From 29f94c72014eaa5d0d3b920686e689e79759cacb Mon Sep 17 00:00:00 2001
From: Tim Northover <tnorthover@apple.com>
Date: Sat, 24 May 2014 12:50:23 +0000
Subject: [PATCH] AArch64/ARM64: move ARM64 into AArch64's place

This commit starts with a "git mv ARM64 AArch64" and continues out
from there, renaming the C++ classes, intrinsics, and other
target-local objects for consistency.

"ARM64" test directories are also moved, and tests that began their
life in ARM64 use an arm64 triple, those from AArch64 use an aarch64
triple. Both should be equivalent though.

This finishes the AArch64 merge, and everyone should feel free to
continue committing as normal now.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209577 91177308-0d34-0410-b5e6-96231b3b80d8
---
 CMakeLists.txt                                |    4 +-
 autoconf/configure.ac                         |   18 +-
 cmake/config-ix.cmake                         |    2 +-
 configure                                     |   18 +-
 docs/LangRef.rst                              |    2 +-
 include/llvm/IR/Intrinsics.td                 |    2 +-
 ...ntrinsicsARM64.td => IntrinsicsAArch64.td} |  388 +--
 .../RuntimeDyld/RuntimeDyldMachO.cpp          |    7 +-
 .../RuntimeDyld/RuntimeDyldMachO.h            |    2 +-
 lib/LTO/LTOCodeGenerator.cpp                  |    3 +-
 lib/LTO/LTOModule.cpp                         |    3 +-
 lib/MC/MCObjectFileInfo.cpp                   |   10 +-
 lib/Target/AArch64/AArch64.h                  |   49 +
 .../{ARM64/ARM64.td => AArch64/AArch64.td}    |   26 +-
 .../AArch64AddressTypePromotion.cpp}          |   53 +-
 .../AArch64AdvSIMDScalarPass.cpp}             |   84 +-
 .../AArch64AsmPrinter.cpp}                    |  175 +-
 .../AArch64BranchRelaxation.cpp}              |  167 +-
 .../AArch64CallingConv.h}                     |   42 +-
 .../AArch64CallingConvention.td}              |   34 +-
 .../AArch64CleanupLocalDynamicTLSPass.cpp}    |   38 +-
 .../AArch64CollectLOH.cpp}                    |  166 +-
 .../AArch64ConditionalCompares.cpp}           |  202 +-
 .../AArch64DeadRegisterDefinitionsPass.cpp}   |   38 +-
 .../AArch64ExpandPseudoInsts.cpp}             |  246 +-
 .../AArch64FastISel.cpp}                      |  472 ++--
 .../AArch64FrameLowering.cpp}                 |  227 +-
 .../AArch64FrameLowering.h}                   |   18 +-
 .../AArch64ISelDAGToDAG.cpp}                  | 1163 ++++-----
 .../AArch64ISelLowering.cpp}                  | 2075 +++++++++--------
 .../AArch64ISelLowering.h}                    |   30 +-
 .../AArch64InstrAtomics.td}                   |   40 +-
 .../AArch64InstrFormats.td}                   |  172 +-
 .../AArch64InstrInfo.cpp}                     | 1408 +++++------
 .../AArch64InstrInfo.h}                       |   84 +-
 .../AArch64InstrInfo.td}                      | 1368 +++++------
 .../AArch64LoadStoreOptimizer.cpp}            |  360 ++-
 .../AArch64MCInstLower.cpp}                   |  111 +-
 .../AArch64MCInstLower.h}                     |   12 +-
 .../AArch64MachineFunctionInfo.h}             |   20 +-
 .../AArch64PerfectShuffle.h}                  |    2 +-
 .../AArch64PromoteConstant.cpp}               |   62 +-
 .../AArch64RegisterInfo.cpp}                  |  210 +-
 .../AArch64RegisterInfo.h}                    |   24 +-
 lib/Target/AArch64/AArch64RegisterInfo.td     |  593 +++++
 .../AArch64SchedA53.td}                       |    2 +-
 .../AArch64SchedCyclone.td}                   |   10 +-
 .../AArch64Schedule.td}                       |    8 +-
 .../AArch64SelectionDAGInfo.cpp}              |   21 +-
 .../AArch64SelectionDAGInfo.h}                |   16 +-
 .../AArch64StorePairSuppress.cpp}             |   40 +-
 .../AArch64Subtarget.cpp}                     |   45 +-
 .../AArch64Subtarget.h}                       |   18 +-
 .../AArch64TargetMachine.cpp}                 |  136 +-
 lib/Target/AArch64/AArch64TargetMachine.h     |   94 +
 .../AArch64TargetObjectFile.cpp}              |   14 +-
 .../AArch64TargetObjectFile.h}                |   14 +-
 .../AArch64TargetTransformInfo.cpp}           |   73 +-
 .../AsmParser/AArch64AsmParser.cpp}           | 1129 ++++-----
 .../AsmParser/CMakeLists.txt                  |    4 +-
 .../AsmParser/LLVMBuild.txt                   |   10 +-
 .../{ARM64 => AArch64}/AsmParser/Makefile     |    4 +-
 lib/Target/AArch64/CMakeLists.txt             |   51 +
 .../Disassembler/AArch64Disassembler.cpp}     |  955 ++++----
 .../Disassembler/AArch64Disassembler.h}       |   12 +-
 .../AArch64ExternalSymbolizer.cpp}            |   43 +-
 .../Disassembler/AArch64ExternalSymbolizer.h} |   23 +-
 .../Disassembler/CMakeLists.txt               |    8 +-
 .../Disassembler/LLVMBuild.txt                |   10 +-
 .../{ARM64 => AArch64}/Disassembler/Makefile  |    4 +-
 .../InstPrinter/AArch64InstPrinter.cpp        | 1316 +++++++++++
 .../InstPrinter/AArch64InstPrinter.h}         |   24 +-
 lib/Target/AArch64/InstPrinter/CMakeLists.txt |    7 +
 .../InstPrinter/LLVMBuild.txt                 |   10 +-
 .../{ARM64 => AArch64}/InstPrinter/Makefile   |    4 +-
 lib/Target/{ARM64 => AArch64}/LLVMBuild.txt   |   12 +-
 .../MCTargetDesc/AArch64AddressingModes.h}    |  112 +-
 .../MCTargetDesc/AArch64AsmBackend.cpp}       |  264 +--
 .../MCTargetDesc/AArch64ELFObjectWriter.cpp}  |  184 +-
 .../MCTargetDesc/AArch64ELFStreamer.cpp}      |   20 +-
 .../MCTargetDesc/AArch64ELFStreamer.h}        |   12 +-
 .../AArch64/MCTargetDesc/AArch64FixupKinds.h  |   76 +
 .../MCTargetDesc/AArch64MCAsmInfo.cpp}        |   18 +-
 .../MCTargetDesc/AArch64MCAsmInfo.h}          |   16 +-
 .../MCTargetDesc/AArch64MCCodeEmitter.cpp}    |  250 +-
 .../MCTargetDesc/AArch64MCExpr.cpp}           |   20 +-
 .../MCTargetDesc/AArch64MCExpr.h}             |   16 +-
 .../MCTargetDesc/AArch64MCTargetDesc.cpp      |  225 ++
 .../MCTargetDesc/AArch64MCTargetDesc.h}       |   40 +-
 .../MCTargetDesc/AArch64MachObjectWriter.cpp} |   61 +-
 .../AArch64/MCTargetDesc/CMakeLists.txt       |   14 +
 .../MCTargetDesc/LLVMBuild.txt                |   10 +-
 .../{ARM64 => AArch64}/MCTargetDesc/Makefile  |    4 +-
 lib/Target/AArch64/Makefile                   |   25 +
 .../TargetInfo/AArch64TargetInfo.cpp}         |   16 +-
 lib/Target/AArch64/TargetInfo/CMakeLists.txt  |    7 +
 .../TargetInfo}/LLVMBuild.txt                 |    8 +-
 .../{ARM64 => AArch64}/TargetInfo/Makefile    |    4 +-
 .../Utils/AArch64BaseInfo.cpp}                |  102 +-
 .../Utils/AArch64BaseInfo.h}                  |  416 ++--
 lib/Target/AArch64/Utils/CMakeLists.txt       |    3 +
 .../Utils}/LLVMBuild.txt                      |    8 +-
 lib/Target/{ARM64 => AArch64}/Utils/Makefile  |    7 +-
 lib/Target/ARM64/ARM64.h                      |   48 -
 lib/Target/ARM64/ARM64RegisterInfo.td         |  593 -----
 lib/Target/ARM64/ARM64TargetMachine.h         |   92 -
 lib/Target/ARM64/CMakeLists.txt               |   51 -
 .../ARM64/InstPrinter/ARM64InstPrinter.cpp    | 1312 -----------
 lib/Target/ARM64/InstPrinter/CMakeLists.txt   |    7 -
 .../ARM64/MCTargetDesc/ARM64FixupKinds.h      |   76 -
 .../ARM64/MCTargetDesc/ARM64MCTargetDesc.cpp  |  210 --
 lib/Target/ARM64/MCTargetDesc/CMakeLists.txt  |   14 -
 lib/Target/ARM64/Makefile                     |   25 -
 lib/Target/ARM64/TargetInfo/CMakeLists.txt    |    7 -
 lib/Target/ARM64/Utils/CMakeLists.txt         |    3 -
 lib/Target/LLVMBuild.txt                      |    2 +-
 .../InstCombine/InstCombineCalls.cpp          |    6 +-
 .../{ARM64 => AArch64}/lit.local.cfg          |    2 +-
 .../CostModel/{ARM64 => AArch64}/select.ll    |    0
 .../CostModel/{ARM64 => AArch64}/store.ll     |    0
 test/CodeGen/AArch64/128bit_load_store.ll     |   10 +-
 .../aarch64-neon-v1i1-setcc.ll                |    0
 test/CodeGen/AArch64/addsub.ll                |    2 +-
 test/CodeGen/AArch64/addsub_ext.ll            |    2 +-
 test/CodeGen/AArch64/alloca.ll                |   36 +-
 test/CodeGen/AArch64/analyze-branch.ll        |    2 +-
 .../arm64-2011-03-09-CPSRSpill.ll}            |    0
 .../arm64-2011-03-17-AsmPrinterCrash.ll}      |    0
 ...arm64-2011-03-21-Unaligned-Frame-Index.ll} |    0
 .../arm64-2011-04-21-CPSRBug.ll}              |    0
 .../arm64-2011-10-18-LdStOptBug.ll}           |    0
 .../arm64-2012-01-11-ComparisonDAGCrash.ll}   |    0
 ...m64-2012-05-07-DAGCombineVectorExtract.ll} |    0
 .../arm64-2012-05-07-MemcpyAlignBug.ll}       |    0
 .../arm64-2012-05-09-LOADgot-bug.ll}          |    0
 .../arm64-2012-05-22-LdStOptBug.ll}           |    0
 .../arm64-2012-06-06-FPToUI.ll}               |    0
 .../arm64-2012-07-11-InstrEmitterBug.ll}      |    0
 .../arm64-2013-01-13-ffast-fcmp.ll}           |    4 +-
 .../arm64-2013-01-23-frem-crash.ll}           |    0
 .../arm64-2013-01-23-sext-crash.ll}           |    0
 .../arm64-2013-02-12-shufv8i8.ll}             |    2 +-
 ...-2014-04-16-AnInfiniteLoopInDAGCombine.ll} |    0
 ...rm64-2014-04-28-sqshl-uqshl-i64Contant.ll} |    8 +-
 .../arm64-2014-04-29-EXT-undef-mask.ll}       |    2 +-
 .../arm64-AdvSIMD-Scalar.ll}                  |    4 +-
 .../aapcs.ll => AArch64/arm64-aapcs.ll}       |    0
 .../arm64-abi-varargs.ll}                     |    0
 .../{ARM64/abi.ll => AArch64/arm64-abi.ll}    |    0
 .../arm64-abi_align.ll}                       |    0
 .../{ARM64/addp.ll => AArch64/arm64-addp.ll}  |    2 +-
 .../arm64-addr-mode-folding.ll}               |    0
 .../arm64-addr-type-promotion.ll}             |    0
 .../addrmode.ll => AArch64/arm64-addrmode.ll} |    0
 .../arm64-alloc-no-stack-realign.ll}          |    0
 .../arm64-alloca-frame-pointer-offset.ll}     |    0
 .../arm64-andCmpBrToTBZ.ll}                   |    0
 .../arm64-ands-bad-peephole.ll}               |    0
 .../arm64-anyregcc-crash.ll}                  |    0
 .../anyregcc.ll => AArch64/arm64-anyregcc.ll} |    0
 .../arm64-arith-saturating.ll}                |   60 +-
 .../arith.ll => AArch64/arm64-arith.ll}       |   16 +-
 .../arm64-arm64-dead-def-elimination-flag.ll} |    2 +-
 .../arm64-atomic-128.ll}                      |    0
 .../atomic.ll => AArch64/arm64-atomic.ll}     |    0
 .../arm64-basic-pic.ll}                       |    0
 .../arm64-big-endian-bitconverts.ll}          |    4 +-
 .../arm64-big-endian-eh.ll}                   |    0
 .../arm64-big-endian-varargs.ll}              |    0
 .../arm64-big-endian-vector-callee.ll}        |    4 +-
 .../arm64-big-endian-vector-caller.ll}        |    4 +-
 .../arm64-big-imm-offsets.ll}                 |    0
 .../arm64-big-stack.ll}                       |    0
 .../arm64-bitfield-extract.ll}                |    0
 .../arm64-blockaddress.ll}                    |    0
 .../arm64-build-vector.ll}                    |    2 +-
 .../arm64-call-tailcalls.ll}                  |    0
 .../cast-opt.ll => AArch64/arm64-cast-opt.ll} |    0
 .../arm64-ccmp-heuristics.ll}                 |    2 +-
 .../{ARM64/ccmp.ll => AArch64/arm64-ccmp.ll}  |    2 +-
 .../clrsb.ll => AArch64/arm64-clrsb.ll}       |    0
 .../arm64-coalesce-ext.ll}                    |    0
 .../arm64-code-model-large-abs.ll}            |    0
 .../arm64-collect-loh-garbage-crash.ll}       |    2 +-
 .../arm64-collect-loh-str.ll}                 |    2 +-
 .../arm64-collect-loh.ll}                     |    4 +-
 .../arm64-complex-copy-noneon.ll}             |    0
 .../arm64-complex-ret.ll}                     |    0
 .../arm64-const-addr.ll}                      |    0
 .../arm64-convert-v2f64-v2i32.ll}             |    2 +-
 .../arm64-convert-v2i32-v2f64.ll}             |    2 +-
 .../arm64-copy-tuple.ll}                      |   58 +-
 .../crc32.ll => AArch64/arm64-crc32.ll}       |   32 +-
 .../crypto.ll => AArch64/arm64-crypto.ll}     |   62 +-
 .../{ARM64/cse.ll => AArch64/arm64-cse.ll}    |    0
 .../{ARM64/csel.ll => AArch64/arm64-csel.ll}  |    0
 .../{ARM64/cvt.ll => AArch64/arm64-cvt.ll}    |  162 +-
 .../arm64-dagcombiner-convergence.ll}         |    0
 .../arm64-dagcombiner-dead-indexed-load.ll}   |    0
 .../arm64-dagcombiner-indexed-load.ll}        |    0
 .../arm64-dagcombiner-load-slicing.ll}        |    0
 .../arm64-dead-def-frame-index.ll}            |    0
 .../arm64-dead-register-def-bug.ll}           |    0
 .../{ARM64/dup.ll => AArch64/arm64-dup.ll}    |    2 +-
 .../arm64-early-ifcvt.ll}                     |    0
 .../arm64-elf-calls.ll}                       |    0
 .../arm64-elf-constpool.ll}                   |    0
 .../arm64-elf-globals.ll}                     |    0
 .../{ARM64/ext.ll => AArch64/arm64-ext.ll}    |    2 +-
 .../arm64-extend-int-to-fp.ll}                |    2 +-
 .../extend.ll => AArch64/arm64-extend.ll}     |    0
 .../arm64-extern-weak.ll}                     |    0
 .../arm64-extload-knownzero.ll}               |    0
 .../extract.ll => AArch64/arm64-extract.ll}   |    2 +-
 .../arm64-extract_subvector.ll}               |    2 +-
 .../arm64-fast-isel-addr-offset.ll}           |    0
 .../arm64-fast-isel-alloca.ll}                |    0
 .../arm64-fast-isel-br.ll}                    |    0
 .../arm64-fast-isel-call.ll}                  |    0
 .../arm64-fast-isel-conversion.ll}            |    0
 .../arm64-fast-isel-fcmp.ll}                  |    0
 .../arm64-fast-isel-gv.ll}                    |    0
 .../arm64-fast-isel-icmp.ll}                  |    0
 .../arm64-fast-isel-indirectbr.ll}            |    0
 .../arm64-fast-isel-intrinsic.ll}             |    0
 .../arm64-fast-isel-materialize.ll}           |    0
 .../arm64-fast-isel-noconvert.ll}             |    0
 .../arm64-fast-isel-rem.ll}                   |    0
 .../arm64-fast-isel-ret.ll}                   |    0
 .../arm64-fast-isel-select.ll}                |    0
 .../arm64-fast-isel.ll}                       |    0
 .../arm64-fastcc-tailcall.ll}                 |    0
 .../arm64-fastisel-gep-promote-before-add.ll} |    0
 .../fcmp-opt.ll => AArch64/arm64-fcmp-opt.ll} |    2 +-
 .../arm64-fcopysign.ll}                       |    0
 ...rm64-fixed-point-scalar-cvt-dagcombine.ll} |    6 +-
 .../fmadd.ll => AArch64/arm64-fmadd.ll}       |    0
 .../{ARM64/fmax.ll => AArch64/arm64-fmax.ll}  |    0
 test/CodeGen/AArch64/arm64-fminv.ll           |  101 +
 .../fmuladd.ll => AArch64/arm64-fmuladd.ll}   |    2 +-
 .../arm64-fold-address.ll}                    |    0
 .../fold-lsl.ll => AArch64/arm64-fold-lsl.ll} |    2 +-
 .../arm64-fp-contract-zero.ll}                |    0
 .../fp-imm.ll => AArch64/arm64-fp-imm.ll}     |    0
 .../{ARM64/fp.ll => AArch64/arm64-fp.ll}      |    0
 .../arm64-fp128-folding.ll}                   |    0
 .../fp128.ll => AArch64/arm64-fp128.ll}       |    0
 .../arm64-frame-index.ll}                     |    0
 .../arm64-frameaddr.ll}                       |    0
 .../arm64-global-address.ll}                  |    0
 .../hello.ll => AArch64/arm64-hello.ll}       |    0
 .../arm64-i16-subreg-extract.ll}              |    2 +-
 .../icmp-opt.ll => AArch64/arm64-icmp-opt.ll} |    0
 .../arm64-illegal-float-ops.ll}               |    0
 .../arm64-indexed-memory.ll}                  |    2 +-
 .../arm64-indexed-vector-ldst-2.ll}           |    0
 .../arm64-indexed-vector-ldst.ll}             | 1518 ++++++------
 .../arm64-inline-asm-error-I.ll}              |    0
 .../arm64-inline-asm-error-J.ll}              |    0
 .../arm64-inline-asm-error-K.ll}              |    0
 .../arm64-inline-asm-error-L.ll}              |    0
 .../arm64-inline-asm-error-M.ll}              |    0
 .../arm64-inline-asm-error-N.ll}              |    0
 .../arm64-inline-asm-zero-reg-error.ll}       |    0
 .../arm64-inline-asm.ll}                      |    2 +-
 .../arm64-join-reserved.ll}                   |    0
 .../arm64-jumptable.ll}                       |    0
 .../arm64-large-frame.ll}                     |    0
 .../{ARM64/ld1.ll => AArch64/arm64-ld1.ll}    |  398 ++--
 .../{ARM64/ldp.ll => AArch64/arm64-ldp.ll}    |    2 +-
 .../{ARM64/ldur.ll => AArch64/arm64-ldur.ll}  |    0
 .../arm64-ldxr-stxr.ll}                       |   84 +-
 .../{ARM64/leaf.ll => AArch64/arm64-leaf.ll}  |    0
 .../arm64-long-shift.ll}                      |    0
 .../arm64-memcpy-inline.ll}                   |    0
 .../arm64-memset-inline.ll}                   |    0
 .../arm64-memset-to-bzero.ll}                 |    0
 .../arm64-misched-basic-A53.ll}               |    4 +-
 .../arm64-misched-forwarding-A53.ll}          |    0
 .../{ARM64/movi.ll => AArch64/arm64-movi.ll}  |    0
 .../{ARM64/mul.ll => AArch64/arm64-mul.ll}    |    0
 .../arm64-named-reg-alloc.ll}                 |    0
 .../arm64-named-reg-notareg.ll}               |    0
 .../{ARM64/neg.ll => AArch64/arm64-neg.ll}    |    0
 .../arm64-neon-2velem-high.ll}                |   64 +-
 .../arm64-neon-2velem.ll}                     |  386 +--
 .../arm64-neon-3vdiff.ll}                     |  268 +--
 .../arm64-neon-aba-abd.ll}                    |   86 +-
 .../arm64-neon-across.ll}                     |  166 +-
 .../arm64-neon-add-pairwise.ll}               |   44 +-
 .../arm64-neon-add-sub.ll}                    |   22 +-
 .../arm64-neon-compare-instructions.ll}       |    0
 .../arm64-neon-copy.ll}                       |   14 +-
 .../arm64-neon-copyPhysReg-tuple.ll}          |   18 +-
 .../arm64-neon-mul-div.ll}                    |   52 +-
 .../arm64-neon-scalar-by-elem-mul.ll}         |   18 +-
 .../arm64-neon-select_cc.ll}                  |    0
 .../arm64-neon-simd-ldst-one.ll}              |    0
 .../arm64-neon-simd-shift.ll}                 |  124 +-
 .../arm64-neon-simd-vget.ll}                  |    0
 .../arm64-neon-v1i1-setcc.ll}                 |    0
 .../arm64-neon-vector-list-spill.ll}          |   36 +-
 .../arm64-patchpoint.ll}                      |    0
 .../arm64-pic-local-symbol.ll}                |    0
 .../arm64-platform-reg.ll}                    |    0
 .../popcnt.ll => AArch64/arm64-popcnt.ll}     |    2 +-
 .../prefetch.ll => AArch64/arm64-prefetch.ll} |    0
 .../arm64-promote-const.ll}                   |    4 +-
 .../redzone.ll => AArch64/arm64-redzone.ll}   |    2 +-
 .../arm64-reg-copy-noneon.ll}                 |    0
 .../arm64-register-offset-addressing.ll}      |    0
 .../arm64-register-pairing.ll}                |    0
 .../arm64-regress-f128csel-flags.ll}          |    0
 .../arm64-regress-interphase-shift.ll}        |    0
 .../arm64-return-vector.ll}                   |    0
 .../arm64-returnaddr.ll}                      |    0
 .../{ARM64/rev.ll => AArch64/arm64-rev.ll}    |    2 +-
 .../rounding.ll => AArch64/arm64-rounding.ll} |    0
 .../arm64-scaled_iv.ll}                       |    0
 .../{ARM64/scvt.ll => AArch64/arm64-scvt.ll}  |    2 +-
 .../arm64-shifted-sext.ll}                    |    0
 .../AArch64/arm64-simd-scalar-to-vector.ll    |   22 +
 .../arm64-simplest-elf.ll}                    |    0
 .../sincos.ll => AArch64/arm64-sincos.ll}     |    0
 .../arm64-sitofp-combine-chains.ll}           |    0
 .../arm64-sli-sri-opt.ll}                     |    2 +-
 .../smaxv.ll => AArch64/arm64-smaxv.ll}       |   26 +-
 .../sminv.ll => AArch64/arm64-sminv.ll}       |   26 +-
 .../spill-lr.ll => AArch64/arm64-spill-lr.ll} |    0
 .../spill.ll => AArch64/arm64-spill.ll}       |    2 +-
 .../{ARM64/st1.ll => AArch64/arm64-st1.ll}    |  290 +--
 .../arm64-stack-no-frame.ll}                  |    0
 .../stackmap.ll => AArch64/arm64-stackmap.ll} |    0
 .../arm64-stackpointer.ll}                    |    0
 .../arm64-stacksave.ll}                       |    0
 .../{ARM64/stp.ll => AArch64/arm64-stp.ll}    |    4 +-
 .../arm64-strict-align.ll}                    |    4 +-
 .../{ARM64/stur.ll => AArch64/arm64-stur.ll}  |    2 +-
 .../arm64-subsections.ll}                     |    0
 .../arm64-subvector-extend.ll}                |    2 +-
 .../arm64-swizzle-tbl-i16-layout.ll}          |    0
 test/CodeGen/AArch64/arm64-tbl.ll             |  132 ++
 .../arm64-this-return.ll}                     |    0
 .../arm64-tls-darwin.ll}                      |    0
 .../arm64-tls-dynamic-together.ll}            |    0
 .../arm64-tls-dynamics.ll}                    |    0
 .../arm64-tls-execs.ll}                       |    0
 .../{ARM64/trap.ll => AArch64/arm64-trap.ll}  |    0
 .../{ARM64/trn.ll => AArch64/arm64-trn.ll}    |    2 +-
 .../arm64-trunc-store.ll}                     |    0
 .../umaxv.ll => AArch64/arm64-umaxv.ll}       |   18 +-
 .../uminv.ll => AArch64/arm64-uminv.ll}       |   18 +-
 .../{ARM64/umov.ll => AArch64/arm64-umov.ll}  |    2 +-
 .../arm64-unaligned_ldst.ll}                  |    0
 .../{ARM64/uzp.ll => AArch64/arm64-uzp.ll}    |    2 +-
 .../vaargs.ll => AArch64/arm64-vaargs.ll}     |    0
 .../{ARM64/vabs.ll => AArch64/arm64-vabs.ll}  |  226 +-
 .../{ARM64/vadd.ll => AArch64/arm64-vadd.ll}  |  158 +-
 .../vaddlv.ll => AArch64/arm64-vaddlv.ll}     |   10 +-
 .../vaddv.ll => AArch64/arm64-vaddv.ll}       |   76 +-
 .../arm64-variadic-aapcs.ll}                  |    0
 .../vbitwise.ll => AArch64/arm64-vbitwise.ll} |   10 +-
 .../{ARM64/vclz.ll => AArch64/arm64-vclz.ll}  |    2 +-
 .../{ARM64/vcmp.ll => AArch64/arm64-vcmp.ll}  |   42 +-
 test/CodeGen/AArch64/arm64-vcnt.ll            |   56 +
 .../vcombine.ll => AArch64/arm64-vcombine.ll} |    2 +-
 .../{ARM64/vcvt.ll => AArch64/arm64-vcvt.ll}  |  164 +-
 .../vcvt_f.ll => AArch64/arm64-vcvt_f.ll}     |   18 +-
 .../arm64-vcvt_f32_su32.ll}                   |   14 +-
 test/CodeGen/AArch64/arm64-vcvt_n.ll          |   49 +
 .../arm64-vcvt_su32_f32.ll}                   |    2 +-
 .../arm64-vcvtxd_f32_f64.ll}                  |    4 +-
 .../vecCmpBr.ll => AArch64/arm64-vecCmpBr.ll} |   26 +-
 .../vecFold.ll => AArch64/arm64-vecFold.ll}   |   40 +-
 .../arm64-vector-ext.ll}                      |    2 +-
 .../arm64-vector-imm.ll}                      |    2 +-
 .../arm64-vector-insertion.ll}                |    2 +-
 .../arm64-vector-ldst.ll}                     |    2 +-
 .../{ARM64/vext.ll => AArch64/arm64-vext.ll}  |    2 +-
 .../arm64-vext_reverse.ll}                    |    0
 .../arm64-vfloatintrinsics.ll}                |    2 +-
 .../vhadd.ll => AArch64/arm64-vhadd.ll}       |   98 +-
 .../vhsub.ll => AArch64/arm64-vhsub.ll}       |   50 +-
 .../arm64-virtual_base.ll}                    |    0
 .../{ARM64/vmax.ll => AArch64/arm64-vmax.ll}  |  270 +--
 test/CodeGen/AArch64/arm64-vminmaxnm.ll       |   68 +
 .../vmovn.ll => AArch64/arm64-vmovn.ll}       |   56 +-
 .../{ARM64/vmul.ll => AArch64/arm64-vmul.ll}  |  370 +--
 .../volatile.ll => AArch64/arm64-volatile.ll} |    0
 .../vpopcnt.ll => AArch64/arm64-vpopcnt.ll}   |    0
 .../vqadd.ll => AArch64/arm64-vqadd.ll}       |  140 +-
 test/CodeGen/AArch64/arm64-vqsub.ll           |  147 ++
 .../vselect.ll => AArch64/arm64-vselect.ll}   |    2 +-
 .../arm64-vsetcc_fp.ll}                       |    2 +-
 .../vshift.ll => AArch64/arm64-vshift.ll}     |  484 ++--
 .../{ARM64/vshr.ll => AArch64/arm64-vshr.ll}  |    2 +-
 .../vshuffle.ll => AArch64/arm64-vshuffle.ll} |    0
 .../vsqrt.ll => AArch64/arm64-vsqrt.ll}       |   98 +-
 .../{ARM64/vsra.ll => AArch64/arm64-vsra.ll}  |    2 +-
 .../{ARM64/vsub.ll => AArch64/arm64-vsub.ll}  |   50 +-
 .../arm64-weak-reference.ll}                  |    0
 .../xaluo.ll => AArch64/arm64-xaluo.ll}       |    0
 .../arm64-zero-cycle-regmov.ll}               |    0
 .../arm64-zero-cycle-zeroing.ll}              |    0
 .../{ARM64/zext.ll => AArch64/arm64-zext.ll}  |    0
 .../arm64-zextload-unscaled.ll}               |    0
 .../{ARM64/zip.ll => AArch64/arm64-zip.ll}    |    2 +-
 .../AArch64/atomic-ops-not-barriers.ll        |    2 +-
 test/CodeGen/AArch64/atomic-ops.ll            |   76 +-
 test/CodeGen/AArch64/basic-pic.ll             |    2 +-
 test/CodeGen/AArch64/bitfield-insert-0.ll     |    2 +-
 test/CodeGen/AArch64/bitfield-insert.ll       |   10 +-
 test/CodeGen/AArch64/bitfield.ll              |    8 +-
 test/CodeGen/AArch64/blockaddress.ll          |    4 +-
 test/CodeGen/AArch64/bool-loads.ll            |    2 +-
 test/CodeGen/AArch64/breg.ll                  |    2 +-
 test/CodeGen/AArch64/callee-save.ll           |   28 +-
 test/CodeGen/AArch64/code-model-large-abs.ll  |    2 +-
 test/CodeGen/AArch64/compare-branch.ll        |    2 +-
 test/CodeGen/AArch64/complex-copy-noneon.ll   |    2 +-
 test/CodeGen/AArch64/cond-sel.ll              |    6 +-
 test/CodeGen/AArch64/directcond.ll            |   12 +-
 test/CodeGen/AArch64/dp1.ll                   |    2 +-
 test/CodeGen/AArch64/eliminate-trunc.ll       |    8 +-
 test/CodeGen/AArch64/extern-weak.ll           |   18 +-
 test/CodeGen/AArch64/fastcc-reserved.ll       |   33 +-
 test/CodeGen/AArch64/fastcc.ll                |  136 +-
 test/CodeGen/AArch64/fcmp.ll                  |    2 +-
 test/CodeGen/AArch64/fcvt-fixed.ll            |    2 +-
 test/CodeGen/AArch64/flags-multiuse.ll        |    2 +-
 test/CodeGen/AArch64/floatdp_2source.ll       |    2 +-
 test/CodeGen/AArch64/fp-cond-sel.ll           |    4 +-
 test/CodeGen/AArch64/fp-dp3.ll                |    2 +-
 test/CodeGen/AArch64/fp128-folding.ll         |    2 +-
 test/CodeGen/AArch64/fpimm.ll                 |    2 +-
 test/CodeGen/AArch64/func-argpassing.ll       |   18 +-
 test/CodeGen/AArch64/func-calls.ll            |   36 +-
 test/CodeGen/AArch64/global-alignment.ll      |    2 +-
 test/CodeGen/AArch64/got-abuse.ll             |    4 +-
 test/CodeGen/AArch64/illegal-float-ops.ll     |    2 +-
 test/CodeGen/AArch64/init-array.ll            |    4 +-
 .../AArch64/inline-asm-constraints-badI.ll    |    2 +-
 .../AArch64/inline-asm-constraints-badK2.ll   |    2 +-
 test/CodeGen/AArch64/jump-table.ll            |    6 +-
 test/CodeGen/AArch64/large-consts.ll          |   10 +-
 test/CodeGen/AArch64/ldst-regoffset.ll        |    4 +-
 test/CodeGen/AArch64/ldst-unscaledimm.ll      |    4 +-
 test/CodeGen/AArch64/ldst-unsignedimm.ll      |    4 +-
 test/CodeGen/{ARM64 => AArch64}/lit.local.cfg |    2 +-
 test/CodeGen/AArch64/literal_pools_float.ll   |    8 +-
 test/CodeGen/AArch64/local_vars.ll            |    4 +-
 test/CodeGen/AArch64/logical_shifted_reg.ll   |    2 +-
 test/CodeGen/AArch64/mature-mc-support.ll     |    8 +-
 test/CodeGen/AArch64/movw-consts.ll           |   28 +-
 test/CodeGen/AArch64/movw-shift-encoding.ll   |   10 +-
 test/CodeGen/AArch64/neon-bitcast.ll          |    2 +-
 .../AArch64/neon-bitwise-instructions.ll      |    2 +-
 .../AArch64/neon-compare-instructions.ll      |    2 +-
 test/CodeGen/AArch64/neon-diagnostics.ll      |    2 +-
 test/CodeGen/AArch64/neon-extract.ll          |    2 +-
 test/CodeGen/AArch64/neon-fma.ll              |    2 +-
 test/CodeGen/AArch64/neon-fpround_f128.ll     |    2 +-
 test/CodeGen/AArch64/neon-idiv.ll             |    2 +-
 test/CodeGen/AArch64/neon-mla-mls.ll          |    2 +-
 test/CodeGen/AArch64/neon-mov.ll              |   34 +-
 test/CodeGen/AArch64/neon-or-combine.ll       |    2 +-
 test/CodeGen/AArch64/neon-perm.ll             |  110 +-
 .../AArch64/neon-scalar-by-elem-fma.ll        |    2 +-
 test/CodeGen/AArch64/neon-scalar-copy.ll      |   10 +-
 test/CodeGen/AArch64/neon-shift-left-long.ll  |    2 +-
 .../AArch64/neon-truncStore-extLoad.ll        |    2 +-
 test/CodeGen/AArch64/pic-eh-stubs.ll          |    2 +-
 .../CodeGen/AArch64/regress-f128csel-flags.ll |    2 +-
 test/CodeGen/AArch64/regress-fp128-livein.ll  |    2 +-
 test/CodeGen/AArch64/regress-tblgen-chains.ll |   14 +-
 .../AArch64/regress-w29-reserved-with-fp.ll   |    2 +-
 test/CodeGen/AArch64/setcc-takes-i32.ll       |    2 +-
 test/CodeGen/AArch64/sibling-call.ll          |    2 +-
 test/CodeGen/AArch64/sincos-expansion.ll      |    2 +-
 .../AArch64/sincospow-vector-expansion.ll     |    2 +-
 test/CodeGen/AArch64/tail-call.ll             |   64 +-
 test/CodeGen/AArch64/zero-reg.ll              |    2 +-
 .../ARM64/compact-unwind-unhandled-cfi.S      |   17 -
 test/CodeGen/ARM64/fminv.ll                   |  101 -
 test/CodeGen/ARM64/simd-scalar-to-vector.ll   |   22 -
 test/CodeGen/ARM64/tbl.ll                     |  132 --
 test/CodeGen/ARM64/vcnt.ll                    |   56 -
 test/CodeGen/ARM64/vcvt_n.ll                  |   49 -
 test/CodeGen/ARM64/vminmaxnm.ll               |   68 -
 test/CodeGen/ARM64/vqsub.ll                   |  147 --
 .../{ARM64 => AArch64}/struct_by_value.ll     |    0
 test/MC/AArch64/adrp-relocation.s             |    2 +-
 test/MC/{ARM64/adr.s => AArch64/arm64-adr.s}  |    8 +-
 .../advsimd.s => AArch64/arm64-advsimd.s}     |    0
 .../aliases.s => AArch64/arm64-aliases.s}     |    0
 .../arm64-arithmetic-encoding.s}              |    0
 .../arm64-arm64-fixup.s}                      |    4 +-
 .../arm64-basic-a64-instructions.s}           |    0
 .../arm64-be-datalayout.s}                    |    0
 .../arm64-bitfield-encoding.s}                |    0
 .../arm64-branch-encoding.s}                  |   34 +-
 .../arm64-condbr-without-dots.s}              |    0
 .../crypto.s => AArch64/arm64-crypto.s}       |    0
 .../arm64-diagno-predicate.s}                 |    0
 .../{ARM64/diags.s => AArch64/arm64-diags.s}  |    2 +-
 .../arm64-directive_loh.s}                    |    0
 .../arm64-elf-reloc-condbr.s}                 |    0
 .../arm64-elf-relocs.s}                       |    0
 .../arm64-fp-encoding.s}                      |    0
 .../arm64-large-relocs.s}                     |   14 +-
 .../arm64-leaf-compact-unwind.s}              |    0
 .../arm64-logical-encoding.s}                 |    0
 .../arm64-mapping-across-sections.s}          |    0
 .../arm64-mapping-within-section.s}           |    0
 .../memory.s => AArch64/arm64-memory.s}       |    0
 .../nv-cond.s => AArch64/arm64-nv-cond.s}     |    0
 .../arm64-optional-hash.s}                    |    0
 .../separator.s => AArch64/arm64-separator.s} |    0
 .../simd-ldst.s => AArch64/arm64-simd-ldst.s} |    0
 .../arm64-small-data-fixups.s}                |    0
 .../arm64-spsel-sysreg.s}                     |    0
 .../arm64-system-encoding.s}                  |    0
 .../arm64-target-specific-sysreg.s}           |    0
 .../arm64-tls-modifiers-darwin.s}             |    0
 .../arm64-tls-relocs.s}                       |  106 +-
 .../arm64-v128_lo-diagnostics.s}              |    0
 .../arm64-variable-exprs.s}                   |    0
 .../arm64-vector-lists.s}                     |    0
 .../arm64-verbose-vector-case.s}              |    0
 test/MC/AArch64/basic-a64-diagnostics.s       |    2 +-
 test/MC/AArch64/basic-a64-instructions.s      |  418 ++--
 test/MC/AArch64/basic-pic.s                   |    2 +-
 test/MC/AArch64/elf-extern.s                  |    2 +-
 test/MC/AArch64/elf-objdump.s                 |    2 +-
 test/MC/AArch64/elf-reloc-addsubimm.s         |    2 +-
 test/MC/AArch64/elf-reloc-ldrlit.s            |    2 +-
 test/MC/AArch64/elf-reloc-ldstunsimm.s        |    2 +-
 test/MC/AArch64/elf-reloc-movw.s              |    2 +-
 test/MC/AArch64/elf-reloc-pcreladdressing.s   |    2 +-
 test/MC/AArch64/elf-reloc-tstb.s              |    2 +-
 test/MC/AArch64/elf-reloc-uncondbrimm.s       |    2 +-
 test/MC/AArch64/gicv3-regs-diagnostics.s      |    2 +-
 test/MC/AArch64/gicv3-regs.s                  |    2 +-
 test/MC/AArch64/inline-asm-modifiers.s        |    2 +-
 test/MC/AArch64/jump-table.s                  |    2 +-
 test/MC/AArch64/lit.local.cfg                 |    2 +-
 test/MC/AArch64/mapping-across-sections.s     |    2 +-
 test/MC/AArch64/mapping-within-section.s      |    2 +-
 test/MC/AArch64/neon-3vdiff.s                 |    2 +-
 test/MC/AArch64/neon-aba-abd.s                |    2 +-
 test/MC/AArch64/neon-add-pairwise.s           |    2 +-
 test/MC/AArch64/neon-add-sub-instructions.s   |    2 +-
 test/MC/AArch64/neon-bitwise-instructions.s   |    2 +-
 test/MC/AArch64/neon-compare-instructions.s   |    2 +-
 test/MC/AArch64/neon-diagnostics.s            |  453 ++--
 test/MC/AArch64/neon-facge-facgt.s            |    2 +-
 test/MC/AArch64/neon-frsqrt-frecp.s           |    2 +-
 test/MC/AArch64/neon-halving-add-sub.s        |    2 +-
 test/MC/AArch64/neon-max-min-pairwise.s       |    2 +-
 test/MC/AArch64/neon-max-min.s                |    2 +-
 test/MC/AArch64/neon-mla-mls-instructions.s   |    2 +-
 test/MC/AArch64/neon-mov.s                    |    2 +-
 test/MC/AArch64/neon-mul-div-instructions.s   |    2 +-
 test/MC/AArch64/neon-rounding-halving-add.s   |    2 +-
 test/MC/AArch64/neon-rounding-shift.s         |    2 +-
 test/MC/AArch64/neon-saturating-add-sub.s     |    2 +-
 .../AArch64/neon-saturating-rounding-shift.s  |    2 +-
 test/MC/AArch64/neon-saturating-shift.s       |    2 +-
 test/MC/AArch64/neon-scalar-abs.s             |    2 +-
 test/MC/AArch64/neon-scalar-add-sub.s         |    2 +-
 test/MC/AArch64/neon-scalar-by-elem-mla.s     |    2 +-
 test/MC/AArch64/neon-scalar-by-elem-mul.s     |    2 +-
 .../neon-scalar-by-elem-saturating-mla.s      |    2 +-
 .../neon-scalar-by-elem-saturating-mul.s      |    2 +-
 test/MC/AArch64/neon-scalar-compare.s         |    2 +-
 test/MC/AArch64/neon-scalar-cvt.s             |    2 +-
 test/MC/AArch64/neon-scalar-dup.s             |    2 +-
 test/MC/AArch64/neon-scalar-extract-narrow.s  |    2 +-
 test/MC/AArch64/neon-scalar-fp-compare.s      |    2 +-
 test/MC/AArch64/neon-scalar-mul.s             |    2 +-
 test/MC/AArch64/neon-scalar-neg.s             |    2 +-
 test/MC/AArch64/neon-scalar-recip.s           |    2 +-
 test/MC/AArch64/neon-scalar-reduce-pairwise.s |    2 +-
 test/MC/AArch64/neon-scalar-rounding-shift.s  |    2 +-
 .../AArch64/neon-scalar-saturating-add-sub.s  |    2 +-
 .../neon-scalar-saturating-rounding-shift.s   |    2 +-
 .../MC/AArch64/neon-scalar-saturating-shift.s |    2 +-
 test/MC/AArch64/neon-scalar-shift-imm.s       |    2 +-
 test/MC/AArch64/neon-scalar-shift.s           |    2 +-
 test/MC/AArch64/neon-shift-left-long.s        |    2 +-
 test/MC/AArch64/neon-shift.s                  |    2 +-
 test/MC/AArch64/neon-simd-copy.s              |    2 +-
 test/MC/AArch64/neon-simd-shift.s             |    2 +-
 test/MC/AArch64/neon-sxtl.s                   |    2 +-
 test/MC/AArch64/neon-uxtl.s                   |    2 +-
 test/MC/AArch64/noneon-diagnostics.s          |   37 +-
 test/MC/AArch64/optional-hash.s               |    2 +-
 test/MC/AArch64/tls-relocs.s                  |  282 +--
 test/MC/AArch64/trace-regs-diagnostics.s      |    2 +-
 test/MC/AArch64/trace-regs.s                  |    2 +-
 test/MC/ARM64/lit.local.cfg                   |    6 -
 .../advsimd.txt => AArch64/arm64-advsimd.txt} |    0
 .../arm64-arithmetic.txt}                     |    0
 .../arm64-basic-a64-undefined.txt}            |    0
 .../arm64-bitfield.txt}                       |    0
 .../branch.txt => AArch64/arm64-branch.txt}   |    0
 .../arm64-canonical-form.txt}                 |    0
 .../crc32.txt => AArch64/arm64-crc32.txt}     |    0
 .../crypto.txt => AArch64/arm64-crypto.txt}   |    0
 .../arm64-invalid-logical.txt}                |    0
 .../logical.txt => AArch64/arm64-logical.txt} |    0
 .../memory.txt => AArch64/arm64-memory.txt}   |    0
 .../arm64-non-apple-fmov.txt}                 |    0
 .../arm64-scalar-fp.txt}                      |    0
 .../system.txt => AArch64/arm64-system.txt}   |    0
 test/MC/Disassembler/AArch64/lit.local.cfg    |    2 +-
 test/MC/Disassembler/ARM64/lit.local.cfg      |    5 -
 .../darwin-ARM64-local-label-diff.s           |    0
 .../{ARM64 => AArch64}/darwin-ARM64-reloc.s   |    0
 .../ARM64 => MC/MachO/AArch64}/lit.local.cfg  |    2 +-
 .../{ARM64 => AArch64}/const-addr.ll          |    0
 .../{ARM64 => AArch64}/large-immediate.ll     |    0
 .../{ARM64 => AArch64}/lit.local.cfg          |    2 +-
 .../GlobalMerge/{ARM64 => AArch64}/arm64.ll   |    0
 .../GlobalMerge/AArch64}/lit.local.cfg        |    2 +-
 .../InstCombine/2012-04-23-Neon-Intrinsics.ll |   20 +-
 .../{ARM64 => AArch64}/lit.local.cfg          |    2 +-
 .../{ARM64 => AArch64}/lsr-memcpy.ll          |    0
 .../{ARM64 => AArch64}/lsr-memset.ll          |    0
 .../{ARM64 => AArch64}/req-regs.ll            |    0
 .../{ARM64 => AArch64}/arm64-unroll.ll        |    0
 .../{ARM64 => AArch64}/gather-cost.ll         |    0
 .../LoopVectorize/ARM64/lit.local.cfg         |    6 -
 .../SLPVectorizer/AArch64}/lit.local.cfg      |    3 +-
 .../mismatched-intrinsics.ll                  |    0
 .../SLPVectorizer/ARM64/lit.local.cfg         |    3 -
 636 files changed, 14412 insertions(+), 14518 deletions(-)
 rename include/llvm/IR/{IntrinsicsARM64.td => IntrinsicsAArch64.td} (55%)
 create mode 100644 lib/Target/AArch64/AArch64.h
 rename lib/Target/{ARM64/ARM64.td => AArch64/AArch64.td} (90%)
 rename lib/Target/{ARM64/ARM64AddressTypePromotion.cpp => AArch64/AArch64AddressTypePromotion.cpp} (90%)
 rename lib/Target/{ARM64/ARM64AdvSIMDScalarPass.cpp => AArch64/AArch64AdvSIMDScalarPass.cpp} (85%)
 rename lib/Target/{ARM64/ARM64AsmPrinter.cpp => AArch64/AArch64AsmPrinter.cpp} (74%)
 rename lib/Target/{ARM64/ARM64BranchRelaxation.cpp => AArch64/AArch64BranchRelaxation.cpp} (78%)
 rename lib/Target/{ARM64/ARM64CallingConv.h => AArch64/AArch64CallingConv.h} (65%)
 rename lib/Target/{ARM64/ARM64CallingConvention.td => AArch64/AArch64CallingConvention.td} (92%)
 rename lib/Target/{ARM64/ARM64CleanupLocalDynamicTLSPass.cpp => AArch64/AArch64CleanupLocalDynamicTLSPass.cpp} (81%)
 rename lib/Target/{ARM64/ARM64CollectLOH.cpp => AArch64/AArch64CollectLOH.cpp} (91%)
 rename lib/Target/{ARM64/ARM64ConditionalCompares.cpp => AArch64/AArch64ConditionalCompares.cpp} (85%)
 rename lib/Target/{ARM64/ARM64DeadRegisterDefinitionsPass.cpp => AArch64/AArch64DeadRegisterDefinitionsPass.cpp} (79%)
 rename lib/Target/{ARM64/ARM64ExpandPseudoInsts.cpp => AArch64/AArch64ExpandPseudoInsts.cpp} (76%)
 rename lib/Target/{ARM64/ARM64FastISel.cpp => AArch64/AArch64FastISel.cpp} (78%)
 rename lib/Target/{ARM64/ARM64FrameLowering.cpp => AArch64/AArch64FrameLowering.cpp} (82%)
 rename lib/Target/{ARM64/ARM64FrameLowering.h => AArch64/AArch64FrameLowering.h} (86%)
 rename lib/Target/{ARM64/ARM64ISelDAGToDAG.cpp => AArch64/AArch64ISelDAGToDAG.cpp} (70%)
 rename lib/Target/{ARM64/ARM64ISelLowering.cpp => AArch64/AArch64ISelLowering.cpp} (80%)
 rename lib/Target/{ARM64/ARM64ISelLowering.h => AArch64/AArch64ISelLowering.h} (96%)
 rename lib/Target/{ARM64/ARM64InstrAtomics.td => AArch64/AArch64InstrAtomics.td} (92%)
 rename lib/Target/{ARM64/ARM64InstrFormats.td => AArch64/AArch64InstrFormats.td} (98%)
 rename lib/Target/{ARM64/ARM64InstrInfo.cpp => AArch64/AArch64InstrInfo.cpp} (55%)
 rename lib/Target/{ARM64/ARM64InstrInfo.h => AArch64/AArch64InstrInfo.h} (81%)
 rename lib/Target/{ARM64/ARM64InstrInfo.td => AArch64/AArch64InstrInfo.td} (83%)
 rename lib/Target/{ARM64/ARM64LoadStoreOptimizer.cpp => AArch64/AArch64LoadStoreOptimizer.cpp} (80%)
 rename lib/Target/{ARM64/ARM64MCInstLower.cpp => AArch64/AArch64MCInstLower.cpp} (58%)
 rename lib/Target/{ARM64/ARM64MCInstLower.h => AArch64/AArch64MCInstLower.h} (78%)
 rename lib/Target/{ARM64/ARM64MachineFunctionInfo.h => AArch64/AArch64MachineFunctionInfo.h} (90%)
 rename lib/Target/{ARM64/ARM64PerfectShuffle.h => AArch64/AArch64PerfectShuffle.h} (99%)
 rename lib/Target/{ARM64/ARM64PromoteConstant.cpp => AArch64/AArch64PromoteConstant.cpp} (92%)
 rename lib/Target/{ARM64/ARM64RegisterInfo.cpp => AArch64/AArch64RegisterInfo.cpp} (66%)
 rename lib/Target/{ARM64/ARM64RegisterInfo.h => AArch64/AArch64RegisterInfo.h} (85%)
 create mode 100644 lib/Target/AArch64/AArch64RegisterInfo.td
 rename lib/Target/{ARM64/ARM64SchedA53.td => AArch64/AArch64SchedA53.td} (99%)
 rename lib/Target/{ARM64/ARM64SchedCyclone.td => AArch64/AArch64SchedCyclone.td} (98%)
 rename lib/Target/{ARM64/ARM64Schedule.td => AArch64/AArch64Schedule.td} (95%)
 rename lib/Target/{ARM64/ARM64SelectionDAGInfo.cpp => AArch64/AArch64SelectionDAGInfo.cpp} (74%)
 rename lib/Target/{ARM64/ARM64SelectionDAGInfo.h => AArch64/AArch64SelectionDAGInfo.h} (69%)
 rename lib/Target/{ARM64/ARM64StorePairSuppress.cpp => AArch64/AArch64StorePairSuppress.cpp} (82%)
 rename lib/Target/{ARM64/ARM64Subtarget.cpp => AArch64/AArch64Subtarget.cpp} (71%)
 rename lib/Target/{ARM64/ARM64Subtarget.h => AArch64/AArch64Subtarget.h} (88%)
 rename lib/Target/{ARM64/ARM64TargetMachine.cpp => AArch64/AArch64TargetMachine.cpp} (51%)
 create mode 100644 lib/Target/AArch64/AArch64TargetMachine.h
 rename lib/Target/{ARM64/ARM64TargetObjectFile.cpp => AArch64/AArch64TargetObjectFile.cpp} (80%)
 rename lib/Target/{ARM64/ARM64TargetObjectFile.h => AArch64/AArch64TargetObjectFile.h} (73%)
 rename lib/Target/{ARM64/ARM64TargetTransformInfo.cpp => AArch64/AArch64TargetTransformInfo.cpp} (87%)
 rename lib/Target/{ARM64/AsmParser/ARM64AsmParser.cpp => AArch64/AsmParser/AArch64AsmParser.cpp} (78%)
 rename lib/Target/{ARM64 => AArch64}/AsmParser/CMakeLists.txt (59%)
 rename lib/Target/{ARM64 => AArch64}/AsmParser/LLVMBuild.txt (70%)
 rename lib/Target/{ARM64 => AArch64}/AsmParser/Makefile (82%)
 create mode 100644 lib/Target/AArch64/CMakeLists.txt
 rename lib/Target/{ARM64/Disassembler/ARM64Disassembler.cpp => AArch64/Disassembler/AArch64Disassembler.cpp} (69%)
 rename lib/Target/{ARM64/Disassembler/ARM64Disassembler.h => AArch64/Disassembler/AArch64Disassembler.h} (75%)
 rename lib/Target/{ARM64/Disassembler/ARM64ExternalSymbolizer.cpp => AArch64/Disassembler/AArch64ExternalSymbolizer.cpp} (86%)
 rename lib/Target/{ARM64/Disassembler/ARM64ExternalSymbolizer.h => AArch64/Disassembler/AArch64ExternalSymbolizer.h} (50%)
 rename lib/Target/{ARM64 => AArch64}/Disassembler/CMakeLists.txt (66%)
 rename lib/Target/{ARM64 => AArch64}/Disassembler/LLVMBuild.txt (71%)
 rename lib/Target/{ARM64 => AArch64}/Disassembler/Makefile (81%)
 create mode 100644 lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp
 rename lib/Target/{ARM64/InstPrinter/ARM64InstPrinter.h => AArch64/InstPrinter/AArch64InstPrinter.h} (88%)
 create mode 100644 lib/Target/AArch64/InstPrinter/CMakeLists.txt
 rename lib/Target/{ARM64 => AArch64}/InstPrinter/LLVMBuild.txt (73%)
 rename lib/Target/{ARM64 => AArch64}/InstPrinter/Makefile (82%)
 rename lib/Target/{ARM64 => AArch64}/LLVMBuild.txt (70%)
 rename lib/Target/{ARM64/MCTargetDesc/ARM64AddressingModes.h => AArch64/MCTargetDesc/AArch64AddressingModes.h} (89%)
 rename lib/Target/{ARM64/MCTargetDesc/ARM64AsmBackend.cpp => AArch64/MCTargetDesc/AArch64AsmBackend.cpp} (66%)
 rename lib/Target/{ARM64/MCTargetDesc/ARM64ELFObjectWriter.cpp => AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp} (56%)
 rename lib/Target/{ARM64/MCTargetDesc/ARM64ELFStreamer.cpp => AArch64/MCTargetDesc/AArch64ELFStreamer.cpp} (86%)
 rename lib/Target/{ARM64/MCTargetDesc/ARM64ELFStreamer.h => AArch64/MCTargetDesc/AArch64ELFStreamer.h} (55%)
 create mode 100644 lib/Target/AArch64/MCTargetDesc/AArch64FixupKinds.h
 rename lib/Target/{ARM64/MCTargetDesc/ARM64MCAsmInfo.cpp => AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp} (82%)
 rename lib/Target/{ARM64/MCTargetDesc/ARM64MCAsmInfo.h => AArch64/MCTargetDesc/AArch64MCAsmInfo.h} (61%)
 rename lib/Target/{ARM64/MCTargetDesc/ARM64MCCodeEmitter.cpp => AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp} (73%)
 rename lib/Target/{ARM64/MCTargetDesc/ARM64MCExpr.cpp => AArch64/MCTargetDesc/AArch64MCExpr.cpp} (89%)
 rename lib/Target/{ARM64/MCTargetDesc/ARM64MCExpr.h => AArch64/MCTargetDesc/AArch64MCExpr.h} (92%)
 create mode 100644 lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
 rename lib/Target/{ARM64/MCTargetDesc/ARM64MCTargetDesc.h => AArch64/MCTargetDesc/AArch64MCTargetDesc.h} (51%)
 rename lib/Target/{ARM64/MCTargetDesc/ARM64MachObjectWriter.cpp => AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp} (88%)
 create mode 100644 lib/Target/AArch64/MCTargetDesc/CMakeLists.txt
 rename lib/Target/{ARM64 => AArch64}/MCTargetDesc/LLVMBuild.txt (72%)
 rename lib/Target/{ARM64 => AArch64}/MCTargetDesc/Makefile (82%)
 create mode 100644 lib/Target/AArch64/Makefile
 rename lib/Target/{ARM64/TargetInfo/ARM64TargetInfo.cpp => AArch64/TargetInfo/AArch64TargetInfo.cpp} (64%)
 create mode 100644 lib/Target/AArch64/TargetInfo/CMakeLists.txt
 rename lib/Target/{ARM64/Utils => AArch64/TargetInfo}/LLVMBuild.txt (79%)
 rename lib/Target/{ARM64 => AArch64}/TargetInfo/Makefile (82%)
 rename lib/Target/{ARM64/Utils/ARM64BaseInfo.cpp => AArch64/Utils/AArch64BaseInfo.cpp} (89%)
 rename lib/Target/{ARM64/Utils/ARM64BaseInfo.h => AArch64/Utils/AArch64BaseInfo.h} (84%)
 create mode 100644 lib/Target/AArch64/Utils/CMakeLists.txt
 rename lib/Target/{ARM64/TargetInfo => AArch64/Utils}/LLVMBuild.txt (79%)
 rename lib/Target/{ARM64 => AArch64}/Utils/Makefile (64%)
 delete mode 100644 lib/Target/ARM64/ARM64.h
 delete mode 100644 lib/Target/ARM64/ARM64RegisterInfo.td
 delete mode 100644 lib/Target/ARM64/ARM64TargetMachine.h
 delete mode 100644 lib/Target/ARM64/CMakeLists.txt
 delete mode 100644 lib/Target/ARM64/InstPrinter/ARM64InstPrinter.cpp
 delete mode 100644 lib/Target/ARM64/InstPrinter/CMakeLists.txt
 delete mode 100644 lib/Target/ARM64/MCTargetDesc/ARM64FixupKinds.h
 delete mode 100644 lib/Target/ARM64/MCTargetDesc/ARM64MCTargetDesc.cpp
 delete mode 100644 lib/Target/ARM64/MCTargetDesc/CMakeLists.txt
 delete mode 100644 lib/Target/ARM64/Makefile
 delete mode 100644 lib/Target/ARM64/TargetInfo/CMakeLists.txt
 delete mode 100644 lib/Target/ARM64/Utils/CMakeLists.txt
 rename test/Analysis/CostModel/{ARM64 => AArch64}/lit.local.cfg (73%)
 rename test/Analysis/CostModel/{ARM64 => AArch64}/select.ll (100%)
 rename test/Analysis/CostModel/{ARM64 => AArch64}/store.ll (100%)
 rename test/CodeGen/{ARM64 => AArch64}/aarch64-neon-v1i1-setcc.ll (100%)
 rename test/CodeGen/{ARM64/2011-03-09-CPSRSpill.ll => AArch64/arm64-2011-03-09-CPSRSpill.ll} (100%)
 rename test/CodeGen/{ARM64/2011-03-17-AsmPrinterCrash.ll => AArch64/arm64-2011-03-17-AsmPrinterCrash.ll} (100%)
 rename test/CodeGen/{ARM64/2011-03-21-Unaligned-Frame-Index.ll => AArch64/arm64-2011-03-21-Unaligned-Frame-Index.ll} (100%)
 rename test/CodeGen/{ARM64/2011-04-21-CPSRBug.ll => AArch64/arm64-2011-04-21-CPSRBug.ll} (100%)
 rename test/CodeGen/{ARM64/2011-10-18-LdStOptBug.ll => AArch64/arm64-2011-10-18-LdStOptBug.ll} (100%)
 rename test/CodeGen/{ARM64/2012-01-11-ComparisonDAGCrash.ll => AArch64/arm64-2012-01-11-ComparisonDAGCrash.ll} (100%)
 rename test/CodeGen/{ARM64/2012-05-07-DAGCombineVectorExtract.ll => AArch64/arm64-2012-05-07-DAGCombineVectorExtract.ll} (100%)
 rename test/CodeGen/{ARM64/2012-05-07-MemcpyAlignBug.ll => AArch64/arm64-2012-05-07-MemcpyAlignBug.ll} (100%)
 rename test/CodeGen/{ARM64/2012-05-09-LOADgot-bug.ll => AArch64/arm64-2012-05-09-LOADgot-bug.ll} (100%)
 rename test/CodeGen/{ARM64/2012-05-22-LdStOptBug.ll => AArch64/arm64-2012-05-22-LdStOptBug.ll} (100%)
 rename test/CodeGen/{ARM64/2012-06-06-FPToUI.ll => AArch64/arm64-2012-06-06-FPToUI.ll} (100%)
 rename test/CodeGen/{ARM64/2012-07-11-InstrEmitterBug.ll => AArch64/arm64-2012-07-11-InstrEmitterBug.ll} (100%)
 rename test/CodeGen/{ARM64/2013-01-13-ffast-fcmp.ll => AArch64/arm64-2013-01-13-ffast-fcmp.ll} (72%)
 rename test/CodeGen/{ARM64/2013-01-23-frem-crash.ll => AArch64/arm64-2013-01-23-frem-crash.ll} (100%)
 rename test/CodeGen/{ARM64/2013-01-23-sext-crash.ll => AArch64/arm64-2013-01-23-sext-crash.ll} (100%)
 rename test/CodeGen/{ARM64/2013-02-12-shufv8i8.ll => AArch64/arm64-2013-02-12-shufv8i8.ll} (83%)
 rename test/CodeGen/{ARM64/2014-04-16-AnInfiniteLoopInDAGCombine.ll => AArch64/arm64-2014-04-16-AnInfiniteLoopInDAGCombine.ll} (100%)
 rename test/CodeGen/{ARM64/2014-04-28-sqshl-uqshl-i64Contant.ll => AArch64/arm64-2014-04-28-sqshl-uqshl-i64Contant.ll} (63%)
 rename test/CodeGen/{ARM64/2014-04-29-EXT-undef-mask.ll => AArch64/arm64-2014-04-29-EXT-undef-mask.ll} (94%)
 rename test/CodeGen/{ARM64/AdvSIMD-Scalar.ll => AArch64/arm64-AdvSIMD-Scalar.ll} (89%)
 rename test/CodeGen/{ARM64/aapcs.ll => AArch64/arm64-aapcs.ll} (100%)
 rename test/CodeGen/{ARM64/abi-varargs.ll => AArch64/arm64-abi-varargs.ll} (100%)
 rename test/CodeGen/{ARM64/abi.ll => AArch64/arm64-abi.ll} (100%)
 rename test/CodeGen/{ARM64/abi_align.ll => AArch64/arm64-abi_align.ll} (100%)
 rename test/CodeGen/{ARM64/addp.ll => AArch64/arm64-addp.ll} (90%)
 rename test/CodeGen/{ARM64/addr-mode-folding.ll => AArch64/arm64-addr-mode-folding.ll} (100%)
 rename test/CodeGen/{ARM64/addr-type-promotion.ll => AArch64/arm64-addr-type-promotion.ll} (100%)
 rename test/CodeGen/{ARM64/addrmode.ll => AArch64/arm64-addrmode.ll} (100%)
 rename test/CodeGen/{ARM64/alloc-no-stack-realign.ll => AArch64/arm64-alloc-no-stack-realign.ll} (100%)
 rename test/CodeGen/{ARM64/alloca-frame-pointer-offset.ll => AArch64/arm64-alloca-frame-pointer-offset.ll} (100%)
 rename test/CodeGen/{ARM64/andCmpBrToTBZ.ll => AArch64/arm64-andCmpBrToTBZ.ll} (100%)
 rename test/CodeGen/{ARM64/ands-bad-peephole.ll => AArch64/arm64-ands-bad-peephole.ll} (100%)
 rename test/CodeGen/{ARM64/anyregcc-crash.ll => AArch64/arm64-anyregcc-crash.ll} (100%)
 rename test/CodeGen/{ARM64/anyregcc.ll => AArch64/arm64-anyregcc.ll} (100%)
 rename test/CodeGen/{ARM64/arith-saturating.ll => AArch64/arm64-arith-saturating.ll} (58%)
 rename test/CodeGen/{ARM64/arith.ll => AArch64/arm64-arith.ll} (90%)
 rename test/CodeGen/{ARM64/arm64-dead-def-elimination-flag.ll => AArch64/arm64-arm64-dead-def-elimination-flag.ll} (80%)
 rename test/CodeGen/{ARM64/atomic-128.ll => AArch64/arm64-atomic-128.ll} (100%)
 rename test/CodeGen/{ARM64/atomic.ll => AArch64/arm64-atomic.ll} (100%)
 rename test/CodeGen/{ARM64/basic-pic.ll => AArch64/arm64-basic-pic.ll} (100%)
 rename test/CodeGen/{ARM64/big-endian-bitconverts.ll => AArch64/arm64-big-endian-bitconverts.ll} (99%)
 rename test/CodeGen/{ARM64/big-endian-eh.ll => AArch64/arm64-big-endian-eh.ll} (100%)
 rename test/CodeGen/{ARM64/big-endian-varargs.ll => AArch64/arm64-big-endian-varargs.ll} (100%)
 rename test/CodeGen/{ARM64/big-endian-vector-callee.ll => AArch64/arm64-big-endian-vector-callee.ll} (99%)
 rename test/CodeGen/{ARM64/big-endian-vector-caller.ll => AArch64/arm64-big-endian-vector-caller.ll} (99%)
 rename test/CodeGen/{ARM64/big-imm-offsets.ll => AArch64/arm64-big-imm-offsets.ll} (100%)
 rename test/CodeGen/{ARM64/big-stack.ll => AArch64/arm64-big-stack.ll} (100%)
 rename test/CodeGen/{ARM64/bitfield-extract.ll => AArch64/arm64-bitfield-extract.ll} (100%)
 rename test/CodeGen/{ARM64/blockaddress.ll => AArch64/arm64-blockaddress.ll} (100%)
 rename test/CodeGen/{ARM64/build-vector.ll => AArch64/arm64-build-vector.ll} (94%)
 rename test/CodeGen/{ARM64/call-tailcalls.ll => AArch64/arm64-call-tailcalls.ll} (100%)
 rename test/CodeGen/{ARM64/cast-opt.ll => AArch64/arm64-cast-opt.ll} (100%)
 rename test/CodeGen/{ARM64/ccmp-heuristics.ll => AArch64/arm64-ccmp-heuristics.ll} (99%)
 rename test/CodeGen/{ARM64/ccmp.ll => AArch64/arm64-ccmp.ll} (98%)
 rename test/CodeGen/{ARM64/clrsb.ll => AArch64/arm64-clrsb.ll} (100%)
 rename test/CodeGen/{ARM64/coalesce-ext.ll => AArch64/arm64-coalesce-ext.ll} (100%)
 rename test/CodeGen/{ARM64/code-model-large-abs.ll => AArch64/arm64-code-model-large-abs.ll} (100%)
 rename test/CodeGen/{ARM64/collect-loh-garbage-crash.ll => AArch64/arm64-collect-loh-garbage-crash.ll} (91%)
 rename test/CodeGen/{ARM64/collect-loh-str.ll => AArch64/arm64-collect-loh-str.ll} (86%)
 rename test/CodeGen/{ARM64/collect-loh.ll => AArch64/arm64-collect-loh.ll} (85%)
 rename test/CodeGen/{ARM64/complex-copy-noneon.ll => AArch64/arm64-complex-copy-noneon.ll} (100%)
 rename test/CodeGen/{ARM64/complex-ret.ll => AArch64/arm64-complex-ret.ll} (100%)
 rename test/CodeGen/{ARM64/const-addr.ll => AArch64/arm64-const-addr.ll} (100%)
 rename test/CodeGen/{ARM64/convert-v2f64-v2i32.ll => AArch64/arm64-convert-v2f64-v2i32.ll} (86%)
 rename test/CodeGen/{ARM64/convert-v2i32-v2f64.ll => AArch64/arm64-convert-v2i32-v2f64.ll} (90%)
 rename test/CodeGen/{ARM64/copy-tuple.ll => AArch64/arm64-copy-tuple.ll} (69%)
 rename test/CodeGen/{ARM64/crc32.ll => AArch64/arm64-crc32.ll} (58%)
 rename test/CodeGen/{ARM64/crypto.ll => AArch64/arm64-crypto.ll} (50%)
 rename test/CodeGen/{ARM64/cse.ll => AArch64/arm64-cse.ll} (100%)
 rename test/CodeGen/{ARM64/csel.ll => AArch64/arm64-csel.ll} (100%)
 rename test/CodeGen/{ARM64/cvt.ll => AArch64/arm64-cvt.ll} (52%)
 rename test/CodeGen/{ARM64/dagcombiner-convergence.ll => AArch64/arm64-dagcombiner-convergence.ll} (100%)
 rename test/CodeGen/{ARM64/dagcombiner-dead-indexed-load.ll => AArch64/arm64-dagcombiner-dead-indexed-load.ll} (100%)
 rename test/CodeGen/{ARM64/dagcombiner-indexed-load.ll => AArch64/arm64-dagcombiner-indexed-load.ll} (100%)
 rename test/CodeGen/{ARM64/dagcombiner-load-slicing.ll => AArch64/arm64-dagcombiner-load-slicing.ll} (100%)
 rename test/CodeGen/{ARM64/dead-def-frame-index.ll => AArch64/arm64-dead-def-frame-index.ll} (100%)
 rename test/CodeGen/{ARM64/dead-register-def-bug.ll => AArch64/arm64-dead-register-def-bug.ll} (100%)
 rename test/CodeGen/{ARM64/dup.ll => AArch64/arm64-dup.ll} (99%)
 rename test/CodeGen/{ARM64/early-ifcvt.ll => AArch64/arm64-early-ifcvt.ll} (100%)
 rename test/CodeGen/{ARM64/elf-calls.ll => AArch64/arm64-elf-calls.ll} (100%)
 rename test/CodeGen/{ARM64/elf-constpool.ll => AArch64/arm64-elf-constpool.ll} (100%)
 rename test/CodeGen/{ARM64/elf-globals.ll => AArch64/arm64-elf-globals.ll} (100%)
 rename test/CodeGen/{ARM64/ext.ll => AArch64/arm64-ext.ll} (98%)
 rename test/CodeGen/{ARM64/extend-int-to-fp.ll => AArch64/arm64-extend-int-to-fp.ll} (86%)
 rename test/CodeGen/{ARM64/extend.ll => AArch64/arm64-extend.ll} (100%)
 rename test/CodeGen/{ARM64/extern-weak.ll => AArch64/arm64-extern-weak.ll} (100%)
 rename test/CodeGen/{ARM64/extload-knownzero.ll => AArch64/arm64-extload-knownzero.ll} (100%)
 rename test/CodeGen/{ARM64/extract.ll => AArch64/arm64-extract.ll} (95%)
 rename test/CodeGen/{ARM64/extract_subvector.ll => AArch64/arm64-extract_subvector.ll} (95%)
 rename test/CodeGen/{ARM64/fast-isel-addr-offset.ll => AArch64/arm64-fast-isel-addr-offset.ll} (100%)
 rename test/CodeGen/{ARM64/fast-isel-alloca.ll => AArch64/arm64-fast-isel-alloca.ll} (100%)
 rename test/CodeGen/{ARM64/fast-isel-br.ll => AArch64/arm64-fast-isel-br.ll} (100%)
 rename test/CodeGen/{ARM64/fast-isel-call.ll => AArch64/arm64-fast-isel-call.ll} (100%)
 rename test/CodeGen/{ARM64/fast-isel-conversion.ll => AArch64/arm64-fast-isel-conversion.ll} (100%)
 rename test/CodeGen/{ARM64/fast-isel-fcmp.ll => AArch64/arm64-fast-isel-fcmp.ll} (100%)
 rename test/CodeGen/{ARM64/fast-isel-gv.ll => AArch64/arm64-fast-isel-gv.ll} (100%)
 rename test/CodeGen/{ARM64/fast-isel-icmp.ll => AArch64/arm64-fast-isel-icmp.ll} (100%)
 rename test/CodeGen/{ARM64/fast-isel-indirectbr.ll => AArch64/arm64-fast-isel-indirectbr.ll} (100%)
 rename test/CodeGen/{ARM64/fast-isel-intrinsic.ll => AArch64/arm64-fast-isel-intrinsic.ll} (100%)
 rename test/CodeGen/{ARM64/fast-isel-materialize.ll => AArch64/arm64-fast-isel-materialize.ll} (100%)
 rename test/CodeGen/{ARM64/fast-isel-noconvert.ll => AArch64/arm64-fast-isel-noconvert.ll} (100%)
 rename test/CodeGen/{ARM64/fast-isel-rem.ll => AArch64/arm64-fast-isel-rem.ll} (100%)
 rename test/CodeGen/{ARM64/fast-isel-ret.ll => AArch64/arm64-fast-isel-ret.ll} (100%)
 rename test/CodeGen/{ARM64/fast-isel-select.ll => AArch64/arm64-fast-isel-select.ll} (100%)
 rename test/CodeGen/{ARM64/fast-isel.ll => AArch64/arm64-fast-isel.ll} (100%)
 rename test/CodeGen/{ARM64/fastcc-tailcall.ll => AArch64/arm64-fastcc-tailcall.ll} (100%)
 rename test/CodeGen/{ARM64/fastisel-gep-promote-before-add.ll => AArch64/arm64-fastisel-gep-promote-before-add.ll} (100%)
 rename test/CodeGen/{ARM64/fcmp-opt.ll => AArch64/arm64-fcmp-opt.ll} (98%)
 rename test/CodeGen/{ARM64/fcopysign.ll => AArch64/arm64-fcopysign.ll} (100%)
 rename test/CodeGen/{ARM64/fixed-point-scalar-cvt-dagcombine.ll => AArch64/arm64-fixed-point-scalar-cvt-dagcombine.ll} (62%)
 rename test/CodeGen/{ARM64/fmadd.ll => AArch64/arm64-fmadd.ll} (100%)
 rename test/CodeGen/{ARM64/fmax.ll => AArch64/arm64-fmax.ll} (100%)
 create mode 100644 test/CodeGen/AArch64/arm64-fminv.ll
 rename test/CodeGen/{ARM64/fmuladd.ll => AArch64/arm64-fmuladd.ll} (97%)
 rename test/CodeGen/{ARM64/fold-address.ll => AArch64/arm64-fold-address.ll} (100%)
 rename test/CodeGen/{ARM64/fold-lsl.ll => AArch64/arm64-fold-lsl.ll} (97%)
 rename test/CodeGen/{ARM64/fp-contract-zero.ll => AArch64/arm64-fp-contract-zero.ll} (100%)
 rename test/CodeGen/{ARM64/fp-imm.ll => AArch64/arm64-fp-imm.ll} (100%)
 rename test/CodeGen/{ARM64/fp.ll => AArch64/arm64-fp.ll} (100%)
 rename test/CodeGen/{ARM64/fp128-folding.ll => AArch64/arm64-fp128-folding.ll} (100%)
 rename test/CodeGen/{ARM64/fp128.ll => AArch64/arm64-fp128.ll} (100%)
 rename test/CodeGen/{ARM64/frame-index.ll => AArch64/arm64-frame-index.ll} (100%)
 rename test/CodeGen/{ARM64/frameaddr.ll => AArch64/arm64-frameaddr.ll} (100%)
 rename test/CodeGen/{ARM64/global-address.ll => AArch64/arm64-global-address.ll} (100%)
 rename test/CodeGen/{ARM64/hello.ll => AArch64/arm64-hello.ll} (100%)
 rename test/CodeGen/{ARM64/i16-subreg-extract.ll => AArch64/arm64-i16-subreg-extract.ll} (80%)
 rename test/CodeGen/{ARM64/icmp-opt.ll => AArch64/arm64-icmp-opt.ll} (100%)
 rename test/CodeGen/{ARM64/illegal-float-ops.ll => AArch64/arm64-illegal-float-ops.ll} (100%)
 rename test/CodeGen/{ARM64/indexed-memory.ll => AArch64/arm64-indexed-memory.ll} (99%)
 rename test/CodeGen/{ARM64/indexed-vector-ldst-2.ll => AArch64/arm64-indexed-vector-ldst-2.ll} (100%)
 rename test/CodeGen/{ARM64/indexed-vector-ldst.ll => AArch64/arm64-indexed-vector-ldst.ll} (75%)
 rename test/CodeGen/{ARM64/inline-asm-error-I.ll => AArch64/arm64-inline-asm-error-I.ll} (100%)
 rename test/CodeGen/{ARM64/inline-asm-error-J.ll => AArch64/arm64-inline-asm-error-J.ll} (100%)
 rename test/CodeGen/{ARM64/inline-asm-error-K.ll => AArch64/arm64-inline-asm-error-K.ll} (100%)
 rename test/CodeGen/{ARM64/inline-asm-error-L.ll => AArch64/arm64-inline-asm-error-L.ll} (100%)
 rename test/CodeGen/{ARM64/inline-asm-error-M.ll => AArch64/arm64-inline-asm-error-M.ll} (100%)
 rename test/CodeGen/{ARM64/inline-asm-error-N.ll => AArch64/arm64-inline-asm-error-N.ll} (100%)
 rename test/CodeGen/{ARM64/inline-asm-zero-reg-error.ll => AArch64/arm64-inline-asm-zero-reg-error.ll} (100%)
 rename test/CodeGen/{ARM64/inline-asm.ll => AArch64/arm64-inline-asm.ll} (98%)
 rename test/CodeGen/{ARM64/join-reserved.ll => AArch64/arm64-join-reserved.ll} (100%)
 rename test/CodeGen/{ARM64/jumptable.ll => AArch64/arm64-jumptable.ll} (100%)
 rename test/CodeGen/{ARM64/aarch64-large-frame.ll => AArch64/arm64-large-frame.ll} (100%)
 rename test/CodeGen/{ARM64/ld1.ll => AArch64/arm64-ld1.ll} (67%)
 rename test/CodeGen/{ARM64/ldp.ll => AArch64/arm64-ldp.ll} (98%)
 rename test/CodeGen/{ARM64/ldur.ll => AArch64/arm64-ldur.ll} (100%)
 rename test/CodeGen/{ARM64/ldxr-stxr.ll => AArch64/arm64-ldxr-stxr.ll} (69%)
 rename test/CodeGen/{ARM64/leaf.ll => AArch64/arm64-leaf.ll} (100%)
 rename test/CodeGen/{ARM64/long-shift.ll => AArch64/arm64-long-shift.ll} (100%)
 rename test/CodeGen/{ARM64/memcpy-inline.ll => AArch64/arm64-memcpy-inline.ll} (100%)
 rename test/CodeGen/{ARM64/memset-inline.ll => AArch64/arm64-memset-inline.ll} (100%)
 rename test/CodeGen/{ARM64/memset-to-bzero.ll => AArch64/arm64-memset-to-bzero.ll} (100%)
 rename test/CodeGen/{ARM64/misched-basic-A53.ll => AArch64/arm64-misched-basic-A53.ll} (96%)
 rename test/CodeGen/{ARM64/misched-forwarding-A53.ll => AArch64/arm64-misched-forwarding-A53.ll} (100%)
 rename test/CodeGen/{ARM64/movi.ll => AArch64/arm64-movi.ll} (100%)
 rename test/CodeGen/{ARM64/mul.ll => AArch64/arm64-mul.ll} (100%)
 rename test/CodeGen/{ARM64/named-reg-alloc.ll => AArch64/arm64-named-reg-alloc.ll} (100%)
 rename test/CodeGen/{ARM64/named-reg-notareg.ll => AArch64/arm64-named-reg-notareg.ll} (100%)
 rename test/CodeGen/{ARM64/neg.ll => AArch64/arm64-neg.ll} (100%)
 rename test/CodeGen/{ARM64/aarch64-neon-2velem-high.ll => AArch64/arm64-neon-2velem-high.ll} (82%)
 rename test/CodeGen/{ARM64/aarch64-neon-2velem.ll => AArch64/arm64-neon-2velem.ll} (84%)
 rename test/CodeGen/{ARM64/aarch64-neon-3vdiff.ll => AArch64/arm64-neon-3vdiff.ll} (85%)
 rename test/CodeGen/{ARM64/aarch64-neon-aba-abd.ll => AArch64/arm64-neon-aba-abd.ll} (58%)
 rename test/CodeGen/{ARM64/aarch64-neon-across.ll => AArch64/arm64-neon-across.ll} (56%)
 rename test/CodeGen/{ARM64/aarch64-neon-add-pairwise.ll => AArch64/arm64-neon-add-pairwise.ll} (51%)
 rename test/CodeGen/{ARM64/aarch64-neon-add-sub.ll => AArch64/arm64-neon-add-sub.ll} (89%)
 rename test/CodeGen/{ARM64/neon-compare-instructions.ll => AArch64/arm64-neon-compare-instructions.ll} (100%)
 rename test/CodeGen/{ARM64/aarch64-neon-copy.ll => AArch64/arm64-neon-copy.ll} (99%)
 rename test/CodeGen/{ARM64/aarch64-neon-copyPhysReg-tuple.ll => AArch64/arm64-neon-copyPhysReg-tuple.ll} (56%)
 rename test/CodeGen/{ARM64/aarch64-neon-mul-div.ll => AArch64/arm64-neon-mul-div.ll} (92%)
 rename test/CodeGen/{ARM64/aarch64-neon-scalar-by-elem-mul.ll => AArch64/arm64-neon-scalar-by-elem-mul.ll} (85%)
 rename test/CodeGen/{ARM64/aarch64-neon-select_cc.ll => AArch64/arm64-neon-select_cc.ll} (100%)
 rename test/CodeGen/{ARM64/aarch64-neon-simd-ldst-one.ll => AArch64/arm64-neon-simd-ldst-one.ll} (100%)
 rename test/CodeGen/{ARM64/aarch64-neon-simd-shift.ll => AArch64/arm64-neon-simd-shift.ll} (81%)
 rename test/CodeGen/{ARM64/aarch64-neon-simd-vget.ll => AArch64/arm64-neon-simd-vget.ll} (100%)
 rename test/CodeGen/{ARM64/neon-v1i1-setcc.ll => AArch64/arm64-neon-v1i1-setcc.ll} (100%)
 rename test/CodeGen/{ARM64/aarch64-neon-vector-list-spill.ll => AArch64/arm64-neon-vector-list-spill.ll} (77%)
 rename test/CodeGen/{ARM64/patchpoint.ll => AArch64/arm64-patchpoint.ll} (100%)
 rename test/CodeGen/{ARM64/pic-local-symbol.ll => AArch64/arm64-pic-local-symbol.ll} (100%)
 rename test/CodeGen/{ARM64/platform-reg.ll => AArch64/arm64-platform-reg.ll} (100%)
 rename test/CodeGen/{ARM64/popcnt.ll => AArch64/arm64-popcnt.ll} (93%)
 rename test/CodeGen/{ARM64/prefetch.ll => AArch64/arm64-prefetch.ll} (100%)
 rename test/CodeGen/{ARM64/promote-const.ll => AArch64/arm64-promote-const.ll} (98%)
 rename test/CodeGen/{ARM64/redzone.ll => AArch64/arm64-redzone.ll} (88%)
 rename test/CodeGen/{ARM64/reg-copy-noneon.ll => AArch64/arm64-reg-copy-noneon.ll} (100%)
 rename test/CodeGen/{ARM64/register-offset-addressing.ll => AArch64/arm64-register-offset-addressing.ll} (100%)
 rename test/CodeGen/{ARM64/register-pairing.ll => AArch64/arm64-register-pairing.ll} (100%)
 rename test/CodeGen/{ARM64/regress-f128csel-flags.ll => AArch64/arm64-regress-f128csel-flags.ll} (100%)
 rename test/CodeGen/{ARM64/regress-interphase-shift.ll => AArch64/arm64-regress-interphase-shift.ll} (100%)
 rename test/CodeGen/{ARM64/return-vector.ll => AArch64/arm64-return-vector.ll} (100%)
 rename test/CodeGen/{ARM64/returnaddr.ll => AArch64/arm64-returnaddr.ll} (100%)
 rename test/CodeGen/{ARM64/rev.ll => AArch64/arm64-rev.ll} (99%)
 rename test/CodeGen/{ARM64/rounding.ll => AArch64/arm64-rounding.ll} (100%)
 rename test/CodeGen/{ARM64/scaled_iv.ll => AArch64/arm64-scaled_iv.ll} (100%)
 rename test/CodeGen/{ARM64/scvt.ll => AArch64/arm64-scvt.ll} (99%)
 rename test/CodeGen/{ARM64/shifted-sext.ll => AArch64/arm64-shifted-sext.ll} (100%)
 create mode 100644 test/CodeGen/AArch64/arm64-simd-scalar-to-vector.ll
 rename test/CodeGen/{ARM64/simplest-elf.ll => AArch64/arm64-simplest-elf.ll} (100%)
 rename test/CodeGen/{ARM64/sincos.ll => AArch64/arm64-sincos.ll} (100%)
 rename test/CodeGen/{ARM64/sitofp-combine-chains.ll => AArch64/arm64-sitofp-combine-chains.ll} (100%)
 rename test/CodeGen/{ARM64/sli-sri-opt.ll => AArch64/arm64-sli-sri-opt.ll} (95%)
 rename test/CodeGen/{ARM64/smaxv.ll => AArch64/arm64-smaxv.ll} (61%)
 rename test/CodeGen/{ARM64/sminv.ll => AArch64/arm64-sminv.ll} (61%)
 rename test/CodeGen/{ARM64/spill-lr.ll => AArch64/arm64-spill-lr.ll} (100%)
 rename test/CodeGen/{ARM64/spill.ll => AArch64/arm64-spill.ll} (88%)
 rename test/CodeGen/{ARM64/st1.ll => AArch64/arm64-st1.ll} (50%)
 rename test/CodeGen/{ARM64/stack-no-frame.ll => AArch64/arm64-stack-no-frame.ll} (100%)
 rename test/CodeGen/{ARM64/stackmap.ll => AArch64/arm64-stackmap.ll} (100%)
 rename test/CodeGen/{ARM64/stackpointer.ll => AArch64/arm64-stackpointer.ll} (100%)
 rename test/CodeGen/{ARM64/stacksave.ll => AArch64/arm64-stacksave.ll} (100%)
 rename test/CodeGen/{ARM64/stp.ll => AArch64/arm64-stp.ll} (94%)
 rename test/CodeGen/{ARM64/strict-align.ll => AArch64/arm64-strict-align.ll} (75%)
 rename test/CodeGen/{ARM64/stur.ll => AArch64/arm64-stur.ll} (96%)
 rename test/CodeGen/{ARM64/subsections.ll => AArch64/arm64-subsections.ll} (100%)
 rename test/CodeGen/{ARM64/subvector-extend.ll => AArch64/arm64-subvector-extend.ll} (97%)
 rename test/CodeGen/{ARM64/swizzle-tbl-i16-layout.ll => AArch64/arm64-swizzle-tbl-i16-layout.ll} (100%)
 create mode 100644 test/CodeGen/AArch64/arm64-tbl.ll
 rename test/CodeGen/{ARM64/this-return.ll => AArch64/arm64-this-return.ll} (100%)
 rename test/CodeGen/{ARM64/tls-darwin.ll => AArch64/arm64-tls-darwin.ll} (100%)
 rename test/CodeGen/{ARM64/tls-dynamic-together.ll => AArch64/arm64-tls-dynamic-together.ll} (100%)
 rename test/CodeGen/{ARM64/tls-dynamics.ll => AArch64/arm64-tls-dynamics.ll} (100%)
 rename test/CodeGen/{ARM64/tls-execs.ll => AArch64/arm64-tls-execs.ll} (100%)
 rename test/CodeGen/{ARM64/trap.ll => AArch64/arm64-trap.ll} (100%)
 rename test/CodeGen/{ARM64/trn.ll => AArch64/arm64-trn.ll} (98%)
 rename test/CodeGen/{ARM64/trunc-store.ll => AArch64/arm64-trunc-store.ll} (100%)
 rename test/CodeGen/{ARM64/umaxv.ll => AArch64/arm64-umaxv.ll} (75%)
 rename test/CodeGen/{ARM64/uminv.ll => AArch64/arm64-uminv.ll} (75%)
 rename test/CodeGen/{ARM64/umov.ll => AArch64/arm64-umov.ll} (90%)
 rename test/CodeGen/{ARM64/unaligned_ldst.ll => AArch64/arm64-unaligned_ldst.ll} (100%)
 rename test/CodeGen/{ARM64/uzp.ll => AArch64/arm64-uzp.ll} (98%)
 rename test/CodeGen/{ARM64/vaargs.ll => AArch64/arm64-vaargs.ll} (100%)
 rename test/CodeGen/{ARM64/vabs.ll => AArch64/arm64-vabs.ll} (68%)
 rename test/CodeGen/{ARM64/vadd.ll => AArch64/arm64-vadd.ll} (80%)
 rename test/CodeGen/{ARM64/vaddlv.ll => AArch64/arm64-vaddlv.ll} (55%)
 rename test/CodeGen/{ARM64/vaddv.ll => AArch64/arm64-vaddv.ll} (63%)
 rename test/CodeGen/{ARM64/variadic-aapcs.ll => AArch64/arm64-variadic-aapcs.ll} (100%)
 rename test/CodeGen/{ARM64/vbitwise.ll => AArch64/arm64-vbitwise.ll} (86%)
 rename test/CodeGen/{ARM64/vclz.ll => AArch64/arm64-vclz.ll} (98%)
 rename test/CodeGen/{ARM64/vcmp.ll => AArch64/arm64-vcmp.ll} (76%)
 create mode 100644 test/CodeGen/AArch64/arm64-vcnt.ll
 rename test/CodeGen/{ARM64/vcombine.ll => AArch64/arm64-vcombine.ll} (90%)
 rename test/CodeGen/{ARM64/vcvt.ll => AArch64/arm64-vcvt.ll} (67%)
 rename test/CodeGen/{ARM64/vcvt_f.ll => AArch64/arm64-vcvt_f.ll} (73%)
 rename test/CodeGen/{ARM64/vcvt_f32_su32.ll => AArch64/arm64-vcvt_f32_su32.ll} (75%)
 create mode 100644 test/CodeGen/AArch64/arm64-vcvt_n.ll
 rename test/CodeGen/{ARM64/vcvt_su32_f32.ll => AArch64/arm64-vcvt_su32_f32.ll} (91%)
 rename test/CodeGen/{ARM64/vcvtxd_f32_f64.ll => AArch64/arm64-vcvtxd_f32_f64.ll} (54%)
 rename test/CodeGen/{ARM64/vecCmpBr.ll => AArch64/arm64-vecCmpBr.ll} (87%)
 rename test/CodeGen/{ARM64/vecFold.ll => AArch64/arm64-vecFold.ll} (74%)
 rename test/CodeGen/{ARM64/vector-ext.ll => AArch64/arm64-vector-ext.ll} (81%)
 rename test/CodeGen/{ARM64/vector-imm.ll => AArch64/arm64-vector-imm.ll} (98%)
 rename test/CodeGen/{ARM64/vector-insertion.ll => AArch64/arm64-vector-insertion.ll} (91%)
 rename test/CodeGen/{ARM64/vector-ldst.ll => AArch64/arm64-vector-ldst.ll} (99%)
 rename test/CodeGen/{ARM64/vext.ll => AArch64/arm64-vext.ll} (99%)
 rename test/CodeGen/{ARM64/vext_reverse.ll => AArch64/arm64-vext_reverse.ll} (100%)
 rename test/CodeGen/{ARM64/vfloatintrinsics.ll => AArch64/arm64-vfloatintrinsics.ll} (99%)
 rename test/CodeGen/{ARM64/vhadd.ll => AArch64/arm64-vhadd.ll} (51%)
 rename test/CodeGen/{ARM64/vhsub.ll => AArch64/arm64-vhsub.ll} (50%)
 rename test/CodeGen/{ARM64/virtual_base.ll => AArch64/arm64-virtual_base.ll} (100%)
 rename test/CodeGen/{ARM64/vmax.ll => AArch64/arm64-vmax.ll} (52%)
 create mode 100644 test/CodeGen/AArch64/arm64-vminmaxnm.ll
 rename test/CodeGen/{ARM64/vmovn.ll => AArch64/arm64-vmovn.ll} (74%)
 rename test/CodeGen/{ARM64/vmul.ll => AArch64/arm64-vmul.ll} (80%)
 rename test/CodeGen/{ARM64/volatile.ll => AArch64/arm64-volatile.ll} (100%)
 rename test/CodeGen/{ARM64/vpopcnt.ll => AArch64/arm64-vpopcnt.ll} (100%)
 rename test/CodeGen/{ARM64/vqadd.ll => AArch64/arm64-vqadd.ll} (50%)
 create mode 100644 test/CodeGen/AArch64/arm64-vqsub.ll
 rename test/CodeGen/{ARM64/vselect.ll => AArch64/arm64-vselect.ll} (89%)
 rename test/CodeGen/{ARM64/vsetcc_fp.ll => AArch64/arm64-vsetcc_fp.ll} (80%)
 rename test/CodeGen/{ARM64/vshift.ll => AArch64/arm64-vshift.ll} (68%)
 rename test/CodeGen/{ARM64/vshr.ll => AArch64/arm64-vshr.ll} (95%)
 rename test/CodeGen/{ARM64/vshuffle.ll => AArch64/arm64-vshuffle.ll} (100%)
 rename test/CodeGen/{ARM64/vsqrt.ll => AArch64/arm64-vsqrt.ll} (51%)
 rename test/CodeGen/{ARM64/vsra.ll => AArch64/arm64-vsra.ll} (98%)
 rename test/CodeGen/{ARM64/vsub.ll => AArch64/arm64-vsub.ll} (85%)
 rename test/CodeGen/{ARM64/weak-reference.ll => AArch64/arm64-weak-reference.ll} (100%)
 rename test/CodeGen/{ARM64/xaluo.ll => AArch64/arm64-xaluo.ll} (100%)
 rename test/CodeGen/{ARM64/zero-cycle-regmov.ll => AArch64/arm64-zero-cycle-regmov.ll} (100%)
 rename test/CodeGen/{ARM64/zero-cycle-zeroing.ll => AArch64/arm64-zero-cycle-zeroing.ll} (100%)
 rename test/CodeGen/{ARM64/zext.ll => AArch64/arm64-zext.ll} (100%)
 rename test/CodeGen/{ARM64/zextload-unscaled.ll => AArch64/arm64-zextload-unscaled.ll} (100%)
 rename test/CodeGen/{ARM64/zip.ll => AArch64/arm64-zip.ll} (98%)
 rename test/CodeGen/{ARM64 => AArch64}/lit.local.cfg (89%)
 delete mode 100644 test/CodeGen/ARM64/compact-unwind-unhandled-cfi.S
 delete mode 100644 test/CodeGen/ARM64/fminv.ll
 delete mode 100644 test/CodeGen/ARM64/simd-scalar-to-vector.ll
 delete mode 100644 test/CodeGen/ARM64/tbl.ll
 delete mode 100644 test/CodeGen/ARM64/vcnt.ll
 delete mode 100644 test/CodeGen/ARM64/vcvt_n.ll
 delete mode 100644 test/CodeGen/ARM64/vminmaxnm.ll
 delete mode 100644 test/CodeGen/ARM64/vqsub.ll
 rename test/DebugInfo/{ARM64 => AArch64}/struct_by_value.ll (100%)
 rename test/MC/{ARM64/adr.s => AArch64/arm64-adr.s} (88%)
 rename test/MC/{ARM64/advsimd.s => AArch64/arm64-advsimd.s} (100%)
 rename test/MC/{ARM64/aliases.s => AArch64/arm64-aliases.s} (100%)
 rename test/MC/{ARM64/arithmetic-encoding.s => AArch64/arm64-arithmetic-encoding.s} (100%)
 rename test/MC/{ARM64/arm64-fixup.s => AArch64/arm64-arm64-fixup.s} (76%)
 rename test/MC/{ARM64/basic-a64-instructions.s => AArch64/arm64-basic-a64-instructions.s} (100%)
 rename test/MC/{ARM64/be-datalayout.s => AArch64/arm64-be-datalayout.s} (100%)
 rename test/MC/{ARM64/bitfield-encoding.s => AArch64/arm64-bitfield-encoding.s} (100%)
 rename test/MC/{ARM64/branch-encoding.s => AArch64/arm64-branch-encoding.s} (77%)
 rename test/MC/{ARM64/condbr-without-dots.s => AArch64/arm64-condbr-without-dots.s} (100%)
 rename test/MC/{ARM64/crypto.s => AArch64/arm64-crypto.s} (100%)
 rename test/MC/{ARM64/diagno-predicate.s => AArch64/arm64-diagno-predicate.s} (100%)
 rename test/MC/{ARM64/diags.s => AArch64/arm64-diags.s} (99%)
 rename test/MC/{ARM64/directive_loh.s => AArch64/arm64-directive_loh.s} (100%)
 rename test/MC/{ARM64/elf-reloc-condbr.s => AArch64/arm64-elf-reloc-condbr.s} (100%)
 rename test/MC/{ARM64/elf-relocs.s => AArch64/arm64-elf-relocs.s} (100%)
 rename test/MC/{ARM64/fp-encoding.s => AArch64/arm64-fp-encoding.s} (100%)
 rename test/MC/{ARM64/large-relocs.s => AArch64/arm64-large-relocs.s} (83%)
 rename test/MC/{ARM64/leaf-compact-unwind.s => AArch64/arm64-leaf-compact-unwind.s} (100%)
 rename test/MC/{ARM64/logical-encoding.s => AArch64/arm64-logical-encoding.s} (100%)
 rename test/MC/{ARM64/mapping-across-sections.s => AArch64/arm64-mapping-across-sections.s} (100%)
 rename test/MC/{ARM64/mapping-within-section.s => AArch64/arm64-mapping-within-section.s} (100%)
 rename test/MC/{ARM64/memory.s => AArch64/arm64-memory.s} (100%)
 rename test/MC/{ARM64/nv-cond.s => AArch64/arm64-nv-cond.s} (100%)
 rename test/MC/{ARM64/optional-hash.s => AArch64/arm64-optional-hash.s} (100%)
 rename test/MC/{ARM64/separator.s => AArch64/arm64-separator.s} (100%)
 rename test/MC/{ARM64/simd-ldst.s => AArch64/arm64-simd-ldst.s} (100%)
 rename test/MC/{ARM64/small-data-fixups.s => AArch64/arm64-small-data-fixups.s} (100%)
 rename test/MC/{ARM64/spsel-sysreg.s => AArch64/arm64-spsel-sysreg.s} (100%)
 rename test/MC/{ARM64/system-encoding.s => AArch64/arm64-system-encoding.s} (100%)
 rename test/MC/{ARM64/target-specific-sysreg.s => AArch64/arm64-target-specific-sysreg.s} (100%)
 rename test/MC/{ARM64/tls-modifiers-darwin.s => AArch64/arm64-tls-modifiers-darwin.s} (100%)
 rename test/MC/{ARM64/tls-relocs.s => AArch64/arm64-tls-relocs.s} (82%)
 rename test/MC/{ARM64/v128_lo-diagnostics.s => AArch64/arm64-v128_lo-diagnostics.s} (100%)
 rename test/MC/{ARM64/variable-exprs.s => AArch64/arm64-variable-exprs.s} (100%)
 rename test/MC/{ARM64/vector-lists.s => AArch64/arm64-vector-lists.s} (100%)
 rename test/MC/{ARM64/verbose-vector-case.s => AArch64/arm64-verbose-vector-case.s} (100%)
 delete mode 100644 test/MC/ARM64/lit.local.cfg
 rename test/MC/Disassembler/{ARM64/advsimd.txt => AArch64/arm64-advsimd.txt} (100%)
 rename test/MC/Disassembler/{ARM64/arithmetic.txt => AArch64/arm64-arithmetic.txt} (100%)
 rename test/MC/Disassembler/{ARM64/basic-a64-undefined.txt => AArch64/arm64-basic-a64-undefined.txt} (100%)
 rename test/MC/Disassembler/{ARM64/bitfield.txt => AArch64/arm64-bitfield.txt} (100%)
 rename test/MC/Disassembler/{ARM64/branch.txt => AArch64/arm64-branch.txt} (100%)
 rename test/MC/Disassembler/{ARM64/canonical-form.txt => AArch64/arm64-canonical-form.txt} (100%)
 rename test/MC/Disassembler/{ARM64/crc32.txt => AArch64/arm64-crc32.txt} (100%)
 rename test/MC/Disassembler/{ARM64/crypto.txt => AArch64/arm64-crypto.txt} (100%)
 rename test/MC/Disassembler/{ARM64/invalid-logical.txt => AArch64/arm64-invalid-logical.txt} (100%)
 rename test/MC/Disassembler/{ARM64/logical.txt => AArch64/arm64-logical.txt} (100%)
 rename test/MC/Disassembler/{ARM64/memory.txt => AArch64/arm64-memory.txt} (100%)
 rename test/MC/Disassembler/{ARM64/non-apple-fmov.txt => AArch64/arm64-non-apple-fmov.txt} (100%)
 rename test/MC/Disassembler/{ARM64/scalar-fp.txt => AArch64/arm64-scalar-fp.txt} (100%)
 rename test/MC/Disassembler/{ARM64/system.txt => AArch64/arm64-system.txt} (100%)
 delete mode 100644 test/MC/Disassembler/ARM64/lit.local.cfg
 rename test/MC/MachO/{ARM64 => AArch64}/darwin-ARM64-local-label-diff.s (100%)
 rename test/MC/MachO/{ARM64 => AArch64}/darwin-ARM64-reloc.s (100%)
 rename test/{Transforms/GlobalMerge/ARM64 => MC/MachO/AArch64}/lit.local.cfg (74%)
 rename test/Transforms/ConstantHoisting/{ARM64 => AArch64}/const-addr.ll (100%)
 rename test/Transforms/ConstantHoisting/{ARM64 => AArch64}/large-immediate.ll (100%)
 rename test/Transforms/ConstantHoisting/{ARM64 => AArch64}/lit.local.cfg (73%)
 rename test/Transforms/GlobalMerge/{ARM64 => AArch64}/arm64.ll (100%)
 rename test/{DebugInfo/ARM64 => Transforms/GlobalMerge/AArch64}/lit.local.cfg (74%)
 rename test/Transforms/LoopStrengthReduce/{ARM64 => AArch64}/lit.local.cfg (78%)
 rename test/Transforms/LoopStrengthReduce/{ARM64 => AArch64}/lsr-memcpy.ll (100%)
 rename test/Transforms/LoopStrengthReduce/{ARM64 => AArch64}/lsr-memset.ll (100%)
 rename test/Transforms/LoopStrengthReduce/{ARM64 => AArch64}/req-regs.ll (100%)
 rename test/Transforms/LoopVectorize/{ARM64 => AArch64}/arm64-unroll.ll (100%)
 rename test/Transforms/LoopVectorize/{ARM64 => AArch64}/gather-cost.ll (100%)
 delete mode 100644 test/Transforms/LoopVectorize/ARM64/lit.local.cfg
 rename test/{MC/MachO/ARM64 => Transforms/SLPVectorizer/AArch64}/lit.local.cfg (73%)
 rename test/Transforms/SLPVectorizer/{ARM64 => AArch64}/mismatched-intrinsics.ll (100%)
 delete mode 100644 test/Transforms/SLPVectorizer/ARM64/lit.local.cfg

diff --git a/CMakeLists.txt b/CMakeLists.txt
index b19ab0271ab..0d6eead42f6 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -127,7 +127,7 @@ set(LLVM_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/include)
 set(LLVM_LIBDIR_SUFFIX "" CACHE STRING "Define suffix of library directory name (32/64)" )
 
 set(LLVM_ALL_TARGETS
-  ARM64
+  AArch64
   ARM
   CppBackend
   Hexagon
@@ -143,7 +143,7 @@ set(LLVM_ALL_TARGETS
   )
 
 # List of targets with JIT support:
-set(LLVM_TARGETS_WITH_JIT X86 PowerPC ARM64 ARM Mips SystemZ)
+set(LLVM_TARGETS_WITH_JIT X86 PowerPC AArch64 ARM Mips SystemZ)
 
 set(LLVM_TARGETS_TO_BUILD "all"
     CACHE STRING "Semicolon-separated list of targets to build, or \"all\".")
diff --git a/autoconf/configure.ac b/autoconf/configure.ac
index 344e66af65d..08f756c9214 100644
--- a/autoconf/configure.ac
+++ b/autoconf/configure.ac
@@ -419,9 +419,9 @@ AC_CACHE_CHECK([target architecture],[llvm_cv_target_arch],
   amd64-* | x86_64-*)     llvm_cv_target_arch="x86_64" ;;
   sparc*-*)               llvm_cv_target_arch="Sparc" ;;
   powerpc*-*)             llvm_cv_target_arch="PowerPC" ;;
-  arm64*-*)               llvm_cv_target_arch="ARM64" ;;
+  arm64*-*)               llvm_cv_target_arch="AArch64" ;;
   arm*-*)                 llvm_cv_target_arch="ARM" ;;
-  aarch64*-*)             llvm_cv_target_arch="ARM64" ;;
+  aarch64*-*)             llvm_cv_target_arch="AArch64" ;;
   mips-* | mips64-*)      llvm_cv_target_arch="Mips" ;;
   mipsel-* | mips64el-*)  llvm_cv_target_arch="Mips" ;;
   xcore-*)                llvm_cv_target_arch="XCore" ;;
@@ -455,9 +455,9 @@ case $host in
   amd64-* | x86_64-*)     host_arch="x86_64" ;;
   sparc*-*)               host_arch="Sparc" ;;
   powerpc*-*)             host_arch="PowerPC" ;;
-  arm64*-*)               host_arch="ARM64" ;;
+  arm64*-*)               host_arch="AArch64" ;;
   arm*-*)                 host_arch="ARM" ;;
-  aarch64*-*)             host_arch="ARM64" ;;
+  aarch64*-*)             host_arch="AArch64" ;;
   mips-* | mips64-*)      host_arch="Mips" ;;
   mipsel-* | mips64el-*)  host_arch="Mips" ;;
   xcore-*)                host_arch="XCore" ;;
@@ -796,7 +796,7 @@ else
   esac
 fi
 
-TARGETS_WITH_JIT="ARM ARM64 Mips PowerPC SystemZ X86"
+TARGETS_WITH_JIT="ARM AArch64 Mips PowerPC SystemZ X86"
 AC_SUBST(TARGETS_WITH_JIT,$TARGETS_WITH_JIT)
 
 dnl Allow enablement of building and installing docs
@@ -949,7 +949,7 @@ if test "$llvm_cv_enable_crash_overrides" = "yes" ; then
 fi
 
 dnl List all possible targets
-ALL_TARGETS="X86 Sparc PowerPC ARM ARM64 Mips XCore MSP430 CppBackend NVPTX Hexagon SystemZ R600"
+ALL_TARGETS="X86 Sparc PowerPC ARM AArch64 Mips XCore MSP430 CppBackend NVPTX Hexagon SystemZ R600"
 AC_SUBST(ALL_TARGETS,$ALL_TARGETS)
 
 dnl Allow specific targets to be specified for building (or not)
@@ -970,8 +970,8 @@ case "$enableval" in
         x86_64)   TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
         sparc)    TARGETS_TO_BUILD="Sparc $TARGETS_TO_BUILD" ;;
         powerpc)  TARGETS_TO_BUILD="PowerPC $TARGETS_TO_BUILD" ;;
-        aarch64)  TARGETS_TO_BUILD="ARM64 $TARGETS_TO_BUILD" ;;
-        arm64)    TARGETS_TO_BUILD="ARM64 $TARGETS_TO_BUILD" ;;
+        aarch64)  TARGETS_TO_BUILD="AArch64 $TARGETS_TO_BUILD" ;;
+        arm64)    TARGETS_TO_BUILD="AArch64 $TARGETS_TO_BUILD" ;;
         arm)      TARGETS_TO_BUILD="ARM $TARGETS_TO_BUILD" ;;
         mips)     TARGETS_TO_BUILD="Mips $TARGETS_TO_BUILD" ;;
         mipsel)   TARGETS_TO_BUILD="Mips $TARGETS_TO_BUILD" ;;
@@ -989,7 +989,7 @@ case "$enableval" in
             x86_64)      TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
             Sparc)       TARGETS_TO_BUILD="Sparc $TARGETS_TO_BUILD" ;;
             PowerPC)     TARGETS_TO_BUILD="PowerPC $TARGETS_TO_BUILD" ;;
-            AArch64)     TARGETS_TO_BUILD="ARM64 $TARGETS_TO_BUILD" ;;
+            AArch64)     TARGETS_TO_BUILD="AArch64 $TARGETS_TO_BUILD" ;;
             ARM)         TARGETS_TO_BUILD="ARM $TARGETS_TO_BUILD" ;;
             Mips)        TARGETS_TO_BUILD="Mips $TARGETS_TO_BUILD" ;;
             XCore)       TARGETS_TO_BUILD="XCore $TARGETS_TO_BUILD" ;;
diff --git a/cmake/config-ix.cmake b/cmake/config-ix.cmake
index ca4af73d92c..1325e790c80 100755
--- a/cmake/config-ix.cmake
+++ b/cmake/config-ix.cmake
@@ -372,7 +372,7 @@ elseif (LLVM_NATIVE_ARCH MATCHES "powerpc")
 elseif (LLVM_NATIVE_ARCH MATCHES "aarch64")
   set(LLVM_NATIVE_ARCH AArch64)
 elseif (LLVM_NATIVE_ARCH MATCHES "arm64")
-  set(LLVM_NATIVE_ARCH ARM64)
+  set(LLVM_NATIVE_ARCH AArch64)
 elseif (LLVM_NATIVE_ARCH MATCHES "arm")
   set(LLVM_NATIVE_ARCH ARM)
 elseif (LLVM_NATIVE_ARCH MATCHES "mips")
diff --git a/configure b/configure
index a5babe9c230..e1959dfee6c 100755
--- a/configure
+++ b/configure
@@ -4151,9 +4151,9 @@ else
   amd64-* | x86_64-*)     llvm_cv_target_arch="x86_64" ;;
   sparc*-*)               llvm_cv_target_arch="Sparc" ;;
   powerpc*-*)             llvm_cv_target_arch="PowerPC" ;;
-  arm64*-*)               llvm_cv_target_arch="ARM64" ;;
+  arm64*-*)               llvm_cv_target_arch="AArch64" ;;
   arm*-*)                 llvm_cv_target_arch="ARM" ;;
-  aarch64*-*)             llvm_cv_target_arch="ARM64" ;;
+  aarch64*-*)             llvm_cv_target_arch="AArch64" ;;
   mips-* | mips64-*)      llvm_cv_target_arch="Mips" ;;
   mipsel-* | mips64el-*)  llvm_cv_target_arch="Mips" ;;
   xcore-*)                llvm_cv_target_arch="XCore" ;;
@@ -4188,9 +4188,9 @@ case $host in
   amd64-* | x86_64-*)     host_arch="x86_64" ;;
   sparc*-*)               host_arch="Sparc" ;;
   powerpc*-*)             host_arch="PowerPC" ;;
-  arm64*-*)               host_arch="ARM64" ;;
+  arm64*-*)               host_arch="AArch64" ;;
   arm*-*)                 host_arch="ARM" ;;
-  aarch64*-*)             host_arch="ARM64" ;;
+  aarch64*-*)             host_arch="AArch64" ;;
   mips-* | mips64-*)      host_arch="Mips" ;;
   mipsel-* | mips64el-*)  host_arch="Mips" ;;
   xcore-*)                host_arch="XCore" ;;
@@ -5120,7 +5120,7 @@ else
   esac
 fi
 
-TARGETS_WITH_JIT="ARM ARM64 Mips PowerPC SystemZ X86"
+TARGETS_WITH_JIT="ARM AArch64 Mips PowerPC SystemZ X86"
 TARGETS_WITH_JIT=$TARGETS_WITH_JIT
 
 
@@ -5357,7 +5357,7 @@ _ACEOF
 
 fi
 
-ALL_TARGETS="X86 Sparc PowerPC ARM ARM64 Mips XCore MSP430 CppBackend NVPTX Hexagon SystemZ R600"
+ALL_TARGETS="X86 Sparc PowerPC ARM AArch64 Mips XCore MSP430 CppBackend NVPTX Hexagon SystemZ R600"
 ALL_TARGETS=$ALL_TARGETS
 
 
@@ -5380,8 +5380,8 @@ case "$enableval" in
         x86_64)   TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
         sparc)    TARGETS_TO_BUILD="Sparc $TARGETS_TO_BUILD" ;;
         powerpc)  TARGETS_TO_BUILD="PowerPC $TARGETS_TO_BUILD" ;;
-        aarch64)  TARGETS_TO_BUILD="ARM64 $TARGETS_TO_BUILD" ;;
-        arm64)    TARGETS_TO_BUILD="ARM64 $TARGETS_TO_BUILD" ;;
+        aarch64)  TARGETS_TO_BUILD="AArch64 $TARGETS_TO_BUILD" ;;
+        arm64)    TARGETS_TO_BUILD="AArch64 $TARGETS_TO_BUILD" ;;
         arm)      TARGETS_TO_BUILD="ARM $TARGETS_TO_BUILD" ;;
         mips)     TARGETS_TO_BUILD="Mips $TARGETS_TO_BUILD" ;;
         mipsel)   TARGETS_TO_BUILD="Mips $TARGETS_TO_BUILD" ;;
@@ -5399,7 +5399,7 @@ case "$enableval" in
             x86_64)      TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
             Sparc)       TARGETS_TO_BUILD="Sparc $TARGETS_TO_BUILD" ;;
             PowerPC)     TARGETS_TO_BUILD="PowerPC $TARGETS_TO_BUILD" ;;
-            AArch64)     TARGETS_TO_BUILD="ARM64 $TARGETS_TO_BUILD" ;;
+            AArch64)     TARGETS_TO_BUILD="AArch64 $TARGETS_TO_BUILD" ;;
             ARM)         TARGETS_TO_BUILD="ARM $TARGETS_TO_BUILD" ;;
             Mips)        TARGETS_TO_BUILD="Mips $TARGETS_TO_BUILD" ;;
             XCore)       TARGETS_TO_BUILD="XCore $TARGETS_TO_BUILD" ;;
diff --git a/docs/LangRef.rst b/docs/LangRef.rst
index fa8d3c0b75f..9b72eca7de5 100644
--- a/docs/LangRef.rst
+++ b/docs/LangRef.rst
@@ -6877,7 +6877,7 @@ register in surrounding code, including inline assembly. Because of that,
 allocatable registers are not supported.
 
 Warning: So far it only works with the stack pointer on selected
-architectures (ARM, ARM64, AArch64, PowerPC and x86_64). Significant amount of
+architectures (ARM, AArch64, PowerPC and x86_64). Significant amount of
 work is needed to support other registers and even more so, allocatable
 registers.
 
diff --git a/include/llvm/IR/Intrinsics.td b/include/llvm/IR/Intrinsics.td
index b133b4e4096..edd1621ef25 100644
--- a/include/llvm/IR/Intrinsics.td
+++ b/include/llvm/IR/Intrinsics.td
@@ -533,7 +533,7 @@ def int_clear_cache : Intrinsic<[], [llvm_ptr_ty, llvm_ptr_ty],
 include "llvm/IR/IntrinsicsPowerPC.td"
 include "llvm/IR/IntrinsicsX86.td"
 include "llvm/IR/IntrinsicsARM.td"
-include "llvm/IR/IntrinsicsARM64.td"
+include "llvm/IR/IntrinsicsAArch64.td"
 include "llvm/IR/IntrinsicsXCore.td"
 include "llvm/IR/IntrinsicsHexagon.td"
 include "llvm/IR/IntrinsicsNVVM.td"
diff --git a/include/llvm/IR/IntrinsicsARM64.td b/include/llvm/IR/IntrinsicsAArch64.td
similarity index 55%
rename from include/llvm/IR/IntrinsicsARM64.td
rename to include/llvm/IR/IntrinsicsAArch64.td
index 146ea5d970c..23757aaef5c 100644
--- a/include/llvm/IR/IntrinsicsARM64.td
+++ b/include/llvm/IR/IntrinsicsAArch64.td
@@ -1,4 +1,4 @@
-//===- IntrinsicsARM64.td - Defines ARM64 intrinsics -------*- tablegen -*-===//
+//===- IntrinsicsAARCH64.td - Defines AARCH64 intrinsics ---*- tablegen -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -7,36 +7,36 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file defines all of the ARM64-specific intrinsics.
+// This file defines all of the AARCH64-specific intrinsics.
 //
 //===----------------------------------------------------------------------===//
 
-let TargetPrefix = "arm64" in {
+let TargetPrefix = "aarch64" in {
 
-def int_arm64_ldxr : Intrinsic<[llvm_i64_ty], [llvm_anyptr_ty]>;
-def int_arm64_ldaxr : Intrinsic<[llvm_i64_ty], [llvm_anyptr_ty]>;
-def int_arm64_stxr : Intrinsic<[llvm_i32_ty], [llvm_i64_ty, llvm_anyptr_ty]>;
-def int_arm64_stlxr : Intrinsic<[llvm_i32_ty], [llvm_i64_ty, llvm_anyptr_ty]>;
+def int_aarch64_ldxr : Intrinsic<[llvm_i64_ty], [llvm_anyptr_ty]>;
+def int_aarch64_ldaxr : Intrinsic<[llvm_i64_ty], [llvm_anyptr_ty]>;
+def int_aarch64_stxr : Intrinsic<[llvm_i32_ty], [llvm_i64_ty, llvm_anyptr_ty]>;
+def int_aarch64_stlxr : Intrinsic<[llvm_i32_ty], [llvm_i64_ty, llvm_anyptr_ty]>;
 
-def int_arm64_ldxp : Intrinsic<[llvm_i64_ty, llvm_i64_ty], [llvm_ptr_ty]>;
-def int_arm64_ldaxp : Intrinsic<[llvm_i64_ty, llvm_i64_ty], [llvm_ptr_ty]>;
-def int_arm64_stxp : Intrinsic<[llvm_i32_ty],
+def int_aarch64_ldxp : Intrinsic<[llvm_i64_ty, llvm_i64_ty], [llvm_ptr_ty]>;
+def int_aarch64_ldaxp : Intrinsic<[llvm_i64_ty, llvm_i64_ty], [llvm_ptr_ty]>;
+def int_aarch64_stxp : Intrinsic<[llvm_i32_ty],
                                [llvm_i64_ty, llvm_i64_ty, llvm_ptr_ty]>;
-def int_arm64_stlxp : Intrinsic<[llvm_i32_ty],
+def int_aarch64_stlxp : Intrinsic<[llvm_i32_ty],
                                 [llvm_i64_ty, llvm_i64_ty, llvm_ptr_ty]>;
 
-def int_arm64_clrex : Intrinsic<[]>;
+def int_aarch64_clrex : Intrinsic<[]>;
 
-def int_arm64_sdiv : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>,
+def int_aarch64_sdiv : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>,
                                 LLVMMatchType<0>], [IntrNoMem]>;
-def int_arm64_udiv : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>,
+def int_aarch64_udiv : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>,
                                 LLVMMatchType<0>], [IntrNoMem]>;
 }
 
 //===----------------------------------------------------------------------===//
 // Advanced SIMD (NEON)
 
-let TargetPrefix = "arm64" in {  // All intrinsics start with "llvm.arm64.".
+let TargetPrefix = "aarch64" in {  // All intrinsics start with "llvm.aarch64.".
   class AdvSIMD_2Scalar_Float_Intrinsic
     : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>],
                 [IntrNoMem]>;
@@ -139,269 +139,269 @@ let TargetPrefix = "arm64" in {  // All intrinsics start with "llvm.arm64.".
 
 let Properties = [IntrNoMem] in {
   // Vector Add Across Lanes
-  def int_arm64_neon_saddv : AdvSIMD_1VectorArg_Int_Across_Intrinsic;
-  def int_arm64_neon_uaddv : AdvSIMD_1VectorArg_Int_Across_Intrinsic;
-  def int_arm64_neon_faddv : AdvSIMD_1VectorArg_Float_Across_Intrinsic;
+  def int_aarch64_neon_saddv : AdvSIMD_1VectorArg_Int_Across_Intrinsic;
+  def int_aarch64_neon_uaddv : AdvSIMD_1VectorArg_Int_Across_Intrinsic;
+  def int_aarch64_neon_faddv : AdvSIMD_1VectorArg_Float_Across_Intrinsic;
 
   // Vector Long Add Across Lanes
-  def int_arm64_neon_saddlv : AdvSIMD_1VectorArg_Int_Across_Intrinsic;
-  def int_arm64_neon_uaddlv : AdvSIMD_1VectorArg_Int_Across_Intrinsic;
+  def int_aarch64_neon_saddlv : AdvSIMD_1VectorArg_Int_Across_Intrinsic;
+  def int_aarch64_neon_uaddlv : AdvSIMD_1VectorArg_Int_Across_Intrinsic;
 
   // Vector Halving Add
-  def int_arm64_neon_shadd : AdvSIMD_2VectorArg_Intrinsic;
-  def int_arm64_neon_uhadd : AdvSIMD_2VectorArg_Intrinsic;
+  def int_aarch64_neon_shadd : AdvSIMD_2VectorArg_Intrinsic;
+  def int_aarch64_neon_uhadd : AdvSIMD_2VectorArg_Intrinsic;
 
   // Vector Rounding Halving Add
-  def int_arm64_neon_srhadd : AdvSIMD_2VectorArg_Intrinsic;
-  def int_arm64_neon_urhadd : AdvSIMD_2VectorArg_Intrinsic;
+  def int_aarch64_neon_srhadd : AdvSIMD_2VectorArg_Intrinsic;
+  def int_aarch64_neon_urhadd : AdvSIMD_2VectorArg_Intrinsic;
 
   // Vector Saturating Add
-  def int_arm64_neon_sqadd : AdvSIMD_2IntArg_Intrinsic;
-  def int_arm64_neon_suqadd : AdvSIMD_2IntArg_Intrinsic;
-  def int_arm64_neon_usqadd : AdvSIMD_2IntArg_Intrinsic;
-  def int_arm64_neon_uqadd : AdvSIMD_2IntArg_Intrinsic;
+  def int_aarch64_neon_sqadd : AdvSIMD_2IntArg_Intrinsic;
+  def int_aarch64_neon_suqadd : AdvSIMD_2IntArg_Intrinsic;
+  def int_aarch64_neon_usqadd : AdvSIMD_2IntArg_Intrinsic;
+  def int_aarch64_neon_uqadd : AdvSIMD_2IntArg_Intrinsic;
 
   // Vector Add High-Half
   // FIXME: this is a legacy intrinsic for aarch64_simd.h. Remove it when that
   // header is no longer supported.
-  def int_arm64_neon_addhn : AdvSIMD_2VectorArg_Narrow_Intrinsic;
+  def int_aarch64_neon_addhn : AdvSIMD_2VectorArg_Narrow_Intrinsic;
 
   // Vector Rounding Add High-Half
-  def int_arm64_neon_raddhn : AdvSIMD_2VectorArg_Narrow_Intrinsic;
+  def int_aarch64_neon_raddhn : AdvSIMD_2VectorArg_Narrow_Intrinsic;
 
   // Vector Saturating Doubling Multiply High
-  def int_arm64_neon_sqdmulh : AdvSIMD_2IntArg_Intrinsic;
+  def int_aarch64_neon_sqdmulh : AdvSIMD_2IntArg_Intrinsic;
 
   // Vector Saturating Rounding Doubling Multiply High
-  def int_arm64_neon_sqrdmulh : AdvSIMD_2IntArg_Intrinsic;
+  def int_aarch64_neon_sqrdmulh : AdvSIMD_2IntArg_Intrinsic;
 
   // Vector Polynominal Multiply
-  def int_arm64_neon_pmul : AdvSIMD_2VectorArg_Intrinsic;
+  def int_aarch64_neon_pmul : AdvSIMD_2VectorArg_Intrinsic;
 
   // Vector Long Multiply
-  def int_arm64_neon_smull : AdvSIMD_2VectorArg_Long_Intrinsic;
-  def int_arm64_neon_umull : AdvSIMD_2VectorArg_Long_Intrinsic;
-  def int_arm64_neon_pmull : AdvSIMD_2VectorArg_Long_Intrinsic;
+  def int_aarch64_neon_smull : AdvSIMD_2VectorArg_Long_Intrinsic;
+  def int_aarch64_neon_umull : AdvSIMD_2VectorArg_Long_Intrinsic;
+  def int_aarch64_neon_pmull : AdvSIMD_2VectorArg_Long_Intrinsic;
 
   // 64-bit polynomial multiply really returns an i128, which is not legal. Fake
   // it with a v16i8.
-  def int_arm64_neon_pmull64 :
+  def int_aarch64_neon_pmull64 :
         Intrinsic<[llvm_v16i8_ty], [llvm_i64_ty, llvm_i64_ty], [IntrNoMem]>;
 
   // Vector Extending Multiply
-  def int_arm64_neon_fmulx : AdvSIMD_2FloatArg_Intrinsic {
+  def int_aarch64_neon_fmulx : AdvSIMD_2FloatArg_Intrinsic {
     let Properties = [IntrNoMem, Commutative];
   }
 
   // Vector Saturating Doubling Long Multiply
-  def int_arm64_neon_sqdmull : AdvSIMD_2VectorArg_Long_Intrinsic;
-  def int_arm64_neon_sqdmulls_scalar
+  def int_aarch64_neon_sqdmull : AdvSIMD_2VectorArg_Long_Intrinsic;
+  def int_aarch64_neon_sqdmulls_scalar
     : Intrinsic<[llvm_i64_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
 
   // Vector Halving Subtract
-  def int_arm64_neon_shsub : AdvSIMD_2VectorArg_Intrinsic;
-  def int_arm64_neon_uhsub : AdvSIMD_2VectorArg_Intrinsic;
+  def int_aarch64_neon_shsub : AdvSIMD_2VectorArg_Intrinsic;
+  def int_aarch64_neon_uhsub : AdvSIMD_2VectorArg_Intrinsic;
 
   // Vector Saturating Subtract
-  def int_arm64_neon_sqsub : AdvSIMD_2IntArg_Intrinsic;
-  def int_arm64_neon_uqsub : AdvSIMD_2IntArg_Intrinsic;
+  def int_aarch64_neon_sqsub : AdvSIMD_2IntArg_Intrinsic;
+  def int_aarch64_neon_uqsub : AdvSIMD_2IntArg_Intrinsic;
 
   // Vector Subtract High-Half
   // FIXME: this is a legacy intrinsic for aarch64_simd.h. Remove it when that
   // header is no longer supported.
-  def int_arm64_neon_subhn : AdvSIMD_2VectorArg_Narrow_Intrinsic;
+  def int_aarch64_neon_subhn : AdvSIMD_2VectorArg_Narrow_Intrinsic;
 
   // Vector Rounding Subtract High-Half
-  def int_arm64_neon_rsubhn : AdvSIMD_2VectorArg_Narrow_Intrinsic;
+  def int_aarch64_neon_rsubhn : AdvSIMD_2VectorArg_Narrow_Intrinsic;
 
   // Vector Compare Absolute Greater-than-or-equal
-  def int_arm64_neon_facge : AdvSIMD_2Arg_FloatCompare_Intrinsic;
+  def int_aarch64_neon_facge : AdvSIMD_2Arg_FloatCompare_Intrinsic;
 
   // Vector Compare Absolute Greater-than
-  def int_arm64_neon_facgt : AdvSIMD_2Arg_FloatCompare_Intrinsic;
+  def int_aarch64_neon_facgt : AdvSIMD_2Arg_FloatCompare_Intrinsic;
 
   // Vector Absolute Difference
-  def int_arm64_neon_sabd : AdvSIMD_2VectorArg_Intrinsic;
-  def int_arm64_neon_uabd : AdvSIMD_2VectorArg_Intrinsic;
-  def int_arm64_neon_fabd : AdvSIMD_2VectorArg_Intrinsic;
+  def int_aarch64_neon_sabd : AdvSIMD_2VectorArg_Intrinsic;
+  def int_aarch64_neon_uabd : AdvSIMD_2VectorArg_Intrinsic;
+  def int_aarch64_neon_fabd : AdvSIMD_2VectorArg_Intrinsic;
 
   // Scalar Absolute Difference
-  def int_arm64_sisd_fabd : AdvSIMD_2Scalar_Float_Intrinsic;
+  def int_aarch64_sisd_fabd : AdvSIMD_2Scalar_Float_Intrinsic;
 
   // Vector Max
-  def int_arm64_neon_smax : AdvSIMD_2VectorArg_Intrinsic;
-  def int_arm64_neon_umax : AdvSIMD_2VectorArg_Intrinsic;
-  def int_arm64_neon_fmax : AdvSIMD_2VectorArg_Intrinsic;
-  def int_arm64_neon_fmaxnmp : AdvSIMD_2VectorArg_Intrinsic;
+  def int_aarch64_neon_smax : AdvSIMD_2VectorArg_Intrinsic;
+  def int_aarch64_neon_umax : AdvSIMD_2VectorArg_Intrinsic;
+  def int_aarch64_neon_fmax : AdvSIMD_2VectorArg_Intrinsic;
+  def int_aarch64_neon_fmaxnmp : AdvSIMD_2VectorArg_Intrinsic;
 
   // Vector Max Across Lanes
-  def int_arm64_neon_smaxv : AdvSIMD_1VectorArg_Int_Across_Intrinsic;
-  def int_arm64_neon_umaxv : AdvSIMD_1VectorArg_Int_Across_Intrinsic;
-  def int_arm64_neon_fmaxv : AdvSIMD_1VectorArg_Float_Across_Intrinsic;
-  def int_arm64_neon_fmaxnmv : AdvSIMD_1VectorArg_Float_Across_Intrinsic;
+  def int_aarch64_neon_smaxv : AdvSIMD_1VectorArg_Int_Across_Intrinsic;
+  def int_aarch64_neon_umaxv : AdvSIMD_1VectorArg_Int_Across_Intrinsic;
+  def int_aarch64_neon_fmaxv : AdvSIMD_1VectorArg_Float_Across_Intrinsic;
+  def int_aarch64_neon_fmaxnmv : AdvSIMD_1VectorArg_Float_Across_Intrinsic;
 
   // Vector Min
-  def int_arm64_neon_smin : AdvSIMD_2VectorArg_Intrinsic;
-  def int_arm64_neon_umin : AdvSIMD_2VectorArg_Intrinsic;
-  def int_arm64_neon_fmin : AdvSIMD_2VectorArg_Intrinsic;
-  def int_arm64_neon_fminnmp : AdvSIMD_2VectorArg_Intrinsic;
+  def int_aarch64_neon_smin : AdvSIMD_2VectorArg_Intrinsic;
+  def int_aarch64_neon_umin : AdvSIMD_2VectorArg_Intrinsic;
+  def int_aarch64_neon_fmin : AdvSIMD_2VectorArg_Intrinsic;
+  def int_aarch64_neon_fminnmp : AdvSIMD_2VectorArg_Intrinsic;
 
   // Vector Min/Max Number
-  def int_arm64_neon_fminnm : AdvSIMD_2FloatArg_Intrinsic;
-  def int_arm64_neon_fmaxnm : AdvSIMD_2FloatArg_Intrinsic;
+  def int_aarch64_neon_fminnm : AdvSIMD_2FloatArg_Intrinsic;
+  def int_aarch64_neon_fmaxnm : AdvSIMD_2FloatArg_Intrinsic;
 
   // Vector Min Across Lanes
-  def int_arm64_neon_sminv : AdvSIMD_1VectorArg_Int_Across_Intrinsic;
-  def int_arm64_neon_uminv : AdvSIMD_1VectorArg_Int_Across_Intrinsic;
-  def int_arm64_neon_fminv : AdvSIMD_1VectorArg_Float_Across_Intrinsic;
-  def int_arm64_neon_fminnmv : AdvSIMD_1VectorArg_Float_Across_Intrinsic;
+  def int_aarch64_neon_sminv : AdvSIMD_1VectorArg_Int_Across_Intrinsic;
+  def int_aarch64_neon_uminv : AdvSIMD_1VectorArg_Int_Across_Intrinsic;
+  def int_aarch64_neon_fminv : AdvSIMD_1VectorArg_Float_Across_Intrinsic;
+  def int_aarch64_neon_fminnmv : AdvSIMD_1VectorArg_Float_Across_Intrinsic;
 
   // Pairwise Add
-  def int_arm64_neon_addp : AdvSIMD_2VectorArg_Intrinsic;
+  def int_aarch64_neon_addp : AdvSIMD_2VectorArg_Intrinsic;
 
   // Long Pairwise Add
   // FIXME: In theory, we shouldn't need intrinsics for saddlp or
   // uaddlp, but tblgen's type inference currently can't handle the
   // pattern fragments this ends up generating.
-  def int_arm64_neon_saddlp : AdvSIMD_1VectorArg_Expand_Intrinsic;
-  def int_arm64_neon_uaddlp : AdvSIMD_1VectorArg_Expand_Intrinsic;
+  def int_aarch64_neon_saddlp : AdvSIMD_1VectorArg_Expand_Intrinsic;
+  def int_aarch64_neon_uaddlp : AdvSIMD_1VectorArg_Expand_Intrinsic;
 
   // Folding Maximum
-  def int_arm64_neon_smaxp : AdvSIMD_2VectorArg_Intrinsic;
-  def int_arm64_neon_umaxp : AdvSIMD_2VectorArg_Intrinsic;
-  def int_arm64_neon_fmaxp : AdvSIMD_2VectorArg_Intrinsic;
+  def int_aarch64_neon_smaxp : AdvSIMD_2VectorArg_Intrinsic;
+  def int_aarch64_neon_umaxp : AdvSIMD_2VectorArg_Intrinsic;
+  def int_aarch64_neon_fmaxp : AdvSIMD_2VectorArg_Intrinsic;
 
   // Folding Minimum
-  def int_arm64_neon_sminp : AdvSIMD_2VectorArg_Intrinsic;
-  def int_arm64_neon_uminp : AdvSIMD_2VectorArg_Intrinsic;
-  def int_arm64_neon_fminp : AdvSIMD_2VectorArg_Intrinsic;
+  def int_aarch64_neon_sminp : AdvSIMD_2VectorArg_Intrinsic;
+  def int_aarch64_neon_uminp : AdvSIMD_2VectorArg_Intrinsic;
+  def int_aarch64_neon_fminp : AdvSIMD_2VectorArg_Intrinsic;
 
   // Reciprocal Estimate/Step
-  def int_arm64_neon_frecps : AdvSIMD_2FloatArg_Intrinsic;
-  def int_arm64_neon_frsqrts : AdvSIMD_2FloatArg_Intrinsic;
+  def int_aarch64_neon_frecps : AdvSIMD_2FloatArg_Intrinsic;
+  def int_aarch64_neon_frsqrts : AdvSIMD_2FloatArg_Intrinsic;
 
   // Reciprocal Exponent
-  def int_arm64_neon_frecpx : AdvSIMD_1FloatArg_Intrinsic;
+  def int_aarch64_neon_frecpx : AdvSIMD_1FloatArg_Intrinsic;
 
   // Vector Saturating Shift Left
-  def int_arm64_neon_sqshl : AdvSIMD_2IntArg_Intrinsic;
-  def int_arm64_neon_uqshl : AdvSIMD_2IntArg_Intrinsic;
+  def int_aarch64_neon_sqshl : AdvSIMD_2IntArg_Intrinsic;
+  def int_aarch64_neon_uqshl : AdvSIMD_2IntArg_Intrinsic;
 
   // Vector Rounding Shift Left
-  def int_arm64_neon_srshl : AdvSIMD_2IntArg_Intrinsic;
-  def int_arm64_neon_urshl : AdvSIMD_2IntArg_Intrinsic;
+  def int_aarch64_neon_srshl : AdvSIMD_2IntArg_Intrinsic;
+  def int_aarch64_neon_urshl : AdvSIMD_2IntArg_Intrinsic;
 
   // Vector Saturating Rounding Shift Left
-  def int_arm64_neon_sqrshl : AdvSIMD_2IntArg_Intrinsic;
-  def int_arm64_neon_uqrshl : AdvSIMD_2IntArg_Intrinsic;
+  def int_aarch64_neon_sqrshl : AdvSIMD_2IntArg_Intrinsic;
+  def int_aarch64_neon_uqrshl : AdvSIMD_2IntArg_Intrinsic;
 
   // Vector Signed->Unsigned Shift Left by Constant
-  def int_arm64_neon_sqshlu : AdvSIMD_2IntArg_Intrinsic;
+  def int_aarch64_neon_sqshlu : AdvSIMD_2IntArg_Intrinsic;
 
   // Vector Signed->Unsigned Narrowing Saturating Shift Right by Constant
-  def int_arm64_neon_sqshrun : AdvSIMD_2Arg_Scalar_Narrow_Intrinsic;
+  def int_aarch64_neon_sqshrun : AdvSIMD_2Arg_Scalar_Narrow_Intrinsic;
 
   // Vector Signed->Unsigned Rounding Narrowing Saturating Shift Right by Const
-  def int_arm64_neon_sqrshrun : AdvSIMD_2Arg_Scalar_Narrow_Intrinsic;
+  def int_aarch64_neon_sqrshrun : AdvSIMD_2Arg_Scalar_Narrow_Intrinsic;
 
   // Vector Narrowing Shift Right by Constant
-  def int_arm64_neon_sqshrn : AdvSIMD_2Arg_Scalar_Narrow_Intrinsic;
-  def int_arm64_neon_uqshrn : AdvSIMD_2Arg_Scalar_Narrow_Intrinsic;
+  def int_aarch64_neon_sqshrn : AdvSIMD_2Arg_Scalar_Narrow_Intrinsic;
+  def int_aarch64_neon_uqshrn : AdvSIMD_2Arg_Scalar_Narrow_Intrinsic;
 
   // Vector Rounding Narrowing Shift Right by Constant
-  def int_arm64_neon_rshrn : AdvSIMD_2Arg_Scalar_Narrow_Intrinsic;
+  def int_aarch64_neon_rshrn : AdvSIMD_2Arg_Scalar_Narrow_Intrinsic;
 
   // Vector Rounding Narrowing Saturating Shift Right by Constant
-  def int_arm64_neon_sqrshrn : AdvSIMD_2Arg_Scalar_Narrow_Intrinsic;
-  def int_arm64_neon_uqrshrn : AdvSIMD_2Arg_Scalar_Narrow_Intrinsic;
+  def int_aarch64_neon_sqrshrn : AdvSIMD_2Arg_Scalar_Narrow_Intrinsic;
+  def int_aarch64_neon_uqrshrn : AdvSIMD_2Arg_Scalar_Narrow_Intrinsic;
 
   // Vector Shift Left
-  def int_arm64_neon_sshl : AdvSIMD_2IntArg_Intrinsic;
-  def int_arm64_neon_ushl : AdvSIMD_2IntArg_Intrinsic;
+  def int_aarch64_neon_sshl : AdvSIMD_2IntArg_Intrinsic;
+  def int_aarch64_neon_ushl : AdvSIMD_2IntArg_Intrinsic;
 
   // Vector Widening Shift Left by Constant
-  def int_arm64_neon_shll : AdvSIMD_2VectorArg_Scalar_Wide_BySize_Intrinsic;
-  def int_arm64_neon_sshll : AdvSIMD_2VectorArg_Scalar_Wide_Intrinsic;
-  def int_arm64_neon_ushll : AdvSIMD_2VectorArg_Scalar_Wide_Intrinsic;
+  def int_aarch64_neon_shll : AdvSIMD_2VectorArg_Scalar_Wide_BySize_Intrinsic;
+  def int_aarch64_neon_sshll : AdvSIMD_2VectorArg_Scalar_Wide_Intrinsic;
+  def int_aarch64_neon_ushll : AdvSIMD_2VectorArg_Scalar_Wide_Intrinsic;
 
   // Vector Shift Right by Constant and Insert
-  def int_arm64_neon_vsri : AdvSIMD_3VectorArg_Scalar_Intrinsic;
+  def int_aarch64_neon_vsri : AdvSIMD_3VectorArg_Scalar_Intrinsic;
 
   // Vector Shift Left by Constant and Insert
-  def int_arm64_neon_vsli : AdvSIMD_3VectorArg_Scalar_Intrinsic;
+  def int_aarch64_neon_vsli : AdvSIMD_3VectorArg_Scalar_Intrinsic;
 
   // Vector Saturating Narrow
-  def int_arm64_neon_scalar_sqxtn: AdvSIMD_1IntArg_Narrow_Intrinsic;
-  def int_arm64_neon_scalar_uqxtn : AdvSIMD_1IntArg_Narrow_Intrinsic;
-  def int_arm64_neon_sqxtn : AdvSIMD_1VectorArg_Narrow_Intrinsic;
-  def int_arm64_neon_uqxtn : AdvSIMD_1VectorArg_Narrow_Intrinsic;
+  def int_aarch64_neon_scalar_sqxtn: AdvSIMD_1IntArg_Narrow_Intrinsic;
+  def int_aarch64_neon_scalar_uqxtn : AdvSIMD_1IntArg_Narrow_Intrinsic;
+  def int_aarch64_neon_sqxtn : AdvSIMD_1VectorArg_Narrow_Intrinsic;
+  def int_aarch64_neon_uqxtn : AdvSIMD_1VectorArg_Narrow_Intrinsic;
 
   // Vector Saturating Extract and Unsigned Narrow
-  def int_arm64_neon_scalar_sqxtun : AdvSIMD_1IntArg_Narrow_Intrinsic;
-  def int_arm64_neon_sqxtun : AdvSIMD_1VectorArg_Narrow_Intrinsic;
+  def int_aarch64_neon_scalar_sqxtun : AdvSIMD_1IntArg_Narrow_Intrinsic;
+  def int_aarch64_neon_sqxtun : AdvSIMD_1VectorArg_Narrow_Intrinsic;
 
   // Vector Absolute Value
-  def int_arm64_neon_abs : AdvSIMD_1IntArg_Intrinsic;
+  def int_aarch64_neon_abs : AdvSIMD_1IntArg_Intrinsic;
 
   // Vector Saturating Absolute Value
-  def int_arm64_neon_sqabs : AdvSIMD_1IntArg_Intrinsic;
+  def int_aarch64_neon_sqabs : AdvSIMD_1IntArg_Intrinsic;
 
   // Vector Saturating Negation
-  def int_arm64_neon_sqneg : AdvSIMD_1IntArg_Intrinsic;
+  def int_aarch64_neon_sqneg : AdvSIMD_1IntArg_Intrinsic;
 
   // Vector Count Leading Sign Bits
-  def int_arm64_neon_cls : AdvSIMD_1VectorArg_Intrinsic;
+  def int_aarch64_neon_cls : AdvSIMD_1VectorArg_Intrinsic;
 
   // Vector Reciprocal Estimate
-  def int_arm64_neon_urecpe : AdvSIMD_1VectorArg_Intrinsic;
-  def int_arm64_neon_frecpe : AdvSIMD_1FloatArg_Intrinsic;
+  def int_aarch64_neon_urecpe : AdvSIMD_1VectorArg_Intrinsic;
+  def int_aarch64_neon_frecpe : AdvSIMD_1FloatArg_Intrinsic;
 
   // Vector Square Root Estimate
-  def int_arm64_neon_ursqrte : AdvSIMD_1VectorArg_Intrinsic;
-  def int_arm64_neon_frsqrte : AdvSIMD_1FloatArg_Intrinsic;
+  def int_aarch64_neon_ursqrte : AdvSIMD_1VectorArg_Intrinsic;
+  def int_aarch64_neon_frsqrte : AdvSIMD_1FloatArg_Intrinsic;
 
   // Vector Bitwise Reverse
-  def int_arm64_neon_rbit : AdvSIMD_1VectorArg_Intrinsic;
+  def int_aarch64_neon_rbit : AdvSIMD_1VectorArg_Intrinsic;
 
   // Vector Conversions Between Half-Precision and Single-Precision.
-  def int_arm64_neon_vcvtfp2hf
+  def int_aarch64_neon_vcvtfp2hf
     : Intrinsic<[llvm_v4i16_ty], [llvm_v4f32_ty], [IntrNoMem]>;
-  def int_arm64_neon_vcvthf2fp
+  def int_aarch64_neon_vcvthf2fp
     : Intrinsic<[llvm_v4f32_ty], [llvm_v4i16_ty], [IntrNoMem]>;
 
   // Vector Conversions Between Floating-point and Fixed-point.
-  def int_arm64_neon_vcvtfp2fxs : AdvSIMD_CvtFPToFx_Intrinsic;
-  def int_arm64_neon_vcvtfp2fxu : AdvSIMD_CvtFPToFx_Intrinsic;
-  def int_arm64_neon_vcvtfxs2fp : AdvSIMD_CvtFxToFP_Intrinsic;
-  def int_arm64_neon_vcvtfxu2fp : AdvSIMD_CvtFxToFP_Intrinsic;
+  def int_aarch64_neon_vcvtfp2fxs : AdvSIMD_CvtFPToFx_Intrinsic;
+  def int_aarch64_neon_vcvtfp2fxu : AdvSIMD_CvtFPToFx_Intrinsic;
+  def int_aarch64_neon_vcvtfxs2fp : AdvSIMD_CvtFxToFP_Intrinsic;
+  def int_aarch64_neon_vcvtfxu2fp : AdvSIMD_CvtFxToFP_Intrinsic;
 
   // Vector FP->Int Conversions
-  def int_arm64_neon_fcvtas : AdvSIMD_FPToIntRounding_Intrinsic;
-  def int_arm64_neon_fcvtau : AdvSIMD_FPToIntRounding_Intrinsic;
-  def int_arm64_neon_fcvtms : AdvSIMD_FPToIntRounding_Intrinsic;
-  def int_arm64_neon_fcvtmu : AdvSIMD_FPToIntRounding_Intrinsic;
-  def int_arm64_neon_fcvtns : AdvSIMD_FPToIntRounding_Intrinsic;
-  def int_arm64_neon_fcvtnu : AdvSIMD_FPToIntRounding_Intrinsic;
-  def int_arm64_neon_fcvtps : AdvSIMD_FPToIntRounding_Intrinsic;
-  def int_arm64_neon_fcvtpu : AdvSIMD_FPToIntRounding_Intrinsic;
-  def int_arm64_neon_fcvtzs : AdvSIMD_FPToIntRounding_Intrinsic;
-  def int_arm64_neon_fcvtzu : AdvSIMD_FPToIntRounding_Intrinsic;
+  def int_aarch64_neon_fcvtas : AdvSIMD_FPToIntRounding_Intrinsic;
+  def int_aarch64_neon_fcvtau : AdvSIMD_FPToIntRounding_Intrinsic;
+  def int_aarch64_neon_fcvtms : AdvSIMD_FPToIntRounding_Intrinsic;
+  def int_aarch64_neon_fcvtmu : AdvSIMD_FPToIntRounding_Intrinsic;
+  def int_aarch64_neon_fcvtns : AdvSIMD_FPToIntRounding_Intrinsic;
+  def int_aarch64_neon_fcvtnu : AdvSIMD_FPToIntRounding_Intrinsic;
+  def int_aarch64_neon_fcvtps : AdvSIMD_FPToIntRounding_Intrinsic;
+  def int_aarch64_neon_fcvtpu : AdvSIMD_FPToIntRounding_Intrinsic;
+  def int_aarch64_neon_fcvtzs : AdvSIMD_FPToIntRounding_Intrinsic;
+  def int_aarch64_neon_fcvtzu : AdvSIMD_FPToIntRounding_Intrinsic;
 
   // Vector FP Rounding: only ties to even is unrepresented by a normal
   // intrinsic.
-  def int_arm64_neon_frintn : AdvSIMD_1FloatArg_Intrinsic;
+  def int_aarch64_neon_frintn : AdvSIMD_1FloatArg_Intrinsic;
 
   // Scalar FP->Int conversions
 
   // Vector FP Inexact Narrowing
-  def int_arm64_neon_fcvtxn : AdvSIMD_1VectorArg_Expand_Intrinsic;
+  def int_aarch64_neon_fcvtxn : AdvSIMD_1VectorArg_Expand_Intrinsic;
 
   // Scalar FP Inexact Narrowing
-  def int_arm64_sisd_fcvtxn : Intrinsic<[llvm_float_ty], [llvm_double_ty],
+  def int_aarch64_sisd_fcvtxn : Intrinsic<[llvm_float_ty], [llvm_double_ty],
                                         [IntrNoMem]>;
 }
 
-let TargetPrefix = "arm64" in {  // All intrinsics start with "llvm.arm64.".
+let TargetPrefix = "aarch64" in {  // All intrinsics start with "llvm.aarch64.".
   class AdvSIMD_2Vector2Index_Intrinsic
     : Intrinsic<[llvm_anyvector_ty],
                 [llvm_anyvector_ty, llvm_i64_ty, LLVMMatchType<0>, llvm_i64_ty],
@@ -409,9 +409,9 @@ let TargetPrefix = "arm64" in {  // All intrinsics start with "llvm.arm64.".
 }
 
 // Vector element to element moves
-def int_arm64_neon_vcopy_lane: AdvSIMD_2Vector2Index_Intrinsic;
+def int_aarch64_neon_vcopy_lane: AdvSIMD_2Vector2Index_Intrinsic;
 
-let TargetPrefix = "arm64" in {  // All intrinsics start with "llvm.arm64.".
+let TargetPrefix = "aarch64" in {  // All intrinsics start with "llvm.aarch64.".
   class AdvSIMD_1Vec_Load_Intrinsic
       : Intrinsic<[llvm_anyvector_ty], [LLVMAnyPointerType<LLVMMatchType<0>>],
                   [IntrReadArgMem]>;
@@ -482,35 +482,35 @@ let TargetPrefix = "arm64" in {  // All intrinsics start with "llvm.arm64.".
 
 // Memory ops
 
-def int_arm64_neon_ld1x2 : AdvSIMD_2Vec_Load_Intrinsic;
-def int_arm64_neon_ld1x3 : AdvSIMD_3Vec_Load_Intrinsic;
-def int_arm64_neon_ld1x4 : AdvSIMD_4Vec_Load_Intrinsic;
+def int_aarch64_neon_ld1x2 : AdvSIMD_2Vec_Load_Intrinsic;
+def int_aarch64_neon_ld1x3 : AdvSIMD_3Vec_Load_Intrinsic;
+def int_aarch64_neon_ld1x4 : AdvSIMD_4Vec_Load_Intrinsic;
 
-def int_arm64_neon_st1x2 : AdvSIMD_2Vec_Store_Intrinsic;
-def int_arm64_neon_st1x3 : AdvSIMD_3Vec_Store_Intrinsic;
-def int_arm64_neon_st1x4 : AdvSIMD_4Vec_Store_Intrinsic;
+def int_aarch64_neon_st1x2 : AdvSIMD_2Vec_Store_Intrinsic;
+def int_aarch64_neon_st1x3 : AdvSIMD_3Vec_Store_Intrinsic;
+def int_aarch64_neon_st1x4 : AdvSIMD_4Vec_Store_Intrinsic;
 
-def int_arm64_neon_ld2 : AdvSIMD_2Vec_Load_Intrinsic;
-def int_arm64_neon_ld3 : AdvSIMD_3Vec_Load_Intrinsic;
-def int_arm64_neon_ld4 : AdvSIMD_4Vec_Load_Intrinsic;
+def int_aarch64_neon_ld2 : AdvSIMD_2Vec_Load_Intrinsic;
+def int_aarch64_neon_ld3 : AdvSIMD_3Vec_Load_Intrinsic;
+def int_aarch64_neon_ld4 : AdvSIMD_4Vec_Load_Intrinsic;
 
-def int_arm64_neon_ld2lane : AdvSIMD_2Vec_Load_Lane_Intrinsic;
-def int_arm64_neon_ld3lane : AdvSIMD_3Vec_Load_Lane_Intrinsic;
-def int_arm64_neon_ld4lane : AdvSIMD_4Vec_Load_Lane_Intrinsic;
+def int_aarch64_neon_ld2lane : AdvSIMD_2Vec_Load_Lane_Intrinsic;
+def int_aarch64_neon_ld3lane : AdvSIMD_3Vec_Load_Lane_Intrinsic;
+def int_aarch64_neon_ld4lane : AdvSIMD_4Vec_Load_Lane_Intrinsic;
 
-def int_arm64_neon_ld2r : AdvSIMD_2Vec_Load_Intrinsic;
-def int_arm64_neon_ld3r : AdvSIMD_3Vec_Load_Intrinsic;
-def int_arm64_neon_ld4r : AdvSIMD_4Vec_Load_Intrinsic;
+def int_aarch64_neon_ld2r : AdvSIMD_2Vec_Load_Intrinsic;
+def int_aarch64_neon_ld3r : AdvSIMD_3Vec_Load_Intrinsic;
+def int_aarch64_neon_ld4r : AdvSIMD_4Vec_Load_Intrinsic;
 
-def int_arm64_neon_st2  : AdvSIMD_2Vec_Store_Intrinsic;
-def int_arm64_neon_st3  : AdvSIMD_3Vec_Store_Intrinsic;
-def int_arm64_neon_st4  : AdvSIMD_4Vec_Store_Intrinsic;
+def int_aarch64_neon_st2  : AdvSIMD_2Vec_Store_Intrinsic;
+def int_aarch64_neon_st3  : AdvSIMD_3Vec_Store_Intrinsic;
+def int_aarch64_neon_st4  : AdvSIMD_4Vec_Store_Intrinsic;
 
-def int_arm64_neon_st2lane  : AdvSIMD_2Vec_Store_Lane_Intrinsic;
-def int_arm64_neon_st3lane  : AdvSIMD_3Vec_Store_Lane_Intrinsic;
-def int_arm64_neon_st4lane  : AdvSIMD_4Vec_Store_Lane_Intrinsic;
+def int_aarch64_neon_st2lane  : AdvSIMD_2Vec_Store_Lane_Intrinsic;
+def int_aarch64_neon_st3lane  : AdvSIMD_3Vec_Store_Lane_Intrinsic;
+def int_aarch64_neon_st4lane  : AdvSIMD_4Vec_Store_Lane_Intrinsic;
 
-let TargetPrefix = "arm64" in {  // All intrinsics start with "llvm.arm64.".
+let TargetPrefix = "aarch64" in {  // All intrinsics start with "llvm.aarch64.".
   class AdvSIMD_Tbl1_Intrinsic
     : Intrinsic<[llvm_anyvector_ty], [llvm_v16i8_ty, LLVMMatchType<0>],
                 [IntrNoMem]>;
@@ -548,17 +548,17 @@ let TargetPrefix = "arm64" in {  // All intrinsics start with "llvm.arm64.".
                  llvm_v16i8_ty, llvm_v16i8_ty, LLVMMatchType<0>],
                 [IntrNoMem]>;
 }
-def int_arm64_neon_tbl1 : AdvSIMD_Tbl1_Intrinsic;
-def int_arm64_neon_tbl2 : AdvSIMD_Tbl2_Intrinsic;
-def int_arm64_neon_tbl3 : AdvSIMD_Tbl3_Intrinsic;
-def int_arm64_neon_tbl4 : AdvSIMD_Tbl4_Intrinsic;
+def int_aarch64_neon_tbl1 : AdvSIMD_Tbl1_Intrinsic;
+def int_aarch64_neon_tbl2 : AdvSIMD_Tbl2_Intrinsic;
+def int_aarch64_neon_tbl3 : AdvSIMD_Tbl3_Intrinsic;
+def int_aarch64_neon_tbl4 : AdvSIMD_Tbl4_Intrinsic;
 
-def int_arm64_neon_tbx1 : AdvSIMD_Tbx1_Intrinsic;
-def int_arm64_neon_tbx2 : AdvSIMD_Tbx2_Intrinsic;
-def int_arm64_neon_tbx3 : AdvSIMD_Tbx3_Intrinsic;
-def int_arm64_neon_tbx4 : AdvSIMD_Tbx4_Intrinsic;
+def int_aarch64_neon_tbx1 : AdvSIMD_Tbx1_Intrinsic;
+def int_aarch64_neon_tbx2 : AdvSIMD_Tbx2_Intrinsic;
+def int_aarch64_neon_tbx3 : AdvSIMD_Tbx3_Intrinsic;
+def int_aarch64_neon_tbx4 : AdvSIMD_Tbx4_Intrinsic;
 
-let TargetPrefix = "arm64" in {
+let TargetPrefix = "aarch64" in {
   class Crypto_AES_DataKey_Intrinsic
     : Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
 
@@ -592,45 +592,45 @@ let TargetPrefix = "arm64" in {
 }
 
 // AES
-def int_arm64_crypto_aese   : Crypto_AES_DataKey_Intrinsic;
-def int_arm64_crypto_aesd   : Crypto_AES_DataKey_Intrinsic;
-def int_arm64_crypto_aesmc  : Crypto_AES_Data_Intrinsic;
-def int_arm64_crypto_aesimc : Crypto_AES_Data_Intrinsic;
+def int_aarch64_crypto_aese   : Crypto_AES_DataKey_Intrinsic;
+def int_aarch64_crypto_aesd   : Crypto_AES_DataKey_Intrinsic;
+def int_aarch64_crypto_aesmc  : Crypto_AES_Data_Intrinsic;
+def int_aarch64_crypto_aesimc : Crypto_AES_Data_Intrinsic;
 
 // SHA1
-def int_arm64_crypto_sha1c  : Crypto_SHA_5Hash4Schedule_Intrinsic;
-def int_arm64_crypto_sha1p  : Crypto_SHA_5Hash4Schedule_Intrinsic;
-def int_arm64_crypto_sha1m  : Crypto_SHA_5Hash4Schedule_Intrinsic;
-def int_arm64_crypto_sha1h  : Crypto_SHA_1Hash_Intrinsic;
+def int_aarch64_crypto_sha1c  : Crypto_SHA_5Hash4Schedule_Intrinsic;
+def int_aarch64_crypto_sha1p  : Crypto_SHA_5Hash4Schedule_Intrinsic;
+def int_aarch64_crypto_sha1m  : Crypto_SHA_5Hash4Schedule_Intrinsic;
+def int_aarch64_crypto_sha1h  : Crypto_SHA_1Hash_Intrinsic;
 
-def int_arm64_crypto_sha1su0 : Crypto_SHA_12Schedule_Intrinsic;
-def int_arm64_crypto_sha1su1 : Crypto_SHA_8Schedule_Intrinsic;
+def int_aarch64_crypto_sha1su0 : Crypto_SHA_12Schedule_Intrinsic;
+def int_aarch64_crypto_sha1su1 : Crypto_SHA_8Schedule_Intrinsic;
 
 // SHA256
-def int_arm64_crypto_sha256h   : Crypto_SHA_8Hash4Schedule_Intrinsic;
-def int_arm64_crypto_sha256h2  : Crypto_SHA_8Hash4Schedule_Intrinsic;
-def int_arm64_crypto_sha256su0 : Crypto_SHA_8Schedule_Intrinsic;
-def int_arm64_crypto_sha256su1 : Crypto_SHA_12Schedule_Intrinsic;
+def int_aarch64_crypto_sha256h   : Crypto_SHA_8Hash4Schedule_Intrinsic;
+def int_aarch64_crypto_sha256h2  : Crypto_SHA_8Hash4Schedule_Intrinsic;
+def int_aarch64_crypto_sha256su0 : Crypto_SHA_8Schedule_Intrinsic;
+def int_aarch64_crypto_sha256su1 : Crypto_SHA_12Schedule_Intrinsic;
 
 //===----------------------------------------------------------------------===//
 // CRC32
 
-let TargetPrefix = "arm64" in {
+let TargetPrefix = "aarch64" in {
 
-def int_arm64_crc32b  : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
+def int_aarch64_crc32b  : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
     [IntrNoMem]>;
-def int_arm64_crc32cb : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
+def int_aarch64_crc32cb : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
     [IntrNoMem]>;
-def int_arm64_crc32h  : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
+def int_aarch64_crc32h  : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
     [IntrNoMem]>;
-def int_arm64_crc32ch : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
+def int_aarch64_crc32ch : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
     [IntrNoMem]>;
-def int_arm64_crc32w  : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
+def int_aarch64_crc32w  : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
     [IntrNoMem]>;
-def int_arm64_crc32cw : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
+def int_aarch64_crc32cw : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
     [IntrNoMem]>;
-def int_arm64_crc32x  : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i64_ty],
+def int_aarch64_crc32x  : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i64_ty],
     [IntrNoMem]>;
-def int_arm64_crc32cx : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i64_ty],
+def int_aarch64_crc32cx : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i64_ty],
     [IntrNoMem]>;
 }
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp
index a70b03d95cf..2b425fbdd33 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp
@@ -168,8 +168,9 @@ void RuntimeDyldMachO::resolveRelocation(const RelocationEntry &RE,
   case Triple::thumb:
     resolveARMRelocation(RE, Value);
     break;
+  case Triple::aarch64:
   case Triple::arm64:
-    resolveARM64Relocation(RE, Value);
+    resolveAArch64Relocation(RE, Value);
     break;
   }
 }
@@ -289,8 +290,8 @@ bool RuntimeDyldMachO::resolveARMRelocation(const RelocationEntry &RE,
   return false;
 }
 
-bool RuntimeDyldMachO::resolveARM64Relocation(const RelocationEntry &RE,
-                                              uint64_t Value) {
+bool RuntimeDyldMachO::resolveAArch64Relocation(const RelocationEntry &RE,
+                                                uint64_t Value) {
   const SectionEntry &Section = Sections[RE.SectionID];
   uint8_t* LocalAddress = Section.Address + RE.Offset;
 
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h
index 08573eed5c8..060eb8c29a2 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h
@@ -41,7 +41,7 @@ private:
   bool resolveI386Relocation(const RelocationEntry &RE, uint64_t Value);
   bool resolveX86_64Relocation(const RelocationEntry &RE, uint64_t Value);
   bool resolveARMRelocation(const RelocationEntry &RE, uint64_t Value);
-  bool resolveARM64Relocation(const RelocationEntry &RE, uint64_t Value);
+  bool resolveAArch64Relocation(const RelocationEntry &RE, uint64_t Value);
 
   // Populate stubs in __jump_table section.
   void populateJumpTable(MachOObjectFile &Obj, const SectionRef &JTSection,
diff --git a/lib/LTO/LTOCodeGenerator.cpp b/lib/LTO/LTOCodeGenerator.cpp
index 028c1912717..99236bd24ea 100644
--- a/lib/LTO/LTOCodeGenerator.cpp
+++ b/lib/LTO/LTOCodeGenerator.cpp
@@ -312,7 +312,8 @@ bool LTOCodeGenerator::determineTarget(std::string &errMsg) {
       MCpu = "core2";
     else if (Triple.getArch() == llvm::Triple::x86)
       MCpu = "yonah";
-    else if (Triple.getArch() == llvm::Triple::arm64)
+    else if (Triple.getArch() == llvm::Triple::arm64 ||
+             Triple.getArch() == llvm::Triple::aarch64)
       MCpu = "cyclone";
   }
 
diff --git a/lib/LTO/LTOModule.cpp b/lib/LTO/LTOModule.cpp
index 255951a7070..d1175142651 100644
--- a/lib/LTO/LTOModule.cpp
+++ b/lib/LTO/LTOModule.cpp
@@ -168,7 +168,8 @@ LTOModule *LTOModule::makeLTOModule(MemoryBuffer *buffer,
       CPU = "core2";
     else if (Triple.getArch() == llvm::Triple::x86)
       CPU = "yonah";
-    else if (Triple.getArch() == llvm::Triple::arm64)
+    else if (Triple.getArch() == llvm::Triple::arm64 ||
+             Triple.getArch() == llvm::Triple::aarch64)
       CPU = "cyclone";
   }
 
diff --git a/lib/MC/MCObjectFileInfo.cpp b/lib/MC/MCObjectFileInfo.cpp
index bb132799504..9d413afe5db 100644
--- a/lib/MC/MCObjectFileInfo.cpp
+++ b/lib/MC/MCObjectFileInfo.cpp
@@ -23,7 +23,8 @@ void MCObjectFileInfo::InitMachOMCObjectFileInfo(Triple T) {
   IsFunctionEHFrameSymbolPrivate = false;
   SupportsWeakOmittedEHFrame = false;
 
-  if (T.isOSDarwin() && T.getArch() == Triple::arm64)
+  if (T.isOSDarwin() &&
+      (T.getArch() == Triple::arm64 || T.getArch() == Triple::aarch64))
     SupportsCompactUnwindWithoutEHFrame = true;
 
   PersonalityEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel
@@ -151,7 +152,8 @@ void MCObjectFileInfo::InitMachOMCObjectFileInfo(Triple T) {
   COFFDebugSymbolsSection = nullptr;
 
   if ((T.isMacOSX() && !T.isMacOSXVersionLT(10, 6)) ||
-      (T.isOSDarwin() && T.getArch() == Triple::arm64)) {
+      (T.isOSDarwin() &&
+       (T.getArch() == Triple::arm64 || T.getArch() == Triple::aarch64))) {
     CompactUnwindSection =
       Ctx->getMachOSection("__LD", "__compact_unwind",
                            MachO::S_ATTR_DEBUG,
@@ -159,7 +161,7 @@ void MCObjectFileInfo::InitMachOMCObjectFileInfo(Triple T) {
 
     if (T.getArch() == Triple::x86_64 || T.getArch() == Triple::x86)
       CompactUnwindDwarfEHFrameOnly = 0x04000000;
-    else if (T.getArch() == Triple::arm64)
+    else if (T.getArch() == Triple::arm64 || T.getArch() == Triple::aarch64)
       CompactUnwindDwarfEHFrameOnly = 0x03000000;
   }
 
@@ -785,7 +787,7 @@ void MCObjectFileInfo::InitMCObjectFileInfo(StringRef TT, Reloc::Model relocm,
   // cellspu-apple-darwin. Perhaps we should fix in Triple?
   if ((Arch == Triple::x86 || Arch == Triple::x86_64 ||
        Arch == Triple::arm || Arch == Triple::thumb ||
-       Arch == Triple::arm64 ||
+       Arch == Triple::arm64 || Arch == Triple::aarch64 ||
        Arch == Triple::ppc || Arch == Triple::ppc64 ||
        Arch == Triple::UnknownArch) &&
       (T.isOSDarwin() || T.isOSBinFormatMachO())) {
diff --git a/lib/Target/AArch64/AArch64.h b/lib/Target/AArch64/AArch64.h
new file mode 100644
index 00000000000..1c022aaf86b
--- /dev/null
+++ b/lib/Target/AArch64/AArch64.h
@@ -0,0 +1,49 @@
+//==-- AArch64.h - Top-level interface for AArch64  --------------*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the entry points for global functions defined in the LLVM
+// AArch64 back-end.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef TARGET_AArch64_H
+#define TARGET_AArch64_H
+
+#include "Utils/AArch64BaseInfo.h"
+#include "MCTargetDesc/AArch64MCTargetDesc.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/DataTypes.h"
+
+namespace llvm {
+
+class AArch64TargetMachine;
+class FunctionPass;
+class MachineFunctionPass;
+
+FunctionPass *createAArch64DeadRegisterDefinitions();
+FunctionPass *createAArch64ConditionalCompares();
+FunctionPass *createAArch64AdvSIMDScalar();
+FunctionPass *createAArch64BranchRelaxation();
+FunctionPass *createAArch64ISelDag(AArch64TargetMachine &TM,
+                                 CodeGenOpt::Level OptLevel);
+FunctionPass *createAArch64StorePairSuppressPass();
+FunctionPass *createAArch64ExpandPseudoPass();
+FunctionPass *createAArch64LoadStoreOptimizationPass();
+ModulePass *createAArch64PromoteConstantPass();
+FunctionPass *createAArch64AddressTypePromotionPass();
+/// \brief Creates an ARM-specific Target Transformation Info pass.
+ImmutablePass *
+createAArch64TargetTransformInfoPass(const AArch64TargetMachine *TM);
+
+FunctionPass *createAArch64CleanupLocalDynamicTLSPass();
+
+FunctionPass *createAArch64CollectLOHPass();
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/ARM64/ARM64.td b/lib/Target/AArch64/AArch64.td
similarity index 90%
rename from lib/Target/ARM64/ARM64.td
rename to lib/Target/AArch64/AArch64.td
index c473205f17c..1ad5ac8c6f3 100644
--- a/lib/Target/ARM64/ARM64.td
+++ b/lib/Target/AArch64/AArch64.td
@@ -1,4 +1,4 @@
-//===- ARM64.td - Describe the ARM64 Target Machine --------*- tablegen -*-===//
+//=- AArch64.td - Describe the AArch64 Target Machine --------*- tablegen -*-=//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -17,7 +17,7 @@
 include "llvm/Target/Target.td"
 
 //===----------------------------------------------------------------------===//
-// ARM64 Subtarget features.
+// AArch64 Subtarget features.
 //
 
 def FeatureFPARMv8 : SubtargetFeature<"fp-armv8", "HasFPARMv8", "true",
@@ -44,23 +44,23 @@ def FeatureZCZeroing : SubtargetFeature<"zcz", "HasZeroCycleZeroing", "true",
 // Register File Description
 //===----------------------------------------------------------------------===//
 
-include "ARM64RegisterInfo.td"
-include "ARM64CallingConvention.td"
+include "AArch64RegisterInfo.td"
+include "AArch64CallingConvention.td"
 
 //===----------------------------------------------------------------------===//
 // Instruction Descriptions
 //===----------------------------------------------------------------------===//
 
-include "ARM64Schedule.td"
-include "ARM64InstrInfo.td"
+include "AArch64Schedule.td"
+include "AArch64InstrInfo.td"
 
-def ARM64InstrInfo : InstrInfo;
+def AArch64InstrInfo : InstrInfo;
 
 //===----------------------------------------------------------------------===//
-// ARM64 Processors supported.
+// AArch64 Processors supported.
 //
-include "ARM64SchedA53.td"
-include "ARM64SchedCyclone.td"
+include "AArch64SchedA53.td"
+include "AArch64SchedCyclone.td"
 
 def ProcA53     : SubtargetFeature<"a53", "ARMProcFamily", "CortexA53",
                                    "Cortex-A53 ARM processors",
@@ -109,7 +109,7 @@ def AppleAsmParserVariant : AsmParserVariant {
 //===----------------------------------------------------------------------===//
 // Assembly printer
 //===----------------------------------------------------------------------===//
-// ARM64 Uses the MC printer for asm output, so make sure the TableGen
+// AArch64 Uses the MC printer for asm output, so make sure the TableGen
 // AsmWriter bits get associated with the correct class.
 def GenericAsmWriter : AsmWriter {
   string AsmWriterClassName  = "InstPrinter";
@@ -127,8 +127,8 @@ def AppleAsmWriter : AsmWriter {
 // Target Declaration
 //===----------------------------------------------------------------------===//
 
-def ARM64 : Target {
-  let InstructionSet = ARM64InstrInfo;
+def AArch64 : Target {
+  let InstructionSet = AArch64InstrInfo;
   let AssemblyParserVariants = [GenericAsmParserVariant, AppleAsmParserVariant];
   let AssemblyWriters = [GenericAsmWriter, AppleAsmWriter];
 }
diff --git a/lib/Target/ARM64/ARM64AddressTypePromotion.cpp b/lib/Target/AArch64/AArch64AddressTypePromotion.cpp
similarity index 90%
rename from lib/Target/ARM64/ARM64AddressTypePromotion.cpp
rename to lib/Target/AArch64/AArch64AddressTypePromotion.cpp
index be2b5eed2ad..04906f6078f 100644
--- a/lib/Target/ARM64/ARM64AddressTypePromotion.cpp
+++ b/lib/Target/AArch64/AArch64AddressTypePromotion.cpp
@@ -1,5 +1,4 @@
-
-//===-- ARM64AddressTypePromotion.cpp --- Promote type for addr accesses -===//
+//===-- AArch64AddressTypePromotion.cpp --- Promote type for addr accesses -==//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -29,7 +28,7 @@
 // FIXME: This pass may be useful for other targets too.
 // ===---------------------------------------------------------------------===//
 
-#include "ARM64.h"
+#include "AArch64.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
@@ -45,38 +44,38 @@
 
 using namespace llvm;
 
-#define DEBUG_TYPE "arm64-type-promotion"
+#define DEBUG_TYPE "aarch64-type-promotion"
 
 static cl::opt<bool>
-EnableAddressTypePromotion("arm64-type-promotion", cl::Hidden,
+EnableAddressTypePromotion("aarch64-type-promotion", cl::Hidden,
                            cl::desc("Enable the type promotion pass"),
                            cl::init(true));
 static cl::opt<bool>
-EnableMerge("arm64-type-promotion-merge", cl::Hidden,
+EnableMerge("aarch64-type-promotion-merge", cl::Hidden,
             cl::desc("Enable merging of redundant sexts when one is dominating"
                      " the other."),
             cl::init(true));
 
 //===----------------------------------------------------------------------===//
-//                       ARM64AddressTypePromotion
+//                       AArch64AddressTypePromotion
 //===----------------------------------------------------------------------===//
 
 namespace llvm {
-void initializeARM64AddressTypePromotionPass(PassRegistry &);
+void initializeAArch64AddressTypePromotionPass(PassRegistry &);
 }
 
 namespace {
-class ARM64AddressTypePromotion : public FunctionPass {
+class AArch64AddressTypePromotion : public FunctionPass {
 
 public:
   static char ID;
-  ARM64AddressTypePromotion()
+  AArch64AddressTypePromotion()
       : FunctionPass(ID), Func(nullptr), ConsideredSExtType(nullptr) {
-    initializeARM64AddressTypePromotionPass(*PassRegistry::getPassRegistry());
+    initializeAArch64AddressTypePromotionPass(*PassRegistry::getPassRegistry());
   }
 
   const char *getPassName() const override {
-    return "ARM64 Address Type Promotion";
+    return "AArch64 Address Type Promotion";
   }
 
   /// Iterate over the functions and promote the computation of interesting
@@ -140,19 +139,19 @@ private:
 };
 } // end anonymous namespace.
 
-char ARM64AddressTypePromotion::ID = 0;
+char AArch64AddressTypePromotion::ID = 0;
 
-INITIALIZE_PASS_BEGIN(ARM64AddressTypePromotion, "arm64-type-promotion",
-                      "ARM64 Type Promotion Pass", false, false)
+INITIALIZE_PASS_BEGIN(AArch64AddressTypePromotion, "aarch64-type-promotion",
+                      "AArch64 Type Promotion Pass", false, false)
 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_END(ARM64AddressTypePromotion, "arm64-type-promotion",
-                    "ARM64 Type Promotion Pass", false, false)
+INITIALIZE_PASS_END(AArch64AddressTypePromotion, "aarch64-type-promotion",
+                    "AArch64 Type Promotion Pass", false, false)
 
-FunctionPass *llvm::createARM64AddressTypePromotionPass() {
-  return new ARM64AddressTypePromotion();
+FunctionPass *llvm::createAArch64AddressTypePromotionPass() {
+  return new AArch64AddressTypePromotion();
 }
 
-bool ARM64AddressTypePromotion::canGetThrough(const Instruction *Inst) {
+bool AArch64AddressTypePromotion::canGetThrough(const Instruction *Inst) {
   if (isa<SExtInst>(Inst))
     return true;
 
@@ -175,7 +174,7 @@ bool ARM64AddressTypePromotion::canGetThrough(const Instruction *Inst) {
   return false;
 }
 
-bool ARM64AddressTypePromotion::shouldGetThrough(const Instruction *Inst) {
+bool AArch64AddressTypePromotion::shouldGetThrough(const Instruction *Inst) {
   // If the type of the sext is the same as the considered one, this sext
   // will become useless.
   // Otherwise, we will have to do something to preserve the original value,
@@ -211,7 +210,7 @@ static bool shouldSExtOperand(const Instruction *Inst, int OpIdx) {
 }
 
 bool
-ARM64AddressTypePromotion::shouldConsiderSExt(const Instruction *SExt) const {
+AArch64AddressTypePromotion::shouldConsiderSExt(const Instruction *SExt) const {
   if (SExt->getType() != ConsideredSExtType)
     return false;
 
@@ -249,7 +248,7 @@ ARM64AddressTypePromotion::shouldConsiderSExt(const Instruction *SExt) const {
 //   = a
 // Iterate on 'c'.
 bool
-ARM64AddressTypePromotion::propagateSignExtension(Instructions &SExtInsts) {
+AArch64AddressTypePromotion::propagateSignExtension(Instructions &SExtInsts) {
   DEBUG(dbgs() << "*** Propagate Sign Extension ***\n");
 
   bool LocalChange = false;
@@ -375,8 +374,8 @@ ARM64AddressTypePromotion::propagateSignExtension(Instructions &SExtInsts) {
   return LocalChange;
 }
 
-void ARM64AddressTypePromotion::mergeSExts(ValueToInsts &ValToSExtendedUses,
-                                           SetOfInstructions &ToRemove) {
+void AArch64AddressTypePromotion::mergeSExts(ValueToInsts &ValToSExtendedUses,
+                                             SetOfInstructions &ToRemove) {
   DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
 
   for (auto &Entry : ValToSExtendedUses) {
@@ -414,7 +413,7 @@ void ARM64AddressTypePromotion::mergeSExts(ValueToInsts &ValToSExtendedUses,
   }
 }
 
-void ARM64AddressTypePromotion::analyzeSExtension(Instructions &SExtInsts) {
+void AArch64AddressTypePromotion::analyzeSExtension(Instructions &SExtInsts) {
   DEBUG(dbgs() << "*** Analyze Sign Extensions ***\n");
 
   DenseMap<Value *, Instruction *> SeenChains;
@@ -479,7 +478,7 @@ void ARM64AddressTypePromotion::analyzeSExtension(Instructions &SExtInsts) {
   }
 }
 
-bool ARM64AddressTypePromotion::runOnFunction(Function &F) {
+bool AArch64AddressTypePromotion::runOnFunction(Function &F) {
   if (!EnableAddressTypePromotion || F.isDeclaration())
     return false;
   Func = &F;
diff --git a/lib/Target/ARM64/ARM64AdvSIMDScalarPass.cpp b/lib/Target/AArch64/AArch64AdvSIMDScalarPass.cpp
similarity index 85%
rename from lib/Target/ARM64/ARM64AdvSIMDScalarPass.cpp
rename to lib/Target/AArch64/AArch64AdvSIMDScalarPass.cpp
index 5950a8f18e1..734fb215e6e 100644
--- a/lib/Target/ARM64/ARM64AdvSIMDScalarPass.cpp
+++ b/lib/Target/AArch64/AArch64AdvSIMDScalarPass.cpp
@@ -1,4 +1,4 @@
-//===-- ARM64AdvSIMDScalar.cpp - Replace dead defs w/ zero reg --===//
+//===-- AArch64AdvSIMDScalar.cpp - Replace dead defs w/ zero reg --===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -33,9 +33,9 @@
 // solution.
 //===----------------------------------------------------------------------===//
 
-#include "ARM64.h"
-#include "ARM64InstrInfo.h"
-#include "ARM64RegisterInfo.h"
+#include "AArch64.h"
+#include "AArch64InstrInfo.h"
+#include "AArch64RegisterInfo.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineFunction.h"
@@ -47,12 +47,12 @@
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
-#define DEBUG_TYPE "arm64-simd-scalar"
+#define DEBUG_TYPE "aarch64-simd-scalar"
 
 // Allow forcing all i64 operations with equivalent SIMD instructions to use
 // them. For stress-testing the transformation function.
 static cl::opt<bool>
-TransformAll("arm64-simd-scalar-force-all",
+TransformAll("aarch64-simd-scalar-force-all",
              cl::desc("Force use of AdvSIMD scalar instructions everywhere"),
              cl::init(false), cl::Hidden);
 
@@ -61,9 +61,9 @@ STATISTIC(NumCopiesDeleted, "Number of cross-class copies deleted");
 STATISTIC(NumCopiesInserted, "Number of cross-class copies inserted");
 
 namespace {
-class ARM64AdvSIMDScalar : public MachineFunctionPass {
+class AArch64AdvSIMDScalar : public MachineFunctionPass {
   MachineRegisterInfo *MRI;
-  const ARM64InstrInfo *TII;
+  const AArch64InstrInfo *TII;
 
 private:
   // isProfitableToTransform - Predicate function to determine whether an
@@ -81,7 +81,7 @@ private:
 
 public:
   static char ID; // Pass identification, replacement for typeid.
-  explicit ARM64AdvSIMDScalar() : MachineFunctionPass(ID) {}
+  explicit AArch64AdvSIMDScalar() : MachineFunctionPass(ID) {}
 
   bool runOnMachineFunction(MachineFunction &F) override;
 
@@ -94,7 +94,7 @@ public:
     MachineFunctionPass::getAnalysisUsage(AU);
   }
 };
-char ARM64AdvSIMDScalar::ID = 0;
+char AArch64AdvSIMDScalar::ID = 0;
 } // end anonymous namespace
 
 static bool isGPR64(unsigned Reg, unsigned SubReg,
@@ -102,20 +102,20 @@ static bool isGPR64(unsigned Reg, unsigned SubReg,
   if (SubReg)
     return false;
   if (TargetRegisterInfo::isVirtualRegister(Reg))
-    return MRI->getRegClass(Reg)->hasSuperClassEq(&ARM64::GPR64RegClass);
-  return ARM64::GPR64RegClass.contains(Reg);
+    return MRI->getRegClass(Reg)->hasSuperClassEq(&AArch64::GPR64RegClass);
+  return AArch64::GPR64RegClass.contains(Reg);
 }
 
 static bool isFPR64(unsigned Reg, unsigned SubReg,
                     const MachineRegisterInfo *MRI) {
   if (TargetRegisterInfo::isVirtualRegister(Reg))
-    return (MRI->getRegClass(Reg)->hasSuperClassEq(&ARM64::FPR64RegClass) &&
+    return (MRI->getRegClass(Reg)->hasSuperClassEq(&AArch64::FPR64RegClass) &&
             SubReg == 0) ||
-           (MRI->getRegClass(Reg)->hasSuperClassEq(&ARM64::FPR128RegClass) &&
-            SubReg == ARM64::dsub);
+           (MRI->getRegClass(Reg)->hasSuperClassEq(&AArch64::FPR128RegClass) &&
+            SubReg == AArch64::dsub);
   // Physical register references just check the register class directly.
-  return (ARM64::FPR64RegClass.contains(Reg) && SubReg == 0) ||
-         (ARM64::FPR128RegClass.contains(Reg) && SubReg == ARM64::dsub);
+  return (AArch64::FPR64RegClass.contains(Reg) && SubReg == 0) ||
+         (AArch64::FPR128RegClass.contains(Reg) && SubReg == AArch64::dsub);
 }
 
 // getSrcFromCopy - Get the original source register for a GPR64 <--> FPR64
@@ -125,17 +125,18 @@ static unsigned getSrcFromCopy(const MachineInstr *MI,
                                unsigned &SubReg) {
   SubReg = 0;
   // The "FMOV Xd, Dn" instruction is the typical form.
-  if (MI->getOpcode() == ARM64::FMOVDXr || MI->getOpcode() == ARM64::FMOVXDr)
+  if (MI->getOpcode() == AArch64::FMOVDXr ||
+      MI->getOpcode() == AArch64::FMOVXDr)
     return MI->getOperand(1).getReg();
   // A lane zero extract "UMOV.d Xd, Vn[0]" is equivalent. We shouldn't see
   // these at this stage, but it's easy to check for.
-  if (MI->getOpcode() == ARM64::UMOVvi64 && MI->getOperand(2).getImm() == 0) {
-    SubReg = ARM64::dsub;
+  if (MI->getOpcode() == AArch64::UMOVvi64 && MI->getOperand(2).getImm() == 0) {
+    SubReg = AArch64::dsub;
     return MI->getOperand(1).getReg();
   }
   // Or just a plain COPY instruction. This can be directly to/from FPR64,
   // or it can be a dsub subreg reference to an FPR128.
-  if (MI->getOpcode() == ARM64::COPY) {
+  if (MI->getOpcode() == AArch64::COPY) {
     if (isFPR64(MI->getOperand(0).getReg(), MI->getOperand(0).getSubReg(),
                 MRI) &&
         isGPR64(MI->getOperand(1).getReg(), MI->getOperand(1).getSubReg(), MRI))
@@ -161,10 +162,10 @@ static int getTransformOpcode(unsigned Opc) {
   default:
     break;
   // FIXME: Lots more possibilities.
-  case ARM64::ADDXrr:
-    return ARM64::ADDv1i64;
-  case ARM64::SUBXrr:
-    return ARM64::SUBv1i64;
+  case AArch64::ADDXrr:
+    return AArch64::ADDv1i64;
+  case AArch64::SUBXrr:
+    return AArch64::SUBv1i64;
   }
   // No AdvSIMD equivalent, so just return the original opcode.
   return Opc;
@@ -178,7 +179,8 @@ static bool isTransformable(const MachineInstr *MI) {
 // isProfitableToTransform - Predicate function to determine whether an
 // instruction should be transformed to its equivalent AdvSIMD scalar
 // instruction. "add Xd, Xn, Xm" ==> "add Dd, Da, Db", for example.
-bool ARM64AdvSIMDScalar::isProfitableToTransform(const MachineInstr *MI) const {
+bool
+AArch64AdvSIMDScalar::isProfitableToTransform(const MachineInstr *MI) const {
   // If this instruction isn't eligible to be transformed (no SIMD equivalent),
   // early exit since that's the common case.
   if (!isTransformable(MI))
@@ -238,8 +240,8 @@ bool ARM64AdvSIMDScalar::isProfitableToTransform(const MachineInstr *MI) const {
     // preferable to have it use the FPR64 in most cases, as if the source
     // vector is an IMPLICIT_DEF, the INSERT_SUBREG just goes away entirely.
     // Ditto for a lane insert.
-    else if (Use->getOpcode() == ARM64::INSERT_SUBREG ||
-             Use->getOpcode() == ARM64::INSvi64gpr)
+    else if (Use->getOpcode() == AArch64::INSERT_SUBREG ||
+             Use->getOpcode() == AArch64::INSvi64gpr)
       ;
     else
       AllUsesAreCopies = false;
@@ -259,10 +261,10 @@ bool ARM64AdvSIMDScalar::isProfitableToTransform(const MachineInstr *MI) const {
   return TransformAll;
 }
 
-static MachineInstr *insertCopy(const ARM64InstrInfo *TII, MachineInstr *MI,
+static MachineInstr *insertCopy(const AArch64InstrInfo *TII, MachineInstr *MI,
                                 unsigned Dst, unsigned Src, bool IsKill) {
   MachineInstrBuilder MIB =
-      BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), TII->get(ARM64::COPY),
+      BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), TII->get(AArch64::COPY),
               Dst)
           .addReg(Src, getKillRegState(IsKill));
   DEBUG(dbgs() << "    adding copy: " << *MIB);
@@ -273,7 +275,7 @@ static MachineInstr *insertCopy(const ARM64InstrInfo *TII, MachineInstr *MI,
 // transformInstruction - Perform the transformation of an instruction
 // to its equivalant AdvSIMD scalar instruction. Update inputs and outputs
 // to be the correct register class, minimizing cross-class copies.
-void ARM64AdvSIMDScalar::transformInstruction(MachineInstr *MI) {
+void AArch64AdvSIMDScalar::transformInstruction(MachineInstr *MI) {
   DEBUG(dbgs() << "Scalar transform: " << *MI);
 
   MachineBasicBlock *MBB = MI->getParent();
@@ -316,19 +318,19 @@ void ARM64AdvSIMDScalar::transformInstruction(MachineInstr *MI) {
   // copy.
   if (!Src0) {
     SubReg0 = 0;
-    Src0 = MRI->createVirtualRegister(&ARM64::FPR64RegClass);
+    Src0 = MRI->createVirtualRegister(&AArch64::FPR64RegClass);
     insertCopy(TII, MI, Src0, OrigSrc0, true);
   }
   if (!Src1) {
     SubReg1 = 0;
-    Src1 = MRI->createVirtualRegister(&ARM64::FPR64RegClass);
+    Src1 = MRI->createVirtualRegister(&AArch64::FPR64RegClass);
     insertCopy(TII, MI, Src1, OrigSrc1, true);
   }
 
   // Create a vreg for the destination.
   // FIXME: No need to do this if the ultimate user expects an FPR64.
   // Check for that and avoid the copy if possible.
-  unsigned Dst = MRI->createVirtualRegister(&ARM64::FPR64RegClass);
+  unsigned Dst = MRI->createVirtualRegister(&AArch64::FPR64RegClass);
 
   // For now, all of the new instructions have the same simple three-register
   // form, so no need to special case based on what instruction we're
@@ -349,7 +351,7 @@ void ARM64AdvSIMDScalar::transformInstruction(MachineInstr *MI) {
 }
 
 // processMachineBasicBlock - Main optimzation loop.
-bool ARM64AdvSIMDScalar::processMachineBasicBlock(MachineBasicBlock *MBB) {
+bool AArch64AdvSIMDScalar::processMachineBasicBlock(MachineBasicBlock *MBB) {
   bool Changed = false;
   for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E;) {
     MachineInstr *MI = I;
@@ -363,13 +365,13 @@ bool ARM64AdvSIMDScalar::processMachineBasicBlock(MachineBasicBlock *MBB) {
 }
 
 // runOnMachineFunction - Pass entry point from PassManager.
-bool ARM64AdvSIMDScalar::runOnMachineFunction(MachineFunction &mf) {
+bool AArch64AdvSIMDScalar::runOnMachineFunction(MachineFunction &mf) {
   bool Changed = false;
-  DEBUG(dbgs() << "***** ARM64AdvSIMDScalar *****\n");
+  DEBUG(dbgs() << "***** AArch64AdvSIMDScalar *****\n");
 
   const TargetMachine &TM = mf.getTarget();
   MRI = &mf.getRegInfo();
-  TII = static_cast<const ARM64InstrInfo *>(TM.getInstrInfo());
+  TII = static_cast<const AArch64InstrInfo *>(TM.getInstrInfo());
 
   // Just check things on a one-block-at-a-time basis.
   for (MachineFunction::iterator I = mf.begin(), E = mf.end(); I != E; ++I)
@@ -378,8 +380,8 @@ bool ARM64AdvSIMDScalar::runOnMachineFunction(MachineFunction &mf) {
   return Changed;
 }
 
-// createARM64AdvSIMDScalar - Factory function used by ARM64TargetMachine
+// createAArch64AdvSIMDScalar - Factory function used by AArch64TargetMachine
 // to add the pass to the PassManager.
-FunctionPass *llvm::createARM64AdvSIMDScalar() {
-  return new ARM64AdvSIMDScalar();
+FunctionPass *llvm::createAArch64AdvSIMDScalar() {
+  return new AArch64AdvSIMDScalar();
 }
diff --git a/lib/Target/ARM64/ARM64AsmPrinter.cpp b/lib/Target/AArch64/AArch64AsmPrinter.cpp
similarity index 74%
rename from lib/Target/ARM64/ARM64AsmPrinter.cpp
rename to lib/Target/AArch64/AArch64AsmPrinter.cpp
index 7e17985bf4a..8553a591fee 100644
--- a/lib/Target/ARM64/ARM64AsmPrinter.cpp
+++ b/lib/Target/AArch64/AArch64AsmPrinter.cpp
@@ -1,4 +1,4 @@
-//===-- ARM64AsmPrinter.cpp - ARM64 LLVM assembly writer ------------------===//
+//===-- AArch64AsmPrinter.cpp - AArch64 LLVM assembly writer --------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -8,16 +8,16 @@
 //===----------------------------------------------------------------------===//
 //
 // This file contains a printer that converts from our internal representation
-// of machine-dependent LLVM code to the ARM64 assembly language.
+// of machine-dependent LLVM code to the AArch64 assembly language.
 //
 //===----------------------------------------------------------------------===//
 
-#include "ARM64.h"
-#include "ARM64MachineFunctionInfo.h"
-#include "ARM64MCInstLower.h"
-#include "ARM64RegisterInfo.h"
-#include "ARM64Subtarget.h"
-#include "InstPrinter/ARM64InstPrinter.h"
+#include "AArch64.h"
+#include "AArch64MachineFunctionInfo.h"
+#include "AArch64MCInstLower.h"
+#include "AArch64RegisterInfo.h"
+#include "AArch64Subtarget.h"
+#include "InstPrinter/AArch64InstPrinter.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringSwitch.h"
 #include "llvm/ADT/Twine.h"
@@ -42,21 +42,24 @@ using namespace llvm;
 
 namespace {
 
-class ARM64AsmPrinter : public AsmPrinter {
-  /// Subtarget - Keep a pointer to the ARM64Subtarget around so that we can
+class AArch64AsmPrinter : public AsmPrinter {
+  /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
   /// make the right decision when printing asm code for different targets.
-  const ARM64Subtarget *Subtarget;
+  const AArch64Subtarget *Subtarget;
 
-  ARM64MCInstLower MCInstLowering;
+  AArch64MCInstLower MCInstLowering;
   StackMaps SM;
 
 public:
-  ARM64AsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
-      : AsmPrinter(TM, Streamer), Subtarget(&TM.getSubtarget<ARM64Subtarget>()),
-        MCInstLowering(OutContext, *Mang, *this), SM(*this), ARM64FI(nullptr),
+  AArch64AsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
+      : AsmPrinter(TM, Streamer),
+        Subtarget(&TM.getSubtarget<AArch64Subtarget>()),
+        MCInstLowering(OutContext, *Mang, *this), SM(*this), AArch64FI(nullptr),
         LOHLabelCounter(0) {}
 
-  const char *getPassName() const override { return "ARM64 Assembly Printer"; }
+  const char *getPassName() const override {
+    return "AArch64 Assembly Printer";
+  }
 
   /// \brief Wrapper for MCInstLowering.lowerOperand() for the
   /// tblgen'erated pseudo lowering.
@@ -81,7 +84,7 @@ public:
   }
 
   bool runOnMachineFunction(MachineFunction &F) override {
-    ARM64FI = F.getInfo<ARM64FunctionInfo>();
+    AArch64FI = F.getInfo<AArch64FunctionInfo>();
     return AsmPrinter::runOnMachineFunction(F);
   }
 
@@ -106,9 +109,9 @@ private:
 
   MCSymbol *GetCPISymbol(unsigned CPID) const override;
   void EmitEndOfAsmFile(Module &M) override;
-  ARM64FunctionInfo *ARM64FI;
+  AArch64FunctionInfo *AArch64FI;
 
-  /// \brief Emit the LOHs contained in ARM64FI.
+  /// \brief Emit the LOHs contained in AArch64FI.
   void EmitLOHs();
 
   typedef std::map<const MachineInstr *, MCSymbol *> MInstToMCSymbol;
@@ -120,7 +123,7 @@ private:
 
 //===----------------------------------------------------------------------===//
 
-void ARM64AsmPrinter::EmitEndOfAsmFile(Module &M) {
+void AArch64AsmPrinter::EmitEndOfAsmFile(Module &M) {
   if (Subtarget->isTargetMachO()) {
     // Funny Darwin hack: This flag tells the linker that no global symbols
     // contain code that falls through to other global symbols (e.g. the obvious
@@ -156,7 +159,7 @@ void ARM64AsmPrinter::EmitEndOfAsmFile(Module &M) {
 }
 
 MachineLocation
-ARM64AsmPrinter::getDebugValueLocation(const MachineInstr *MI) const {
+AArch64AsmPrinter::getDebugValueLocation(const MachineInstr *MI) const {
   MachineLocation Location;
   assert(MI->getNumOperands() == 4 && "Invalid no. of machine operands!");
   // Frame address.  Currently handles register +- offset only.
@@ -168,10 +171,10 @@ ARM64AsmPrinter::getDebugValueLocation(const MachineInstr *MI) const {
   return Location;
 }
 
-void ARM64AsmPrinter::EmitLOHs() {
+void AArch64AsmPrinter::EmitLOHs() {
   SmallVector<MCSymbol *, 3> MCArgs;
 
-  for (const auto &D : ARM64FI->getLOHContainer()) {
+  for (const auto &D : AArch64FI->getLOHContainer()) {
     for (const MachineInstr *MI : D.getArgs()) {
       MInstToMCSymbol::iterator LabelIt = LOHInstToLabel.find(MI);
       assert(LabelIt != LOHInstToLabel.end() &&
@@ -183,13 +186,13 @@ void ARM64AsmPrinter::EmitLOHs() {
   }
 }
 
-void ARM64AsmPrinter::EmitFunctionBodyEnd() {
-  if (!ARM64FI->getLOHRelated().empty())
+void AArch64AsmPrinter::EmitFunctionBodyEnd() {
+  if (!AArch64FI->getLOHRelated().empty())
     EmitLOHs();
 }
 
 /// GetCPISymbol - Return the symbol for the specified constant pool entry.
-MCSymbol *ARM64AsmPrinter::GetCPISymbol(unsigned CPID) const {
+MCSymbol *AArch64AsmPrinter::GetCPISymbol(unsigned CPID) const {
   // Darwin uses a linker-private symbol name for constant-pools (to
   // avoid addends on the relocation?), ELF has no such concept and
   // uses a normal private symbol.
@@ -203,8 +206,8 @@ MCSymbol *ARM64AsmPrinter::GetCPISymbol(unsigned CPID) const {
       Twine(getFunctionNumber()) + "_" + Twine(CPID));
 }
 
-void ARM64AsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNum,
-                                   raw_ostream &O) {
+void AArch64AsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNum,
+                                     raw_ostream &O) {
   const MachineOperand &MO = MI->getOperand(OpNum);
   switch (MO.getType()) {
   default:
@@ -213,7 +216,7 @@ void ARM64AsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNum,
     unsigned Reg = MO.getReg();
     assert(TargetRegisterInfo::isPhysicalRegister(Reg));
     assert(!MO.getSubReg() && "Subregs should be eliminated!");
-    O << ARM64InstPrinter::getRegisterName(Reg);
+    O << AArch64InstPrinter::getRegisterName(Reg);
     break;
   }
   case MachineOperand::MO_Immediate: {
@@ -224,8 +227,8 @@ void ARM64AsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNum,
   }
 }
 
-bool ARM64AsmPrinter::printAsmMRegister(const MachineOperand &MO, char Mode,
-                                        raw_ostream &O) {
+bool AArch64AsmPrinter::printAsmMRegister(const MachineOperand &MO, char Mode,
+                                          raw_ostream &O) {
   unsigned Reg = MO.getReg();
   switch (Mode) {
   default:
@@ -238,30 +241,30 @@ bool ARM64AsmPrinter::printAsmMRegister(const MachineOperand &MO, char Mode,
     break;
   }
 
-  O << ARM64InstPrinter::getRegisterName(Reg);
+  O << AArch64InstPrinter::getRegisterName(Reg);
   return false;
 }
 
 // Prints the register in MO using class RC using the offset in the
 // new register class. This should not be used for cross class
 // printing.
-bool ARM64AsmPrinter::printAsmRegInClass(const MachineOperand &MO,
-                                         const TargetRegisterClass *RC,
-                                         bool isVector, raw_ostream &O) {
+bool AArch64AsmPrinter::printAsmRegInClass(const MachineOperand &MO,
+                                           const TargetRegisterClass *RC,
+                                           bool isVector, raw_ostream &O) {
   assert(MO.isReg() && "Should only get here with a register!");
-  const ARM64RegisterInfo *RI =
-      static_cast<const ARM64RegisterInfo *>(TM.getRegisterInfo());
+  const AArch64RegisterInfo *RI =
+      static_cast<const AArch64RegisterInfo *>(TM.getRegisterInfo());
   unsigned Reg = MO.getReg();
   unsigned RegToPrint = RC->getRegister(RI->getEncodingValue(Reg));
   assert(RI->regsOverlap(RegToPrint, Reg));
-  O << ARM64InstPrinter::getRegisterName(
-           RegToPrint, isVector ? ARM64::vreg : ARM64::NoRegAltName);
+  O << AArch64InstPrinter::getRegisterName(
+           RegToPrint, isVector ? AArch64::vreg : AArch64::NoRegAltName);
   return false;
 }
 
-bool ARM64AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
-                                      unsigned AsmVariant,
-                                      const char *ExtraCode, raw_ostream &O) {
+bool AArch64AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
+                                        unsigned AsmVariant,
+                                        const char *ExtraCode, raw_ostream &O) {
   const MachineOperand &MO = MI->getOperand(OpNum);
   // Does this asm operand have a single letter operand modifier?
   if (ExtraCode && ExtraCode[0]) {
@@ -276,8 +279,8 @@ bool ARM64AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
       if (MO.isReg())
         return printAsmMRegister(MO, ExtraCode[0], O);
       if (MO.isImm() && MO.getImm() == 0) {
-        unsigned Reg = ExtraCode[0] == 'w' ? ARM64::WZR : ARM64::XZR;
-        O << ARM64InstPrinter::getRegisterName(Reg);
+        unsigned Reg = ExtraCode[0] == 'w' ? AArch64::WZR : AArch64::XZR;
+        O << AArch64InstPrinter::getRegisterName(Reg);
         return false;
       }
       printOperand(MI, OpNum, O);
@@ -291,19 +294,19 @@ bool ARM64AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
         const TargetRegisterClass *RC;
         switch (ExtraCode[0]) {
         case 'b':
-          RC = &ARM64::FPR8RegClass;
+          RC = &AArch64::FPR8RegClass;
           break;
         case 'h':
-          RC = &ARM64::FPR16RegClass;
+          RC = &AArch64::FPR16RegClass;
           break;
         case 's':
-          RC = &ARM64::FPR32RegClass;
+          RC = &AArch64::FPR32RegClass;
           break;
         case 'd':
-          RC = &ARM64::FPR64RegClass;
+          RC = &AArch64::FPR64RegClass;
           break;
         case 'q':
-          RC = &ARM64::FPR128RegClass;
+          RC = &AArch64::FPR128RegClass;
           break;
         default:
           return true;
@@ -321,33 +324,35 @@ bool ARM64AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
     unsigned Reg = MO.getReg();
 
     // If this is a w or x register, print an x register.
-    if (ARM64::GPR32allRegClass.contains(Reg) ||
-        ARM64::GPR64allRegClass.contains(Reg))
+    if (AArch64::GPR32allRegClass.contains(Reg) ||
+        AArch64::GPR64allRegClass.contains(Reg))
       return printAsmMRegister(MO, 'x', O);
 
     // If this is a b, h, s, d, or q register, print it as a v register.
-    return printAsmRegInClass(MO, &ARM64::FPR128RegClass, true /* vector */, O);
+    return printAsmRegInClass(MO, &AArch64::FPR128RegClass, true /* vector */,
+                              O);
   }
 
   printOperand(MI, OpNum, O);
   return false;
 }
 
-bool ARM64AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
-                                            unsigned OpNum, unsigned AsmVariant,
-                                            const char *ExtraCode,
-                                            raw_ostream &O) {
+bool AArch64AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
+                                              unsigned OpNum,
+                                              unsigned AsmVariant,
+                                              const char *ExtraCode,
+                                              raw_ostream &O) {
   if (ExtraCode && ExtraCode[0])
     return true; // Unknown modifier.
 
   const MachineOperand &MO = MI->getOperand(OpNum);
   assert(MO.isReg() && "unexpected inline asm memory operand");
-  O << "[" << ARM64InstPrinter::getRegisterName(MO.getReg()) << "]";
+  O << "[" << AArch64InstPrinter::getRegisterName(MO.getReg()) << "]";
   return false;
 }
 
-void ARM64AsmPrinter::PrintDebugValueComment(const MachineInstr *MI,
-                                             raw_ostream &OS) {
+void AArch64AsmPrinter::PrintDebugValueComment(const MachineInstr *MI,
+                                               raw_ostream &OS) {
   unsigned NOps = MI->getNumOperands();
   assert(NOps == 4);
   OS << '\t' << MAI->getCommentString() << "DEBUG_VALUE: ";
@@ -366,21 +371,21 @@ void ARM64AsmPrinter::PrintDebugValueComment(const MachineInstr *MI,
   printOperand(MI, NOps - 2, OS);
 }
 
-void ARM64AsmPrinter::LowerSTACKMAP(MCStreamer &OutStreamer, StackMaps &SM,
-                                    const MachineInstr &MI) {
+void AArch64AsmPrinter::LowerSTACKMAP(MCStreamer &OutStreamer, StackMaps &SM,
+                                      const MachineInstr &MI) {
   unsigned NumNOPBytes = MI.getOperand(1).getImm();
 
   SM.recordStackMap(MI);
   // Emit padding.
   assert(NumNOPBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
   for (unsigned i = 0; i < NumNOPBytes; i += 4)
-    EmitToStreamer(OutStreamer, MCInstBuilder(ARM64::HINT).addImm(0));
+    EmitToStreamer(OutStreamer, MCInstBuilder(AArch64::HINT).addImm(0));
 }
 
 // Lower a patchpoint of the form:
 // [<def>], <id>, <numBytes>, <target>, <numArgs>
-void ARM64AsmPrinter::LowerPATCHPOINT(MCStreamer &OutStreamer, StackMaps &SM,
-                                      const MachineInstr &MI) {
+void AArch64AsmPrinter::LowerPATCHPOINT(MCStreamer &OutStreamer, StackMaps &SM,
+                                        const MachineInstr &MI) {
   SM.recordPatchPoint(MI);
 
   PatchPointOpers Opers(&MI);
@@ -393,21 +398,21 @@ void ARM64AsmPrinter::LowerPATCHPOINT(MCStreamer &OutStreamer, StackMaps &SM,
     unsigned ScratchReg = MI.getOperand(Opers.getNextScratchIdx()).getReg();
     EncodedBytes = 16;
     // Materialize the jump address:
-    EmitToStreamer(OutStreamer, MCInstBuilder(ARM64::MOVZWi)
+    EmitToStreamer(OutStreamer, MCInstBuilder(AArch64::MOVZWi)
                                     .addReg(ScratchReg)
                                     .addImm((CallTarget >> 32) & 0xFFFF)
                                     .addImm(32));
-    EmitToStreamer(OutStreamer, MCInstBuilder(ARM64::MOVKWi)
+    EmitToStreamer(OutStreamer, MCInstBuilder(AArch64::MOVKWi)
                                     .addReg(ScratchReg)
                                     .addReg(ScratchReg)
                                     .addImm((CallTarget >> 16) & 0xFFFF)
                                     .addImm(16));
-    EmitToStreamer(OutStreamer, MCInstBuilder(ARM64::MOVKWi)
+    EmitToStreamer(OutStreamer, MCInstBuilder(AArch64::MOVKWi)
                                     .addReg(ScratchReg)
                                     .addReg(ScratchReg)
                                     .addImm(CallTarget & 0xFFFF)
                                     .addImm(0));
-    EmitToStreamer(OutStreamer, MCInstBuilder(ARM64::BLR).addReg(ScratchReg));
+    EmitToStreamer(OutStreamer, MCInstBuilder(AArch64::BLR).addReg(ScratchReg));
   }
   // Emit padding.
   unsigned NumBytes = Opers.getMetaOper(PatchPointOpers::NBytesPos).getImm();
@@ -416,19 +421,19 @@ void ARM64AsmPrinter::LowerPATCHPOINT(MCStreamer &OutStreamer, StackMaps &SM,
   assert((NumBytes - EncodedBytes) % 4 == 0 &&
          "Invalid number of NOP bytes requested!");
   for (unsigned i = EncodedBytes; i < NumBytes; i += 4)
-    EmitToStreamer(OutStreamer, MCInstBuilder(ARM64::HINT).addImm(0));
+    EmitToStreamer(OutStreamer, MCInstBuilder(AArch64::HINT).addImm(0));
 }
 
 // Simple pseudo-instructions have their lowering (with expansion to real
 // instructions) auto-generated.
-#include "ARM64GenMCPseudoLowering.inc"
+#include "AArch64GenMCPseudoLowering.inc"
 
-void ARM64AsmPrinter::EmitInstruction(const MachineInstr *MI) {
+void AArch64AsmPrinter::EmitInstruction(const MachineInstr *MI) {
   // Do any auto-generated pseudo lowerings.
   if (emitPseudoExpansionLowering(OutStreamer, MI))
     return;
 
-  if (ARM64FI->getLOHRelated().count(MI)) {
+  if (AArch64FI->getLOHRelated().count(MI)) {
     // Generate a label for LOH related instruction
     MCSymbol *LOHLabel = GetTempSymbol("loh", LOHLabelCounter++);
     // Associate the instruction with the label
@@ -440,7 +445,7 @@ void ARM64AsmPrinter::EmitInstruction(const MachineInstr *MI) {
   switch (MI->getOpcode()) {
   default:
     break;
-  case ARM64::DBG_VALUE: {
+  case AArch64::DBG_VALUE: {
     if (isVerbose() && OutStreamer.hasRawTextSupport()) {
       SmallString<128> TmpStr;
       raw_svector_ostream OS(TmpStr);
@@ -453,23 +458,23 @@ void ARM64AsmPrinter::EmitInstruction(const MachineInstr *MI) {
   // Tail calls use pseudo instructions so they have the proper code-gen
   // attributes (isCall, isReturn, etc.). We lower them to the real
   // instruction here.
-  case ARM64::TCRETURNri: {
+  case AArch64::TCRETURNri: {
     MCInst TmpInst;
-    TmpInst.setOpcode(ARM64::BR);
+    TmpInst.setOpcode(AArch64::BR);
     TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
     EmitToStreamer(OutStreamer, TmpInst);
     return;
   }
-  case ARM64::TCRETURNdi: {
+  case AArch64::TCRETURNdi: {
     MCOperand Dest;
     MCInstLowering.lowerOperand(MI->getOperand(0), Dest);
     MCInst TmpInst;
-    TmpInst.setOpcode(ARM64::B);
+    TmpInst.setOpcode(AArch64::B);
     TmpInst.addOperand(Dest);
     EmitToStreamer(OutStreamer, TmpInst);
     return;
   }
-  case ARM64::TLSDESC_BLR: {
+  case AArch64::TLSDESC_BLR: {
     MCOperand Callee, Sym;
     MCInstLowering.lowerOperand(MI->getOperand(0), Callee);
     MCInstLowering.lowerOperand(MI->getOperand(1), Sym);
@@ -477,14 +482,14 @@ void ARM64AsmPrinter::EmitInstruction(const MachineInstr *MI) {
     // First emit a relocation-annotation. This expands to no code, but requests
     // the following instruction gets an R_AARCH64_TLSDESC_CALL.
     MCInst TLSDescCall;
-    TLSDescCall.setOpcode(ARM64::TLSDESCCALL);
+    TLSDescCall.setOpcode(AArch64::TLSDESCCALL);
     TLSDescCall.addOperand(Sym);
     EmitToStreamer(OutStreamer, TLSDescCall);
 
     // Other than that it's just a normal indirect call to the function loaded
     // from the descriptor.
     MCInst BLR;
-    BLR.setOpcode(ARM64::BLR);
+    BLR.setOpcode(AArch64::BLR);
     BLR.addOperand(Callee);
     EmitToStreamer(OutStreamer, BLR);
 
@@ -505,10 +510,10 @@ void ARM64AsmPrinter::EmitInstruction(const MachineInstr *MI) {
 }
 
 // Force static initialization.
-extern "C" void LLVMInitializeARM64AsmPrinter() {
-  RegisterAsmPrinter<ARM64AsmPrinter> X(TheARM64leTarget);
-  RegisterAsmPrinter<ARM64AsmPrinter> Y(TheARM64beTarget);
+extern "C" void LLVMInitializeAArch64AsmPrinter() {
+  RegisterAsmPrinter<AArch64AsmPrinter> X(TheAArch64leTarget);
+  RegisterAsmPrinter<AArch64AsmPrinter> Y(TheAArch64beTarget);
 
-  RegisterAsmPrinter<ARM64AsmPrinter> Z(TheAArch64leTarget);
-  RegisterAsmPrinter<ARM64AsmPrinter> W(TheAArch64beTarget);
+  RegisterAsmPrinter<AArch64AsmPrinter> Z(TheARM64leTarget);
+  RegisterAsmPrinter<AArch64AsmPrinter> W(TheARM64beTarget);
 }
diff --git a/lib/Target/ARM64/ARM64BranchRelaxation.cpp b/lib/Target/AArch64/AArch64BranchRelaxation.cpp
similarity index 78%
rename from lib/Target/ARM64/ARM64BranchRelaxation.cpp
rename to lib/Target/AArch64/AArch64BranchRelaxation.cpp
index 73be3504790..52094526727 100644
--- a/lib/Target/ARM64/ARM64BranchRelaxation.cpp
+++ b/lib/Target/AArch64/AArch64BranchRelaxation.cpp
@@ -1,4 +1,4 @@
-//===-- ARM64BranchRelaxation.cpp - ARM64 branch relaxation ---------------===//
+//===-- AArch64BranchRelaxation.cpp - AArch64 branch relaxation -----------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -9,9 +9,9 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "ARM64.h"
-#include "ARM64InstrInfo.h"
-#include "ARM64MachineFunctionInfo.h"
+#include "AArch64.h"
+#include "AArch64InstrInfo.h"
+#include "AArch64MachineFunctionInfo.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -23,29 +23,29 @@
 #include "llvm/Support/CommandLine.h"
 using namespace llvm;
 
-#define DEBUG_TYPE "arm64-branch-relax"
+#define DEBUG_TYPE "aarch64-branch-relax"
 
 static cl::opt<bool>
-BranchRelaxation("arm64-branch-relax", cl::Hidden, cl::init(true),
+BranchRelaxation("aarch64-branch-relax", cl::Hidden, cl::init(true),
                  cl::desc("Relax out of range conditional branches"));
 
 static cl::opt<unsigned>
-TBZDisplacementBits("arm64-tbz-offset-bits", cl::Hidden, cl::init(14),
+TBZDisplacementBits("aarch64-tbz-offset-bits", cl::Hidden, cl::init(14),
                     cl::desc("Restrict range of TB[N]Z instructions (DEBUG)"));
 
 static cl::opt<unsigned>
-CBZDisplacementBits("arm64-cbz-offset-bits", cl::Hidden, cl::init(19),
+CBZDisplacementBits("aarch64-cbz-offset-bits", cl::Hidden, cl::init(19),
                     cl::desc("Restrict range of CB[N]Z instructions (DEBUG)"));
 
 static cl::opt<unsigned>
-BCCDisplacementBits("arm64-bcc-offset-bits", cl::Hidden, cl::init(19),
+BCCDisplacementBits("aarch64-bcc-offset-bits", cl::Hidden, cl::init(19),
                     cl::desc("Restrict range of Bcc instructions (DEBUG)"));
 
 STATISTIC(NumSplit, "Number of basic blocks split");
 STATISTIC(NumRelaxed, "Number of conditional branches relaxed");
 
 namespace {
-class ARM64BranchRelaxation : public MachineFunctionPass {
+class AArch64BranchRelaxation : public MachineFunctionPass {
   /// BasicBlockInfo - Information about the offset and size of a single
   /// basic block.
   struct BasicBlockInfo {
@@ -77,7 +77,7 @@ class ARM64BranchRelaxation : public MachineFunctionPass {
   SmallVector<BasicBlockInfo, 16> BlockInfo;
 
   MachineFunction *MF;
-  const ARM64InstrInfo *TII;
+  const AArch64InstrInfo *TII;
 
   bool relaxBranchInstructions();
   void scanFunction();
@@ -92,19 +92,19 @@ class ARM64BranchRelaxation : public MachineFunctionPass {
 
 public:
   static char ID;
-  ARM64BranchRelaxation() : MachineFunctionPass(ID) {}
+  AArch64BranchRelaxation() : MachineFunctionPass(ID) {}
 
   bool runOnMachineFunction(MachineFunction &MF) override;
 
   const char *getPassName() const override {
-    return "ARM64 branch relaxation pass";
+    return "AArch64 branch relaxation pass";
   }
 };
-char ARM64BranchRelaxation::ID = 0;
+char AArch64BranchRelaxation::ID = 0;
 }
 
 /// verify - check BBOffsets, BBSizes, alignment of islands
-void ARM64BranchRelaxation::verify() {
+void AArch64BranchRelaxation::verify() {
 #ifndef NDEBUG
   unsigned PrevNum = MF->begin()->getNumber();
   for (MachineBasicBlock &MBB : *MF) {
@@ -118,7 +118,7 @@ void ARM64BranchRelaxation::verify() {
 }
 
 /// print block size and offset information - debugging
-void ARM64BranchRelaxation::dumpBBs() {
+void AArch64BranchRelaxation::dumpBBs() {
   for (auto &MBB : *MF) {
     const BasicBlockInfo &BBI = BlockInfo[MBB.getNumber()];
     dbgs() << format("BB#%u\toffset=%08x\t", MBB.getNumber(), BBI.Offset)
@@ -145,7 +145,7 @@ static bool BBHasFallthrough(MachineBasicBlock *MBB) {
 
 /// scanFunction - Do the initial scan of the function, building up
 /// information about each block.
-void ARM64BranchRelaxation::scanFunction() {
+void AArch64BranchRelaxation::scanFunction() {
   BlockInfo.clear();
   BlockInfo.resize(MF->getNumBlockIDs());
 
@@ -162,7 +162,7 @@ void ARM64BranchRelaxation::scanFunction() {
 
 /// computeBlockSize - Compute the size for MBB.
 /// This function updates BlockInfo directly.
-void ARM64BranchRelaxation::computeBlockSize(const MachineBasicBlock &MBB) {
+void AArch64BranchRelaxation::computeBlockSize(const MachineBasicBlock &MBB) {
   unsigned Size = 0;
   for (const MachineInstr &MI : MBB)
     Size += TII->GetInstSizeInBytes(&MI);
@@ -172,7 +172,7 @@ void ARM64BranchRelaxation::computeBlockSize(const MachineBasicBlock &MBB) {
 /// getInstrOffset - Return the current offset of the specified machine
 /// instruction from the start of the function.  This offset changes as stuff is
 /// moved around inside the function.
-unsigned ARM64BranchRelaxation::getInstrOffset(MachineInstr *MI) const {
+unsigned AArch64BranchRelaxation::getInstrOffset(MachineInstr *MI) const {
   MachineBasicBlock *MBB = MI->getParent();
 
   // The offset is composed of two things: the sum of the sizes of all MBB's
@@ -188,7 +188,7 @@ unsigned ARM64BranchRelaxation::getInstrOffset(MachineInstr *MI) const {
   return Offset;
 }
 
-void ARM64BranchRelaxation::adjustBlockOffsets(MachineBasicBlock &Start) {
+void AArch64BranchRelaxation::adjustBlockOffsets(MachineBasicBlock &Start) {
   unsigned PrevNum = Start.getNumber();
   for (auto &MBB : make_range(MachineFunction::iterator(Start), MF->end())) {
     unsigned Num = MBB.getNumber();
@@ -209,7 +209,7 @@ void ARM64BranchRelaxation::adjustBlockOffsets(MachineBasicBlock &Start) {
 /// and must be updated by the caller! Other transforms follow using this
 /// utility function, so no point updating now rather than waiting.
 MachineBasicBlock *
-ARM64BranchRelaxation::splitBlockBeforeInstr(MachineInstr *MI) {
+AArch64BranchRelaxation::splitBlockBeforeInstr(MachineInstr *MI) {
   MachineBasicBlock *OrigBB = MI->getParent();
 
   // Create a new MBB for the code after the OrigBB.
@@ -226,7 +226,7 @@ ARM64BranchRelaxation::splitBlockBeforeInstr(MachineInstr *MI) {
   // Note the new unconditional branch is not being recorded.
   // There doesn't seem to be meaningful DebugInfo available; this doesn't
   // correspond to anything in the source.
-  BuildMI(OrigBB, DebugLoc(), TII->get(ARM64::B)).addMBB(NewBB);
+  BuildMI(OrigBB, DebugLoc(), TII->get(AArch64::B)).addMBB(NewBB);
 
   // Insert an entry into BlockInfo to align it properly with the block numbers.
   BlockInfo.insert(BlockInfo.begin() + NewBB->getNumber(), BasicBlockInfo());
@@ -252,9 +252,9 @@ ARM64BranchRelaxation::splitBlockBeforeInstr(MachineInstr *MI) {
 
 /// isBlockInRange - Returns true if the distance between specific MI and
 /// specific BB can fit in MI's displacement field.
-bool ARM64BranchRelaxation::isBlockInRange(MachineInstr *MI,
-                                           MachineBasicBlock *DestBB,
-                                           unsigned Bits) {
+bool AArch64BranchRelaxation::isBlockInRange(MachineInstr *MI,
+                                             MachineBasicBlock *DestBB,
+                                             unsigned Bits) {
   unsigned MaxOffs = ((1 << (Bits - 1)) - 1) << 2;
   unsigned BrOffset = getInstrOffset(MI);
   unsigned DestOffset = BlockInfo[DestBB->getNumber()].Offset;
@@ -275,15 +275,15 @@ static bool isConditionalBranch(unsigned Opc) {
   switch (Opc) {
   default:
     return false;
-  case ARM64::TBZW:
-  case ARM64::TBNZW:
-  case ARM64::TBZX:
-  case ARM64::TBNZX:
-  case ARM64::CBZW:
-  case ARM64::CBNZW:
-  case ARM64::CBZX:
-  case ARM64::CBNZX:
-  case ARM64::Bcc:
+  case AArch64::TBZW:
+  case AArch64::TBNZW:
+  case AArch64::TBZX:
+  case AArch64::TBNZX:
+  case AArch64::CBZW:
+  case AArch64::CBNZW:
+  case AArch64::CBZX:
+  case AArch64::CBNZX:
+  case AArch64::Bcc:
     return true;
   }
 }
@@ -292,16 +292,16 @@ static MachineBasicBlock *getDestBlock(MachineInstr *MI) {
   switch (MI->getOpcode()) {
   default:
     assert(0 && "unexpected opcode!");
-  case ARM64::TBZW:
-  case ARM64::TBNZW:
-  case ARM64::TBZX:
-  case ARM64::TBNZX:
+  case AArch64::TBZW:
+  case AArch64::TBNZW:
+  case AArch64::TBZX:
+  case AArch64::TBNZX:
     return MI->getOperand(2).getMBB();
-  case ARM64::CBZW:
-  case ARM64::CBNZW:
-  case ARM64::CBZX:
-  case ARM64::CBNZX:
-  case ARM64::Bcc:
+  case AArch64::CBZW:
+  case AArch64::CBNZW:
+  case AArch64::CBZX:
+  case AArch64::CBNZX:
+  case AArch64::Bcc:
     return MI->getOperand(1).getMBB();
   }
 }
@@ -310,15 +310,15 @@ static unsigned getOppositeConditionOpcode(unsigned Opc) {
   switch (Opc) {
   default:
     assert(0 && "unexpected opcode!");
-  case ARM64::TBNZW:   return ARM64::TBZW;
-  case ARM64::TBNZX:   return ARM64::TBZX;
-  case ARM64::TBZW:    return ARM64::TBNZW;
-  case ARM64::TBZX:    return ARM64::TBNZX;
-  case ARM64::CBNZW:   return ARM64::CBZW;
-  case ARM64::CBNZX:   return ARM64::CBZX;
-  case ARM64::CBZW:    return ARM64::CBNZW;
-  case ARM64::CBZX:    return ARM64::CBNZX;
-  case ARM64::Bcc:     return ARM64::Bcc; // Condition is an operand for Bcc.
+  case AArch64::TBNZW:   return AArch64::TBZW;
+  case AArch64::TBNZX:   return AArch64::TBZX;
+  case AArch64::TBZW:    return AArch64::TBNZW;
+  case AArch64::TBZX:    return AArch64::TBNZX;
+  case AArch64::CBNZW:   return AArch64::CBZW;
+  case AArch64::CBNZX:   return AArch64::CBZX;
+  case AArch64::CBZW:    return AArch64::CBNZW;
+  case AArch64::CBZX:    return AArch64::CBNZX;
+  case AArch64::Bcc:     return AArch64::Bcc; // Condition is an operand for Bcc.
   }
 }
 
@@ -326,32 +326,32 @@ static unsigned getBranchDisplacementBits(unsigned Opc) {
   switch (Opc) {
   default:
     assert(0 && "unexpected opcode!");
-  case ARM64::TBNZW:
-  case ARM64::TBZW:
-  case ARM64::TBNZX:
-  case ARM64::TBZX:
+  case AArch64::TBNZW:
+  case AArch64::TBZW:
+  case AArch64::TBNZX:
+  case AArch64::TBZX:
     return TBZDisplacementBits;
-  case ARM64::CBNZW:
-  case ARM64::CBZW:
-  case ARM64::CBNZX:
-  case ARM64::CBZX:
+  case AArch64::CBNZW:
+  case AArch64::CBZW:
+  case AArch64::CBNZX:
+  case AArch64::CBZX:
     return CBZDisplacementBits;
-  case ARM64::Bcc:
+  case AArch64::Bcc:
     return BCCDisplacementBits;
   }
 }
 
 static inline void invertBccCondition(MachineInstr *MI) {
-  assert(MI->getOpcode() == ARM64::Bcc && "Unexpected opcode!");
-  ARM64CC::CondCode CC = (ARM64CC::CondCode)MI->getOperand(0).getImm();
-  CC = ARM64CC::getInvertedCondCode(CC);
+  assert(MI->getOpcode() == AArch64::Bcc && "Unexpected opcode!");
+  AArch64CC::CondCode CC = (AArch64CC::CondCode)MI->getOperand(0).getImm();
+  CC = AArch64CC::getInvertedCondCode(CC);
   MI->getOperand(0).setImm((int64_t)CC);
 }
 
 /// fixupConditionalBranch - Fix up a conditional branch whose destination is
 /// too far away to fit in its displacement field. It is converted to an inverse
 /// conditional branch + an unconditional branch to the destination.
-bool ARM64BranchRelaxation::fixupConditionalBranch(MachineInstr *MI) {
+bool AArch64BranchRelaxation::fixupConditionalBranch(MachineInstr *MI) {
   MachineBasicBlock *DestBB = getDestBlock(MI);
 
   // Add an unconditional branch to the destination and invert the branch
@@ -372,7 +372,7 @@ bool ARM64BranchRelaxation::fixupConditionalBranch(MachineInstr *MI) {
   if (BMI != MI) {
     if (std::next(MachineBasicBlock::iterator(MI)) ==
             std::prev(MBB->getLastNonDebugInstr()) &&
-        BMI->getOpcode() == ARM64::B) {
+        BMI->getOpcode() == AArch64::B) {
       // Last MI in the BB is an unconditional branch. Can we simply invert the
       // condition and swap destinations:
       // beq L1
@@ -386,14 +386,15 @@ bool ARM64BranchRelaxation::fixupConditionalBranch(MachineInstr *MI) {
         DEBUG(dbgs() << "  Invert condition and swap its destination with "
                      << *BMI);
         BMI->getOperand(0).setMBB(DestBB);
-        unsigned OpNum =
-            (MI->getOpcode() == ARM64::TBZW || MI->getOpcode() == ARM64::TBNZW ||
-             MI->getOpcode() == ARM64::TBZX || MI->getOpcode() == ARM64::TBNZX)
-                ? 2
-                : 1;
+        unsigned OpNum = (MI->getOpcode() == AArch64::TBZW ||
+                          MI->getOpcode() == AArch64::TBNZW ||
+                          MI->getOpcode() == AArch64::TBZX ||
+                          MI->getOpcode() == AArch64::TBNZX)
+                             ? 2
+                             : 1;
         MI->getOperand(OpNum).setMBB(NewDest);
         MI->setDesc(TII->get(getOppositeConditionOpcode(MI->getOpcode())));
-        if (MI->getOpcode() == ARM64::Bcc)
+        if (MI->getOpcode() == AArch64::Bcc)
           invertBccCondition(MI);
         return true;
       }
@@ -429,14 +430,14 @@ bool ARM64BranchRelaxation::fixupConditionalBranch(MachineInstr *MI) {
   MachineInstrBuilder MIB = BuildMI(
       MBB, DebugLoc(), TII->get(getOppositeConditionOpcode(MI->getOpcode())))
                                 .addOperand(MI->getOperand(0));
-  if (MI->getOpcode() == ARM64::TBZW || MI->getOpcode() == ARM64::TBNZW ||
-      MI->getOpcode() == ARM64::TBZX || MI->getOpcode() == ARM64::TBNZX)
+  if (MI->getOpcode() == AArch64::TBZW || MI->getOpcode() == AArch64::TBNZW ||
+      MI->getOpcode() == AArch64::TBZX || MI->getOpcode() == AArch64::TBNZX)
     MIB.addOperand(MI->getOperand(1));
-  if (MI->getOpcode() == ARM64::Bcc)
+  if (MI->getOpcode() == AArch64::Bcc)
     invertBccCondition(MIB);
   MIB.addMBB(NextBB);
   BlockInfo[MBB->getNumber()].Size += TII->GetInstSizeInBytes(&MBB->back());
-  BuildMI(MBB, DebugLoc(), TII->get(ARM64::B)).addMBB(DestBB);
+  BuildMI(MBB, DebugLoc(), TII->get(AArch64::B)).addMBB(DestBB);
   BlockInfo[MBB->getNumber()].Size += TII->GetInstSizeInBytes(&MBB->back());
 
   // Remove the old conditional branch.  It may or may not still be in MBB.
@@ -448,7 +449,7 @@ bool ARM64BranchRelaxation::fixupConditionalBranch(MachineInstr *MI) {
   return true;
 }
 
-bool ARM64BranchRelaxation::relaxBranchInstructions() {
+bool AArch64BranchRelaxation::relaxBranchInstructions() {
   bool Changed = false;
   // Relaxing branches involves creating new basic blocks, so re-eval
   // end() for termination.
@@ -465,16 +466,16 @@ bool ARM64BranchRelaxation::relaxBranchInstructions() {
   return Changed;
 }
 
-bool ARM64BranchRelaxation::runOnMachineFunction(MachineFunction &mf) {
+bool AArch64BranchRelaxation::runOnMachineFunction(MachineFunction &mf) {
   MF = &mf;
 
   // If the pass is disabled, just bail early.
   if (!BranchRelaxation)
     return false;
 
-  DEBUG(dbgs() << "***** ARM64BranchRelaxation *****\n");
+  DEBUG(dbgs() << "***** AArch64BranchRelaxation *****\n");
 
-  TII = (const ARM64InstrInfo *)MF->getTarget().getInstrInfo();
+  TII = (const AArch64InstrInfo *)MF->getTarget().getInstrInfo();
 
   // Renumber all of the machine basic blocks in the function, guaranteeing that
   // the numbers agree with the position of the block in the function.
@@ -502,8 +503,8 @@ bool ARM64BranchRelaxation::runOnMachineFunction(MachineFunction &mf) {
   return MadeChange;
 }
 
-/// createARM64BranchRelaxation - returns an instance of the constpool
+/// createAArch64BranchRelaxation - returns an instance of the constpool
 /// island pass.
-FunctionPass *llvm::createARM64BranchRelaxation() {
-  return new ARM64BranchRelaxation();
+FunctionPass *llvm::createAArch64BranchRelaxation() {
+  return new AArch64BranchRelaxation();
 }
diff --git a/lib/Target/ARM64/ARM64CallingConv.h b/lib/Target/AArch64/AArch64CallingConv.h
similarity index 65%
rename from lib/Target/ARM64/ARM64CallingConv.h
rename to lib/Target/AArch64/AArch64CallingConv.h
index f24ba59dfb9..1fe426ed686 100644
--- a/lib/Target/ARM64/ARM64CallingConv.h
+++ b/lib/Target/AArch64/AArch64CallingConv.h
@@ -1,4 +1,4 @@
-//=== ARM64CallingConv.h - Custom Calling Convention Routines -*- C++ -*-===//
+//=== AArch64CallingConv.h - Custom Calling Convention Routines -*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -7,38 +7,38 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file contains the custom routines for the ARM64 Calling Convention that
+// This file contains the custom routines for the AArch64 Calling Convention that
 // aren't done by tablegen.
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef ARM64CALLINGCONV_H
-#define ARM64CALLINGCONV_H
+#ifndef AArch64CALLINGCONV_H
+#define AArch64CALLINGCONV_H
 
-#include "ARM64InstrInfo.h"
+#include "AArch64InstrInfo.h"
 #include "llvm/IR/CallingConv.h"
 #include "llvm/CodeGen/CallingConvLower.h"
 #include "llvm/Target/TargetInstrInfo.h"
 
 namespace llvm {
 
-/// CC_ARM64_Custom_i1i8i16_Reg - customized handling of passing i1/i8/i16 via
+/// CC_AArch64_Custom_i1i8i16_Reg - customized handling of passing i1/i8/i16 via
 /// register. Here, ValVT can be i1/i8/i16 or i32 depending on whether the
 /// argument is already promoted and LocVT is i1/i8/i16. We only promote the
 /// argument to i32 if we are sure this argument will be passed in register.
-static bool CC_ARM64_Custom_i1i8i16_Reg(unsigned ValNo, MVT ValVT, MVT LocVT,
+static bool CC_AArch64_Custom_i1i8i16_Reg(unsigned ValNo, MVT ValVT, MVT LocVT,
                                         CCValAssign::LocInfo LocInfo,
                                         ISD::ArgFlagsTy ArgFlags,
                                         CCState &State,
                                         bool IsWebKitJS = false) {
-  static const MCPhysReg RegList1[] = { ARM64::W0, ARM64::W1, ARM64::W2,
-                                        ARM64::W3, ARM64::W4, ARM64::W5,
-                                        ARM64::W6, ARM64::W7 };
-  static const MCPhysReg RegList2[] = { ARM64::X0, ARM64::X1, ARM64::X2,
-                                        ARM64::X3, ARM64::X4, ARM64::X5,
-                                        ARM64::X6, ARM64::X7 };
-  static const MCPhysReg WebKitRegList1[] = { ARM64::W0 };
-  static const MCPhysReg WebKitRegList2[] = { ARM64::X0 };
+  static const MCPhysReg RegList1[] = { AArch64::W0, AArch64::W1, AArch64::W2,
+                                        AArch64::W3, AArch64::W4, AArch64::W5,
+                                        AArch64::W6, AArch64::W7 };
+  static const MCPhysReg RegList2[] = { AArch64::X0, AArch64::X1, AArch64::X2,
+                                        AArch64::X3, AArch64::X4, AArch64::X5,
+                                        AArch64::X6, AArch64::X7 };
+  static const MCPhysReg WebKitRegList1[] = { AArch64::W0 };
+  static const MCPhysReg WebKitRegList2[] = { AArch64::X0 };
 
   const MCPhysReg *List1 = IsWebKitJS ? WebKitRegList1 : RegList1;
   const MCPhysReg *List2 = IsWebKitJS ? WebKitRegList2 : RegList2;
@@ -63,22 +63,22 @@ static bool CC_ARM64_Custom_i1i8i16_Reg(unsigned ValNo, MVT ValVT, MVT LocVT,
   return false;
 }
 
-/// CC_ARM64_WebKit_JS_i1i8i16_Reg - customized handling of passing i1/i8/i16
-/// via register. This behaves the same as CC_ARM64_Custom_i1i8i16_Reg, but only
+/// CC_AArch64_WebKit_JS_i1i8i16_Reg - customized handling of passing i1/i8/i16
+/// via register. This behaves the same as CC_AArch64_Custom_i1i8i16_Reg, but only
 /// uses the first register.
-static bool CC_ARM64_WebKit_JS_i1i8i16_Reg(unsigned ValNo, MVT ValVT, MVT LocVT,
+static bool CC_AArch64_WebKit_JS_i1i8i16_Reg(unsigned ValNo, MVT ValVT, MVT LocVT,
                                            CCValAssign::LocInfo LocInfo,
                                            ISD::ArgFlagsTy ArgFlags,
                                            CCState &State) {
-  return CC_ARM64_Custom_i1i8i16_Reg(ValNo, ValVT, LocVT, LocInfo, ArgFlags,
+  return CC_AArch64_Custom_i1i8i16_Reg(ValNo, ValVT, LocVT, LocInfo, ArgFlags,
                                      State, true);
 }
 
-/// CC_ARM64_Custom_i1i8i16_Stack: customized handling of passing i1/i8/i16 on
+/// CC_AArch64_Custom_i1i8i16_Stack: customized handling of passing i1/i8/i16 on
 /// stack. Here, ValVT can be i1/i8/i16 or i32 depending on whether the argument
 /// is already promoted and LocVT is i1/i8/i16. If ValVT is already promoted,
 /// it will be truncated back to i1/i8/i16.
-static bool CC_ARM64_Custom_i1i8i16_Stack(unsigned ValNo, MVT ValVT, MVT LocVT,
+static bool CC_AArch64_Custom_i1i8i16_Stack(unsigned ValNo, MVT ValVT, MVT LocVT,
                                           CCValAssign::LocInfo LocInfo,
                                           ISD::ArgFlagsTy ArgFlags,
                                           CCState &State) {
diff --git a/lib/Target/ARM64/ARM64CallingConvention.td b/lib/Target/AArch64/AArch64CallingConvention.td
similarity index 92%
rename from lib/Target/ARM64/ARM64CallingConvention.td
rename to lib/Target/AArch64/AArch64CallingConvention.td
index 0ef5601718d..c263d14dcc3 100644
--- a/lib/Target/ARM64/ARM64CallingConvention.td
+++ b/lib/Target/AArch64/AArch64CallingConvention.td
@@ -1,4 +1,4 @@
-//===- ARM64CallingConv.td - Calling Conventions for ARM64 -*- tablegen -*-===//
+//=- AArch64CallingConv.td - Calling Conventions for AArch64 -*- tablegen -*-=//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -7,7 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This describes the calling conventions for ARM64 architecture.
+// This describes the calling conventions for AArch64 architecture.
 //
 //===----------------------------------------------------------------------===//
 
@@ -22,7 +22,7 @@ class CCIfBigEndian<CCAction A> :
 // ARM AAPCS64 Calling Convention
 //===----------------------------------------------------------------------===//
 
-def CC_ARM64_AAPCS : CallingConv<[
+def CC_AArch64_AAPCS : CallingConv<[
   CCIfType<[v2f32], CCBitConvertToType<v2i32>>,
   CCIfType<[v2f64, v4f32], CCBitConvertToType<v2i64>>,
 
@@ -42,7 +42,7 @@ def CC_ARM64_AAPCS : CallingConv<[
 
   // Handle i1, i8, i16, i32, i64, f32, f64 and v2f64 by passing in registers,
   // up to eight each of GPR and FPR.
-  CCIfType<[i1, i8, i16], CCCustom<"CC_ARM64_Custom_i1i8i16_Reg">>,
+  CCIfType<[i1, i8, i16], CCCustom<"CC_AArch64_Custom_i1i8i16_Reg">>,
   CCIfType<[i32], CCAssignToRegWithShadow<[W0, W1, W2, W3, W4, W5, W6, W7],
                                           [X0, X1, X2, X3, X4, X5, X6, X7]>>,
   // i128 is split to two i64s, we can't fit half to register X7.
@@ -73,7 +73,7 @@ def CC_ARM64_AAPCS : CallingConv<[
            CCAssignToStack<16, 16>>
 ]>;
 
-def RetCC_ARM64_AAPCS : CallingConv<[
+def RetCC_AArch64_AAPCS : CallingConv<[
   CCIfType<[v2f32], CCBitConvertToType<v2i32>>,
   CCIfType<[v2f64, v4f32], CCBitConvertToType<v2i64>>,
 
@@ -104,7 +104,7 @@ def RetCC_ARM64_AAPCS : CallingConv<[
 // from the standard one at this level:
 //     + i128s (i.e. split i64s) don't need even registers.
 //     + Stack slots are sized as needed rather than being at least 64-bit.
-def CC_ARM64_DarwinPCS : CallingConv<[
+def CC_AArch64_DarwinPCS : CallingConv<[
   CCIfType<[v2f32], CCBitConvertToType<v2i32>>,
   CCIfType<[v2f64, v4f32, f128], CCBitConvertToType<v2i64>>,
 
@@ -117,7 +117,7 @@ def CC_ARM64_DarwinPCS : CallingConv<[
 
   // Handle i1, i8, i16, i32, i64, f32, f64 and v2f64 by passing in registers,
   // up to eight each of GPR and FPR.
-  CCIfType<[i1, i8, i16], CCCustom<"CC_ARM64_Custom_i1i8i16_Reg">>,
+  CCIfType<[i1, i8, i16], CCCustom<"CC_AArch64_Custom_i1i8i16_Reg">>,
   CCIfType<[i32], CCAssignToRegWithShadow<[W0, W1, W2, W3, W4, W5, W6, W7],
                                           [X0, X1, X2, X3, X4, X5, X6, X7]>>,
   // i128 is split to two i64s, we can't fit half to register X7.
@@ -140,14 +140,14 @@ def CC_ARM64_DarwinPCS : CallingConv<[
            CCAssignToReg<[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
 
   // If more than will fit in registers, pass them on the stack instead.
-  CCIfType<[i1, i8, i16], CCCustom<"CC_ARM64_Custom_i1i8i16_Stack">>,
+  CCIfType<[i1, i8, i16], CCCustom<"CC_AArch64_Custom_i1i8i16_Stack">>,
   CCIfType<[i32, f32], CCAssignToStack<4, 4>>,
   CCIfType<[i64, f64, v1f64, v2f32, v1i64, v2i32, v4i16, v8i8],
            CCAssignToStack<8, 8>>,
   CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32, v2f64], CCAssignToStack<16, 16>>
 ]>;
 
-def CC_ARM64_DarwinPCS_VarArg : CallingConv<[
+def CC_AArch64_DarwinPCS_VarArg : CallingConv<[
   CCIfType<[v2f32], CCBitConvertToType<v2i32>>,
   CCIfType<[v2f64, v4f32, f128], CCBitConvertToType<v2i64>>,
 
@@ -166,9 +166,9 @@ def CC_ARM64_DarwinPCS_VarArg : CallingConv<[
 // in register and the remaining arguments on stack. We allow 32bit stack slots,
 // so that WebKit can write partial values in the stack and define the other
 // 32bit quantity as undef.
-def CC_ARM64_WebKit_JS : CallingConv<[
+def CC_AArch64_WebKit_JS : CallingConv<[
   // Handle i1, i8, i16, i32, and i64 passing in register X0 (W0).
-  CCIfType<[i1, i8, i16], CCCustom<"CC_ARM64_WebKit_JS_i1i8i16_Reg">>,
+  CCIfType<[i1, i8, i16], CCCustom<"CC_AArch64_WebKit_JS_i1i8i16_Reg">>,
   CCIfType<[i32], CCAssignToRegWithShadow<[W0], [X0]>>,
   CCIfType<[i64], CCAssignToRegWithShadow<[X0], [W0]>>,
 
@@ -178,7 +178,7 @@ def CC_ARM64_WebKit_JS : CallingConv<[
   CCIfType<[i64, f64], CCAssignToStack<8, 8>>
 ]>;
 
-def RetCC_ARM64_WebKit_JS : CallingConv<[
+def RetCC_AArch64_WebKit_JS : CallingConv<[
   CCIfType<[i32], CCAssignToRegWithShadow<[W0, W1, W2, W3, W4, W5, W6, W7],
                                           [X0, X1, X2, X3, X4, X5, X6, X7]>>,
   CCIfType<[i64], CCAssignToRegWithShadow<[X0, X1, X2, X3, X4, X5, X6, X7],
@@ -197,7 +197,7 @@ def RetCC_ARM64_WebKit_JS : CallingConv<[
 // It would be better to model its preservation semantics properly (create a
 // vreg on entry, use it in RET & tail call generation; make that vreg def if we
 // end up saving LR as part of a call frame). Watch this space...
-def CSR_ARM64_AAPCS : CalleeSavedRegs<(add LR, FP, X19, X20, X21, X22,
+def CSR_AArch64_AAPCS : CalleeSavedRegs<(add LR, FP, X19, X20, X21, X22,
                                            X23, X24, X25, X26, X27, X28,
                                            D8,  D9,  D10, D11,
                                            D12, D13, D14, D15)>;
@@ -210,24 +210,24 @@ def CSR_ARM64_AAPCS : CalleeSavedRegs<(add LR, FP, X19, X20, X21, X22,
 // (For generic ARM 64-bit ABI code, clang will not generate constructors or
 // destructors with 'this' returns, so this RegMask will not be used in that
 // case)
-def CSR_ARM64_AAPCS_ThisReturn : CalleeSavedRegs<(add CSR_ARM64_AAPCS, X0)>;
+def CSR_AArch64_AAPCS_ThisReturn : CalleeSavedRegs<(add CSR_AArch64_AAPCS, X0)>;
 
 // The function used by Darwin to obtain the address of a thread-local variable
 // guarantees more than a normal AAPCS function. x16 and x17 are used on the
 // fast path for calculation, but other registers except X0 (argument/return)
 // and LR (it is a call, after all) are preserved.
-def CSR_ARM64_TLS_Darwin
+def CSR_AArch64_TLS_Darwin
     : CalleeSavedRegs<(add (sub (sequence "X%u", 1, 28), X16, X17),
                            FP,
                            (sequence "Q%u", 0, 31))>;
 
 // The ELF stub used for TLS-descriptor access saves every feasible
 // register. Only X0 and LR are clobbered.
-def CSR_ARM64_TLS_ELF
+def CSR_AArch64_TLS_ELF
     : CalleeSavedRegs<(add (sequence "X%u", 1, 28), FP,
                            (sequence "Q%u", 0, 31))>;
 
-def CSR_ARM64_AllRegs
+def CSR_AArch64_AllRegs
     : CalleeSavedRegs<(add (sequence "W%u", 0, 30), WSP,
                            (sequence "X%u", 0, 28), FP, LR, SP,
                            (sequence "B%u", 0, 31), (sequence "H%u", 0, 31),
diff --git a/lib/Target/ARM64/ARM64CleanupLocalDynamicTLSPass.cpp b/lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp
similarity index 81%
rename from lib/Target/ARM64/ARM64CleanupLocalDynamicTLSPass.cpp
rename to lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp
index dce1301b92e..4d23dc59d7a 100644
--- a/lib/Target/ARM64/ARM64CleanupLocalDynamicTLSPass.cpp
+++ b/lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp
@@ -1,4 +1,4 @@
-//===-- ARM64CleanupLocalDynamicTLSPass.cpp -----------------------*- C++ -*-=//
+//===-- AArch64CleanupLocalDynamicTLSPass.cpp ---------------------*- C++ -*-=//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -22,10 +22,10 @@
 // pass looks through a function and performs such combinations.
 //
 //===----------------------------------------------------------------------===//
-#include "ARM64.h"
-#include "ARM64InstrInfo.h"
-#include "ARM64MachineFunctionInfo.h"
-#include "ARM64TargetMachine.h"
+#include "AArch64.h"
+#include "AArch64InstrInfo.h"
+#include "AArch64MachineFunctionInfo.h"
+#include "AArch64TargetMachine.h"
 #include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
@@ -39,7 +39,7 @@ struct LDTLSCleanup : public MachineFunctionPass {
   LDTLSCleanup() : MachineFunctionPass(ID) {}
 
   bool runOnMachineFunction(MachineFunction &MF) override {
-    ARM64FunctionInfo *AFI = MF.getInfo<ARM64FunctionInfo>();
+    AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
     if (AFI->getNumLocalDynamicTLSAccesses() < 2) {
       // No point folding accesses if there isn't at least two.
       return false;
@@ -62,7 +62,7 @@ struct LDTLSCleanup : public MachineFunctionPass {
     for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;
          ++I) {
       switch (I->getOpcode()) {
-      case ARM64::TLSDESC_BLR:
+      case AArch64::TLSDESC_BLR:
         // Make sure it's a local dynamic access.
         if (!I->getOperand(1).isSymbol() ||
             strcmp(I->getOperand(1).getSymbolName(), "_TLS_MODULE_BASE_"))
@@ -92,15 +92,15 @@ struct LDTLSCleanup : public MachineFunctionPass {
   MachineInstr *replaceTLSBaseAddrCall(MachineInstr *I,
                                        unsigned TLSBaseAddrReg) {
     MachineFunction *MF = I->getParent()->getParent();
-    const ARM64TargetMachine *TM =
-        static_cast<const ARM64TargetMachine *>(&MF->getTarget());
-    const ARM64InstrInfo *TII = TM->getInstrInfo();
+    const AArch64TargetMachine *TM =
+        static_cast<const AArch64TargetMachine *>(&MF->getTarget());
+    const AArch64InstrInfo *TII = TM->getInstrInfo();
 
     // Insert a Copy from TLSBaseAddrReg to x0, which is where the rest of the
     // code sequence assumes the address will be.
-    MachineInstr *Copy =
-        BuildMI(*I->getParent(), I, I->getDebugLoc(),
-                TII->get(TargetOpcode::COPY), ARM64::X0).addReg(TLSBaseAddrReg);
+    MachineInstr *Copy = BuildMI(*I->getParent(), I, I->getDebugLoc(),
+                                 TII->get(TargetOpcode::COPY),
+                                 AArch64::X0).addReg(TLSBaseAddrReg);
 
     // Erase the TLS_base_addr instruction.
     I->eraseFromParent();
@@ -112,19 +112,19 @@ struct LDTLSCleanup : public MachineFunctionPass {
   // inserting a copy instruction after I. Returns the new instruction.
   MachineInstr *setRegister(MachineInstr *I, unsigned *TLSBaseAddrReg) {
     MachineFunction *MF = I->getParent()->getParent();
-    const ARM64TargetMachine *TM =
-        static_cast<const ARM64TargetMachine *>(&MF->getTarget());
-    const ARM64InstrInfo *TII = TM->getInstrInfo();
+    const AArch64TargetMachine *TM =
+        static_cast<const AArch64TargetMachine *>(&MF->getTarget());
+    const AArch64InstrInfo *TII = TM->getInstrInfo();
 
     // Create a virtual register for the TLS base address.
     MachineRegisterInfo &RegInfo = MF->getRegInfo();
-    *TLSBaseAddrReg = RegInfo.createVirtualRegister(&ARM64::GPR64RegClass);
+    *TLSBaseAddrReg = RegInfo.createVirtualRegister(&AArch64::GPR64RegClass);
 
     // Insert a copy from X0 to TLSBaseAddrReg for later.
     MachineInstr *Next = I->getNextNode();
     MachineInstr *Copy = BuildMI(*I->getParent(), Next, I->getDebugLoc(),
                                  TII->get(TargetOpcode::COPY),
-                                 *TLSBaseAddrReg).addReg(ARM64::X0);
+                                 *TLSBaseAddrReg).addReg(AArch64::X0);
 
     return Copy;
   }
@@ -142,6 +142,6 @@ struct LDTLSCleanup : public MachineFunctionPass {
 }
 
 char LDTLSCleanup::ID = 0;
-FunctionPass *llvm::createARM64CleanupLocalDynamicTLSPass() {
+FunctionPass *llvm::createAArch64CleanupLocalDynamicTLSPass() {
   return new LDTLSCleanup();
 }
diff --git a/lib/Target/ARM64/ARM64CollectLOH.cpp b/lib/Target/AArch64/AArch64CollectLOH.cpp
similarity index 91%
rename from lib/Target/ARM64/ARM64CollectLOH.cpp
rename to lib/Target/AArch64/AArch64CollectLOH.cpp
index 8b48f3ae9b2..6b1f09678e9 100644
--- a/lib/Target/ARM64/ARM64CollectLOH.cpp
+++ b/lib/Target/AArch64/AArch64CollectLOH.cpp
@@ -1,4 +1,4 @@
-//===-------------- ARM64CollectLOH.cpp - ARM64 collect LOH pass --*- C++ -*-=//
+//===---------- AArch64CollectLOH.cpp - AArch64 collect LOH pass --*- C++ -*-=//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -85,8 +85,8 @@
 // This LOH aims at getting rid of redundant ADRP instructions.
 //
 // The overall design for emitting the LOHs is:
-// 1. ARM64CollectLOH (this pass) records the LOHs in the ARM64FunctionInfo.
-// 2. ARM64AsmPrinter reads the LOHs from ARM64FunctionInfo and it:
+// 1. AArch64CollectLOH (this pass) records the LOHs in the AArch64FunctionInfo.
+// 2. AArch64AsmPrinter reads the LOHs from AArch64FunctionInfo and it:
 //     1. Associates them a label.
 //     2. Emits them in a MCStreamer (EmitLOHDirective).
 //         - The MCMachOStreamer records them into the MCAssembler.
@@ -98,10 +98,10 @@
 //         - Other ObjectWriters ignore them.
 //===----------------------------------------------------------------------===//
 
-#include "ARM64.h"
-#include "ARM64InstrInfo.h"
-#include "ARM64MachineFunctionInfo.h"
-#include "MCTargetDesc/ARM64AddressingModes.h"
+#include "AArch64.h"
+#include "AArch64InstrInfo.h"
+#include "AArch64MachineFunctionInfo.h"
+#include "MCTargetDesc/AArch64AddressingModes.h"
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/MapVector.h"
@@ -122,16 +122,16 @@
 #include "llvm/ADT/Statistic.h"
 using namespace llvm;
 
-#define DEBUG_TYPE "arm64-collect-loh"
+#define DEBUG_TYPE "aarch64-collect-loh"
 
 static cl::opt<bool>
-PreCollectRegister("arm64-collect-loh-pre-collect-register", cl::Hidden,
+PreCollectRegister("aarch64-collect-loh-pre-collect-register", cl::Hidden,
                    cl::desc("Restrict analysis to registers invovled"
                             " in LOHs"),
                    cl::init(true));
 
 static cl::opt<bool>
-BasicBlockScopeOnly("arm64-collect-loh-bb-only", cl::Hidden,
+BasicBlockScopeOnly("aarch64-collect-loh-bb-only", cl::Hidden,
                     cl::desc("Restrict analysis at basic block scope"),
                     cl::init(true));
 
@@ -164,20 +164,20 @@ STATISTIC(NumADRSimpleCandidate, "Number of simplifiable ADRP + ADD");
 STATISTIC(NumADRComplexCandidate, "Number of too complex ADRP + ADD");
 
 namespace llvm {
-void initializeARM64CollectLOHPass(PassRegistry &);
+void initializeAArch64CollectLOHPass(PassRegistry &);
 }
 
 namespace {
-struct ARM64CollectLOH : public MachineFunctionPass {
+struct AArch64CollectLOH : public MachineFunctionPass {
   static char ID;
-  ARM64CollectLOH() : MachineFunctionPass(ID) {
-    initializeARM64CollectLOHPass(*PassRegistry::getPassRegistry());
+  AArch64CollectLOH() : MachineFunctionPass(ID) {
+    initializeAArch64CollectLOHPass(*PassRegistry::getPassRegistry());
   }
 
   bool runOnMachineFunction(MachineFunction &MF) override;
 
   const char *getPassName() const override {
-    return "ARM64 Collect Linker Optimization Hint (LOH)";
+    return "AArch64 Collect Linker Optimization Hint (LOH)";
   }
 
   void getAnalysisUsage(AnalysisUsage &AU) const override {
@@ -214,14 +214,14 @@ typedef DenseMap<unsigned, unsigned> MapRegToId;
 typedef SmallVector<unsigned, 32> MapIdToReg;
 } // end anonymous namespace.
 
-char ARM64CollectLOH::ID = 0;
+char AArch64CollectLOH::ID = 0;
 
-INITIALIZE_PASS_BEGIN(ARM64CollectLOH, "arm64-collect-loh",
-                      "ARM64 Collect Linker Optimization Hint (LOH)", false,
+INITIALIZE_PASS_BEGIN(AArch64CollectLOH, "aarch64-collect-loh",
+                      "AArch64 Collect Linker Optimization Hint (LOH)", false,
                       false)
 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
-INITIALIZE_PASS_END(ARM64CollectLOH, "arm64-collect-loh",
-                    "ARM64 Collect Linker Optimization Hint (LOH)", false,
+INITIALIZE_PASS_END(AArch64CollectLOH, "aarch64-collect-loh",
+                    "AArch64 Collect Linker Optimization Hint (LOH)", false,
                     false)
 
 /// Given a couple (MBB, reg) get the corresponding set of instruction from
@@ -295,7 +295,7 @@ static void initReachingDef(MachineFunction &MF,
     BitVector &BBKillSet = Kill[&MBB];
     BBKillSet.resize(NbReg);
     for (const MachineInstr &MI : MBB) {
-      bool IsADRP = MI.getOpcode() == ARM64::ADRP;
+      bool IsADRP = MI.getOpcode() == AArch64::ADRP;
 
       // Process uses first.
       if (IsADRP || !ADRPMode)
@@ -509,9 +509,9 @@ static bool canDefBePartOfLOH(const MachineInstr *Def) {
   switch (Opc) {
   default:
     return false;
-  case ARM64::ADRP:
+  case AArch64::ADRP:
     return true;
-  case ARM64::ADDXri:
+  case AArch64::ADDXri:
     // Check immediate to see if the immediate is an address.
     switch (Def->getOperand(2).getType()) {
     default:
@@ -522,7 +522,7 @@ static bool canDefBePartOfLOH(const MachineInstr *Def) {
     case MachineOperand::MO_BlockAddress:
       return true;
     }
-  case ARM64::LDRXui:
+  case AArch64::LDRXui:
     // Check immediate to see if the immediate is an address.
     switch (Def->getOperand(2).getType()) {
     default:
@@ -541,13 +541,13 @@ static bool isCandidateStore(const MachineInstr *Instr) {
   switch (Instr->getOpcode()) {
   default:
     return false;
-  case ARM64::STRBui:
-  case ARM64::STRHui:
-  case ARM64::STRWui:
-  case ARM64::STRXui:
-  case ARM64::STRSui:
-  case ARM64::STRDui:
-  case ARM64::STRQui:
+  case AArch64::STRBui:
+  case AArch64::STRHui:
+  case AArch64::STRWui:
+  case AArch64::STRXui:
+  case AArch64::STRSui:
+  case AArch64::STRDui:
+  case AArch64::STRQui:
     // In case we have str xA, [xA, #imm], this is two different uses
     // of xA and we cannot fold, otherwise the xA stored may be wrong,
     // even if #imm == 0.
@@ -582,7 +582,7 @@ static void reachedUsesToDefs(InstrToInstrs &UseToReachingDefs,
         MapRegToId::const_iterator It;
         // if all the reaching defs are not adrp, this use will not be
         // simplifiable.
-        if ((ADRPMode && Def->getOpcode() != ARM64::ADRP) ||
+        if ((ADRPMode && Def->getOpcode() != AArch64::ADRP) ||
             (!ADRPMode && !canDefBePartOfLOH(Def)) ||
             (!ADRPMode && isCandidateStore(MI) &&
              // store are LOH candidate iff the end of the chain is used as
@@ -615,7 +615,7 @@ static void reachedUsesToDefs(InstrToInstrs &UseToReachingDefs,
 /// Based on the use to defs information (in ADRPMode), compute the
 /// opportunities of LOH ADRP-related.
 static void computeADRP(const InstrToInstrs &UseToDefs,
-                        ARM64FunctionInfo &ARM64FI,
+                        AArch64FunctionInfo &AArch64FI,
                         const MachineDominatorTree *MDT) {
   DEBUG(dbgs() << "*** Compute LOH for ADRP\n");
   for (const auto &Entry : UseToDefs) {
@@ -634,7 +634,7 @@ static void computeADRP(const InstrToInstrs &UseToDefs,
       SmallVector<const MachineInstr *, 2> Args;
       Args.push_back(L2);
       Args.push_back(L1);
-      ARM64FI.addLOHDirective(MCLOH_AdrpAdrp, Args);
+      AArch64FI.addLOHDirective(MCLOH_AdrpAdrp, Args);
       ++NumADRPSimpleCandidate;
     }
 #ifdef DEBUG
@@ -656,19 +656,19 @@ static bool isCandidateLoad(const MachineInstr *Instr) {
   switch (Instr->getOpcode()) {
   default:
     return false;
-  case ARM64::LDRSBWui:
-  case ARM64::LDRSBXui:
-  case ARM64::LDRSHWui:
-  case ARM64::LDRSHXui:
-  case ARM64::LDRSWui:
-  case ARM64::LDRBui:
-  case ARM64::LDRHui:
-  case ARM64::LDRWui:
-  case ARM64::LDRXui:
-  case ARM64::LDRSui:
-  case ARM64::LDRDui:
-  case ARM64::LDRQui:
-    if (Instr->getOperand(2).getTargetFlags() & ARM64II::MO_GOT)
+  case AArch64::LDRSBWui:
+  case AArch64::LDRSBXui:
+  case AArch64::LDRSHWui:
+  case AArch64::LDRSHXui:
+  case AArch64::LDRSWui:
+  case AArch64::LDRBui:
+  case AArch64::LDRHui:
+  case AArch64::LDRWui:
+  case AArch64::LDRXui:
+  case AArch64::LDRSui:
+  case AArch64::LDRDui:
+  case AArch64::LDRQui:
+    if (Instr->getOperand(2).getTargetFlags() & AArch64II::MO_GOT)
       return false;
     return true;
   }
@@ -681,12 +681,12 @@ static bool supportLoadFromLiteral(const MachineInstr *Instr) {
   switch (Instr->getOpcode()) {
   default:
     return false;
-  case ARM64::LDRSWui:
-  case ARM64::LDRWui:
-  case ARM64::LDRXui:
-  case ARM64::LDRSui:
-  case ARM64::LDRDui:
-  case ARM64::LDRQui:
+  case AArch64::LDRSWui:
+  case AArch64::LDRWui:
+  case AArch64::LDRXui:
+  case AArch64::LDRSui:
+  case AArch64::LDRDui:
+  case AArch64::LDRQui:
     return true;
   }
   // Unreachable.
@@ -705,7 +705,7 @@ static bool isCandidate(const MachineInstr *Instr,
     return false;
 
   const MachineInstr *Def = *UseToDefs.find(Instr)->second.begin();
-  if (Def->getOpcode() != ARM64::ADRP) {
+  if (Def->getOpcode() != AArch64::ADRP) {
     // At this point, Def is ADDXri or LDRXui of the right type of
     // symbol, because we filtered out the uses that were not defined
     // by these kind of instructions (+ ADRP).
@@ -728,7 +728,7 @@ static bool isCandidate(const MachineInstr *Instr,
   // - top is ADRP.
   // - check the simple chain property: each intermediate node must
   // dominates the next one.
-  if (Def->getOpcode() == ARM64::ADRP)
+  if (Def->getOpcode() == AArch64::ADRP)
     return MDT->dominates(Def, Instr);
   return false;
 }
@@ -736,22 +736,22 @@ static bool isCandidate(const MachineInstr *Instr,
 static bool registerADRCandidate(const MachineInstr &Use,
                                  const InstrToInstrs &UseToDefs,
                                  const InstrToInstrs *DefsPerColorToUses,
-                                 ARM64FunctionInfo &ARM64FI,
+                                 AArch64FunctionInfo &AArch64FI,
                                  SetOfMachineInstr *InvolvedInLOHs,
                                  const MapRegToId &RegToId) {
   // Look for opportunities to turn ADRP -> ADD or
   // ADRP -> LDR GOTPAGEOFF into ADR.
   // If ADRP has more than one use. Give up.
-  if (Use.getOpcode() != ARM64::ADDXri &&
-      (Use.getOpcode() != ARM64::LDRXui ||
-       !(Use.getOperand(2).getTargetFlags() & ARM64II::MO_GOT)))
+  if (Use.getOpcode() != AArch64::ADDXri &&
+      (Use.getOpcode() != AArch64::LDRXui ||
+       !(Use.getOperand(2).getTargetFlags() & AArch64II::MO_GOT)))
     return false;
   InstrToInstrs::const_iterator It = UseToDefs.find(&Use);
   // The map may contain garbage that we need to ignore.
   if (It == UseToDefs.end() || It->second.empty())
     return false;
   const MachineInstr &Def = **It->second.begin();
-  if (Def.getOpcode() != ARM64::ADRP)
+  if (Def.getOpcode() != AArch64::ADRP)
     return false;
   // Check the number of users of ADRP.
   const SetOfMachineInstr *Users =
@@ -772,7 +772,7 @@ static bool registerADRCandidate(const MachineInstr &Use,
   Args.push_back(&Def);
   Args.push_back(&Use);
 
-  ARM64FI.addLOHDirective(Use.getOpcode() == ARM64::ADDXri ? MCLOH_AdrpAdd
+  AArch64FI.addLOHDirective(Use.getOpcode() == AArch64::ADDXri ? MCLOH_AdrpAdd
                                                            : MCLOH_AdrpLdrGot,
                           Args);
   return true;
@@ -782,7 +782,7 @@ static bool registerADRCandidate(const MachineInstr &Use,
 /// opportunities of LOH non-ADRP-related
 static void computeOthers(const InstrToInstrs &UseToDefs,
                           const InstrToInstrs *DefsPerColorToUses,
-                          ARM64FunctionInfo &ARM64FI, const MapRegToId &RegToId,
+                          AArch64FunctionInfo &AArch64FI, const MapRegToId &RegToId,
                           const MachineDominatorTree *MDT) {
   SetOfMachineInstr *InvolvedInLOHs = nullptr;
 #ifdef DEBUG
@@ -839,7 +839,7 @@ static void computeOthers(const InstrToInstrs &UseToDefs,
     const MachineInstr *L1 = Def;
     const MachineInstr *L2 = nullptr;
     unsigned ImmediateDefOpc = Def->getOpcode();
-    if (Def->getOpcode() != ARM64::ADRP) {
+    if (Def->getOpcode() != AArch64::ADRP) {
       // Check the number of users of this node.
       const SetOfMachineInstr *Users =
           getUses(DefsPerColorToUses,
@@ -899,10 +899,10 @@ static void computeOthers(const InstrToInstrs &UseToDefs,
       continue;
     }
 
-    bool IsL2Add = (ImmediateDefOpc == ARM64::ADDXri);
+    bool IsL2Add = (ImmediateDefOpc == AArch64::ADDXri);
     // If the chain is three instructions long and ldr is the second element,
     // then this ldr must load form GOT, otherwise this is not a correct chain.
-    if (L2 && !IsL2Add && L2->getOperand(2).getTargetFlags() != ARM64II::MO_GOT)
+    if (L2 && !IsL2Add && L2->getOperand(2).getTargetFlags() != AArch64II::MO_GOT)
       continue;
     SmallVector<const MachineInstr *, 3> Args;
     MCLOHType Kind;
@@ -944,18 +944,18 @@ static void computeOthers(const InstrToInstrs &UseToDefs,
 #ifdef DEBUG
         // get the immediate of the load
         if (Candidate->getOperand(2).getImm() == 0)
-          if (ImmediateDefOpc == ARM64::ADDXri)
+          if (ImmediateDefOpc == AArch64::ADDXri)
             ++NumADDToLDR;
           else
             ++NumLDRToLDR;
-        else if (ImmediateDefOpc == ARM64::ADDXri)
+        else if (ImmediateDefOpc == AArch64::ADDXri)
           ++NumADDToLDRWithImm;
         else
           ++NumLDRToLDRWithImm;
 #endif // DEBUG
       }
     } else {
-      if (ImmediateDefOpc == ARM64::ADRP)
+      if (ImmediateDefOpc == AArch64::ADRP)
         continue;
       else {
 
@@ -978,23 +978,23 @@ static void computeOthers(const InstrToInstrs &UseToDefs,
 #ifdef DEBUG
         // get the immediate of the store
         if (Candidate->getOperand(2).getImm() == 0)
-          if (ImmediateDefOpc == ARM64::ADDXri)
+          if (ImmediateDefOpc == AArch64::ADDXri)
             ++NumADDToSTR;
           else
             ++NumLDRToSTR;
-        else if (ImmediateDefOpc == ARM64::ADDXri)
+        else if (ImmediateDefOpc == AArch64::ADDXri)
           ++NumADDToSTRWithImm;
         else
           ++NumLDRToSTRWithImm;
 #endif // DEBUG
       }
     }
-    ARM64FI.addLOHDirective(Kind, Args);
+    AArch64FI.addLOHDirective(Kind, Args);
   }
 
   // Now, we grabbed all the big patterns, check ADR opportunities.
   for (const MachineInstr *Candidate : PotentialADROpportunities)
-    registerADRCandidate(*Candidate, UseToDefs, DefsPerColorToUses, ARM64FI,
+    registerADRCandidate(*Candidate, UseToDefs, DefsPerColorToUses, AArch64FI,
                          InvolvedInLOHs, RegToId);
 }
 
@@ -1041,15 +1041,15 @@ static void collectInvolvedReg(MachineFunction &MF, MapRegToId &RegToId,
   }
 }
 
-bool ARM64CollectLOH::runOnMachineFunction(MachineFunction &MF) {
+bool AArch64CollectLOH::runOnMachineFunction(MachineFunction &MF) {
   const TargetMachine &TM = MF.getTarget();
   const TargetRegisterInfo *TRI = TM.getRegisterInfo();
   const MachineDominatorTree *MDT = &getAnalysis<MachineDominatorTree>();
 
   MapRegToId RegToId;
   MapIdToReg IdToReg;
-  ARM64FunctionInfo *ARM64FI = MF.getInfo<ARM64FunctionInfo>();
-  assert(ARM64FI && "No MachineFunctionInfo for this function!");
+  AArch64FunctionInfo *AArch64FI = MF.getInfo<AArch64FunctionInfo>();
+  assert(AArch64FI && "No MachineFunctionInfo for this function!");
 
   DEBUG(dbgs() << "Looking for LOH in " << MF.getName() << '\n');
 
@@ -1059,11 +1059,11 @@ bool ARM64CollectLOH::runOnMachineFunction(MachineFunction &MF) {
 
   MachineInstr *DummyOp = nullptr;
   if (BasicBlockScopeOnly) {
-    const ARM64InstrInfo *TII =
-        static_cast<const ARM64InstrInfo *>(TM.getInstrInfo());
+    const AArch64InstrInfo *TII =
+        static_cast<const AArch64InstrInfo *>(TM.getInstrInfo());
     // For local analysis, create a dummy operation to record uses that are not
     // local.
-    DummyOp = MF.CreateMachineInstr(TII->get(ARM64::COPY), DebugLoc());
+    DummyOp = MF.CreateMachineInstr(TII->get(AArch64::COPY), DebugLoc());
   }
 
   unsigned NbReg = RegToId.size();
@@ -1084,7 +1084,7 @@ bool ARM64CollectLOH::runOnMachineFunction(MachineFunction &MF) {
   reachedUsesToDefs(ADRPToReachingDefs, ColorOpToReachedUses, RegToId, true);
 
   // Compute LOH for ADRP.
-  computeADRP(ADRPToReachingDefs, *ARM64FI, MDT);
+  computeADRP(ADRPToReachingDefs, *AArch64FI, MDT);
   delete[] ColorOpToReachedUses;
 
   // Continue with general ADRP -> ADD/LDR -> LDR/STR pattern.
@@ -1100,7 +1100,7 @@ bool ARM64CollectLOH::runOnMachineFunction(MachineFunction &MF) {
   reachedUsesToDefs(UsesToReachingDefs, ColorOpToReachedUses, RegToId, false);
 
   // Compute other than AdrpAdrp LOH.
-  computeOthers(UsesToReachingDefs, ColorOpToReachedUses, *ARM64FI, RegToId,
+  computeOthers(UsesToReachingDefs, ColorOpToReachedUses, *AArch64FI, RegToId,
                 MDT);
   delete[] ColorOpToReachedUses;
 
@@ -1110,8 +1110,8 @@ bool ARM64CollectLOH::runOnMachineFunction(MachineFunction &MF) {
   return Modified;
 }
 
-/// createARM64CollectLOHPass - returns an instance of the Statistic for
+/// createAArch64CollectLOHPass - returns an instance of the Statistic for
 /// linker optimization pass.
-FunctionPass *llvm::createARM64CollectLOHPass() {
-  return new ARM64CollectLOH();
+FunctionPass *llvm::createAArch64CollectLOHPass() {
+  return new AArch64CollectLOH();
 }
diff --git a/lib/Target/ARM64/ARM64ConditionalCompares.cpp b/lib/Target/AArch64/AArch64ConditionalCompares.cpp
similarity index 85%
rename from lib/Target/ARM64/ARM64ConditionalCompares.cpp
rename to lib/Target/AArch64/AArch64ConditionalCompares.cpp
index 2243cce51a1..452cdecf8a0 100644
--- a/lib/Target/ARM64/ARM64ConditionalCompares.cpp
+++ b/lib/Target/AArch64/AArch64ConditionalCompares.cpp
@@ -1,4 +1,4 @@
-//===-- ARM64ConditionalCompares.cpp --- CCMP formation for ARM64 ---------===//
+//===-- AArch64ConditionalCompares.cpp --- CCMP formation for AArch64 -----===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -7,7 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file implements the ARM64ConditionalCompares pass which reduces
+// This file implements the AArch64ConditionalCompares pass which reduces
 // branching and code size by using the conditional compare instructions CCMP,
 // CCMN, and FCMP.
 //
@@ -17,7 +17,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "ARM64.h"
+#include "AArch64.h"
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/DepthFirstIterator.h"
 #include "llvm/ADT/SetVector.h"
@@ -42,16 +42,16 @@
 
 using namespace llvm;
 
-#define DEBUG_TYPE "arm64-ccmp"
+#define DEBUG_TYPE "aarch64-ccmp"
 
 // Absolute maximum number of instructions allowed per speculated block.
 // This bypasses all other heuristics, so it should be set fairly high.
 static cl::opt<unsigned> BlockInstrLimit(
-    "arm64-ccmp-limit", cl::init(30), cl::Hidden,
+    "aarch64-ccmp-limit", cl::init(30), cl::Hidden,
     cl::desc("Maximum number of instructions per speculated block."));
 
 // Stress testing mode - disable heuristics.
-static cl::opt<bool> Stress("arm64-stress-ccmp", cl::Hidden,
+static cl::opt<bool> Stress("aarch64-stress-ccmp", cl::Hidden,
                             cl::desc("Turn all knobs to 11"));
 
 STATISTIC(NumConsidered, "Number of ccmps considered");
@@ -98,7 +98,7 @@ STATISTIC(NumCompBranches, "Number of cbz/cbnz branches converted");
 //
 // The cmp-conversion turns the compare instruction in CmpBB into a conditional
 // compare, and merges CmpBB into Head, speculatively executing its
-// instructions. The ARM64 conditional compare instructions have an immediate
+// instructions. The AArch64 conditional compare instructions have an immediate
 // operand that specifies the NZCV flag values when the condition is false and
 // the compare isn't executed. This makes it possible to chain compares with
 // different condition codes.
@@ -162,13 +162,13 @@ private:
   SmallVector<MachineOperand, 4> HeadCond;
 
   /// The condition code that makes Head branch to CmpBB.
-  ARM64CC::CondCode HeadCmpBBCC;
+  AArch64CC::CondCode HeadCmpBBCC;
 
   /// The branch condition in CmpBB.
   SmallVector<MachineOperand, 4> CmpBBCond;
 
   /// The condition code that makes CmpBB branch to Tail.
-  ARM64CC::CondCode CmpBBTailCC;
+  AArch64CC::CondCode CmpBBTailCC;
 
   /// Check if the Tail PHIs are trivially convertible.
   bool trivialTailPHIs();
@@ -253,11 +253,11 @@ void SSACCmpConv::updateTailPHIs() {
   }
 }
 
-// This pass runs before the ARM64DeadRegisterDefinitions pass, so compares are
-// still writing virtual registers without any uses.
+// This pass runs before the AArch64DeadRegisterDefinitions pass, so compares
+// are still writing virtual registers without any uses.
 bool SSACCmpConv::isDeadDef(unsigned DstReg) {
   // Writes to the zero register are dead.
-  if (DstReg == ARM64::WZR || DstReg == ARM64::XZR)
+  if (DstReg == AArch64::WZR || DstReg == AArch64::XZR)
     return true;
   if (!TargetRegisterInfo::isVirtualRegister(DstReg))
     return false;
@@ -269,11 +269,11 @@ bool SSACCmpConv::isDeadDef(unsigned DstReg) {
 // Parse a condition code returned by AnalyzeBranch, and compute the CondCode
 // corresponding to TBB.
 // Return
-static bool parseCond(ArrayRef<MachineOperand> Cond, ARM64CC::CondCode &CC) {
+static bool parseCond(ArrayRef<MachineOperand> Cond, AArch64CC::CondCode &CC) {
   // A normal br.cond simply has the condition code.
   if (Cond[0].getImm() != -1) {
     assert(Cond.size() == 1 && "Unknown Cond array format");
-    CC = (ARM64CC::CondCode)(int)Cond[0].getImm();
+    CC = (AArch64CC::CondCode)(int)Cond[0].getImm();
     return true;
   }
   // For tbz and cbz instruction, the opcode is next.
@@ -282,15 +282,15 @@ static bool parseCond(ArrayRef<MachineOperand> Cond, ARM64CC::CondCode &CC) {
     // This includes tbz / tbnz branches which can't be converted to
     // ccmp + br.cond.
     return false;
-  case ARM64::CBZW:
-  case ARM64::CBZX:
+  case AArch64::CBZW:
+  case AArch64::CBZX:
     assert(Cond.size() == 3 && "Unknown Cond array format");
-    CC = ARM64CC::EQ;
+    CC = AArch64CC::EQ;
     return true;
-  case ARM64::CBNZW:
-  case ARM64::CBNZX:
+  case AArch64::CBNZW:
+  case AArch64::CBNZX:
     assert(Cond.size() == 3 && "Unknown Cond array format");
-    CC = ARM64CC::NE;
+    CC = AArch64CC::NE;
     return true;
   }
 }
@@ -300,12 +300,12 @@ MachineInstr *SSACCmpConv::findConvertibleCompare(MachineBasicBlock *MBB) {
   if (I == MBB->end())
     return nullptr;
   // The terminator must be controlled by the flags.
-  if (!I->readsRegister(ARM64::NZCV)) {
+  if (!I->readsRegister(AArch64::NZCV)) {
     switch (I->getOpcode()) {
-    case ARM64::CBZW:
-    case ARM64::CBZX:
-    case ARM64::CBNZW:
-    case ARM64::CBNZX:
+    case AArch64::CBZW:
+    case AArch64::CBZX:
+    case AArch64::CBNZW:
+    case AArch64::CBNZX:
       // These can be converted into a ccmp against #0.
       return I;
     }
@@ -320,11 +320,11 @@ MachineInstr *SSACCmpConv::findConvertibleCompare(MachineBasicBlock *MBB) {
     assert(!I->isTerminator() && "Spurious terminator");
     switch (I->getOpcode()) {
     // cmp is an alias for subs with a dead destination register.
-    case ARM64::SUBSWri:
-    case ARM64::SUBSXri:
+    case AArch64::SUBSWri:
+    case AArch64::SUBSXri:
     // cmn is an alias for adds with a dead destination register.
-    case ARM64::ADDSWri:
-    case ARM64::ADDSXri:
+    case AArch64::ADDSWri:
+    case AArch64::ADDSXri:
       // Check that the immediate operand is within range, ccmp wants a uimm5.
       // Rd = SUBSri Rn, imm, shift
       if (I->getOperand(3).getImm() || !isUInt<5>(I->getOperand(2).getImm())) {
@@ -333,25 +333,25 @@ MachineInstr *SSACCmpConv::findConvertibleCompare(MachineBasicBlock *MBB) {
         return nullptr;
       }
     // Fall through.
-    case ARM64::SUBSWrr:
-    case ARM64::SUBSXrr:
-    case ARM64::ADDSWrr:
-    case ARM64::ADDSXrr:
+    case AArch64::SUBSWrr:
+    case AArch64::SUBSXrr:
+    case AArch64::ADDSWrr:
+    case AArch64::ADDSXrr:
       if (isDeadDef(I->getOperand(0).getReg()))
         return I;
       DEBUG(dbgs() << "Can't convert compare with live destination: " << *I);
       ++NumLiveDstRejs;
       return nullptr;
-    case ARM64::FCMPSrr:
-    case ARM64::FCMPDrr:
-    case ARM64::FCMPESrr:
-    case ARM64::FCMPEDrr:
+    case AArch64::FCMPSrr:
+    case AArch64::FCMPDrr:
+    case AArch64::FCMPESrr:
+    case AArch64::FCMPEDrr:
       return I;
     }
 
     // Check for flag reads and clobbers.
     MIOperands::PhysRegInfo PRI =
-        MIOperands(I).analyzePhysReg(ARM64::NZCV, TRI);
+        MIOperands(I).analyzePhysReg(AArch64::NZCV, TRI);
 
     if (PRI.Reads) {
       // The ccmp doesn't produce exactly the same flags as the original
@@ -422,7 +422,7 @@ bool SSACCmpConv::canSpeculateInstrs(MachineBasicBlock *MBB,
     }
 
     // Only CmpMI is allowed to clobber the flags.
-    if (&I != CmpMI && I.modifiesRegister(ARM64::NZCV, TRI)) {
+    if (&I != CmpMI && I.modifiesRegister(AArch64::NZCV, TRI)) {
       DEBUG(dbgs() << "Clobbers flags: " << I);
       return false;
     }
@@ -519,7 +519,7 @@ bool SSACCmpConv::canConvert(MachineBasicBlock *MBB) {
   // Make sure the branch direction is right.
   if (TBB != CmpBB) {
     assert(TBB == Tail && "Unexpected TBB");
-    HeadCmpBBCC = ARM64CC::getInvertedCondCode(HeadCmpBBCC);
+    HeadCmpBBCC = AArch64CC::getInvertedCondCode(HeadCmpBBCC);
   }
 
   CmpBBCond.clear();
@@ -543,10 +543,10 @@ bool SSACCmpConv::canConvert(MachineBasicBlock *MBB) {
   }
 
   if (TBB != Tail)
-    CmpBBTailCC = ARM64CC::getInvertedCondCode(CmpBBTailCC);
+    CmpBBTailCC = AArch64CC::getInvertedCondCode(CmpBBTailCC);
 
-  DEBUG(dbgs() << "Head->CmpBB on " << ARM64CC::getCondCodeName(HeadCmpBBCC)
-               << ", CmpBB->Tail on " << ARM64CC::getCondCodeName(CmpBBTailCC)
+  DEBUG(dbgs() << "Head->CmpBB on " << AArch64CC::getCondCodeName(HeadCmpBBCC)
+               << ", CmpBB->Tail on " << AArch64CC::getCondCodeName(CmpBBTailCC)
                << '\n');
 
   CmpMI = findConvertibleCompare(CmpBB);
@@ -579,13 +579,13 @@ void SSACCmpConv::convert(SmallVectorImpl<MachineBasicBlock *> &RemovedBlocks) {
     ++NumCompBranches;
     unsigned Opc = 0;
     switch (HeadCond[1].getImm()) {
-    case ARM64::CBZW:
-    case ARM64::CBNZW:
-      Opc = ARM64::SUBSWri;
+    case AArch64::CBZW:
+    case AArch64::CBNZW:
+      Opc = AArch64::SUBSWri;
       break;
-    case ARM64::CBZX:
-    case ARM64::CBNZX:
-      Opc = ARM64::SUBSXri;
+    case AArch64::CBZX:
+    case AArch64::CBNZX:
+      Opc = AArch64::SUBSXri;
       break;
     default:
       llvm_unreachable("Cannot convert Head branch");
@@ -615,27 +615,27 @@ void SSACCmpConv::convert(SmallVectorImpl<MachineBasicBlock *> &RemovedBlocks) {
   switch (CmpMI->getOpcode()) {
   default:
     llvm_unreachable("Unknown compare opcode");
-  case ARM64::SUBSWri:    Opc = ARM64::CCMPWi; break;
-  case ARM64::SUBSWrr:    Opc = ARM64::CCMPWr; break;
-  case ARM64::SUBSXri:    Opc = ARM64::CCMPXi; break;
-  case ARM64::SUBSXrr:    Opc = ARM64::CCMPXr; break;
-  case ARM64::ADDSWri:    Opc = ARM64::CCMNWi; break;
-  case ARM64::ADDSWrr:    Opc = ARM64::CCMNWr; break;
-  case ARM64::ADDSXri:    Opc = ARM64::CCMNXi; break;
-  case ARM64::ADDSXrr:    Opc = ARM64::CCMNXr; break;
-  case ARM64::FCMPSrr:    Opc = ARM64::FCCMPSrr; FirstOp = 0; break;
-  case ARM64::FCMPDrr:    Opc = ARM64::FCCMPDrr; FirstOp = 0; break;
-  case ARM64::FCMPESrr:   Opc = ARM64::FCCMPESrr; FirstOp = 0; break;
-  case ARM64::FCMPEDrr:   Opc = ARM64::FCCMPEDrr; FirstOp = 0; break;
-  case ARM64::CBZW:
-  case ARM64::CBNZW:
-    Opc = ARM64::CCMPWi;
+  case AArch64::SUBSWri:    Opc = AArch64::CCMPWi; break;
+  case AArch64::SUBSWrr:    Opc = AArch64::CCMPWr; break;
+  case AArch64::SUBSXri:    Opc = AArch64::CCMPXi; break;
+  case AArch64::SUBSXrr:    Opc = AArch64::CCMPXr; break;
+  case AArch64::ADDSWri:    Opc = AArch64::CCMNWi; break;
+  case AArch64::ADDSWrr:    Opc = AArch64::CCMNWr; break;
+  case AArch64::ADDSXri:    Opc = AArch64::CCMNXi; break;
+  case AArch64::ADDSXrr:    Opc = AArch64::CCMNXr; break;
+  case AArch64::FCMPSrr:    Opc = AArch64::FCCMPSrr; FirstOp = 0; break;
+  case AArch64::FCMPDrr:    Opc = AArch64::FCCMPDrr; FirstOp = 0; break;
+  case AArch64::FCMPESrr:   Opc = AArch64::FCCMPESrr; FirstOp = 0; break;
+  case AArch64::FCMPEDrr:   Opc = AArch64::FCCMPEDrr; FirstOp = 0; break;
+  case AArch64::CBZW:
+  case AArch64::CBNZW:
+    Opc = AArch64::CCMPWi;
     FirstOp = 0;
     isZBranch = true;
     break;
-  case ARM64::CBZX:
-  case ARM64::CBNZX:
-    Opc = ARM64::CCMPXi;
+  case AArch64::CBZX:
+  case AArch64::CBNZX:
+    Opc = AArch64::CCMPXi;
     FirstOp = 0;
     isZBranch = true;
     break;
@@ -646,7 +646,7 @@ void SSACCmpConv::convert(SmallVectorImpl<MachineBasicBlock *> &RemovedBlocks) {
   // The NZCV immediate operand should provide flags for the case where Head
   // would have branched to Tail. These flags should cause the new Head
   // terminator to branch to tail.
-  unsigned NZCV = ARM64CC::getNZCVToSatisfyCondCode(CmpBBTailCC);
+  unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(CmpBBTailCC);
   const MCInstrDesc &MCID = TII->get(Opc);
   MRI->constrainRegClass(CmpMI->getOperand(FirstOp).getReg(),
                          TII->getRegClass(MCID, 0, TRI, *MF));
@@ -665,10 +665,10 @@ void SSACCmpConv::convert(SmallVectorImpl<MachineBasicBlock *> &RemovedBlocks) {
   // If CmpMI was a terminator, we need a new conditional branch to replace it.
   // This now becomes a Head terminator.
   if (isZBranch) {
-    bool isNZ = CmpMI->getOpcode() == ARM64::CBNZW ||
-                CmpMI->getOpcode() == ARM64::CBNZX;
-    BuildMI(*Head, CmpMI, CmpMI->getDebugLoc(), TII->get(ARM64::Bcc))
-        .addImm(isNZ ? ARM64CC::NE : ARM64CC::EQ)
+    bool isNZ = CmpMI->getOpcode() == AArch64::CBNZW ||
+                CmpMI->getOpcode() == AArch64::CBNZX;
+    BuildMI(*Head, CmpMI, CmpMI->getDebugLoc(), TII->get(AArch64::Bcc))
+        .addImm(isNZ ? AArch64CC::NE : AArch64CC::EQ)
         .addOperand(CmpMI->getOperand(1)); // Branch target.
   }
   CmpMI->eraseFromParent();
@@ -687,10 +687,10 @@ int SSACCmpConv::expectedCodeSizeDelta() const {
   // plus a branch instruction.
   if (HeadCond[0].getImm() == -1) {
     switch (HeadCond[1].getImm()) {
-    case ARM64::CBZW:
-    case ARM64::CBNZW:
-    case ARM64::CBZX:
-    case ARM64::CBNZX:
+    case AArch64::CBZW:
+    case AArch64::CBNZW:
+    case AArch64::CBZX:
+    case AArch64::CBNZX:
       // Therefore delta += 1
       delta = 1;
       break;
@@ -706,21 +706,21 @@ int SSACCmpConv::expectedCodeSizeDelta() const {
   default:
     --delta;
     break;
-  case ARM64::CBZW:
-  case ARM64::CBNZW:
-  case ARM64::CBZX:
-  case ARM64::CBNZX:
+  case AArch64::CBZW:
+  case AArch64::CBNZW:
+  case AArch64::CBZX:
+  case AArch64::CBNZX:
     break;
   }
   return delta;
 }
 
 //===----------------------------------------------------------------------===//
-//                       ARM64ConditionalCompares Pass
+//                       AArch64ConditionalCompares Pass
 //===----------------------------------------------------------------------===//
 
 namespace {
-class ARM64ConditionalCompares : public MachineFunctionPass {
+class AArch64ConditionalCompares : public MachineFunctionPass {
   const TargetInstrInfo *TII;
   const TargetRegisterInfo *TRI;
   const MCSchedModel *SchedModel;
@@ -735,11 +735,11 @@ class ARM64ConditionalCompares : public MachineFunctionPass {
 
 public:
   static char ID;
-  ARM64ConditionalCompares() : MachineFunctionPass(ID) {}
+  AArch64ConditionalCompares() : MachineFunctionPass(ID) {}
   void getAnalysisUsage(AnalysisUsage &AU) const override;
   bool runOnMachineFunction(MachineFunction &MF) override;
   const char *getPassName() const override {
-    return "ARM64 Conditional Compares";
+    return "AArch64 Conditional Compares";
   }
 
 private:
@@ -751,25 +751,25 @@ private:
 };
 } // end anonymous namespace
 
-char ARM64ConditionalCompares::ID = 0;
+char AArch64ConditionalCompares::ID = 0;
 
 namespace llvm {
-void initializeARM64ConditionalComparesPass(PassRegistry &);
+void initializeAArch64ConditionalComparesPass(PassRegistry &);
 }
 
-INITIALIZE_PASS_BEGIN(ARM64ConditionalCompares, "arm64-ccmp", "ARM64 CCMP Pass",
-                      false, false)
+INITIALIZE_PASS_BEGIN(AArch64ConditionalCompares, "aarch64-ccmp",
+                      "AArch64 CCMP Pass", false, false)
 INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
 INITIALIZE_PASS_DEPENDENCY(MachineTraceMetrics)
-INITIALIZE_PASS_END(ARM64ConditionalCompares, "arm64-ccmp", "ARM64 CCMP Pass",
-                    false, false)
+INITIALIZE_PASS_END(AArch64ConditionalCompares, "aarch64-ccmp",
+                    "AArch64 CCMP Pass", false, false)
 
-FunctionPass *llvm::createARM64ConditionalCompares() {
-  return new ARM64ConditionalCompares();
+FunctionPass *llvm::createAArch64ConditionalCompares() {
+  return new AArch64ConditionalCompares();
 }
 
-void ARM64ConditionalCompares::getAnalysisUsage(AnalysisUsage &AU) const {
+void AArch64ConditionalCompares::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.addRequired<MachineBranchProbabilityInfo>();
   AU.addRequired<MachineDominatorTree>();
   AU.addPreserved<MachineDominatorTree>();
@@ -781,8 +781,8 @@ void ARM64ConditionalCompares::getAnalysisUsage(AnalysisUsage &AU) const {
 }
 
 /// Update the dominator tree after if-conversion erased some blocks.
-void
-ARM64ConditionalCompares::updateDomTree(ArrayRef<MachineBasicBlock *> Removed) {
+void AArch64ConditionalCompares::updateDomTree(
+    ArrayRef<MachineBasicBlock *> Removed) {
   // convert() removes CmpBB which was previously dominated by Head.
   // CmpBB children should be transferred to Head.
   MachineDomTreeNode *HeadNode = DomTree->getNode(CmpConv.Head);
@@ -798,7 +798,7 @@ ARM64ConditionalCompares::updateDomTree(ArrayRef<MachineBasicBlock *> Removed) {
 
 /// Update LoopInfo after if-conversion.
 void
-ARM64ConditionalCompares::updateLoops(ArrayRef<MachineBasicBlock *> Removed) {
+AArch64ConditionalCompares::updateLoops(ArrayRef<MachineBasicBlock *> Removed) {
   if (!Loops)
     return;
   for (unsigned i = 0, e = Removed.size(); i != e; ++i)
@@ -806,7 +806,7 @@ ARM64ConditionalCompares::updateLoops(ArrayRef<MachineBasicBlock *> Removed) {
 }
 
 /// Invalidate MachineTraceMetrics before if-conversion.
-void ARM64ConditionalCompares::invalidateTraces() {
+void AArch64ConditionalCompares::invalidateTraces() {
   Traces->invalidate(CmpConv.Head);
   Traces->invalidate(CmpConv.CmpBB);
 }
@@ -814,7 +814,7 @@ void ARM64ConditionalCompares::invalidateTraces() {
 /// Apply cost model and heuristics to the if-conversion in IfConv.
 /// Return true if the conversion is a good idea.
 ///
-bool ARM64ConditionalCompares::shouldConvert() {
+bool AArch64ConditionalCompares::shouldConvert() {
   // Stress testing mode disables all cost considerations.
   if (Stress)
     return true;
@@ -875,7 +875,7 @@ bool ARM64ConditionalCompares::shouldConvert() {
   return true;
 }
 
-bool ARM64ConditionalCompares::tryConvert(MachineBasicBlock *MBB) {
+bool AArch64ConditionalCompares::tryConvert(MachineBasicBlock *MBB) {
   bool Changed = false;
   while (CmpConv.canConvert(MBB) && shouldConvert()) {
     invalidateTraces();
@@ -888,8 +888,8 @@ bool ARM64ConditionalCompares::tryConvert(MachineBasicBlock *MBB) {
   return Changed;
 }
 
-bool ARM64ConditionalCompares::runOnMachineFunction(MachineFunction &MF) {
-  DEBUG(dbgs() << "********** ARM64 Conditional Compares **********\n"
+bool AArch64ConditionalCompares::runOnMachineFunction(MachineFunction &MF) {
+  DEBUG(dbgs() << "********** AArch64 Conditional Compares **********\n"
                << "********** Function: " << MF.getName() << '\n');
   TII = MF.getTarget().getInstrInfo();
   TRI = MF.getTarget().getRegisterInfo();
diff --git a/lib/Target/ARM64/ARM64DeadRegisterDefinitionsPass.cpp b/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp
similarity index 79%
rename from lib/Target/ARM64/ARM64DeadRegisterDefinitionsPass.cpp
rename to lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp
index e8f03ec833f..a2d853c85fe 100644
--- a/lib/Target/ARM64/ARM64DeadRegisterDefinitionsPass.cpp
+++ b/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp
@@ -1,4 +1,4 @@
-//===-- ARM64DeadRegisterDefinitions.cpp - Replace dead defs w/ zero reg --===//
+//==-- AArch64DeadRegisterDefinitions.cpp - Replace dead defs w/ zero reg --==//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -11,8 +11,8 @@
 // hardware's register renamer.
 //===----------------------------------------------------------------------===//
 
-#include "ARM64.h"
-#include "ARM64RegisterInfo.h"
+#include "AArch64.h"
+#include "AArch64RegisterInfo.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineFunction.h"
@@ -21,12 +21,12 @@
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
-#define DEBUG_TYPE "arm64-dead-defs"
+#define DEBUG_TYPE "aarch64-dead-defs"
 
 STATISTIC(NumDeadDefsReplaced, "Number of dead definitions replaced");
 
 namespace {
-class ARM64DeadRegisterDefinitions : public MachineFunctionPass {
+class AArch64DeadRegisterDefinitions : public MachineFunctionPass {
 private:
   const TargetRegisterInfo *TRI;
   bool implicitlyDefinesOverlappingReg(unsigned Reg, const MachineInstr &MI);
@@ -34,7 +34,7 @@ private:
   bool usesFrameIndex(const MachineInstr &MI);
 public:
   static char ID; // Pass identification, replacement for typeid.
-  explicit ARM64DeadRegisterDefinitions() : MachineFunctionPass(ID) {}
+  explicit AArch64DeadRegisterDefinitions() : MachineFunctionPass(ID) {}
 
   virtual bool runOnMachineFunction(MachineFunction &F) override;
 
@@ -45,10 +45,10 @@ public:
     MachineFunctionPass::getAnalysisUsage(AU);
   }
 };
-char ARM64DeadRegisterDefinitions::ID = 0;
+char AArch64DeadRegisterDefinitions::ID = 0;
 } // end anonymous namespace
 
-bool ARM64DeadRegisterDefinitions::implicitlyDefinesOverlappingReg(
+bool AArch64DeadRegisterDefinitions::implicitlyDefinesOverlappingReg(
     unsigned Reg, const MachineInstr &MI) {
   for (const MachineOperand &MO : MI.implicit_operands())
     if (MO.isReg() && MO.isDef())
@@ -57,15 +57,15 @@ bool ARM64DeadRegisterDefinitions::implicitlyDefinesOverlappingReg(
   return false;
 }
 
-bool ARM64DeadRegisterDefinitions::usesFrameIndex(const MachineInstr &MI) {
+bool AArch64DeadRegisterDefinitions::usesFrameIndex(const MachineInstr &MI) {
   for (const MachineOperand &Op : MI.uses())
     if (Op.isFI())
       return true;
   return false;
 }
 
-bool
-ARM64DeadRegisterDefinitions::processMachineBasicBlock(MachineBasicBlock &MBB) {
+bool AArch64DeadRegisterDefinitions::processMachineBasicBlock(
+    MachineBasicBlock &MBB) {
   bool Changed = false;
   for (MachineInstr &MI : MBB) {
     if (usesFrameIndex(MI)) {
@@ -99,11 +99,11 @@ ARM64DeadRegisterDefinitions::processMachineBasicBlock(MachineBasicBlock &MBB) {
         default:
           DEBUG(dbgs() << "    Ignoring, register is not a GPR.\n");
           continue;
-        case ARM64::GPR32RegClassID:
-          NewReg = ARM64::WZR;
+        case AArch64::GPR32RegClassID:
+          NewReg = AArch64::WZR;
           break;
-        case ARM64::GPR64RegClassID:
-          NewReg = ARM64::XZR;
+        case AArch64::GPR64RegClassID:
+          NewReg = AArch64::XZR;
           break;
         }
         DEBUG(dbgs() << "    Replacing with zero register. New:\n      ");
@@ -118,10 +118,10 @@ ARM64DeadRegisterDefinitions::processMachineBasicBlock(MachineBasicBlock &MBB) {
 
 // Scan the function for instructions that have a dead definition of a
 // register. Replace that register with the zero register when possible.
-bool ARM64DeadRegisterDefinitions::runOnMachineFunction(MachineFunction &MF) {
+bool AArch64DeadRegisterDefinitions::runOnMachineFunction(MachineFunction &MF) {
   TRI = MF.getTarget().getRegisterInfo();
   bool Changed = false;
-  DEBUG(dbgs() << "***** ARM64DeadRegisterDefinitions *****\n");
+  DEBUG(dbgs() << "***** AArch64DeadRegisterDefinitions *****\n");
 
   for (auto &MBB : MF)
     if (processMachineBasicBlock(MBB))
@@ -129,6 +129,6 @@ bool ARM64DeadRegisterDefinitions::runOnMachineFunction(MachineFunction &MF) {
   return Changed;
 }
 
-FunctionPass *llvm::createARM64DeadRegisterDefinitions() {
-  return new ARM64DeadRegisterDefinitions();
+FunctionPass *llvm::createAArch64DeadRegisterDefinitions() {
+  return new AArch64DeadRegisterDefinitions();
 }
diff --git a/lib/Target/ARM64/ARM64ExpandPseudoInsts.cpp b/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
similarity index 76%
rename from lib/Target/ARM64/ARM64ExpandPseudoInsts.cpp
rename to lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
index a4b5d31314e..a76fd76e5ed 100644
--- a/lib/Target/ARM64/ARM64ExpandPseudoInsts.cpp
+++ b/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
@@ -1,4 +1,4 @@
-//===-- ARM64ExpandPseudoInsts.cpp - Expand pseudo instructions ---*- C++ -*-=//
+//==-- AArch64ExpandPseudoInsts.cpp - Expand pseudo instructions --*- C++ -*-=//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -14,25 +14,25 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "MCTargetDesc/ARM64AddressingModes.h"
-#include "ARM64InstrInfo.h"
+#include "MCTargetDesc/AArch64AddressingModes.h"
+#include "AArch64InstrInfo.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/Support/MathExtras.h"
 using namespace llvm;
 
 namespace {
-class ARM64ExpandPseudo : public MachineFunctionPass {
+class AArch64ExpandPseudo : public MachineFunctionPass {
 public:
   static char ID;
-  ARM64ExpandPseudo() : MachineFunctionPass(ID) {}
+  AArch64ExpandPseudo() : MachineFunctionPass(ID) {}
 
-  const ARM64InstrInfo *TII;
+  const AArch64InstrInfo *TII;
 
   bool runOnMachineFunction(MachineFunction &Fn) override;
 
   const char *getPassName() const override {
-    return "ARM64 pseudo instruction expansion pass";
+    return "AArch64 pseudo instruction expansion pass";
   }
 
 private:
@@ -41,7 +41,7 @@ private:
   bool expandMOVImm(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
                     unsigned BitSize);
 };
-char ARM64ExpandPseudo::ID = 0;
+char AArch64ExpandPseudo::ID = 0;
 }
 
 /// \brief Transfer implicit operands on the pseudo instruction to the
@@ -87,17 +87,17 @@ static uint64_t replicateChunk(uint64_t Imm, unsigned FromIdx, unsigned ToIdx) {
 static bool tryOrrMovk(uint64_t UImm, uint64_t OrrImm, MachineInstr &MI,
                        MachineBasicBlock &MBB,
                        MachineBasicBlock::iterator &MBBI,
-                       const ARM64InstrInfo *TII, unsigned ChunkIdx) {
+                       const AArch64InstrInfo *TII, unsigned ChunkIdx) {
   assert(ChunkIdx < 4 && "Out of range chunk index specified!");
   const unsigned ShiftAmt = ChunkIdx * 16;
 
   uint64_t Encoding;
-  if (ARM64_AM::processLogicalImmediate(OrrImm, 64, Encoding)) {
+  if (AArch64_AM::processLogicalImmediate(OrrImm, 64, Encoding)) {
     // Create the ORR-immediate instruction.
     MachineInstrBuilder MIB =
-        BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM64::ORRXri))
+        BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ORRXri))
             .addOperand(MI.getOperand(0))
-            .addReg(ARM64::XZR)
+            .addReg(AArch64::XZR)
             .addImm(Encoding);
 
     // Create the MOVK instruction.
@@ -105,11 +105,11 @@ static bool tryOrrMovk(uint64_t UImm, uint64_t OrrImm, MachineInstr &MI,
     const unsigned DstReg = MI.getOperand(0).getReg();
     const bool DstIsDead = MI.getOperand(0).isDead();
     MachineInstrBuilder MIB1 =
-        BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM64::MOVKXi))
+        BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MOVKXi))
             .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
             .addReg(DstReg)
             .addImm(Imm16)
-            .addImm(ARM64_AM::getShifterImm(ARM64_AM::LSL, ShiftAmt));
+            .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt));
 
     transferImpOps(MI, MIB, MIB1);
     MI.eraseFromParent();
@@ -124,7 +124,7 @@ static bool tryOrrMovk(uint64_t UImm, uint64_t OrrImm, MachineInstr &MI,
 static bool canUseOrr(uint64_t Chunk, uint64_t &Encoding) {
   Chunk = (Chunk << 48) | (Chunk << 32) | (Chunk << 16) | Chunk;
 
-  return ARM64_AM::processLogicalImmediate(Chunk, 64, Encoding);
+  return AArch64_AM::processLogicalImmediate(Chunk, 64, Encoding);
 }
 
 /// \brief Check for identical 16-bit chunks within the constant and if so
@@ -138,7 +138,7 @@ static bool canUseOrr(uint64_t Chunk, uint64_t &Encoding) {
 static bool tryToreplicateChunks(uint64_t UImm, MachineInstr &MI,
                                  MachineBasicBlock &MBB,
                                  MachineBasicBlock::iterator &MBBI,
-                                 const ARM64InstrInfo *TII) {
+                                 const AArch64InstrInfo *TII) {
   typedef DenseMap<uint64_t, unsigned> CountMap;
   CountMap Counts;
 
@@ -162,9 +162,9 @@ static bool tryToreplicateChunks(uint64_t UImm, MachineInstr &MI,
     const bool CountThree = Count == 3;
     // Create the ORR-immediate instruction.
     MachineInstrBuilder MIB =
-        BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM64::ORRXri))
+        BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ORRXri))
             .addOperand(MI.getOperand(0))
-            .addReg(ARM64::XZR)
+            .addReg(AArch64::XZR)
             .addImm(Encoding);
 
     const unsigned DstReg = MI.getOperand(0).getReg();
@@ -182,12 +182,12 @@ static bool tryToreplicateChunks(uint64_t UImm, MachineInstr &MI,
 
     // Create the first MOVK instruction.
     MachineInstrBuilder MIB1 =
-        BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM64::MOVKXi))
+        BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MOVKXi))
             .addReg(DstReg,
                     RegState::Define | getDeadRegState(DstIsDead && CountThree))
             .addReg(DstReg)
             .addImm(Imm16)
-            .addImm(ARM64_AM::getShifterImm(ARM64_AM::LSL, ShiftAmt));
+            .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt));
 
     // In case we have three instances the whole constant is now materialized
     // and we can exit.
@@ -207,11 +207,11 @@ static bool tryToreplicateChunks(uint64_t UImm, MachineInstr &MI,
 
     // Create the second MOVK instruction.
     MachineInstrBuilder MIB2 =
-        BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM64::MOVKXi))
+        BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MOVKXi))
             .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
             .addReg(DstReg)
             .addImm(Imm16)
-            .addImm(ARM64_AM::getShifterImm(ARM64_AM::LSL, ShiftAmt));
+            .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt));
 
     transferImpOps(MI, MIB, MIB2);
     MI.eraseFromParent();
@@ -272,7 +272,7 @@ static uint64_t updateImm(uint64_t Imm, unsigned Idx, bool Clear) {
 static bool trySequenceOfOnes(uint64_t UImm, MachineInstr &MI,
                               MachineBasicBlock &MBB,
                               MachineBasicBlock::iterator &MBBI,
-                              const ARM64InstrInfo *TII) {
+                              const AArch64InstrInfo *TII) {
   const int NotSet = -1;
   const uint64_t Mask = 0xFFFF;
 
@@ -343,11 +343,11 @@ static bool trySequenceOfOnes(uint64_t UImm, MachineInstr &MI,
 
   // Create the ORR-immediate instruction.
   uint64_t Encoding = 0;
-  ARM64_AM::processLogicalImmediate(OrrImm, 64, Encoding);
+  AArch64_AM::processLogicalImmediate(OrrImm, 64, Encoding);
   MachineInstrBuilder MIB =
-      BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM64::ORRXri))
+      BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ORRXri))
           .addOperand(MI.getOperand(0))
-          .addReg(ARM64::XZR)
+          .addReg(AArch64::XZR)
           .addImm(Encoding);
 
   const unsigned DstReg = MI.getOperand(0).getReg();
@@ -356,12 +356,13 @@ static bool trySequenceOfOnes(uint64_t UImm, MachineInstr &MI,
   const bool SingleMovk = SecondMovkIdx == NotSet;
   // Create the first MOVK instruction.
   MachineInstrBuilder MIB1 =
-      BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM64::MOVKXi))
+      BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MOVKXi))
           .addReg(DstReg,
                   RegState::Define | getDeadRegState(DstIsDead && SingleMovk))
           .addReg(DstReg)
           .addImm(getChunk(UImm, FirstMovkIdx))
-          .addImm(ARM64_AM::getShifterImm(ARM64_AM::LSL, FirstMovkIdx * 16));
+          .addImm(
+              AArch64_AM::getShifterImm(AArch64_AM::LSL, FirstMovkIdx * 16));
 
   // Early exit in case we only need to emit a single MOVK instruction.
   if (SingleMovk) {
@@ -372,11 +373,12 @@ static bool trySequenceOfOnes(uint64_t UImm, MachineInstr &MI,
 
   // Create the second MOVK instruction.
   MachineInstrBuilder MIB2 =
-      BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM64::MOVKXi))
+      BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MOVKXi))
           .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
           .addReg(DstReg)
           .addImm(getChunk(UImm, SecondMovkIdx))
-          .addImm(ARM64_AM::getShifterImm(ARM64_AM::LSL, SecondMovkIdx * 16));
+          .addImm(
+              AArch64_AM::getShifterImm(AArch64_AM::LSL, SecondMovkIdx * 16));
 
   transferImpOps(MI, MIB, MIB2);
   MI.eraseFromParent();
@@ -385,9 +387,9 @@ static bool trySequenceOfOnes(uint64_t UImm, MachineInstr &MI,
 
 /// \brief Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more
 /// real move-immediate instructions to synthesize the immediate.
-bool ARM64ExpandPseudo::expandMOVImm(MachineBasicBlock &MBB,
-                                     MachineBasicBlock::iterator MBBI,
-                                     unsigned BitSize) {
+bool AArch64ExpandPseudo::expandMOVImm(MachineBasicBlock &MBB,
+                                       MachineBasicBlock::iterator MBBI,
+                                       unsigned BitSize) {
   MachineInstr &MI = *MBBI;
   uint64_t Imm = MI.getOperand(1).getImm();
   const unsigned Mask = 0xFFFF;
@@ -395,12 +397,12 @@ bool ARM64ExpandPseudo::expandMOVImm(MachineBasicBlock &MBB,
   // Try a MOVI instruction (aka ORR-immediate with the zero register).
   uint64_t UImm = Imm << (64 - BitSize) >> (64 - BitSize);
   uint64_t Encoding;
-  if (ARM64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) {
-    unsigned Opc = (BitSize == 32 ? ARM64::ORRWri : ARM64::ORRXri);
+  if (AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) {
+    unsigned Opc = (BitSize == 32 ? AArch64::ORRWri : AArch64::ORRXri);
     MachineInstrBuilder MIB =
         BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc))
             .addOperand(MI.getOperand(0))
-            .addReg(BitSize == 32 ? ARM64::WZR : ARM64::XZR)
+            .addReg(BitSize == 32 ? AArch64::WZR : AArch64::XZR)
             .addImm(Encoding);
     transferImpOps(MI, MIB, MIB);
     MI.eraseFromParent();
@@ -504,9 +506,9 @@ bool ARM64ExpandPseudo::expandMOVImm(MachineBasicBlock &MBB,
   unsigned FirstOpc;
   if (BitSize == 32) {
     Imm &= (1LL << 32) - 1;
-    FirstOpc = (isNeg ? ARM64::MOVNWi : ARM64::MOVZWi);
+    FirstOpc = (isNeg ? AArch64::MOVNWi : AArch64::MOVZWi);
   } else {
-    FirstOpc = (isNeg ? ARM64::MOVNXi : ARM64::MOVZXi);
+    FirstOpc = (isNeg ? AArch64::MOVNXi : AArch64::MOVZXi);
   }
   unsigned Shift = 0;     // LSL amount for high bits with MOVZ/MOVN
   unsigned LastShift = 0; // LSL amount for last MOVK
@@ -524,7 +526,7 @@ bool ARM64ExpandPseudo::expandMOVImm(MachineBasicBlock &MBB,
           .addReg(DstReg, RegState::Define |
                               getDeadRegState(DstIsDead && Shift == LastShift))
           .addImm(Imm16)
-          .addImm(ARM64_AM::getShifterImm(ARM64_AM::LSL, Shift));
+          .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, Shift));
 
   // If a MOVN was used for the high bits of a negative value, flip the rest
   // of the bits back for use with MOVK.
@@ -538,7 +540,7 @@ bool ARM64ExpandPseudo::expandMOVImm(MachineBasicBlock &MBB,
   }
 
   MachineInstrBuilder MIB2;
-  unsigned Opc = (BitSize == 32 ? ARM64::MOVKWi : ARM64::MOVKXi);
+  unsigned Opc = (BitSize == 32 ? AArch64::MOVKWi : AArch64::MOVKXi);
   while (Shift != LastShift) {
     Shift -= 16;
     Imm16 = (Imm >> Shift) & Mask;
@@ -550,7 +552,7 @@ bool ARM64ExpandPseudo::expandMOVImm(MachineBasicBlock &MBB,
                            getDeadRegState(DstIsDead && Shift == LastShift))
                .addReg(DstReg)
                .addImm(Imm16)
-               .addImm(ARM64_AM::getShifterImm(ARM64_AM::LSL, Shift));
+               .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, Shift));
   }
 
   transferImpOps(MI, MIB1, MIB2);
@@ -560,7 +562,7 @@ bool ARM64ExpandPseudo::expandMOVImm(MachineBasicBlock &MBB,
 
 /// \brief If MBBI references a pseudo instruction that should be expanded here,
 /// do the expansion and return true.  Otherwise return false.
-bool ARM64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
+bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
                                  MachineBasicBlock::iterator MBBI) {
   MachineInstr &MI = *MBBI;
   unsigned Opcode = MI.getOpcode();
@@ -568,75 +570,76 @@ bool ARM64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
   default:
     break;
 
-  case ARM64::ADDWrr:
-  case ARM64::SUBWrr:
-  case ARM64::ADDXrr:
-  case ARM64::SUBXrr:
-  case ARM64::ADDSWrr:
-  case ARM64::SUBSWrr:
-  case ARM64::ADDSXrr:
-  case ARM64::SUBSXrr:
-  case ARM64::ANDWrr:
-  case ARM64::ANDXrr:
-  case ARM64::BICWrr:
-  case ARM64::BICXrr:
-  case ARM64::ANDSWrr:
-  case ARM64::ANDSXrr:
-  case ARM64::BICSWrr:
-  case ARM64::BICSXrr:
-  case ARM64::EONWrr:
-  case ARM64::EONXrr:
-  case ARM64::EORWrr:
-  case ARM64::EORXrr:
-  case ARM64::ORNWrr:
-  case ARM64::ORNXrr:
-  case ARM64::ORRWrr:
-  case ARM64::ORRXrr: {
+  case AArch64::ADDWrr:
+  case AArch64::SUBWrr:
+  case AArch64::ADDXrr:
+  case AArch64::SUBXrr:
+  case AArch64::ADDSWrr:
+  case AArch64::SUBSWrr:
+  case AArch64::ADDSXrr:
+  case AArch64::SUBSXrr:
+  case AArch64::ANDWrr:
+  case AArch64::ANDXrr:
+  case AArch64::BICWrr:
+  case AArch64::BICXrr:
+  case AArch64::ANDSWrr:
+  case AArch64::ANDSXrr:
+  case AArch64::BICSWrr:
+  case AArch64::BICSXrr:
+  case AArch64::EONWrr:
+  case AArch64::EONXrr:
+  case AArch64::EORWrr:
+  case AArch64::EORXrr:
+  case AArch64::ORNWrr:
+  case AArch64::ORNXrr:
+  case AArch64::ORRWrr:
+  case AArch64::ORRXrr: {
     unsigned Opcode;
     switch (MI.getOpcode()) {
     default:
       return false;
-    case ARM64::ADDWrr:      Opcode = ARM64::ADDWrs; break;
-    case ARM64::SUBWrr:      Opcode = ARM64::SUBWrs; break;
-    case ARM64::ADDXrr:      Opcode = ARM64::ADDXrs; break;
-    case ARM64::SUBXrr:      Opcode = ARM64::SUBXrs; break;
-    case ARM64::ADDSWrr:     Opcode = ARM64::ADDSWrs; break;
-    case ARM64::SUBSWrr:     Opcode = ARM64::SUBSWrs; break;
-    case ARM64::ADDSXrr:     Opcode = ARM64::ADDSXrs; break;
-    case ARM64::SUBSXrr:     Opcode = ARM64::SUBSXrs; break;
-    case ARM64::ANDWrr:      Opcode = ARM64::ANDWrs; break;
-    case ARM64::ANDXrr:      Opcode = ARM64::ANDXrs; break;
-    case ARM64::BICWrr:      Opcode = ARM64::BICWrs; break;
-    case ARM64::BICXrr:      Opcode = ARM64::BICXrs; break;
-    case ARM64::ANDSWrr:     Opcode = ARM64::ANDSWrs; break;
-    case ARM64::ANDSXrr:     Opcode = ARM64::ANDSXrs; break;
-    case ARM64::BICSWrr:     Opcode = ARM64::BICSWrs; break;
-    case ARM64::BICSXrr:     Opcode = ARM64::BICSXrs; break;
-    case ARM64::EONWrr:      Opcode = ARM64::EONWrs; break;
-    case ARM64::EONXrr:      Opcode = ARM64::EONXrs; break;
-    case ARM64::EORWrr:      Opcode = ARM64::EORWrs; break;
-    case ARM64::EORXrr:      Opcode = ARM64::EORXrs; break;
-    case ARM64::ORNWrr:      Opcode = ARM64::ORNWrs; break;
-    case ARM64::ORNXrr:      Opcode = ARM64::ORNXrs; break;
-    case ARM64::ORRWrr:      Opcode = ARM64::ORRWrs; break;
-    case ARM64::ORRXrr:      Opcode = ARM64::ORRXrs; break;
+    case AArch64::ADDWrr:      Opcode = AArch64::ADDWrs; break;
+    case AArch64::SUBWrr:      Opcode = AArch64::SUBWrs; break;
+    case AArch64::ADDXrr:      Opcode = AArch64::ADDXrs; break;
+    case AArch64::SUBXrr:      Opcode = AArch64::SUBXrs; break;
+    case AArch64::ADDSWrr:     Opcode = AArch64::ADDSWrs; break;
+    case AArch64::SUBSWrr:     Opcode = AArch64::SUBSWrs; break;
+    case AArch64::ADDSXrr:     Opcode = AArch64::ADDSXrs; break;
+    case AArch64::SUBSXrr:     Opcode = AArch64::SUBSXrs; break;
+    case AArch64::ANDWrr:      Opcode = AArch64::ANDWrs; break;
+    case AArch64::ANDXrr:      Opcode = AArch64::ANDXrs; break;
+    case AArch64::BICWrr:      Opcode = AArch64::BICWrs; break;
+    case AArch64::BICXrr:      Opcode = AArch64::BICXrs; break;
+    case AArch64::ANDSWrr:     Opcode = AArch64::ANDSWrs; break;
+    case AArch64::ANDSXrr:     Opcode = AArch64::ANDSXrs; break;
+    case AArch64::BICSWrr:     Opcode = AArch64::BICSWrs; break;
+    case AArch64::BICSXrr:     Opcode = AArch64::BICSXrs; break;
+    case AArch64::EONWrr:      Opcode = AArch64::EONWrs; break;
+    case AArch64::EONXrr:      Opcode = AArch64::EONXrs; break;
+    case AArch64::EORWrr:      Opcode = AArch64::EORWrs; break;
+    case AArch64::EORXrr:      Opcode = AArch64::EORXrs; break;
+    case AArch64::ORNWrr:      Opcode = AArch64::ORNWrs; break;
+    case AArch64::ORNXrr:      Opcode = AArch64::ORNXrs; break;
+    case AArch64::ORRWrr:      Opcode = AArch64::ORRWrs; break;
+    case AArch64::ORRXrr:      Opcode = AArch64::ORRXrs; break;
     }
     MachineInstrBuilder MIB1 =
         BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opcode),
                 MI.getOperand(0).getReg())
             .addOperand(MI.getOperand(1))
             .addOperand(MI.getOperand(2))
-            .addImm(ARM64_AM::getShifterImm(ARM64_AM::LSL, 0));
+            .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
     transferImpOps(MI, MIB1, MIB1);
     MI.eraseFromParent();
     return true;
   }
 
-  case ARM64::FCVTSHpseudo: {
+  case AArch64::FCVTSHpseudo: {
     MachineOperand Src = MI.getOperand(1);
     Src.setImplicit();
-    unsigned SrcH = TII->getRegisterInfo().getSubReg(Src.getReg(), ARM64::hsub);
-    auto MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM64::FCVTSHr))
+    unsigned SrcH =
+        TII->getRegisterInfo().getSubReg(Src.getReg(), AArch64::hsub);
+    auto MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::FCVTSHr))
                    .addOperand(MI.getOperand(0))
                    .addReg(SrcH, RegState::Undef)
                    .addOperand(Src);
@@ -644,33 +647,34 @@ bool ARM64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
     MI.eraseFromParent();
     return true;
   }
-  case ARM64::LOADgot: {
+  case AArch64::LOADgot: {
     // Expand into ADRP + LDR.
     unsigned DstReg = MI.getOperand(0).getReg();
     const MachineOperand &MO1 = MI.getOperand(1);
     unsigned Flags = MO1.getTargetFlags();
     MachineInstrBuilder MIB1 =
-        BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM64::ADRP), DstReg);
+        BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg);
     MachineInstrBuilder MIB2 =
-        BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM64::LDRXui))
+        BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::LDRXui))
             .addOperand(MI.getOperand(0))
             .addReg(DstReg);
 
     if (MO1.isGlobal()) {
-      MIB1.addGlobalAddress(MO1.getGlobal(), 0, Flags | ARM64II::MO_PAGE);
+      MIB1.addGlobalAddress(MO1.getGlobal(), 0, Flags | AArch64II::MO_PAGE);
       MIB2.addGlobalAddress(MO1.getGlobal(), 0,
-                            Flags | ARM64II::MO_PAGEOFF | ARM64II::MO_NC);
+                            Flags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
     } else if (MO1.isSymbol()) {
-      MIB1.addExternalSymbol(MO1.getSymbolName(), Flags | ARM64II::MO_PAGE);
+      MIB1.addExternalSymbol(MO1.getSymbolName(), Flags | AArch64II::MO_PAGE);
       MIB2.addExternalSymbol(MO1.getSymbolName(),
-                             Flags | ARM64II::MO_PAGEOFF | ARM64II::MO_NC);
+                             Flags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
     } else {
       assert(MO1.isCPI() &&
              "Only expect globals, externalsymbols, or constant pools");
       MIB1.addConstantPoolIndex(MO1.getIndex(), MO1.getOffset(),
-                                Flags | ARM64II::MO_PAGE);
+                                Flags | AArch64II::MO_PAGE);
       MIB2.addConstantPoolIndex(MO1.getIndex(), MO1.getOffset(),
-                                Flags | ARM64II::MO_PAGEOFF | ARM64II::MO_NC);
+                                Flags | AArch64II::MO_PAGEOFF |
+                                    AArch64II::MO_NC);
     }
 
     transferImpOps(MI, MIB1, MIB2);
@@ -678,20 +682,20 @@ bool ARM64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
     return true;
   }
 
-  case ARM64::MOVaddr:
-  case ARM64::MOVaddrJT:
-  case ARM64::MOVaddrCP:
-  case ARM64::MOVaddrBA:
-  case ARM64::MOVaddrTLS:
-  case ARM64::MOVaddrEXT: {
+  case AArch64::MOVaddr:
+  case AArch64::MOVaddrJT:
+  case AArch64::MOVaddrCP:
+  case AArch64::MOVaddrBA:
+  case AArch64::MOVaddrTLS:
+  case AArch64::MOVaddrEXT: {
     // Expand into ADRP + ADD.
     unsigned DstReg = MI.getOperand(0).getReg();
     MachineInstrBuilder MIB1 =
-        BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM64::ADRP), DstReg)
+        BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg)
             .addOperand(MI.getOperand(1));
 
     MachineInstrBuilder MIB2 =
-        BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM64::ADDXri))
+        BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADDXri))
             .addOperand(MI.getOperand(0))
             .addReg(DstReg)
             .addOperand(MI.getOperand(2))
@@ -702,13 +706,13 @@ bool ARM64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
     return true;
   }
 
-  case ARM64::MOVi32imm:
+  case AArch64::MOVi32imm:
     return expandMOVImm(MBB, MBBI, 32);
-  case ARM64::MOVi64imm:
+  case AArch64::MOVi64imm:
     return expandMOVImm(MBB, MBBI, 64);
-  case ARM64::RET_ReallyLR:
-    BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM64::RET))
-        .addReg(ARM64::LR);
+  case AArch64::RET_ReallyLR:
+    BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::RET))
+        .addReg(AArch64::LR);
     MI.eraseFromParent();
     return true;
   }
@@ -717,7 +721,7 @@ bool ARM64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
 
 /// \brief Iterate over the instructions in basic block MBB and expand any
 /// pseudo instructions.  Return true if anything was modified.
-bool ARM64ExpandPseudo::expandMBB(MachineBasicBlock &MBB) {
+bool AArch64ExpandPseudo::expandMBB(MachineBasicBlock &MBB) {
   bool Modified = false;
 
   MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
@@ -730,8 +734,8 @@ bool ARM64ExpandPseudo::expandMBB(MachineBasicBlock &MBB) {
   return Modified;
 }
 
-bool ARM64ExpandPseudo::runOnMachineFunction(MachineFunction &MF) {
-  TII = static_cast<const ARM64InstrInfo *>(MF.getTarget().getInstrInfo());
+bool AArch64ExpandPseudo::runOnMachineFunction(MachineFunction &MF) {
+  TII = static_cast<const AArch64InstrInfo *>(MF.getTarget().getInstrInfo());
 
   bool Modified = false;
   for (auto &MBB : MF)
@@ -740,6 +744,6 @@ bool ARM64ExpandPseudo::runOnMachineFunction(MachineFunction &MF) {
 }
 
 /// \brief Returns an instance of the pseudo instruction expansion pass.
-FunctionPass *llvm::createARM64ExpandPseudoPass() {
-  return new ARM64ExpandPseudo();
+FunctionPass *llvm::createAArch64ExpandPseudoPass() {
+  return new AArch64ExpandPseudo();
 }
diff --git a/lib/Target/ARM64/ARM64FastISel.cpp b/lib/Target/AArch64/AArch64FastISel.cpp
similarity index 78%
rename from lib/Target/ARM64/ARM64FastISel.cpp
rename to lib/Target/AArch64/AArch64FastISel.cpp
index f4bf616559a..58178b1a48b 100644
--- a/lib/Target/ARM64/ARM64FastISel.cpp
+++ b/lib/Target/AArch64/AArch64FastISel.cpp
@@ -1,4 +1,4 @@
-//===-- ARM6464FastISel.cpp - ARM64 FastISel implementation ---------------===//
+//===-- AArch6464FastISel.cpp - AArch64 FastISel implementation -----------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -7,17 +7,17 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file defines the ARM64-specific support for the FastISel class. Some
+// This file defines the AArch64-specific support for the FastISel class. Some
 // of the target-specific code is generated by tablegen in the file
-// ARM64GenFastISel.inc, which is #included here.
+// AArch64GenFastISel.inc, which is #included here.
 //
 //===----------------------------------------------------------------------===//
 
-#include "ARM64.h"
-#include "ARM64TargetMachine.h"
-#include "ARM64Subtarget.h"
-#include "ARM64CallingConv.h"
-#include "MCTargetDesc/ARM64AddressingModes.h"
+#include "AArch64.h"
+#include "AArch64TargetMachine.h"
+#include "AArch64Subtarget.h"
+#include "AArch64CallingConv.h"
+#include "MCTargetDesc/AArch64AddressingModes.h"
 #include "llvm/CodeGen/CallingConvLower.h"
 #include "llvm/CodeGen/FastISel.h"
 #include "llvm/CodeGen/FunctionLoweringInfo.h"
@@ -40,7 +40,7 @@ using namespace llvm;
 
 namespace {
 
-class ARM64FastISel : public FastISel {
+class AArch64FastISel : public FastISel {
 
   class Address {
   public:
@@ -85,9 +85,9 @@ class ARM64FastISel : public FastISel {
     bool isValid() { return isFIBase() || (isRegBase() && getReg() != 0); }
   };
 
-  /// Subtarget - Keep a pointer to the ARM64Subtarget around so that we can
+  /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
   /// make the right decision when generating code for different targets.
-  const ARM64Subtarget *Subtarget;
+  const AArch64Subtarget *Subtarget;
   LLVMContext *Context;
 
 private:
@@ -130,8 +130,8 @@ private:
   unsigned EmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt);
   unsigned Emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt);
 
-  unsigned ARM64MaterializeFP(const ConstantFP *CFP, MVT VT);
-  unsigned ARM64MaterializeGV(const GlobalValue *GV);
+  unsigned AArch64MaterializeFP(const ConstantFP *CFP, MVT VT);
+  unsigned AArch64MaterializeGV(const GlobalValue *GV);
 
   // Call handling routines.
 private:
@@ -150,29 +150,29 @@ public:
   unsigned TargetMaterializeAlloca(const AllocaInst *AI) override;
   unsigned TargetMaterializeConstant(const Constant *C) override;
 
-  explicit ARM64FastISel(FunctionLoweringInfo &funcInfo,
+  explicit AArch64FastISel(FunctionLoweringInfo &funcInfo,
                          const TargetLibraryInfo *libInfo)
       : FastISel(funcInfo, libInfo) {
-    Subtarget = &TM.getSubtarget<ARM64Subtarget>();
+    Subtarget = &TM.getSubtarget<AArch64Subtarget>();
     Context = &funcInfo.Fn->getContext();
   }
 
   bool TargetSelectInstruction(const Instruction *I) override;
 
-#include "ARM64GenFastISel.inc"
+#include "AArch64GenFastISel.inc"
 };
 
 } // end anonymous namespace
 
-#include "ARM64GenCallingConv.inc"
+#include "AArch64GenCallingConv.inc"
 
-CCAssignFn *ARM64FastISel::CCAssignFnForCall(CallingConv::ID CC) const {
+CCAssignFn *AArch64FastISel::CCAssignFnForCall(CallingConv::ID CC) const {
   if (CC == CallingConv::WebKit_JS)
-    return CC_ARM64_WebKit_JS;
-  return Subtarget->isTargetDarwin() ? CC_ARM64_DarwinPCS : CC_ARM64_AAPCS;
+    return CC_AArch64_WebKit_JS;
+  return Subtarget->isTargetDarwin() ? CC_AArch64_DarwinPCS : CC_AArch64_AAPCS;
 }
 
-unsigned ARM64FastISel::TargetMaterializeAlloca(const AllocaInst *AI) {
+unsigned AArch64FastISel::TargetMaterializeAlloca(const AllocaInst *AI) {
   assert(TLI.getValueType(AI->getType(), true) == MVT::i64 &&
          "Alloca should always return a pointer.");
 
@@ -184,8 +184,8 @@ unsigned ARM64FastISel::TargetMaterializeAlloca(const AllocaInst *AI) {
       FuncInfo.StaticAllocaMap.find(AI);
 
   if (SI != FuncInfo.StaticAllocaMap.end()) {
-    unsigned ResultReg = createResultReg(&ARM64::GPR64RegClass);
-    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::ADDXri),
+    unsigned ResultReg = createResultReg(&AArch64::GPR64RegClass);
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
             ResultReg)
         .addFrameIndex(SI->second)
         .addImm(0)
@@ -196,7 +196,7 @@ unsigned ARM64FastISel::TargetMaterializeAlloca(const AllocaInst *AI) {
   return 0;
 }
 
-unsigned ARM64FastISel::ARM64MaterializeFP(const ConstantFP *CFP, MVT VT) {
+unsigned AArch64FastISel::AArch64MaterializeFP(const ConstantFP *CFP, MVT VT) {
   if (VT != MVT::f32 && VT != MVT::f64)
     return 0;
 
@@ -209,11 +209,11 @@ unsigned ARM64FastISel::ARM64MaterializeFP(const ConstantFP *CFP, MVT VT) {
     int Imm;
     unsigned Opc;
     if (is64bit) {
-      Imm = ARM64_AM::getFP64Imm(Val);
-      Opc = ARM64::FMOVDi;
+      Imm = AArch64_AM::getFP64Imm(Val);
+      Opc = AArch64::FMOVDi;
     } else {
-      Imm = ARM64_AM::getFP32Imm(Val);
-      Opc = ARM64::FMOVSi;
+      Imm = AArch64_AM::getFP32Imm(Val);
+      Opc = AArch64::FMOVSi;
     }
     unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
@@ -228,19 +228,19 @@ unsigned ARM64FastISel::ARM64MaterializeFP(const ConstantFP *CFP, MVT VT) {
     Align = DL.getTypeAllocSize(CFP->getType());
 
   unsigned Idx = MCP.getConstantPoolIndex(cast<Constant>(CFP), Align);
-  unsigned ADRPReg = createResultReg(&ARM64::GPR64commonRegClass);
-  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::ADRP),
-          ADRPReg).addConstantPoolIndex(Idx, 0, ARM64II::MO_PAGE);
+  unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
+  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
+          ADRPReg).addConstantPoolIndex(Idx, 0, AArch64II::MO_PAGE);
 
-  unsigned Opc = is64bit ? ARM64::LDRDui : ARM64::LDRSui;
+  unsigned Opc = is64bit ? AArch64::LDRDui : AArch64::LDRSui;
   unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
       .addReg(ADRPReg)
-      .addConstantPoolIndex(Idx, 0, ARM64II::MO_PAGEOFF | ARM64II::MO_NC);
+      .addConstantPoolIndex(Idx, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
   return ResultReg;
 }
 
-unsigned ARM64FastISel::ARM64MaterializeGV(const GlobalValue *GV) {
+unsigned AArch64FastISel::AArch64MaterializeGV(const GlobalValue *GV) {
   // We can't handle thread-local variables quickly yet. Unfortunately we have
   // to peer through any aliases to find out if that rule applies.
   const GlobalValue *TLSGV = GV;
@@ -257,37 +257,37 @@ unsigned ARM64FastISel::ARM64MaterializeGV(const GlobalValue *GV) {
   if (!DestEVT.isSimple())
     return 0;
 
-  unsigned ADRPReg = createResultReg(&ARM64::GPR64commonRegClass);
+  unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
   unsigned ResultReg;
 
-  if (OpFlags & ARM64II::MO_GOT) {
+  if (OpFlags & AArch64II::MO_GOT) {
     // ADRP + LDRX
-    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::ADRP),
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
             ADRPReg)
-        .addGlobalAddress(GV, 0, ARM64II::MO_GOT | ARM64II::MO_PAGE);
+        .addGlobalAddress(GV, 0, AArch64II::MO_GOT | AArch64II::MO_PAGE);
 
-    ResultReg = createResultReg(&ARM64::GPR64RegClass);
-    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::LDRXui),
+    ResultReg = createResultReg(&AArch64::GPR64RegClass);
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::LDRXui),
             ResultReg)
         .addReg(ADRPReg)
-        .addGlobalAddress(GV, 0, ARM64II::MO_GOT | ARM64II::MO_PAGEOFF |
-                          ARM64II::MO_NC);
+        .addGlobalAddress(GV, 0, AArch64II::MO_GOT | AArch64II::MO_PAGEOFF |
+                          AArch64II::MO_NC);
   } else {
     // ADRP + ADDX
-    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::ADRP),
-            ADRPReg).addGlobalAddress(GV, 0, ARM64II::MO_PAGE);
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
+            ADRPReg).addGlobalAddress(GV, 0, AArch64II::MO_PAGE);
 
-    ResultReg = createResultReg(&ARM64::GPR64spRegClass);
-    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::ADDXri),
+    ResultReg = createResultReg(&AArch64::GPR64spRegClass);
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
             ResultReg)
         .addReg(ADRPReg)
-        .addGlobalAddress(GV, 0, ARM64II::MO_PAGEOFF | ARM64II::MO_NC)
+        .addGlobalAddress(GV, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC)
         .addImm(0);
   }
   return ResultReg;
 }
 
-unsigned ARM64FastISel::TargetMaterializeConstant(const Constant *C) {
+unsigned AArch64FastISel::TargetMaterializeConstant(const Constant *C) {
   EVT CEVT = TLI.getValueType(C->getType(), true);
 
   // Only handle simple types.
@@ -297,15 +297,15 @@ unsigned ARM64FastISel::TargetMaterializeConstant(const Constant *C) {
 
   // FIXME: Handle ConstantInt.
   if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
-    return ARM64MaterializeFP(CFP, VT);
+    return AArch64MaterializeFP(CFP, VT);
   else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
-    return ARM64MaterializeGV(GV);
+    return AArch64MaterializeGV(GV);
 
   return 0;
 }
 
 // Computes the address to get to an object.
-bool ARM64FastISel::ComputeAddress(const Value *Obj, Address &Addr) {
+bool AArch64FastISel::ComputeAddress(const Value *Obj, Address &Addr) {
   const User *U = nullptr;
   unsigned Opcode = Instruction::UserOp1;
   if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
@@ -413,7 +413,7 @@ bool ARM64FastISel::ComputeAddress(const Value *Obj, Address &Addr) {
   return Addr.isValid();
 }
 
-bool ARM64FastISel::isTypeLegal(Type *Ty, MVT &VT) {
+bool AArch64FastISel::isTypeLegal(Type *Ty, MVT &VT) {
   EVT evt = TLI.getValueType(Ty, true);
 
   // Only handle simple types.
@@ -430,7 +430,7 @@ bool ARM64FastISel::isTypeLegal(Type *Ty, MVT &VT) {
   return TLI.isTypeLegal(VT);
 }
 
-bool ARM64FastISel::isLoadStoreTypeLegal(Type *Ty, MVT &VT) {
+bool AArch64FastISel::isLoadStoreTypeLegal(Type *Ty, MVT &VT) {
   if (isTypeLegal(Ty, VT))
     return true;
 
@@ -442,8 +442,8 @@ bool ARM64FastISel::isLoadStoreTypeLegal(Type *Ty, MVT &VT) {
   return false;
 }
 
-bool ARM64FastISel::SimplifyAddress(Address &Addr, MVT VT, int64_t ScaleFactor,
-                                    bool UseUnscaled) {
+bool AArch64FastISel::SimplifyAddress(Address &Addr, MVT VT,
+                                      int64_t ScaleFactor, bool UseUnscaled) {
   bool needsLowering = false;
   int64_t Offset = Addr.getOffset();
   switch (VT.SimpleTy) {
@@ -486,9 +486,9 @@ bool ARM64FastISel::SimplifyAddress(Address &Addr, MVT VT, int64_t ScaleFactor,
   return true;
 }
 
-void ARM64FastISel::AddLoadStoreOperands(Address &Addr,
-                                         const MachineInstrBuilder &MIB,
-                                         unsigned Flags, bool UseUnscaled) {
+void AArch64FastISel::AddLoadStoreOperands(Address &Addr,
+                                           const MachineInstrBuilder &MIB,
+                                           unsigned Flags, bool UseUnscaled) {
   int64_t Offset = Addr.getOffset();
   // Frame base works a bit differently. Handle it separately.
   if (Addr.getKind() == Address::FrameIndexBase) {
@@ -507,8 +507,8 @@ void ARM64FastISel::AddLoadStoreOperands(Address &Addr,
   }
 }
 
-bool ARM64FastISel::EmitLoad(MVT VT, unsigned &ResultReg, Address Addr,
-                             bool UseUnscaled) {
+bool AArch64FastISel::EmitLoad(MVT VT, unsigned &ResultReg, Address Addr,
+                               bool UseUnscaled) {
   // Negative offsets require unscaled, 9-bit, signed immediate offsets.
   // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
   if (!UseUnscaled && Addr.getOffset() < 0)
@@ -525,32 +525,32 @@ bool ARM64FastISel::EmitLoad(MVT VT, unsigned &ResultReg, Address Addr,
     VTIsi1 = true;
   // Intentional fall-through.
   case MVT::i8:
-    Opc = UseUnscaled ? ARM64::LDURBBi : ARM64::LDRBBui;
-    RC = &ARM64::GPR32RegClass;
+    Opc = UseUnscaled ? AArch64::LDURBBi : AArch64::LDRBBui;
+    RC = &AArch64::GPR32RegClass;
     ScaleFactor = 1;
     break;
   case MVT::i16:
-    Opc = UseUnscaled ? ARM64::LDURHHi : ARM64::LDRHHui;
-    RC = &ARM64::GPR32RegClass;
+    Opc = UseUnscaled ? AArch64::LDURHHi : AArch64::LDRHHui;
+    RC = &AArch64::GPR32RegClass;
     ScaleFactor = 2;
     break;
   case MVT::i32:
-    Opc = UseUnscaled ? ARM64::LDURWi : ARM64::LDRWui;
-    RC = &ARM64::GPR32RegClass;
+    Opc = UseUnscaled ? AArch64::LDURWi : AArch64::LDRWui;
+    RC = &AArch64::GPR32RegClass;
     ScaleFactor = 4;
     break;
   case MVT::i64:
-    Opc = UseUnscaled ? ARM64::LDURXi : ARM64::LDRXui;
-    RC = &ARM64::GPR64RegClass;
+    Opc = UseUnscaled ? AArch64::LDURXi : AArch64::LDRXui;
+    RC = &AArch64::GPR64RegClass;
     ScaleFactor = 8;
     break;
   case MVT::f32:
-    Opc = UseUnscaled ? ARM64::LDURSi : ARM64::LDRSui;
+    Opc = UseUnscaled ? AArch64::LDURSi : AArch64::LDRSui;
     RC = TLI.getRegClassFor(VT);
     ScaleFactor = 4;
     break;
   case MVT::f64:
-    Opc = UseUnscaled ? ARM64::LDURDi : ARM64::LDRDui;
+    Opc = UseUnscaled ? AArch64::LDURDi : AArch64::LDRDui;
     RC = TLI.getRegClassFor(VT);
     ScaleFactor = 8;
     break;
@@ -577,18 +577,18 @@ bool ARM64FastISel::EmitLoad(MVT VT, unsigned &ResultReg, Address Addr,
 
   // Loading an i1 requires special handling.
   if (VTIsi1) {
-    MRI.constrainRegClass(ResultReg, &ARM64::GPR32RegClass);
-    unsigned ANDReg = createResultReg(&ARM64::GPR32spRegClass);
-    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::ANDWri),
+    MRI.constrainRegClass(ResultReg, &AArch64::GPR32RegClass);
+    unsigned ANDReg = createResultReg(&AArch64::GPR32spRegClass);
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ANDWri),
             ANDReg)
         .addReg(ResultReg)
-        .addImm(ARM64_AM::encodeLogicalImmediate(1, 32));
+        .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
     ResultReg = ANDReg;
   }
   return true;
 }
 
-bool ARM64FastISel::SelectLoad(const Instruction *I) {
+bool AArch64FastISel::SelectLoad(const Instruction *I) {
   MVT VT;
   // Verify we have a legal type before going any further.  Currently, we handle
   // simple types that will directly fit in a register (i32/f32/i64/f64) or
@@ -609,8 +609,8 @@ bool ARM64FastISel::SelectLoad(const Instruction *I) {
   return true;
 }
 
-bool ARM64FastISel::EmitStore(MVT VT, unsigned SrcReg, Address Addr,
-                              bool UseUnscaled) {
+bool AArch64FastISel::EmitStore(MVT VT, unsigned SrcReg, Address Addr,
+                                bool UseUnscaled) {
   // Negative offsets require unscaled, 9-bit, signed immediate offsets.
   // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
   if (!UseUnscaled && Addr.getOffset() < 0)
@@ -626,27 +626,27 @@ bool ARM64FastISel::EmitStore(MVT VT, unsigned SrcReg, Address Addr,
   case MVT::i1:
     VTIsi1 = true;
   case MVT::i8:
-    StrOpc = UseUnscaled ? ARM64::STURBBi : ARM64::STRBBui;
+    StrOpc = UseUnscaled ? AArch64::STURBBi : AArch64::STRBBui;
     ScaleFactor = 1;
     break;
   case MVT::i16:
-    StrOpc = UseUnscaled ? ARM64::STURHHi : ARM64::STRHHui;
+    StrOpc = UseUnscaled ? AArch64::STURHHi : AArch64::STRHHui;
     ScaleFactor = 2;
     break;
   case MVT::i32:
-    StrOpc = UseUnscaled ? ARM64::STURWi : ARM64::STRWui;
+    StrOpc = UseUnscaled ? AArch64::STURWi : AArch64::STRWui;
     ScaleFactor = 4;
     break;
   case MVT::i64:
-    StrOpc = UseUnscaled ? ARM64::STURXi : ARM64::STRXui;
+    StrOpc = UseUnscaled ? AArch64::STURXi : AArch64::STRXui;
     ScaleFactor = 8;
     break;
   case MVT::f32:
-    StrOpc = UseUnscaled ? ARM64::STURSi : ARM64::STRSui;
+    StrOpc = UseUnscaled ? AArch64::STURSi : AArch64::STRSui;
     ScaleFactor = 4;
     break;
   case MVT::f64:
-    StrOpc = UseUnscaled ? ARM64::STURDi : ARM64::STRDui;
+    StrOpc = UseUnscaled ? AArch64::STURDi : AArch64::STRDui;
     ScaleFactor = 8;
     break;
   }
@@ -666,12 +666,12 @@ bool ARM64FastISel::EmitStore(MVT VT, unsigned SrcReg, Address Addr,
 
   // Storing an i1 requires special handling.
   if (VTIsi1) {
-    MRI.constrainRegClass(SrcReg, &ARM64::GPR32RegClass);
-    unsigned ANDReg = createResultReg(&ARM64::GPR32spRegClass);
-    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::ANDWri),
+    MRI.constrainRegClass(SrcReg, &AArch64::GPR32RegClass);
+    unsigned ANDReg = createResultReg(&AArch64::GPR32spRegClass);
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ANDWri),
             ANDReg)
         .addReg(SrcReg)
-        .addImm(ARM64_AM::encodeLogicalImmediate(1, 32));
+        .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
     SrcReg = ANDReg;
   }
   // Create the base instruction, then add the operands.
@@ -681,7 +681,7 @@ bool ARM64FastISel::EmitStore(MVT VT, unsigned SrcReg, Address Addr,
   return true;
 }
 
-bool ARM64FastISel::SelectStore(const Instruction *I) {
+bool AArch64FastISel::SelectStore(const Instruction *I) {
   MVT VT;
   Value *Op0 = I->getOperand(0);
   // Verify we have a legal type before going any further.  Currently, we handle
@@ -706,53 +706,53 @@ bool ARM64FastISel::SelectStore(const Instruction *I) {
   return true;
 }
 
-static ARM64CC::CondCode getCompareCC(CmpInst::Predicate Pred) {
+static AArch64CC::CondCode getCompareCC(CmpInst::Predicate Pred) {
   switch (Pred) {
   case CmpInst::FCMP_ONE:
   case CmpInst::FCMP_UEQ:
   default:
     // AL is our "false" for now. The other two need more compares.
-    return ARM64CC::AL;
+    return AArch64CC::AL;
   case CmpInst::ICMP_EQ:
   case CmpInst::FCMP_OEQ:
-    return ARM64CC::EQ;
+    return AArch64CC::EQ;
   case CmpInst::ICMP_SGT:
   case CmpInst::FCMP_OGT:
-    return ARM64CC::GT;
+    return AArch64CC::GT;
   case CmpInst::ICMP_SGE:
   case CmpInst::FCMP_OGE:
-    return ARM64CC::GE;
+    return AArch64CC::GE;
   case CmpInst::ICMP_UGT:
   case CmpInst::FCMP_UGT:
-    return ARM64CC::HI;
+    return AArch64CC::HI;
   case CmpInst::FCMP_OLT:
-    return ARM64CC::MI;
+    return AArch64CC::MI;
   case CmpInst::ICMP_ULE:
   case CmpInst::FCMP_OLE:
-    return ARM64CC::LS;
+    return AArch64CC::LS;
   case CmpInst::FCMP_ORD:
-    return ARM64CC::VC;
+    return AArch64CC::VC;
   case CmpInst::FCMP_UNO:
-    return ARM64CC::VS;
+    return AArch64CC::VS;
   case CmpInst::FCMP_UGE:
-    return ARM64CC::PL;
+    return AArch64CC::PL;
   case CmpInst::ICMP_SLT:
   case CmpInst::FCMP_ULT:
-    return ARM64CC::LT;
+    return AArch64CC::LT;
   case CmpInst::ICMP_SLE:
   case CmpInst::FCMP_ULE:
-    return ARM64CC::LE;
+    return AArch64CC::LE;
   case CmpInst::FCMP_UNE:
   case CmpInst::ICMP_NE:
-    return ARM64CC::NE;
+    return AArch64CC::NE;
   case CmpInst::ICMP_UGE:
-    return ARM64CC::HS;
+    return AArch64CC::HS;
   case CmpInst::ICMP_ULT:
-    return ARM64CC::LO;
+    return AArch64CC::LO;
   }
 }
 
-bool ARM64FastISel::SelectBranch(const Instruction *I) {
+bool AArch64FastISel::SelectBranch(const Instruction *I) {
   const BranchInst *BI = cast<BranchInst>(I);
   MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
   MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
@@ -760,8 +760,8 @@ bool ARM64FastISel::SelectBranch(const Instruction *I) {
   if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
     if (CI->hasOneUse() && (CI->getParent() == I->getParent())) {
       // We may not handle every CC for now.
-      ARM64CC::CondCode CC = getCompareCC(CI->getPredicate());
-      if (CC == ARM64CC::AL)
+      AArch64CC::CondCode CC = getCompareCC(CI->getPredicate());
+      if (CC == AArch64CC::AL)
         return false;
 
       // Emit the cmp.
@@ -769,7 +769,7 @@ bool ARM64FastISel::SelectBranch(const Instruction *I) {
         return false;
 
       // Emit the branch.
-      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::Bcc))
+      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
           .addImm(CC)
           .addMBB(TBB);
       FuncInfo.MBB->addSuccessor(TBB);
@@ -788,26 +788,27 @@ bool ARM64FastISel::SelectBranch(const Instruction *I) {
       // Issue an extract_subreg to get the lower 32-bits.
       if (SrcVT == MVT::i64)
         CondReg = FastEmitInst_extractsubreg(MVT::i32, CondReg, /*Kill=*/true,
-                                             ARM64::sub_32);
+                                             AArch64::sub_32);
 
-      MRI.constrainRegClass(CondReg, &ARM64::GPR32RegClass);
-      unsigned ANDReg = createResultReg(&ARM64::GPR32spRegClass);
-      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::ANDWri),
-              ANDReg)
+      MRI.constrainRegClass(CondReg, &AArch64::GPR32RegClass);
+      unsigned ANDReg = createResultReg(&AArch64::GPR32spRegClass);
+      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+              TII.get(AArch64::ANDWri), ANDReg)
           .addReg(CondReg)
-          .addImm(ARM64_AM::encodeLogicalImmediate(1, 32));
-      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::SUBSWri))
+          .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
+      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+              TII.get(AArch64::SUBSWri))
           .addReg(ANDReg)
           .addReg(ANDReg)
           .addImm(0)
           .addImm(0);
 
-      unsigned CC = ARM64CC::NE;
+      unsigned CC = AArch64CC::NE;
       if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
         std::swap(TBB, FBB);
-        CC = ARM64CC::EQ;
+        CC = AArch64CC::EQ;
       }
-      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::Bcc))
+      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
           .addImm(CC)
           .addMBB(TBB);
       FuncInfo.MBB->addSuccessor(TBB);
@@ -818,7 +819,7 @@ bool ARM64FastISel::SelectBranch(const Instruction *I) {
                  dyn_cast<ConstantInt>(BI->getCondition())) {
     uint64_t Imm = CI->getZExtValue();
     MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB;
-    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::B))
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::B))
         .addMBB(Target);
     FuncInfo.MBB->addSuccessor(Target);
     return true;
@@ -835,19 +836,19 @@ bool ARM64FastISel::SelectBranch(const Instruction *I) {
   // Regardless, the compare has been done in the predecessor block,
   // and it left a value for us in a virtual register.  Ergo, we test
   // the one-bit value left in the virtual register.
-  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::SUBSWri),
-          ARM64::WZR)
+  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::SUBSWri),
+          AArch64::WZR)
       .addReg(CondReg)
       .addImm(0)
       .addImm(0);
 
-  unsigned CC = ARM64CC::NE;
+  unsigned CC = AArch64CC::NE;
   if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
     std::swap(TBB, FBB);
-    CC = ARM64CC::EQ;
+    CC = AArch64CC::EQ;
   }
 
-  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::Bcc))
+  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
       .addImm(CC)
       .addMBB(TBB);
   FuncInfo.MBB->addSuccessor(TBB);
@@ -855,14 +856,14 @@ bool ARM64FastISel::SelectBranch(const Instruction *I) {
   return true;
 }
 
-bool ARM64FastISel::SelectIndirectBr(const Instruction *I) {
+bool AArch64FastISel::SelectIndirectBr(const Instruction *I) {
   const IndirectBrInst *BI = cast<IndirectBrInst>(I);
   unsigned AddrReg = getRegForValue(BI->getOperand(0));
   if (AddrReg == 0)
     return false;
 
   // Emit the indirect branch.
-  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::BR))
+  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::BR))
       .addReg(AddrReg);
 
   // Make sure the CFG is up-to-date.
@@ -872,7 +873,7 @@ bool ARM64FastISel::SelectIndirectBr(const Instruction *I) {
   return true;
 }
 
-bool ARM64FastISel::EmitCmp(Value *Src1Value, Value *Src2Value, bool isZExt) {
+bool AArch64FastISel::EmitCmp(Value *Src1Value, Value *Src2Value, bool isZExt) {
   Type *Ty = Src1Value->getType();
   EVT SrcEVT = TLI.getValueType(Ty, true);
   if (!SrcEVT.isSimple())
@@ -916,26 +917,26 @@ bool ARM64FastISel::EmitCmp(Value *Src1Value, Value *Src2Value, bool isZExt) {
     needsExt = true;
   // Intentional fall-through.
   case MVT::i32:
-    ZReg = ARM64::WZR;
+    ZReg = AArch64::WZR;
     if (UseImm)
-      CmpOpc = isNegativeImm ? ARM64::ADDSWri : ARM64::SUBSWri;
+      CmpOpc = isNegativeImm ? AArch64::ADDSWri : AArch64::SUBSWri;
     else
-      CmpOpc = ARM64::SUBSWrr;
+      CmpOpc = AArch64::SUBSWrr;
     break;
   case MVT::i64:
-    ZReg = ARM64::XZR;
+    ZReg = AArch64::XZR;
     if (UseImm)
-      CmpOpc = isNegativeImm ? ARM64::ADDSXri : ARM64::SUBSXri;
+      CmpOpc = isNegativeImm ? AArch64::ADDSXri : AArch64::SUBSXri;
     else
-      CmpOpc = ARM64::SUBSXrr;
+      CmpOpc = AArch64::SUBSXrr;
     break;
   case MVT::f32:
     isICmp = false;
-    CmpOpc = UseImm ? ARM64::FCMPSri : ARM64::FCMPSrr;
+    CmpOpc = UseImm ? AArch64::FCMPSri : AArch64::FCMPSrr;
     break;
   case MVT::f64:
     isICmp = false;
-    CmpOpc = UseImm ? ARM64::FCMPDri : ARM64::FCMPDrr;
+    CmpOpc = UseImm ? AArch64::FCMPDri : AArch64::FCMPDrr;
     break;
   }
 
@@ -986,12 +987,12 @@ bool ARM64FastISel::EmitCmp(Value *Src1Value, Value *Src2Value, bool isZExt) {
   return true;
 }
 
-bool ARM64FastISel::SelectCmp(const Instruction *I) {
+bool AArch64FastISel::SelectCmp(const Instruction *I) {
   const CmpInst *CI = cast<CmpInst>(I);
 
   // We may not handle every CC for now.
-  ARM64CC::CondCode CC = getCompareCC(CI->getPredicate());
-  if (CC == ARM64CC::AL)
+  AArch64CC::CondCode CC = getCompareCC(CI->getPredicate());
+  if (CC == AArch64CC::AL)
     return false;
 
   // Emit the cmp.
@@ -999,19 +1000,19 @@ bool ARM64FastISel::SelectCmp(const Instruction *I) {
     return false;
 
   // Now set a register based on the comparison.
-  ARM64CC::CondCode invertedCC = getInvertedCondCode(CC);
-  unsigned ResultReg = createResultReg(&ARM64::GPR32RegClass);
-  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::CSINCWr),
+  AArch64CC::CondCode invertedCC = getInvertedCondCode(CC);
+  unsigned ResultReg = createResultReg(&AArch64::GPR32RegClass);
+  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
           ResultReg)
-      .addReg(ARM64::WZR)
-      .addReg(ARM64::WZR)
+      .addReg(AArch64::WZR)
+      .addReg(AArch64::WZR)
       .addImm(invertedCC);
 
   UpdateValueMap(I, ResultReg);
   return true;
 }
 
-bool ARM64FastISel::SelectSelect(const Instruction *I) {
+bool AArch64FastISel::SelectSelect(const Instruction *I) {
   const SelectInst *SI = cast<SelectInst>(I);
 
   EVT DestEVT = TLI.getValueType(SI->getType(), true);
@@ -1034,14 +1035,14 @@ bool ARM64FastISel::SelectSelect(const Instruction *I) {
     return false;
 
 
-  MRI.constrainRegClass(CondReg, &ARM64::GPR32RegClass);
-  unsigned ANDReg = createResultReg(&ARM64::GPR32spRegClass);
-  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::ANDWri),
+  MRI.constrainRegClass(CondReg, &AArch64::GPR32RegClass);
+  unsigned ANDReg = createResultReg(&AArch64::GPR32spRegClass);
+  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ANDWri),
           ANDReg)
       .addReg(CondReg)
-      .addImm(ARM64_AM::encodeLogicalImmediate(1, 32));
+      .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
 
-  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::SUBSWri))
+  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::SUBSWri))
       .addReg(ANDReg)
       .addReg(ANDReg)
       .addImm(0)
@@ -1052,16 +1053,16 @@ bool ARM64FastISel::SelectSelect(const Instruction *I) {
   default:
     return false;
   case MVT::i32:
-    SelectOpc = ARM64::CSELWr;
+    SelectOpc = AArch64::CSELWr;
     break;
   case MVT::i64:
-    SelectOpc = ARM64::CSELXr;
+    SelectOpc = AArch64::CSELXr;
     break;
   case MVT::f32:
-    SelectOpc = ARM64::FCSELSrrr;
+    SelectOpc = AArch64::FCSELSrrr;
     break;
   case MVT::f64:
-    SelectOpc = ARM64::FCSELDrrr;
+    SelectOpc = AArch64::FCSELDrrr;
     break;
   }
 
@@ -1070,13 +1071,13 @@ bool ARM64FastISel::SelectSelect(const Instruction *I) {
           ResultReg)
       .addReg(TrueReg)
       .addReg(FalseReg)
-      .addImm(ARM64CC::NE);
+      .addImm(AArch64CC::NE);
 
   UpdateValueMap(I, ResultReg);
   return true;
 }
 
-bool ARM64FastISel::SelectFPExt(const Instruction *I) {
+bool AArch64FastISel::SelectFPExt(const Instruction *I) {
   Value *V = I->getOperand(0);
   if (!I->getType()->isDoubleTy() || !V->getType()->isFloatTy())
     return false;
@@ -1085,14 +1086,14 @@ bool ARM64FastISel::SelectFPExt(const Instruction *I) {
   if (Op == 0)
     return false;
 
-  unsigned ResultReg = createResultReg(&ARM64::FPR64RegClass);
-  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::FCVTDSr),
+  unsigned ResultReg = createResultReg(&AArch64::FPR64RegClass);
+  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTDSr),
           ResultReg).addReg(Op);
   UpdateValueMap(I, ResultReg);
   return true;
 }
 
-bool ARM64FastISel::SelectFPTrunc(const Instruction *I) {
+bool AArch64FastISel::SelectFPTrunc(const Instruction *I) {
   Value *V = I->getOperand(0);
   if (!I->getType()->isFloatTy() || !V->getType()->isDoubleTy())
     return false;
@@ -1101,15 +1102,15 @@ bool ARM64FastISel::SelectFPTrunc(const Instruction *I) {
   if (Op == 0)
     return false;
 
-  unsigned ResultReg = createResultReg(&ARM64::FPR32RegClass);
-  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::FCVTSDr),
+  unsigned ResultReg = createResultReg(&AArch64::FPR32RegClass);
+  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTSDr),
           ResultReg).addReg(Op);
   UpdateValueMap(I, ResultReg);
   return true;
 }
 
 // FPToUI and FPToSI
-bool ARM64FastISel::SelectFPToInt(const Instruction *I, bool Signed) {
+bool AArch64FastISel::SelectFPToInt(const Instruction *I, bool Signed) {
   MVT DestVT;
   if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
     return false;
@@ -1125,24 +1126,24 @@ bool ARM64FastISel::SelectFPToInt(const Instruction *I, bool Signed) {
   unsigned Opc;
   if (SrcVT == MVT::f64) {
     if (Signed)
-      Opc = (DestVT == MVT::i32) ? ARM64::FCVTZSUWDr : ARM64::FCVTZSUXDr;
+      Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWDr : AArch64::FCVTZSUXDr;
     else
-      Opc = (DestVT == MVT::i32) ? ARM64::FCVTZUUWDr : ARM64::FCVTZUUXDr;
+      Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWDr : AArch64::FCVTZUUXDr;
   } else {
     if (Signed)
-      Opc = (DestVT == MVT::i32) ? ARM64::FCVTZSUWSr : ARM64::FCVTZSUXSr;
+      Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWSr : AArch64::FCVTZSUXSr;
     else
-      Opc = (DestVT == MVT::i32) ? ARM64::FCVTZUUWSr : ARM64::FCVTZUUXSr;
+      Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWSr : AArch64::FCVTZUUXSr;
   }
   unsigned ResultReg = createResultReg(
-      DestVT == MVT::i32 ? &ARM64::GPR32RegClass : &ARM64::GPR64RegClass);
+      DestVT == MVT::i32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass);
   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
       .addReg(SrcReg);
   UpdateValueMap(I, ResultReg);
   return true;
 }
 
-bool ARM64FastISel::SelectIntToFP(const Instruction *I, bool Signed) {
+bool AArch64FastISel::SelectIntToFP(const Instruction *I, bool Signed) {
   MVT DestVT;
   if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
     return false;
@@ -1163,20 +1164,20 @@ bool ARM64FastISel::SelectIntToFP(const Instruction *I, bool Signed) {
       return false;
   }
 
-  MRI.constrainRegClass(SrcReg, SrcVT == MVT::i64 ? &ARM64::GPR64RegClass
-                                                  : &ARM64::GPR32RegClass);
+  MRI.constrainRegClass(SrcReg, SrcVT == MVT::i64 ? &AArch64::GPR64RegClass
+                                                  : &AArch64::GPR32RegClass);
 
   unsigned Opc;
   if (SrcVT == MVT::i64) {
     if (Signed)
-      Opc = (DestVT == MVT::f32) ? ARM64::SCVTFUXSri : ARM64::SCVTFUXDri;
+      Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUXSri : AArch64::SCVTFUXDri;
     else
-      Opc = (DestVT == MVT::f32) ? ARM64::UCVTFUXSri : ARM64::UCVTFUXDri;
+      Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUXSri : AArch64::UCVTFUXDri;
   } else {
     if (Signed)
-      Opc = (DestVT == MVT::f32) ? ARM64::SCVTFUWSri : ARM64::SCVTFUWDri;
+      Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUWSri : AArch64::SCVTFUWDri;
     else
-      Opc = (DestVT == MVT::f32) ? ARM64::UCVTFUWSri : ARM64::UCVTFUWDri;
+      Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUWSri : AArch64::UCVTFUWDri;
   }
 
   unsigned ResultReg = createResultReg(TLI.getRegClassFor(DestVT));
@@ -1186,12 +1187,11 @@ bool ARM64FastISel::SelectIntToFP(const Instruction *I, bool Signed) {
   return true;
 }
 
-bool ARM64FastISel::ProcessCallArgs(SmallVectorImpl<Value *> &Args,
-                                    SmallVectorImpl<unsigned> &ArgRegs,
-                                    SmallVectorImpl<MVT> &ArgVTs,
-                                    SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags,
-                                    SmallVectorImpl<unsigned> &RegArgs,
-                                    CallingConv::ID CC, unsigned &NumBytes) {
+bool AArch64FastISel::ProcessCallArgs(
+    SmallVectorImpl<Value *> &Args, SmallVectorImpl<unsigned> &ArgRegs,
+    SmallVectorImpl<MVT> &ArgVTs, SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags,
+    SmallVectorImpl<unsigned> &RegArgs, CallingConv::ID CC,
+    unsigned &NumBytes) {
   SmallVector<CCValAssign, 16> ArgLocs;
   CCState CCInfo(CC, false, *FuncInfo.MF, TM, ArgLocs, *Context);
   CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, CCAssignFnForCall(CC));
@@ -1258,7 +1258,7 @@ bool ARM64FastISel::ProcessCallArgs(SmallVectorImpl<Value *> &Args,
 
       Address Addr;
       Addr.setKind(Address::RegBase);
-      Addr.setReg(ARM64::SP);
+      Addr.setReg(AArch64::SP);
       Addr.setOffset(VA.getLocMemOffset() + BEAlign);
 
       if (!EmitStore(ArgVT, Arg, Addr))
@@ -1268,9 +1268,9 @@ bool ARM64FastISel::ProcessCallArgs(SmallVectorImpl<Value *> &Args,
   return true;
 }
 
-bool ARM64FastISel::FinishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs,
-                               const Instruction *I, CallingConv::ID CC,
-                               unsigned &NumBytes) {
+bool AArch64FastISel::FinishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs,
+                                 const Instruction *I, CallingConv::ID CC,
+                                 unsigned &NumBytes) {
   // Issue CALLSEQ_END
   unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackUp))
@@ -1302,8 +1302,8 @@ bool ARM64FastISel::FinishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs,
   return true;
 }
 
-bool ARM64FastISel::SelectCall(const Instruction *I,
-                               const char *IntrMemName = nullptr) {
+bool AArch64FastISel::SelectCall(const Instruction *I,
+                                 const char *IntrMemName = nullptr) {
   const CallInst *CI = cast<CallInst>(I);
   const Value *Callee = CI->getCalledValue();
 
@@ -1396,7 +1396,7 @@ bool ARM64FastISel::SelectCall(const Instruction *I,
 
   // Issue the call.
   MachineInstrBuilder MIB;
-  MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::BL));
+  MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::BL));
   if (!IntrMemName)
     MIB.addGlobalAddress(GV, 0, 0);
   else
@@ -1421,15 +1421,15 @@ bool ARM64FastISel::SelectCall(const Instruction *I,
   return true;
 }
 
-bool ARM64FastISel::IsMemCpySmall(uint64_t Len, unsigned Alignment) {
+bool AArch64FastISel::IsMemCpySmall(uint64_t Len, unsigned Alignment) {
   if (Alignment)
     return Len / Alignment <= 4;
   else
     return Len < 32;
 }
 
-bool ARM64FastISel::TryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len,
-                                       unsigned Alignment) {
+bool AArch64FastISel::TryEmitSmallMemCpy(Address Dest, Address Src,
+                                         uint64_t Len, unsigned Alignment) {
   // Make sure we don't bloat code by inlining very large memcpy's.
   if (!IsMemCpySmall(Len, Alignment))
     return false;
@@ -1481,7 +1481,7 @@ bool ARM64FastISel::TryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len,
   return true;
 }
 
-bool ARM64FastISel::SelectIntrinsicCall(const IntrinsicInst &I) {
+bool AArch64FastISel::SelectIntrinsicCall(const IntrinsicInst &I) {
   // FIXME: Handle more intrinsics.
   switch (I.getIntrinsicID()) {
   default:
@@ -1539,7 +1539,7 @@ bool ARM64FastISel::SelectIntrinsicCall(const IntrinsicInst &I) {
     return SelectCall(&I, "memset");
   }
   case Intrinsic::trap: {
-    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::BRK))
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::BRK))
         .addImm(1);
     return true;
   }
@@ -1547,7 +1547,7 @@ bool ARM64FastISel::SelectIntrinsicCall(const IntrinsicInst &I) {
   return false;
 }
 
-bool ARM64FastISel::SelectRet(const Instruction *I) {
+bool AArch64FastISel::SelectRet(const Instruction *I) {
   const ReturnInst *Ret = cast<ReturnInst>(I);
   const Function &F = *I->getParent()->getParent();
 
@@ -1569,8 +1569,8 @@ bool ARM64FastISel::SelectRet(const Instruction *I) {
     SmallVector<CCValAssign, 16> ValLocs;
     CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, TM, ValLocs,
                    I->getContext());
-    CCAssignFn *RetCC = CC == CallingConv::WebKit_JS ? RetCC_ARM64_WebKit_JS
-                                                     : RetCC_ARM64_AAPCS;
+    CCAssignFn *RetCC = CC == CallingConv::WebKit_JS ? RetCC_AArch64_WebKit_JS
+                                                     : RetCC_AArch64_AAPCS;
     CCInfo.AnalyzeReturn(Outs, RetCC);
 
     // Only handle a single return value for now.
@@ -1631,13 +1631,13 @@ bool ARM64FastISel::SelectRet(const Instruction *I) {
   }
 
   MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
-                                    TII.get(ARM64::RET_ReallyLR));
+                                    TII.get(AArch64::RET_ReallyLR));
   for (unsigned i = 0, e = RetRegs.size(); i != e; ++i)
     MIB.addReg(RetRegs[i], RegState::Implicit);
   return true;
 }
 
-bool ARM64FastISel::SelectTrunc(const Instruction *I) {
+bool AArch64FastISel::SelectTrunc(const Instruction *I) {
   Type *DestTy = I->getType();
   Value *Op = I->getOperand(0);
   Type *SrcTy = Op->getType();
@@ -1684,14 +1684,14 @@ bool ARM64FastISel::SelectTrunc(const Instruction *I) {
     }
     // Issue an extract_subreg to get the lower 32-bits.
     unsigned Reg32 = FastEmitInst_extractsubreg(MVT::i32, SrcReg, /*Kill=*/true,
-                                                ARM64::sub_32);
-    MRI.constrainRegClass(Reg32, &ARM64::GPR32RegClass);
+                                                AArch64::sub_32);
+    MRI.constrainRegClass(Reg32, &AArch64::GPR32RegClass);
     // Create the AND instruction which performs the actual truncation.
-    unsigned ANDReg = createResultReg(&ARM64::GPR32spRegClass);
-    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::ANDWri),
+    unsigned ANDReg = createResultReg(&AArch64::GPR32spRegClass);
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ANDWri),
             ANDReg)
         .addReg(Reg32)
-        .addImm(ARM64_AM::encodeLogicalImmediate(Mask, 32));
+        .addImm(AArch64_AM::encodeLogicalImmediate(Mask, 32));
     SrcReg = ANDReg;
   }
 
@@ -1699,7 +1699,7 @@ bool ARM64FastISel::SelectTrunc(const Instruction *I) {
   return true;
 }
 
-unsigned ARM64FastISel::Emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt) {
+unsigned AArch64FastISel::Emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt) {
   assert((DestVT == MVT::i8 || DestVT == MVT::i16 || DestVT == MVT::i32 ||
           DestVT == MVT::i64) &&
          "Unexpected value type.");
@@ -1708,22 +1708,22 @@ unsigned ARM64FastISel::Emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt) {
     DestVT = MVT::i32;
 
   if (isZExt) {
-    MRI.constrainRegClass(SrcReg, &ARM64::GPR32RegClass);
-    unsigned ResultReg = createResultReg(&ARM64::GPR32spRegClass);
-    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::ANDWri),
+    MRI.constrainRegClass(SrcReg, &AArch64::GPR32RegClass);
+    unsigned ResultReg = createResultReg(&AArch64::GPR32spRegClass);
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ANDWri),
             ResultReg)
         .addReg(SrcReg)
-        .addImm(ARM64_AM::encodeLogicalImmediate(1, 32));
+        .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
 
     if (DestVT == MVT::i64) {
       // We're ZExt i1 to i64.  The ANDWri Wd, Ws, #1 implicitly clears the
       // upper 32 bits.  Emit a SUBREG_TO_REG to extend from Wd to Xd.
-      unsigned Reg64 = MRI.createVirtualRegister(&ARM64::GPR64RegClass);
+      unsigned Reg64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
-              TII.get(ARM64::SUBREG_TO_REG), Reg64)
+              TII.get(AArch64::SUBREG_TO_REG), Reg64)
           .addImm(0)
           .addReg(ResultReg)
-          .addImm(ARM64::sub_32);
+          .addImm(AArch64::sub_32);
       ResultReg = Reg64;
     }
     return ResultReg;
@@ -1732,8 +1732,8 @@ unsigned ARM64FastISel::Emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt) {
       // FIXME: We're SExt i1 to i64.
       return 0;
     }
-    unsigned ResultReg = createResultReg(&ARM64::GPR32RegClass);
-    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::SBFMWri),
+    unsigned ResultReg = createResultReg(&AArch64::GPR32RegClass);
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::SBFMWri),
             ResultReg)
         .addReg(SrcReg)
         .addImm(0)
@@ -1742,8 +1742,8 @@ unsigned ARM64FastISel::Emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt) {
   }
 }
 
-unsigned ARM64FastISel::EmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
-                                   bool isZExt) {
+unsigned AArch64FastISel::EmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
+                                     bool isZExt) {
   assert(DestVT != MVT::i1 && "ZeroExt/SignExt an i1?");
   unsigned Opc;
   unsigned Imm = 0;
@@ -1755,21 +1755,21 @@ unsigned ARM64FastISel::EmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
     return Emiti1Ext(SrcReg, DestVT, isZExt);
   case MVT::i8:
     if (DestVT == MVT::i64)
-      Opc = isZExt ? ARM64::UBFMXri : ARM64::SBFMXri;
+      Opc = isZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
     else
-      Opc = isZExt ? ARM64::UBFMWri : ARM64::SBFMWri;
+      Opc = isZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
     Imm = 7;
     break;
   case MVT::i16:
     if (DestVT == MVT::i64)
-      Opc = isZExt ? ARM64::UBFMXri : ARM64::SBFMXri;
+      Opc = isZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
     else
-      Opc = isZExt ? ARM64::UBFMWri : ARM64::SBFMWri;
+      Opc = isZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
     Imm = 15;
     break;
   case MVT::i32:
     assert(DestVT == MVT::i64 && "IntExt i32 to i32?!?");
-    Opc = isZExt ? ARM64::UBFMXri : ARM64::SBFMXri;
+    Opc = isZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
     Imm = 31;
     break;
   }
@@ -1778,12 +1778,12 @@ unsigned ARM64FastISel::EmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
   if (DestVT == MVT::i8 || DestVT == MVT::i16)
     DestVT = MVT::i32;
   else if (DestVT == MVT::i64) {
-    unsigned Src64 = MRI.createVirtualRegister(&ARM64::GPR64RegClass);
+    unsigned Src64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
-            TII.get(ARM64::SUBREG_TO_REG), Src64)
+            TII.get(AArch64::SUBREG_TO_REG), Src64)
         .addImm(0)
         .addReg(SrcReg)
-        .addImm(ARM64::sub_32);
+        .addImm(AArch64::sub_32);
     SrcReg = Src64;
   }
 
@@ -1796,7 +1796,7 @@ unsigned ARM64FastISel::EmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
   return ResultReg;
 }
 
-bool ARM64FastISel::SelectIntExt(const Instruction *I) {
+bool AArch64FastISel::SelectIntExt(const Instruction *I) {
   // On ARM, in general, integer casts don't involve legal types; this code
   // handles promotable integers.  The high bits for a type smaller than
   // the register size are assumed to be undefined.
@@ -1825,7 +1825,7 @@ bool ARM64FastISel::SelectIntExt(const Instruction *I) {
   return true;
 }
 
-bool ARM64FastISel::SelectRem(const Instruction *I, unsigned ISDOpcode) {
+bool AArch64FastISel::SelectRem(const Instruction *I, unsigned ISDOpcode) {
   EVT DestEVT = TLI.getValueType(I->getType(), true);
   if (!DestEVT.isSimple())
     return false;
@@ -1840,13 +1840,13 @@ bool ARM64FastISel::SelectRem(const Instruction *I, unsigned ISDOpcode) {
   default:
     return false;
   case ISD::SREM:
-    DivOpc = is64bit ? ARM64::SDIVXr : ARM64::SDIVWr;
+    DivOpc = is64bit ? AArch64::SDIVXr : AArch64::SDIVWr;
     break;
   case ISD::UREM:
-    DivOpc = is64bit ? ARM64::UDIVXr : ARM64::UDIVWr;
+    DivOpc = is64bit ? AArch64::UDIVXr : AArch64::UDIVWr;
     break;
   }
-  unsigned MSubOpc = is64bit ? ARM64::MSUBXrrr : ARM64::MSUBWrrr;
+  unsigned MSubOpc = is64bit ? AArch64::MSUBXrrr : AArch64::MSUBWrrr;
   unsigned Src0Reg = getRegForValue(I->getOperand(0));
   if (!Src0Reg)
     return false;
@@ -1870,7 +1870,7 @@ bool ARM64FastISel::SelectRem(const Instruction *I, unsigned ISDOpcode) {
   return true;
 }
 
-bool ARM64FastISel::SelectMul(const Instruction *I) {
+bool AArch64FastISel::SelectMul(const Instruction *I) {
   EVT SrcEVT = TLI.getValueType(I->getOperand(0)->getType(), true);
   if (!SrcEVT.isSimple())
     return false;
@@ -1889,12 +1889,12 @@ bool ARM64FastISel::SelectMul(const Instruction *I) {
   case MVT::i8:
   case MVT::i16:
   case MVT::i32:
-    ZReg = ARM64::WZR;
-    Opc = ARM64::MADDWrrr;
+    ZReg = AArch64::WZR;
+    Opc = AArch64::MADDWrrr;
     break;
   case MVT::i64:
-    ZReg = ARM64::XZR;
-    Opc = ARM64::MADDXrrr;
+    ZReg = AArch64::XZR;
+    Opc = AArch64::MADDXrrr;
     break;
   }
 
@@ -1916,7 +1916,7 @@ bool ARM64FastISel::SelectMul(const Instruction *I) {
   return true;
 }
 
-bool ARM64FastISel::TargetSelectInstruction(const Instruction *I) {
+bool AArch64FastISel::TargetSelectInstruction(const Instruction *I) {
   switch (I->getOpcode()) {
   default:
     break;
@@ -1966,12 +1966,12 @@ bool ARM64FastISel::TargetSelectInstruction(const Instruction *I) {
   }
   return false;
   // Silence warnings.
-  (void)&CC_ARM64_DarwinPCS_VarArg;
+  (void)&CC_AArch64_DarwinPCS_VarArg;
 }
 
 namespace llvm {
-llvm::FastISel *ARM64::createFastISel(FunctionLoweringInfo &funcInfo,
-                                      const TargetLibraryInfo *libInfo) {
-  return new ARM64FastISel(funcInfo, libInfo);
+llvm::FastISel *AArch64::createFastISel(FunctionLoweringInfo &funcInfo,
+                                        const TargetLibraryInfo *libInfo) {
+  return new AArch64FastISel(funcInfo, libInfo);
 }
 }
diff --git a/lib/Target/ARM64/ARM64FrameLowering.cpp b/lib/Target/AArch64/AArch64FrameLowering.cpp
similarity index 82%
rename from lib/Target/ARM64/ARM64FrameLowering.cpp
rename to lib/Target/AArch64/AArch64FrameLowering.cpp
index 9c17488ec58..deb306a506d 100644
--- a/lib/Target/ARM64/ARM64FrameLowering.cpp
+++ b/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -1,4 +1,4 @@
-//===- ARM64FrameLowering.cpp - ARM64 Frame Lowering -----------*- C++ -*-====//
+//===- AArch64FrameLowering.cpp - AArch64 Frame Lowering -------*- C++ -*-====//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -7,15 +7,15 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file contains the ARM64 implementation of TargetFrameLowering class.
+// This file contains the AArch64 implementation of TargetFrameLowering class.
 //
 //===----------------------------------------------------------------------===//
 
-#include "ARM64FrameLowering.h"
-#include "ARM64InstrInfo.h"
-#include "ARM64MachineFunctionInfo.h"
-#include "ARM64Subtarget.h"
-#include "ARM64TargetMachine.h"
+#include "AArch64FrameLowering.h"
+#include "AArch64InstrInfo.h"
+#include "AArch64MachineFunctionInfo.h"
+#include "AArch64Subtarget.h"
+#include "AArch64TargetMachine.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/Function.h"
@@ -33,8 +33,8 @@ using namespace llvm;
 
 #define DEBUG_TYPE "frame-info"
 
-static cl::opt<bool> EnableRedZone("arm64-redzone",
-                                   cl::desc("enable use of redzone on ARM64"),
+static cl::opt<bool> EnableRedZone("aarch64-redzone",
+                                   cl::desc("enable use of redzone on AArch64"),
                                    cl::init(false), cl::Hidden);
 
 STATISTIC(NumRedZoneFunctions, "Number of functions using red zone");
@@ -59,7 +59,7 @@ static unsigned estimateStackSize(MachineFunction &MF) {
   return (unsigned)Offset;
 }
 
-bool ARM64FrameLowering::canUseRedZone(const MachineFunction &MF) const {
+bool AArch64FrameLowering::canUseRedZone(const MachineFunction &MF) const {
   if (!EnableRedZone)
     return false;
   // Don't use the red zone if the function explicitly asks us not to.
@@ -69,7 +69,7 @@ bool ARM64FrameLowering::canUseRedZone(const MachineFunction &MF) const {
     return false;
 
   const MachineFrameInfo *MFI = MF.getFrameInfo();
-  const ARM64FunctionInfo *AFI = MF.getInfo<ARM64FunctionInfo>();
+  const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
   unsigned NumBytes = AFI->getLocalStackSize();
 
   // Note: currently hasFP() is always true for hasCalls(), but that's an
@@ -82,13 +82,13 @@ bool ARM64FrameLowering::canUseRedZone(const MachineFunction &MF) const {
 
 /// hasFP - Return true if the specified function should have a dedicated frame
 /// pointer register.
-bool ARM64FrameLowering::hasFP(const MachineFunction &MF) const {
+bool AArch64FrameLowering::hasFP(const MachineFunction &MF) const {
   const MachineFrameInfo *MFI = MF.getFrameInfo();
 
 #ifndef NDEBUG
   const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo();
   assert(!RegInfo->needsStackRealignment(MF) &&
-         "No stack realignment on ARM64!");
+         "No stack realignment on AArch64!");
 #endif
 
   return (MFI->hasCalls() || MFI->hasVarSizedObjects() ||
@@ -100,15 +100,16 @@ bool ARM64FrameLowering::hasFP(const MachineFunction &MF) const {
 /// immediately on entry to the current function.  This eliminates the need for
 /// add/sub sp brackets around call sites.  Returns true if the call frame is
 /// included as part of the stack frame.
-bool ARM64FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
+bool
+AArch64FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
   return !MF.getFrameInfo()->hasVarSizedObjects();
 }
 
-void ARM64FrameLowering::eliminateCallFramePseudoInstr(
+void AArch64FrameLowering::eliminateCallFramePseudoInstr(
     MachineFunction &MF, MachineBasicBlock &MBB,
     MachineBasicBlock::iterator I) const {
-  const ARM64InstrInfo *TII =
-      static_cast<const ARM64InstrInfo *>(MF.getTarget().getInstrInfo());
+  const AArch64InstrInfo *TII =
+      static_cast<const AArch64InstrInfo *>(MF.getTarget().getInstrInfo());
   DebugLoc DL = I->getDebugLoc();
   int Opc = I->getOpcode();
   bool IsDestroy = Opc == TII->getCallFrameDestroyOpcode();
@@ -138,26 +139,26 @@ void ARM64FrameLowering::eliminateCallFramePseudoInstr(
       // Mostly call frames will be allocated at the start of a function so
       // this is OK, but it is a limitation that needs dealing with.
       assert(Amount > -0xffffff && Amount < 0xffffff && "call frame too large");
-      emitFrameOffset(MBB, I, DL, ARM64::SP, ARM64::SP, Amount, TII);
+      emitFrameOffset(MBB, I, DL, AArch64::SP, AArch64::SP, Amount, TII);
     }
   } else if (CalleePopAmount != 0) {
     // If the calling convention demands that the callee pops arguments from the
     // stack, we want to add it back if we have a reserved call frame.
     assert(CalleePopAmount < 0xffffff && "call frame too large");
-    emitFrameOffset(MBB, I, DL, ARM64::SP, ARM64::SP, -CalleePopAmount, TII);
+    emitFrameOffset(MBB, I, DL, AArch64::SP, AArch64::SP, -CalleePopAmount,
+                    TII);
   }
   MBB.erase(I);
 }
 
-void
-ARM64FrameLowering::emitCalleeSavedFrameMoves(MachineBasicBlock &MBB,
-                                              MachineBasicBlock::iterator MBBI,
-                                              unsigned FramePtr) const {
+void AArch64FrameLowering::emitCalleeSavedFrameMoves(
+    MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+    unsigned FramePtr) const {
   MachineFunction &MF = *MBB.getParent();
   MachineFrameInfo *MFI = MF.getFrameInfo();
   MachineModuleInfo &MMI = MF.getMMI();
   const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
-  const ARM64InstrInfo *TII = TM.getInstrInfo();
+  const AArch64InstrInfo *TII = TM.getInstrInfo();
   DebugLoc DL = MBB.findDebugLoc(MBBI);
 
   // Add callee saved registers to move list.
@@ -185,7 +186,7 @@ ARM64FrameLowering::emitCalleeSavedFrameMoves(MachineBasicBlock &MBB,
     // method automatically generates the directives when frame pointers are
     // used. If we generate CFI directives for the extra "STP"s, the linker will
     // lose track of the correct values for the frame pointer and link register.
-    if (HasFP && (FramePtr == Reg || Reg == ARM64::LR)) {
+    if (HasFP && (FramePtr == Reg || Reg == AArch64::LR)) {
       TotalSkipped += stackGrowth;
       continue;
     }
@@ -198,15 +199,15 @@ ARM64FrameLowering::emitCalleeSavedFrameMoves(MachineBasicBlock &MBB,
   }
 }
 
-void ARM64FrameLowering::emitPrologue(MachineFunction &MF) const {
+void AArch64FrameLowering::emitPrologue(MachineFunction &MF) const {
   MachineBasicBlock &MBB = MF.front(); // Prologue goes in entry BB.
   MachineBasicBlock::iterator MBBI = MBB.begin();
   const MachineFrameInfo *MFI = MF.getFrameInfo();
   const Function *Fn = MF.getFunction();
-  const ARM64RegisterInfo *RegInfo = TM.getRegisterInfo();
-  const ARM64InstrInfo *TII = TM.getInstrInfo();
+  const AArch64RegisterInfo *RegInfo = TM.getRegisterInfo();
+  const AArch64InstrInfo *TII = TM.getInstrInfo();
   MachineModuleInfo &MMI = MF.getMMI();
-  ARM64FunctionInfo *AFI = MF.getInfo<ARM64FunctionInfo>();
+  AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
   bool needsFrameMoves = MMI.hasDebugInfo() || Fn->needsUnwindTableEntry();
   bool HasFP = hasFP(MF);
   DebugLoc DL = MBB.findDebugLoc(MBBI);
@@ -224,7 +225,7 @@ void ARM64FrameLowering::emitPrologue(MachineFunction &MF) const {
     // REDZONE: If the stack size is less than 128 bytes, we don't need
     // to actually allocate.
     if (NumBytes && !canUseRedZone(MF)) {
-      emitFrameOffset(MBB, MBBI, DL, ARM64::SP, ARM64::SP, -NumBytes, TII,
+      emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, -NumBytes, TII,
                       MachineInstr::FrameSetup);
 
       // Encode the stack size of the leaf function.
@@ -244,9 +245,9 @@ void ARM64FrameLowering::emitPrologue(MachineFunction &MF) const {
   if (HasFP) {
     // First instruction must a) allocate the stack  and b) have an immediate
     // that is a multiple of -2.
-    assert((MBBI->getOpcode() == ARM64::STPXpre ||
-            MBBI->getOpcode() == ARM64::STPDpre) &&
-           MBBI->getOperand(3).getReg() == ARM64::SP &&
+    assert((MBBI->getOpcode() == AArch64::STPXpre ||
+            MBBI->getOpcode() == AArch64::STPDpre) &&
+           MBBI->getOperand(3).getReg() == AArch64::SP &&
            MBBI->getOperand(4).getImm() < 0 &&
            (MBBI->getOperand(4).getImm() & 1) == 0);
 
@@ -258,10 +259,10 @@ void ARM64FrameLowering::emitPrologue(MachineFunction &MF) const {
   }
 
   // Move past the saves of the callee-saved registers.
-  while (MBBI->getOpcode() == ARM64::STPXi ||
-         MBBI->getOpcode() == ARM64::STPDi ||
-         MBBI->getOpcode() == ARM64::STPXpre ||
-         MBBI->getOpcode() == ARM64::STPDpre) {
+  while (MBBI->getOpcode() == AArch64::STPXi ||
+         MBBI->getOpcode() == AArch64::STPDi ||
+         MBBI->getOpcode() == AArch64::STPXpre ||
+         MBBI->getOpcode() == AArch64::STPDpre) {
     ++MBBI;
     NumBytes -= 16;
   }
@@ -271,7 +272,7 @@ void ARM64FrameLowering::emitPrologue(MachineFunction &MF) const {
     //          mov fp,sp          when FPOffset is zero.
     // Note: All stores of callee-saved registers are marked as "FrameSetup".
     // This code marks the instruction(s) that set the FP also.
-    emitFrameOffset(MBB, MBBI, DL, ARM64::FP, ARM64::SP, FPOffset, TII,
+    emitFrameOffset(MBB, MBBI, DL, AArch64::FP, AArch64::SP, FPOffset, TII,
                     MachineInstr::FrameSetup);
   }
 
@@ -282,7 +283,7 @@ void ARM64FrameLowering::emitPrologue(MachineFunction &MF) const {
   if (NumBytes) {
     // If we're a leaf function, try using the red zone.
     if (!canUseRedZone(MF))
-      emitFrameOffset(MBB, MBBI, DL, ARM64::SP, ARM64::SP, -NumBytes, TII,
+      emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, -NumBytes, TII,
                       MachineInstr::FrameSetup);
   }
 
@@ -295,7 +296,7 @@ void ARM64FrameLowering::emitPrologue(MachineFunction &MF) const {
   // needed.
   //
   if (RegInfo->hasBasePointer(MF))
-    TII->copyPhysReg(MBB, MBBI, DL, ARM64::X19, ARM64::SP, false);
+    TII->copyPhysReg(MBB, MBBI, DL, AArch64::X19, AArch64::SP, false);
 
   if (needsFrameMoves) {
     const DataLayout *TD = MF.getTarget().getDataLayout();
@@ -377,7 +378,7 @@ void ARM64FrameLowering::emitPrologue(MachineFunction &MF) const {
           .addCFIIndex(CFIIndex);
 
       // Record the location of the stored LR
-      unsigned LR = RegInfo->getDwarfRegNum(ARM64::LR, true);
+      unsigned LR = RegInfo->getDwarfRegNum(AArch64::LR, true);
       CFIIndex = MMI.addFrameInst(
           MCCFIInstruction::createOffset(nullptr, LR, StackGrowth));
       BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
@@ -410,15 +411,16 @@ static bool isCalleeSavedRegister(unsigned Reg, const MCPhysReg *CSRegs) {
 
 static bool isCSRestore(MachineInstr *MI, const MCPhysReg *CSRegs) {
   unsigned RtIdx = 0;
-  if (MI->getOpcode() == ARM64::LDPXpost || MI->getOpcode() == ARM64::LDPDpost)
+  if (MI->getOpcode() == AArch64::LDPXpost ||
+      MI->getOpcode() == AArch64::LDPDpost)
     RtIdx = 1;
 
-  if (MI->getOpcode() == ARM64::LDPXpost ||
-      MI->getOpcode() == ARM64::LDPDpost || MI->getOpcode() == ARM64::LDPXi ||
-      MI->getOpcode() == ARM64::LDPDi) {
+  if (MI->getOpcode() == AArch64::LDPXpost ||
+      MI->getOpcode() == AArch64::LDPDpost ||
+      MI->getOpcode() == AArch64::LDPXi || MI->getOpcode() == AArch64::LDPDi) {
     if (!isCalleeSavedRegister(MI->getOperand(RtIdx).getReg(), CSRegs) ||
         !isCalleeSavedRegister(MI->getOperand(RtIdx + 1).getReg(), CSRegs) ||
-        MI->getOperand(RtIdx + 2).getReg() != ARM64::SP)
+        MI->getOperand(RtIdx + 2).getReg() != AArch64::SP)
       return false;
     return true;
   }
@@ -426,25 +428,25 @@ static bool isCSRestore(MachineInstr *MI, const MCPhysReg *CSRegs) {
   return false;
 }
 
-void ARM64FrameLowering::emitEpilogue(MachineFunction &MF,
-                                      MachineBasicBlock &MBB) const {
+void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
+                                        MachineBasicBlock &MBB) const {
   MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
   assert(MBBI->isReturn() && "Can only insert epilog into returning blocks");
   MachineFrameInfo *MFI = MF.getFrameInfo();
-  const ARM64InstrInfo *TII =
-      static_cast<const ARM64InstrInfo *>(MF.getTarget().getInstrInfo());
-  const ARM64RegisterInfo *RegInfo =
-      static_cast<const ARM64RegisterInfo *>(MF.getTarget().getRegisterInfo());
+  const AArch64InstrInfo *TII =
+      static_cast<const AArch64InstrInfo *>(MF.getTarget().getInstrInfo());
+  const AArch64RegisterInfo *RegInfo = static_cast<const AArch64RegisterInfo *>(
+      MF.getTarget().getRegisterInfo());
   DebugLoc DL = MBBI->getDebugLoc();
   unsigned RetOpcode = MBBI->getOpcode();
 
   int NumBytes = MFI->getStackSize();
-  const ARM64FunctionInfo *AFI = MF.getInfo<ARM64FunctionInfo>();
+  const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
 
   // Initial and residual are named for consitency with the prologue. Note that
   // in the epilogue, the residual adjustment is executed first.
   uint64_t ArgumentPopSize = 0;
-  if (RetOpcode == ARM64::TCRETURNdi || RetOpcode == ARM64::TCRETURNri) {
+  if (RetOpcode == AArch64::TCRETURNdi || RetOpcode == AArch64::TCRETURNri) {
     MachineOperand &StackAdjust = MBBI->getOperand(1);
 
     // For a tail-call in a callee-pops-arguments environment, some or all of
@@ -483,8 +485,8 @@ void ARM64FrameLowering::emitEpilogue(MachineFunction &MF,
   // So NumBytes = StackSize + BytesInStackArgArea - CalleeArgStackSize
   //             = StackSize + ArgumentPopSize
   //
-  // ARM64TargetLowering::LowerCall figures out ArgumentPopSize and keeps
-  // it as the 2nd argument of ARM64ISD::TC_RETURN.
+  // AArch64TargetLowering::LowerCall figures out ArgumentPopSize and keeps
+  // it as the 2nd argument of AArch64ISD::TC_RETURN.
   NumBytes += ArgumentPopSize;
 
   unsigned NumRestores = 0;
@@ -508,7 +510,8 @@ void ARM64FrameLowering::emitEpilogue(MachineFunction &MF,
     // If this was a redzone leaf function, we don't need to restore the
     // stack pointer.
     if (!canUseRedZone(MF))
-      emitFrameOffset(MBB, LastPopI, DL, ARM64::SP, ARM64::SP, NumBytes, TII);
+      emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP, NumBytes,
+                      TII);
     return;
   }
 
@@ -517,14 +520,14 @@ void ARM64FrameLowering::emitEpilogue(MachineFunction &MF,
   // non-post-indexed loads for the restores if we aren't actually going to
   // be able to save any instructions.
   if (NumBytes || MFI->hasVarSizedObjects())
-    emitFrameOffset(MBB, LastPopI, DL, ARM64::SP, ARM64::FP,
+    emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::FP,
                     -(NumRestores - 1) * 16, TII, MachineInstr::NoFlags);
 }
 
 /// getFrameIndexOffset - Returns the displacement from the frame register to
 /// the stack frame of the specified index.
-int ARM64FrameLowering::getFrameIndexOffset(const MachineFunction &MF,
-                                            int FI) const {
+int AArch64FrameLowering::getFrameIndexOffset(const MachineFunction &MF,
+                                              int FI) const {
   unsigned FrameReg;
   return getFrameIndexReference(MF, FI, FrameReg);
 }
@@ -533,19 +536,19 @@ int ARM64FrameLowering::getFrameIndexOffset(const MachineFunction &MF,
 /// debug info.  It's the same as what we use for resolving the code-gen
 /// references for now.  FIXME: This can go wrong when references are
 /// SP-relative and simple call frames aren't used.
-int ARM64FrameLowering::getFrameIndexReference(const MachineFunction &MF,
-                                               int FI,
-                                               unsigned &FrameReg) const {
+int AArch64FrameLowering::getFrameIndexReference(const MachineFunction &MF,
+                                                 int FI,
+                                                 unsigned &FrameReg) const {
   return resolveFrameIndexReference(MF, FI, FrameReg);
 }
 
-int ARM64FrameLowering::resolveFrameIndexReference(const MachineFunction &MF,
-                                                   int FI, unsigned &FrameReg,
-                                                   bool PreferFP) const {
+int AArch64FrameLowering::resolveFrameIndexReference(const MachineFunction &MF,
+                                                     int FI, unsigned &FrameReg,
+                                                     bool PreferFP) const {
   const MachineFrameInfo *MFI = MF.getFrameInfo();
-  const ARM64RegisterInfo *RegInfo =
-      static_cast<const ARM64RegisterInfo *>(MF.getTarget().getRegisterInfo());
-  const ARM64FunctionInfo *AFI = MF.getInfo<ARM64FunctionInfo>();
+  const AArch64RegisterInfo *RegInfo = static_cast<const AArch64RegisterInfo *>(
+      MF.getTarget().getRegisterInfo());
+  const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
   int FPOffset = MFI->getObjectOffset(FI) + 16;
   int Offset = MFI->getObjectOffset(FI) + MFI->getStackSize();
   bool isFixed = MFI->isFixedObjectIndex(FI);
@@ -587,7 +590,7 @@ int ARM64FrameLowering::resolveFrameIndexReference(const MachineFunction &MF,
   if (RegInfo->hasBasePointer(MF))
     FrameReg = RegInfo->getBaseRegister();
   else {
-    FrameReg = ARM64::SP;
+    FrameReg = AArch64::SP;
     // If we're using the red zone for this function, the SP won't actually
     // be adjusted, so the offsets will be negative. They're also all
     // within range of the signed 9-bit immediate instructions.
@@ -599,16 +602,16 @@ int ARM64FrameLowering::resolveFrameIndexReference(const MachineFunction &MF,
 }
 
 static unsigned getPrologueDeath(MachineFunction &MF, unsigned Reg) {
-  if (Reg != ARM64::LR)
+  if (Reg != AArch64::LR)
     return getKillRegState(true);
 
   // LR maybe referred to later by an @llvm.returnaddress intrinsic.
-  bool LRLiveIn = MF.getRegInfo().isLiveIn(ARM64::LR);
+  bool LRLiveIn = MF.getRegInfo().isLiveIn(AArch64::LR);
   bool LRKill = !(LRLiveIn && MF.getFrameInfo()->isReturnAddressTaken());
   return getKillRegState(LRKill);
 }
 
-bool ARM64FrameLowering::spillCalleeSavedRegisters(
+bool AArch64FrameLowering::spillCalleeSavedRegisters(
     MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
     const std::vector<CalleeSavedInfo> &CSI,
     const TargetRegisterInfo *TRI) const {
@@ -645,22 +648,22 @@ bool ARM64FrameLowering::spillCalleeSavedRegisters(
     // Rationale: This sequence saves uop updates compared to a sequence of
     // pre-increment spills like stp xi,xj,[sp,#-16]!
     // Note: Similar rational and sequence for restores in epilog.
-    if (ARM64::GPR64RegClass.contains(Reg1)) {
-      assert(ARM64::GPR64RegClass.contains(Reg2) &&
+    if (AArch64::GPR64RegClass.contains(Reg1)) {
+      assert(AArch64::GPR64RegClass.contains(Reg2) &&
              "Expected GPR64 callee-saved register pair!");
       // For first spill use pre-increment store.
       if (i == 0)
-        StrOpc = ARM64::STPXpre;
+        StrOpc = AArch64::STPXpre;
       else
-        StrOpc = ARM64::STPXi;
-    } else if (ARM64::FPR64RegClass.contains(Reg1)) {
-      assert(ARM64::FPR64RegClass.contains(Reg2) &&
+        StrOpc = AArch64::STPXi;
+    } else if (AArch64::FPR64RegClass.contains(Reg1)) {
+      assert(AArch64::FPR64RegClass.contains(Reg2) &&
              "Expected FPR64 callee-saved register pair!");
       // For first spill use pre-increment store.
       if (i == 0)
-        StrOpc = ARM64::STPDpre;
+        StrOpc = AArch64::STPDpre;
       else
-        StrOpc = ARM64::STPDi;
+        StrOpc = AArch64::STPDi;
     } else
       llvm_unreachable("Unexpected callee saved register!");
     DEBUG(dbgs() << "CSR spill: (" << TRI->getName(Reg1) << ", "
@@ -672,19 +675,19 @@ bool ARM64FrameLowering::spillCalleeSavedRegisters(
     assert((Offset >= -64 && Offset <= 63) &&
            "Offset out of bounds for STP immediate");
     MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(StrOpc));
-    if (StrOpc == ARM64::STPDpre || StrOpc == ARM64::STPXpre)
-      MIB.addReg(ARM64::SP, RegState::Define);
+    if (StrOpc == AArch64::STPDpre || StrOpc == AArch64::STPXpre)
+      MIB.addReg(AArch64::SP, RegState::Define);
 
     MIB.addReg(Reg2, getPrologueDeath(MF, Reg2))
         .addReg(Reg1, getPrologueDeath(MF, Reg1))
-        .addReg(ARM64::SP)
+        .addReg(AArch64::SP)
         .addImm(Offset) // [sp, #offset * 8], where factor * 8 is implicit
         .setMIFlag(MachineInstr::FrameSetup);
   }
   return true;
 }
 
-bool ARM64FrameLowering::restoreCalleeSavedRegisters(
+bool AArch64FrameLowering::restoreCalleeSavedRegisters(
     MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
     const std::vector<CalleeSavedInfo> &CSI,
     const TargetRegisterInfo *TRI) const {
@@ -716,20 +719,20 @@ bool ARM64FrameLowering::restoreCalleeSavedRegisters(
 
     assert((Count & 1) == 0 && "Odd number of callee-saved regs to spill!");
     assert((i & 1) == 0 && "Odd index for callee-saved reg spill!");
-    if (ARM64::GPR64RegClass.contains(Reg1)) {
-      assert(ARM64::GPR64RegClass.contains(Reg2) &&
+    if (AArch64::GPR64RegClass.contains(Reg1)) {
+      assert(AArch64::GPR64RegClass.contains(Reg2) &&
              "Expected GPR64 callee-saved register pair!");
       if (i == Count - 2)
-        LdrOpc = ARM64::LDPXpost;
+        LdrOpc = AArch64::LDPXpost;
       else
-        LdrOpc = ARM64::LDPXi;
-    } else if (ARM64::FPR64RegClass.contains(Reg1)) {
-      assert(ARM64::FPR64RegClass.contains(Reg2) &&
+        LdrOpc = AArch64::LDPXi;
+    } else if (AArch64::FPR64RegClass.contains(Reg1)) {
+      assert(AArch64::FPR64RegClass.contains(Reg2) &&
              "Expected FPR64 callee-saved register pair!");
       if (i == Count - 2)
-        LdrOpc = ARM64::LDPDpost;
+        LdrOpc = AArch64::LDPDpost;
       else
-        LdrOpc = ARM64::LDPDi;
+        LdrOpc = AArch64::LDPDi;
     } else
       llvm_unreachable("Unexpected callee saved register!");
     DEBUG(dbgs() << "CSR restore: (" << TRI->getName(Reg1) << ", "
@@ -742,31 +745,31 @@ bool ARM64FrameLowering::restoreCalleeSavedRegisters(
     assert((Offset >= -64 && Offset <= 63) &&
            "Offset out of bounds for LDP immediate");
     MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(LdrOpc));
-    if (LdrOpc == ARM64::LDPXpost || LdrOpc == ARM64::LDPDpost)
-      MIB.addReg(ARM64::SP, RegState::Define);
+    if (LdrOpc == AArch64::LDPXpost || LdrOpc == AArch64::LDPDpost)
+      MIB.addReg(AArch64::SP, RegState::Define);
 
     MIB.addReg(Reg2, getDefRegState(true))
         .addReg(Reg1, getDefRegState(true))
-        .addReg(ARM64::SP)
+        .addReg(AArch64::SP)
         .addImm(Offset); // [sp], #offset * 8  or [sp, #offset * 8]
                          // where the factor * 8 is implicit
   }
   return true;
 }
 
-void ARM64FrameLowering::processFunctionBeforeCalleeSavedScan(
+void AArch64FrameLowering::processFunctionBeforeCalleeSavedScan(
     MachineFunction &MF, RegScavenger *RS) const {
-  const ARM64RegisterInfo *RegInfo =
-      static_cast<const ARM64RegisterInfo *>(MF.getTarget().getRegisterInfo());
-  ARM64FunctionInfo *AFI = MF.getInfo<ARM64FunctionInfo>();
+  const AArch64RegisterInfo *RegInfo = static_cast<const AArch64RegisterInfo *>(
+      MF.getTarget().getRegisterInfo());
+  AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
   MachineRegisterInfo *MRI = &MF.getRegInfo();
   SmallVector<unsigned, 4> UnspilledCSGPRs;
   SmallVector<unsigned, 4> UnspilledCSFPRs;
 
   // The frame record needs to be created by saving the appropriate registers
   if (hasFP(MF)) {
-    MRI->setPhysRegUsed(ARM64::FP);
-    MRI->setPhysRegUsed(ARM64::LR);
+    MRI->setPhysRegUsed(AArch64::FP);
+    MRI->setPhysRegUsed(AArch64::LR);
   }
 
   // Spill the BasePtr if it's used. Do this first thing so that the
@@ -788,10 +791,10 @@ void ARM64FrameLowering::processFunctionBeforeCalleeSavedScan(
 
     const unsigned OddReg = CSRegs[i];
     const unsigned EvenReg = CSRegs[i + 1];
-    assert((ARM64::GPR64RegClass.contains(OddReg) &&
-            ARM64::GPR64RegClass.contains(EvenReg)) ^
-               (ARM64::FPR64RegClass.contains(OddReg) &&
-                ARM64::FPR64RegClass.contains(EvenReg)) &&
+    assert((AArch64::GPR64RegClass.contains(OddReg) &&
+            AArch64::GPR64RegClass.contains(EvenReg)) ^
+               (AArch64::FPR64RegClass.contains(OddReg) &&
+                AArch64::FPR64RegClass.contains(EvenReg)) &&
            "Register class mismatch!");
 
     const bool OddRegUsed = MRI->isPhysRegUsed(OddReg);
@@ -800,7 +803,7 @@ void ARM64FrameLowering::processFunctionBeforeCalleeSavedScan(
     // Early exit if none of the registers in the register pair is actually
     // used.
     if (!OddRegUsed && !EvenRegUsed) {
-      if (ARM64::GPR64RegClass.contains(OddReg)) {
+      if (AArch64::GPR64RegClass.contains(OddReg)) {
         UnspilledCSGPRs.push_back(OddReg);
         UnspilledCSGPRs.push_back(EvenReg);
       } else {
@@ -810,7 +813,7 @@ void ARM64FrameLowering::processFunctionBeforeCalleeSavedScan(
       continue;
     }
 
-    unsigned Reg = ARM64::NoRegister;
+    unsigned Reg = AArch64::NoRegister;
     // If only one of the registers of the register pair is used, make sure to
     // mark the other one as used as well.
     if (OddRegUsed ^ EvenRegUsed) {
@@ -822,17 +825,17 @@ void ARM64FrameLowering::processFunctionBeforeCalleeSavedScan(
     DEBUG(dbgs() << ' ' << PrintReg(OddReg, RegInfo));
     DEBUG(dbgs() << ' ' << PrintReg(EvenReg, RegInfo));
 
-    assert(((OddReg == ARM64::LR && EvenReg == ARM64::FP) ||
+    assert(((OddReg == AArch64::LR && EvenReg == AArch64::FP) ||
             (RegInfo->getEncodingValue(OddReg) + 1 ==
              RegInfo->getEncodingValue(EvenReg))) &&
            "Register pair of non-adjacent registers!");
-    if (ARM64::GPR64RegClass.contains(OddReg)) {
+    if (AArch64::GPR64RegClass.contains(OddReg)) {
       NumGPRSpilled += 2;
       // If it's not a reserved register, we can use it in lieu of an
       // emergency spill slot for the register scavenger.
       // FIXME: It would be better to instead keep looking and choose another
       // unspilled register that isn't reserved, if there is one.
-      if (Reg != ARM64::NoRegister && !RegInfo->isReservedReg(MF, Reg))
+      if (Reg != AArch64::NoRegister && !RegInfo->isReservedReg(MF, Reg))
         ExtraCSSpill = true;
     } else
       NumFPRSpilled += 2;
@@ -878,7 +881,7 @@ void ARM64FrameLowering::processFunctionBeforeCalleeSavedScan(
     // If we didn't find an extra callee-saved register to spill, create
     // an emergency spill slot.
     if (!ExtraCSSpill) {
-      const TargetRegisterClass *RC = &ARM64::GPR64RegClass;
+      const TargetRegisterClass *RC = &AArch64::GPR64RegClass;
       int FI = MFI->CreateStackObject(RC->getSize(), RC->getAlignment(), false);
       RS->addScavengingFrameIndex(FI);
       DEBUG(dbgs() << "No available CS registers, allocated fi#" << FI
diff --git a/lib/Target/ARM64/ARM64FrameLowering.h b/lib/Target/AArch64/AArch64FrameLowering.h
similarity index 86%
rename from lib/Target/ARM64/ARM64FrameLowering.h
rename to lib/Target/AArch64/AArch64FrameLowering.h
index 1991a0a18dd..0e00d168003 100644
--- a/lib/Target/ARM64/ARM64FrameLowering.h
+++ b/lib/Target/AArch64/AArch64FrameLowering.h
@@ -1,4 +1,4 @@
-//===-- ARM64FrameLowering.h - TargetFrameLowering for ARM64 ----*- C++ -*-===//
+//==-- AArch64FrameLowering.h - TargetFrameLowering for AArch64 --*- C++ -*-==//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -11,22 +11,22 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef ARM64_FRAMELOWERING_H
-#define ARM64_FRAMELOWERING_H
+#ifndef AArch64_FRAMELOWERING_H
+#define AArch64_FRAMELOWERING_H
 
 #include "llvm/Target/TargetFrameLowering.h"
 
 namespace llvm {
 
-class ARM64Subtarget;
-class ARM64TargetMachine;
+class AArch64Subtarget;
+class AArch64TargetMachine;
 
-class ARM64FrameLowering : public TargetFrameLowering {
-  const ARM64TargetMachine &TM;
+class AArch64FrameLowering : public TargetFrameLowering {
+  const AArch64TargetMachine &TM;
 
 public:
-  explicit ARM64FrameLowering(const ARM64TargetMachine &TM,
-                              const ARM64Subtarget &STI)
+  explicit AArch64FrameLowering(const AArch64TargetMachine &TM,
+                              const AArch64Subtarget &STI)
       : TargetFrameLowering(StackGrowsDown, 16, 0, 16,
                             false /*StackRealignable*/),
         TM(TM) {}
diff --git a/lib/Target/ARM64/ARM64ISelDAGToDAG.cpp b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
similarity index 70%
rename from lib/Target/ARM64/ARM64ISelDAGToDAG.cpp
rename to lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index 23c45d414e2..7007ffcce29 100644
--- a/lib/Target/ARM64/ARM64ISelDAGToDAG.cpp
+++ b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -1,4 +1,4 @@
-//===-- ARM64ISelDAGToDAG.cpp - A dag to dag inst selector for ARM64 ------===//
+//===-- AArch64ISelDAGToDAG.cpp - A dag to dag inst selector for AArch64 --===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -7,12 +7,12 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file defines an instruction selector for the ARM64 target.
+// This file defines an instruction selector for the AArch64 target.
 //
 //===----------------------------------------------------------------------===//
 
-#include "ARM64TargetMachine.h"
-#include "MCTargetDesc/ARM64AddressingModes.h"
+#include "AArch64TargetMachine.h"
+#include "MCTargetDesc/AArch64AddressingModes.h"
 #include "llvm/ADT/APSInt.h"
 #include "llvm/CodeGen/SelectionDAGISel.h"
 #include "llvm/IR/Function.h" // To access function attributes.
@@ -25,30 +25,31 @@
 
 using namespace llvm;
 
-#define DEBUG_TYPE "arm64-isel"
+#define DEBUG_TYPE "aarch64-isel"
 
 //===--------------------------------------------------------------------===//
-/// ARM64DAGToDAGISel - ARM64 specific code to select ARM64 machine
+/// AArch64DAGToDAGISel - AArch64 specific code to select AArch64 machine
 /// instructions for SelectionDAG operations.
 ///
 namespace {
 
-class ARM64DAGToDAGISel : public SelectionDAGISel {
-  ARM64TargetMachine &TM;
+class AArch64DAGToDAGISel : public SelectionDAGISel {
+  AArch64TargetMachine &TM;
 
-  /// Subtarget - Keep a pointer to the ARM64Subtarget around so that we can
+  /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
   /// make the right decision when generating code for different targets.
-  const ARM64Subtarget *Subtarget;
+  const AArch64Subtarget *Subtarget;
 
   bool ForCodeSize;
 
 public:
-  explicit ARM64DAGToDAGISel(ARM64TargetMachine &tm, CodeGenOpt::Level OptLevel)
-      : SelectionDAGISel(tm, OptLevel), TM(tm),
-        Subtarget(nullptr), ForCodeSize(false) {}
+  explicit AArch64DAGToDAGISel(AArch64TargetMachine &tm,
+                               CodeGenOpt::Level OptLevel)
+      : SelectionDAGISel(tm, OptLevel), TM(tm), Subtarget(nullptr),
+        ForCodeSize(false) {}
 
   const char *getPassName() const override {
-    return "ARM64 Instruction Selection";
+    return "AArch64 Instruction Selection";
   }
 
   bool runOnMachineFunction(MachineFunction &MF) override {
@@ -57,7 +58,7 @@ public:
         FnAttrs.hasAttribute(AttributeSet::FunctionIndex,
                              Attribute::OptimizeForSize) ||
         FnAttrs.hasAttribute(AttributeSet::FunctionIndex, Attribute::MinSize);
-    Subtarget = &TM.getSubtarget<ARM64Subtarget>();
+    Subtarget = &TM.getSubtarget<AArch64Subtarget>();
     return SelectionDAGISel::runOnMachineFunction(MF);
   }
 
@@ -161,7 +162,7 @@ public:
   SDNode *SelectLIBM(SDNode *N);
 
 // Include the pieces autogenerated from the target description.
-#include "ARM64GenDAGISel.inc"
+#include "AArch64GenDAGISel.inc"
 
 private:
   bool SelectShiftedRegister(SDValue N, bool AllowROR, SDValue &Reg,
@@ -214,10 +215,10 @@ static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc,
          isIntImmediate(N->getOperand(1).getNode(), Imm);
 }
 
-bool ARM64DAGToDAGISel::SelectInlineAsmMemoryOperand(
+bool AArch64DAGToDAGISel::SelectInlineAsmMemoryOperand(
     const SDValue &Op, char ConstraintCode, std::vector<SDValue> &OutOps) {
   assert(ConstraintCode == 'm' && "unexpected asm memory constraint");
-  // Require the address to be in a register.  That is safe for all ARM64
+  // Require the address to be in a register.  That is safe for all AArch64
   // variants and it is hard to do anything much smarter without knowing
   // how the operand is used.
   OutOps.push_back(Op);
@@ -227,8 +228,8 @@ bool ARM64DAGToDAGISel::SelectInlineAsmMemoryOperand(
 /// SelectArithImmed - Select an immediate value that can be represented as
 /// a 12-bit value shifted left by either 0 or 12.  If so, return true with
 /// Val set to the 12-bit value and Shift set to the shifter operand.
-bool ARM64DAGToDAGISel::SelectArithImmed(SDValue N, SDValue &Val,
-                                         SDValue &Shift) {
+bool AArch64DAGToDAGISel::SelectArithImmed(SDValue N, SDValue &Val,
+                                           SDValue &Shift) {
   // This function is called from the addsub_shifted_imm ComplexPattern,
   // which lists [imm] as the list of opcode it's interested in, however
   // we still need to check whether the operand is actually an immediate
@@ -248,7 +249,7 @@ bool ARM64DAGToDAGISel::SelectArithImmed(SDValue N, SDValue &Val,
   } else
     return false;
 
-  unsigned ShVal = ARM64_AM::getShifterImm(ARM64_AM::LSL, ShiftAmt);
+  unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt);
   Val = CurDAG->getTargetConstant(Immed, MVT::i32);
   Shift = CurDAG->getTargetConstant(ShVal, MVT::i32);
   return true;
@@ -256,8 +257,8 @@ bool ARM64DAGToDAGISel::SelectArithImmed(SDValue N, SDValue &Val,
 
 /// SelectNegArithImmed - As above, but negates the value before trying to
 /// select it.
-bool ARM64DAGToDAGISel::SelectNegArithImmed(SDValue N, SDValue &Val,
-                                            SDValue &Shift) {
+bool AArch64DAGToDAGISel::SelectNegArithImmed(SDValue N, SDValue &Val,
+                                              SDValue &Shift) {
   // This function is called from the addsub_shifted_imm ComplexPattern,
   // which lists [imm] as the list of opcode it's interested in, however
   // we still need to check whether the operand is actually an immediate
@@ -288,23 +289,23 @@ bool ARM64DAGToDAGISel::SelectNegArithImmed(SDValue N, SDValue &Val,
 
 /// getShiftTypeForNode - Translate a shift node to the corresponding
 /// ShiftType value.
-static ARM64_AM::ShiftExtendType getShiftTypeForNode(SDValue N) {
+static AArch64_AM::ShiftExtendType getShiftTypeForNode(SDValue N) {
   switch (N.getOpcode()) {
   default:
-    return ARM64_AM::InvalidShiftExtend;
+    return AArch64_AM::InvalidShiftExtend;
   case ISD::SHL:
-    return ARM64_AM::LSL;
+    return AArch64_AM::LSL;
   case ISD::SRL:
-    return ARM64_AM::LSR;
+    return AArch64_AM::LSR;
   case ISD::SRA:
-    return ARM64_AM::ASR;
+    return AArch64_AM::ASR;
   case ISD::ROTR:
-    return ARM64_AM::ROR;
+    return AArch64_AM::ROR;
   }
 }
 
 /// \brief Determine wether it is worth to fold V into an extended register.
-bool ARM64DAGToDAGISel::isWorthFolding(SDValue V) const {
+bool AArch64DAGToDAGISel::isWorthFolding(SDValue V) const {
   // it hurts if the a value is used at least twice, unless we are optimizing
   // for code size.
   if (ForCodeSize || V.hasOneUse())
@@ -317,18 +318,18 @@ bool ARM64DAGToDAGISel::isWorthFolding(SDValue V) const {
 /// instructions allow the shifted register to be rotated, but the arithmetic
 /// instructions do not.  The AllowROR parameter specifies whether ROR is
 /// supported.
-bool ARM64DAGToDAGISel::SelectShiftedRegister(SDValue N, bool AllowROR,
-                                              SDValue &Reg, SDValue &Shift) {
-  ARM64_AM::ShiftExtendType ShType = getShiftTypeForNode(N);
-  if (ShType == ARM64_AM::InvalidShiftExtend)
+bool AArch64DAGToDAGISel::SelectShiftedRegister(SDValue N, bool AllowROR,
+                                                SDValue &Reg, SDValue &Shift) {
+  AArch64_AM::ShiftExtendType ShType = getShiftTypeForNode(N);
+  if (ShType == AArch64_AM::InvalidShiftExtend)
     return false;
-  if (!AllowROR && ShType == ARM64_AM::ROR)
+  if (!AllowROR && ShType == AArch64_AM::ROR)
     return false;
 
   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
     unsigned BitSize = N.getValueType().getSizeInBits();
     unsigned Val = RHS->getZExtValue() & (BitSize - 1);
-    unsigned ShVal = ARM64_AM::getShifterImm(ShType, Val);
+    unsigned ShVal = AArch64_AM::getShifterImm(ShType, Val);
 
     Reg = N.getOperand(0);
     Shift = CurDAG->getTargetConstant(ShVal, MVT::i32);
@@ -340,7 +341,7 @@ bool ARM64DAGToDAGISel::SelectShiftedRegister(SDValue N, bool AllowROR,
 
 /// getExtendTypeForNode - Translate an extend node to the corresponding
 /// ExtendType value.
-static ARM64_AM::ShiftExtendType
+static AArch64_AM::ShiftExtendType
 getExtendTypeForNode(SDValue N, bool IsLoadStore = false) {
   if (N.getOpcode() == ISD::SIGN_EXTEND ||
       N.getOpcode() == ISD::SIGN_EXTEND_INREG) {
@@ -351,51 +352,51 @@ getExtendTypeForNode(SDValue N, bool IsLoadStore = false) {
       SrcVT = N.getOperand(0).getValueType();
 
     if (!IsLoadStore && SrcVT == MVT::i8)
-      return ARM64_AM::SXTB;
+      return AArch64_AM::SXTB;
     else if (!IsLoadStore && SrcVT == MVT::i16)
-      return ARM64_AM::SXTH;
+      return AArch64_AM::SXTH;
     else if (SrcVT == MVT::i32)
-      return ARM64_AM::SXTW;
+      return AArch64_AM::SXTW;
     assert(SrcVT != MVT::i64 && "extend from 64-bits?");
 
-    return ARM64_AM::InvalidShiftExtend;
+    return AArch64_AM::InvalidShiftExtend;
   } else if (N.getOpcode() == ISD::ZERO_EXTEND ||
              N.getOpcode() == ISD::ANY_EXTEND) {
     EVT SrcVT = N.getOperand(0).getValueType();
     if (!IsLoadStore && SrcVT == MVT::i8)
-      return ARM64_AM::UXTB;
+      return AArch64_AM::UXTB;
     else if (!IsLoadStore && SrcVT == MVT::i16)
-      return ARM64_AM::UXTH;
+      return AArch64_AM::UXTH;
     else if (SrcVT == MVT::i32)
-      return ARM64_AM::UXTW;
+      return AArch64_AM::UXTW;
     assert(SrcVT != MVT::i64 && "extend from 64-bits?");
 
-    return ARM64_AM::InvalidShiftExtend;
+    return AArch64_AM::InvalidShiftExtend;
   } else if (N.getOpcode() == ISD::AND) {
     ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
     if (!CSD)
-      return ARM64_AM::InvalidShiftExtend;
+      return AArch64_AM::InvalidShiftExtend;
     uint64_t AndMask = CSD->getZExtValue();
 
     switch (AndMask) {
     default:
-      return ARM64_AM::InvalidShiftExtend;
+      return AArch64_AM::InvalidShiftExtend;
     case 0xFF:
-      return !IsLoadStore ? ARM64_AM::UXTB : ARM64_AM::InvalidShiftExtend;
+      return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend;
     case 0xFFFF:
-      return !IsLoadStore ? ARM64_AM::UXTH : ARM64_AM::InvalidShiftExtend;
+      return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend;
     case 0xFFFFFFFF:
-      return ARM64_AM::UXTW;
+      return AArch64_AM::UXTW;
     }
   }
 
-  return ARM64_AM::InvalidShiftExtend;
+  return AArch64_AM::InvalidShiftExtend;
 }
 
 // Helper for SelectMLAV64LaneV128 - Recognize high lane extracts.
 static bool checkHighLaneIndex(SDNode *DL, SDValue &LaneOp, int &LaneIdx) {
-  if (DL->getOpcode() != ARM64ISD::DUPLANE16 &&
-      DL->getOpcode() != ARM64ISD::DUPLANE32)
+  if (DL->getOpcode() != AArch64ISD::DUPLANE16 &&
+      DL->getOpcode() != AArch64ISD::DUPLANE32)
     return false;
 
   SDValue SV = DL->getOperand(0);
@@ -428,10 +429,10 @@ static bool checkV64LaneV128(SDValue Op0, SDValue Op1, SDValue &StdOp,
   return true;
 }
 
-/// SelectMLAV64LaneV128 - ARM64 supports vector MLAs where one multiplicand is
-/// a lane in the upper half of a 128-bit vector.  Recognize and select this so
-/// that we don't emit unnecessary lane extracts.
-SDNode *ARM64DAGToDAGISel::SelectMLAV64LaneV128(SDNode *N) {
+/// SelectMLAV64LaneV128 - AArch64 supports vector MLAs where one multiplicand
+/// is a lane in the upper half of a 128-bit vector.  Recognize and select this
+/// so that we don't emit unnecessary lane extracts.
+SDNode *AArch64DAGToDAGISel::SelectMLAV64LaneV128(SDNode *N) {
   SDValue Op0 = N->getOperand(0);
   SDValue Op1 = N->getOperand(1);
   SDValue MLAOp1;   // Will hold ordinary multiplicand for MLA.
@@ -458,23 +459,23 @@ SDNode *ARM64DAGToDAGISel::SelectMLAV64LaneV128(SDNode *N) {
   default:
     llvm_unreachable("Unrecognized MLA.");
   case MVT::v4i16:
-    MLAOpc = ARM64::MLAv4i16_indexed;
+    MLAOpc = AArch64::MLAv4i16_indexed;
     break;
   case MVT::v8i16:
-    MLAOpc = ARM64::MLAv8i16_indexed;
+    MLAOpc = AArch64::MLAv8i16_indexed;
     break;
   case MVT::v2i32:
-    MLAOpc = ARM64::MLAv2i32_indexed;
+    MLAOpc = AArch64::MLAv2i32_indexed;
     break;
   case MVT::v4i32:
-    MLAOpc = ARM64::MLAv4i32_indexed;
+    MLAOpc = AArch64::MLAv4i32_indexed;
     break;
   }
 
   return CurDAG->getMachineNode(MLAOpc, SDLoc(N), N->getValueType(0), Ops);
 }
 
-SDNode *ARM64DAGToDAGISel::SelectMULLV64LaneV128(unsigned IntNo, SDNode *N) {
+SDNode *AArch64DAGToDAGISel::SelectMULLV64LaneV128(unsigned IntNo, SDNode *N) {
   SDValue SMULLOp0;
   SDValue SMULLOp1;
   int LaneIdx;
@@ -489,26 +490,26 @@ SDNode *ARM64DAGToDAGISel::SelectMULLV64LaneV128(unsigned IntNo, SDNode *N) {
 
   unsigned SMULLOpc = ~0U;
 
-  if (IntNo == Intrinsic::arm64_neon_smull) {
+  if (IntNo == Intrinsic::aarch64_neon_smull) {
     switch (N->getSimpleValueType(0).SimpleTy) {
     default:
       llvm_unreachable("Unrecognized SMULL.");
     case MVT::v4i32:
-      SMULLOpc = ARM64::SMULLv4i16_indexed;
+      SMULLOpc = AArch64::SMULLv4i16_indexed;
       break;
     case MVT::v2i64:
-      SMULLOpc = ARM64::SMULLv2i32_indexed;
+      SMULLOpc = AArch64::SMULLv2i32_indexed;
       break;
     }
-  } else if (IntNo == Intrinsic::arm64_neon_umull) {
+  } else if (IntNo == Intrinsic::aarch64_neon_umull) {
     switch (N->getSimpleValueType(0).SimpleTy) {
     default:
       llvm_unreachable("Unrecognized SMULL.");
     case MVT::v4i32:
-      SMULLOpc = ARM64::UMULLv4i16_indexed;
+      SMULLOpc = AArch64::UMULLv4i16_indexed;
       break;
     case MVT::v2i64:
-      SMULLOpc = ARM64::UMULLv2i32_indexed;
+      SMULLOpc = AArch64::UMULLv2i32_indexed;
       break;
     }
   } else
@@ -525,7 +526,7 @@ static SDValue narrowIfNeeded(SelectionDAG *CurDAG, SDValue N) {
   if (N.getValueType() == MVT::i32)
     return N;
 
-  SDValue SubReg = CurDAG->getTargetConstant(ARM64::sub_32, MVT::i32);
+  SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, MVT::i32);
   MachineSDNode *Node = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
                                                SDLoc(N), MVT::i32, N, SubReg);
   return SDValue(Node, 0);
@@ -534,10 +535,10 @@ static SDValue narrowIfNeeded(SelectionDAG *CurDAG, SDValue N) {
 
 /// SelectArithExtendedRegister - Select a "extended register" operand.  This
 /// operand folds in an extend followed by an optional left shift.
-bool ARM64DAGToDAGISel::SelectArithExtendedRegister(SDValue N, SDValue &Reg,
-                                                    SDValue &Shift) {
+bool AArch64DAGToDAGISel::SelectArithExtendedRegister(SDValue N, SDValue &Reg,
+                                                      SDValue &Shift) {
   unsigned ShiftVal = 0;
-  ARM64_AM::ShiftExtendType Ext;
+  AArch64_AM::ShiftExtendType Ext;
 
   if (N.getOpcode() == ISD::SHL) {
     ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
@@ -548,24 +549,24 @@ bool ARM64DAGToDAGISel::SelectArithExtendedRegister(SDValue N, SDValue &Reg,
       return false;
 
     Ext = getExtendTypeForNode(N.getOperand(0));
-    if (Ext == ARM64_AM::InvalidShiftExtend)
+    if (Ext == AArch64_AM::InvalidShiftExtend)
       return false;
 
     Reg = N.getOperand(0).getOperand(0);
   } else {
     Ext = getExtendTypeForNode(N);
-    if (Ext == ARM64_AM::InvalidShiftExtend)
+    if (Ext == AArch64_AM::InvalidShiftExtend)
       return false;
 
     Reg = N.getOperand(0);
   }
 
-  // ARM64 mandates that the RHS of the operation must use the smallest
+  // AArch64 mandates that the RHS of the operation must use the smallest
   // register classs that could contain the size being extended from.  Thus,
   // if we're folding a (sext i8), we need the RHS to be a GPR32, even though
   // there might not be an actual 32-bit value in the program.  We can
   // (harmlessly) synthesize one by injected an EXTRACT_SUBREG here.
-  assert(Ext != ARM64_AM::UXTX && Ext != ARM64_AM::SXTX);
+  assert(Ext != AArch64_AM::UXTX && Ext != AArch64_AM::SXTX);
   Reg = narrowIfNeeded(CurDAG, Reg);
   Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), MVT::i32);
   return isWorthFolding(N);
@@ -574,7 +575,7 @@ bool ARM64DAGToDAGISel::SelectArithExtendedRegister(SDValue N, SDValue &Reg,
 /// SelectAddrModeIndexed - Select a "register plus scaled unsigned 12-bit
 /// immediate" address.  The "Size" argument is the size in bytes of the memory
 /// reference, which determines the scale.
-bool ARM64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size,
+bool AArch64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size,
                                               SDValue &Base, SDValue &OffImm) {
   const TargetLowering *TLI = getTargetLowering();
   if (N.getOpcode() == ISD::FrameIndex) {
@@ -584,7 +585,7 @@ bool ARM64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size,
     return true;
   }
 
-  if (N.getOpcode() == ARM64ISD::ADDlow) {
+  if (N.getOpcode() == AArch64ISD::ADDlow) {
     GlobalAddressSDNode *GAN =
         dyn_cast<GlobalAddressSDNode>(N.getOperand(1).getNode());
     Base = N.getOperand(0);
@@ -637,8 +638,9 @@ bool ARM64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size,
 /// is not valid for a scaled immediate addressing mode.  The "Size" argument
 /// is the size in bytes of the memory reference, which is needed here to know
 /// what is valid for a scaled immediate.
-bool ARM64DAGToDAGISel::SelectAddrModeUnscaled(SDValue N, unsigned Size,
-                                               SDValue &Base, SDValue &OffImm) {
+bool AArch64DAGToDAGISel::SelectAddrModeUnscaled(SDValue N, unsigned Size,
+                                                 SDValue &Base,
+                                                 SDValue &OffImm) {
   if (!CurDAG->isBaseWithConstantOffset(N))
     return false;
   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
@@ -662,7 +664,7 @@ bool ARM64DAGToDAGISel::SelectAddrModeUnscaled(SDValue N, unsigned Size,
 }
 
 static SDValue Widen(SelectionDAG *CurDAG, SDValue N) {
-  SDValue SubReg = CurDAG->getTargetConstant(ARM64::sub_32, MVT::i32);
+  SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, MVT::i32);
   SDValue ImpDef = SDValue(
       CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, SDLoc(N), MVT::i64),
       0);
@@ -673,21 +675,22 @@ static SDValue Widen(SelectionDAG *CurDAG, SDValue N) {
 
 /// \brief Check if the given SHL node (\p N), can be used to form an
 /// extended register for an addressing mode.
-bool ARM64DAGToDAGISel::SelectExtendedSHL(SDValue N, unsigned Size,
-                                          bool WantExtend, SDValue &Offset,
-                                          SDValue &SignExtend) {
+bool AArch64DAGToDAGISel::SelectExtendedSHL(SDValue N, unsigned Size,
+                                            bool WantExtend, SDValue &Offset,
+                                            SDValue &SignExtend) {
   assert(N.getOpcode() == ISD::SHL && "Invalid opcode.");
   ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
   if (!CSD || (CSD->getZExtValue() & 0x7) != CSD->getZExtValue())
     return false;
 
   if (WantExtend) {
-    ARM64_AM::ShiftExtendType Ext = getExtendTypeForNode(N.getOperand(0), true);
-    if (Ext == ARM64_AM::InvalidShiftExtend)
+    AArch64_AM::ShiftExtendType Ext =
+        getExtendTypeForNode(N.getOperand(0), true);
+    if (Ext == AArch64_AM::InvalidShiftExtend)
       return false;
 
     Offset = narrowIfNeeded(CurDAG, N.getOperand(0).getOperand(0));
-    SignExtend = CurDAG->getTargetConstant(Ext == ARM64_AM::SXTW, MVT::i32);
+    SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, MVT::i32);
   } else {
     Offset = N.getOperand(0);
     SignExtend = CurDAG->getTargetConstant(0, MVT::i32);
@@ -705,10 +708,10 @@ bool ARM64DAGToDAGISel::SelectExtendedSHL(SDValue N, unsigned Size,
   return false;
 }
 
-bool ARM64DAGToDAGISel::SelectAddrModeWRO(SDValue N, unsigned Size,
-                                         SDValue &Base, SDValue &Offset,
-                                         SDValue &SignExtend,
-                                         SDValue &DoShift) {
+bool AArch64DAGToDAGISel::SelectAddrModeWRO(SDValue N, unsigned Size,
+                                            SDValue &Base, SDValue &Offset,
+                                            SDValue &SignExtend,
+                                            SDValue &DoShift) {
   if (N.getOpcode() != ISD::ADD)
     return false;
   SDValue LHS = N.getOperand(0);
@@ -750,23 +753,25 @@ bool ARM64DAGToDAGISel::SelectAddrModeWRO(SDValue N, unsigned Size,
   // There was no shift, whatever else we find.
   DoShift = CurDAG->getTargetConstant(false, MVT::i32);
 
-  ARM64_AM::ShiftExtendType Ext = ARM64_AM::InvalidShiftExtend;
+  AArch64_AM::ShiftExtendType Ext = AArch64_AM::InvalidShiftExtend;
   // Try to match an unshifted extend on the LHS.
   if (IsExtendedRegisterWorthFolding &&
-      (Ext = getExtendTypeForNode(LHS, true)) != ARM64_AM::InvalidShiftExtend) {
+      (Ext = getExtendTypeForNode(LHS, true)) !=
+          AArch64_AM::InvalidShiftExtend) {
     Base = RHS;
     Offset = narrowIfNeeded(CurDAG, LHS.getOperand(0));
-    SignExtend = CurDAG->getTargetConstant(Ext == ARM64_AM::SXTW, MVT::i32);
+    SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, MVT::i32);
     if (isWorthFolding(LHS))
       return true;
   }
 
   // Try to match an unshifted extend on the RHS.
   if (IsExtendedRegisterWorthFolding &&
-      (Ext = getExtendTypeForNode(RHS, true)) != ARM64_AM::InvalidShiftExtend) {
+      (Ext = getExtendTypeForNode(RHS, true)) !=
+          AArch64_AM::InvalidShiftExtend) {
     Base = LHS;
     Offset = narrowIfNeeded(CurDAG, RHS.getOperand(0));
-    SignExtend = CurDAG->getTargetConstant(Ext == ARM64_AM::SXTW, MVT::i32);
+    SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, MVT::i32);
     if (isWorthFolding(RHS))
       return true;
   }
@@ -774,10 +779,10 @@ bool ARM64DAGToDAGISel::SelectAddrModeWRO(SDValue N, unsigned Size,
   return false;
 }
 
-bool ARM64DAGToDAGISel::SelectAddrModeXRO(SDValue N, unsigned Size,
-                                          SDValue &Base, SDValue &Offset,
-                                          SDValue &SignExtend,
-                                          SDValue &DoShift) {
+bool AArch64DAGToDAGISel::SelectAddrModeXRO(SDValue N, unsigned Size,
+                                            SDValue &Base, SDValue &Offset,
+                                            SDValue &SignExtend,
+                                            SDValue &DoShift) {
   if (N.getOpcode() != ISD::ADD)
     return false;
   SDValue LHS = N.getOperand(0);
@@ -825,27 +830,27 @@ bool ARM64DAGToDAGISel::SelectAddrModeXRO(SDValue N, unsigned Size,
   return true;
 }
 
-SDValue ARM64DAGToDAGISel::createDTuple(ArrayRef<SDValue> Regs) {
-  static unsigned RegClassIDs[] = { ARM64::DDRegClassID, ARM64::DDDRegClassID,
-                                    ARM64::DDDDRegClassID };
-  static unsigned SubRegs[] = { ARM64::dsub0, ARM64::dsub1,
-                                ARM64::dsub2, ARM64::dsub3 };
+SDValue AArch64DAGToDAGISel::createDTuple(ArrayRef<SDValue> Regs) {
+  static unsigned RegClassIDs[] = {
+      AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID};
+  static unsigned SubRegs[] = { AArch64::dsub0, AArch64::dsub1,
+                                AArch64::dsub2, AArch64::dsub3 };
 
   return createTuple(Regs, RegClassIDs, SubRegs);
 }
 
-SDValue ARM64DAGToDAGISel::createQTuple(ArrayRef<SDValue> Regs) {
-  static unsigned RegClassIDs[] = { ARM64::QQRegClassID, ARM64::QQQRegClassID,
-                                    ARM64::QQQQRegClassID };
-  static unsigned SubRegs[] = { ARM64::qsub0, ARM64::qsub1,
-                                ARM64::qsub2, ARM64::qsub3 };
+SDValue AArch64DAGToDAGISel::createQTuple(ArrayRef<SDValue> Regs) {
+  static unsigned RegClassIDs[] = {
+      AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID};
+  static unsigned SubRegs[] = { AArch64::qsub0, AArch64::qsub1,
+                                AArch64::qsub2, AArch64::qsub3 };
 
   return createTuple(Regs, RegClassIDs, SubRegs);
 }
 
-SDValue ARM64DAGToDAGISel::createTuple(ArrayRef<SDValue> Regs,
-                                       unsigned RegClassIDs[],
-                                       unsigned SubRegs[]) {
+SDValue AArch64DAGToDAGISel::createTuple(ArrayRef<SDValue> Regs,
+                                         unsigned RegClassIDs[],
+                                         unsigned SubRegs[]) {
   // There's no special register-class for a vector-list of 1 element: it's just
   // a vector.
   if (Regs.size() == 1)
@@ -872,8 +877,8 @@ SDValue ARM64DAGToDAGISel::createTuple(ArrayRef<SDValue> Regs,
   return SDValue(N, 0);
 }
 
-SDNode *ARM64DAGToDAGISel::SelectTable(SDNode *N, unsigned NumVecs,
-                                       unsigned Opc, bool isExt) {
+SDNode *AArch64DAGToDAGISel::SelectTable(SDNode *N, unsigned NumVecs,
+                                         unsigned Opc, bool isExt) {
   SDLoc dl(N);
   EVT VT = N->getValueType(0);
 
@@ -893,7 +898,7 @@ SDNode *ARM64DAGToDAGISel::SelectTable(SDNode *N, unsigned NumVecs,
   return CurDAG->getMachineNode(Opc, dl, VT, Ops);
 }
 
-SDNode *ARM64DAGToDAGISel::SelectIndexedLoad(SDNode *N, bool &Done) {
+SDNode *AArch64DAGToDAGISel::SelectIndexedLoad(SDNode *N, bool &Done) {
   LoadSDNode *LD = cast<LoadSDNode>(N);
   if (LD->isUnindexed())
     return nullptr;
@@ -910,14 +915,14 @@ SDNode *ARM64DAGToDAGISel::SelectIndexedLoad(SDNode *N, bool &Done) {
   ISD::LoadExtType ExtType = LD->getExtensionType();
   bool InsertTo64 = false;
   if (VT == MVT::i64)
-    Opcode = IsPre ? ARM64::LDRXpre : ARM64::LDRXpost;
+    Opcode = IsPre ? AArch64::LDRXpre : AArch64::LDRXpost;
   else if (VT == MVT::i32) {
     if (ExtType == ISD::NON_EXTLOAD)
-      Opcode = IsPre ? ARM64::LDRWpre : ARM64::LDRWpost;
+      Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
     else if (ExtType == ISD::SEXTLOAD)
-      Opcode = IsPre ? ARM64::LDRSWpre : ARM64::LDRSWpost;
+      Opcode = IsPre ? AArch64::LDRSWpre : AArch64::LDRSWpost;
     else {
-      Opcode = IsPre ? ARM64::LDRWpre : ARM64::LDRWpost;
+      Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
       InsertTo64 = true;
       // The result of the load is only i32. It's the subreg_to_reg that makes
       // it into an i64.
@@ -926,11 +931,11 @@ SDNode *ARM64DAGToDAGISel::SelectIndexedLoad(SDNode *N, bool &Done) {
   } else if (VT == MVT::i16) {
     if (ExtType == ISD::SEXTLOAD) {
       if (DstVT == MVT::i64)
-        Opcode = IsPre ? ARM64::LDRSHXpre : ARM64::LDRSHXpost;
+        Opcode = IsPre ? AArch64::LDRSHXpre : AArch64::LDRSHXpost;
       else
-        Opcode = IsPre ? ARM64::LDRSHWpre : ARM64::LDRSHWpost;
+        Opcode = IsPre ? AArch64::LDRSHWpre : AArch64::LDRSHWpost;
     } else {
-      Opcode = IsPre ? ARM64::LDRHHpre : ARM64::LDRHHpost;
+      Opcode = IsPre ? AArch64::LDRHHpre : AArch64::LDRHHpost;
       InsertTo64 = DstVT == MVT::i64;
       // The result of the load is only i32. It's the subreg_to_reg that makes
       // it into an i64.
@@ -939,22 +944,22 @@ SDNode *ARM64DAGToDAGISel::SelectIndexedLoad(SDNode *N, bool &Done) {
   } else if (VT == MVT::i8) {
     if (ExtType == ISD::SEXTLOAD) {
       if (DstVT == MVT::i64)
-        Opcode = IsPre ? ARM64::LDRSBXpre : ARM64::LDRSBXpost;
+        Opcode = IsPre ? AArch64::LDRSBXpre : AArch64::LDRSBXpost;
       else
-        Opcode = IsPre ? ARM64::LDRSBWpre : ARM64::LDRSBWpost;
+        Opcode = IsPre ? AArch64::LDRSBWpre : AArch64::LDRSBWpost;
     } else {
-      Opcode = IsPre ? ARM64::LDRBBpre : ARM64::LDRBBpost;
+      Opcode = IsPre ? AArch64::LDRBBpre : AArch64::LDRBBpost;
       InsertTo64 = DstVT == MVT::i64;
       // The result of the load is only i32. It's the subreg_to_reg that makes
       // it into an i64.
       DstVT = MVT::i32;
     }
   } else if (VT == MVT::f32) {
-    Opcode = IsPre ? ARM64::LDRSpre : ARM64::LDRSpost;
+    Opcode = IsPre ? AArch64::LDRSpre : AArch64::LDRSpost;
   } else if (VT == MVT::f64 || VT.is64BitVector()) {
-    Opcode = IsPre ? ARM64::LDRDpre : ARM64::LDRDpost;
+    Opcode = IsPre ? AArch64::LDRDpre : AArch64::LDRDpost;
   } else if (VT.is128BitVector()) {
-    Opcode = IsPre ? ARM64::LDRQpre : ARM64::LDRQpost;
+    Opcode = IsPre ? AArch64::LDRQpre : AArch64::LDRQpost;
   } else
     return nullptr;
   SDValue Chain = LD->getChain();
@@ -969,11 +974,11 @@ SDNode *ARM64DAGToDAGISel::SelectIndexedLoad(SDNode *N, bool &Done) {
   Done = true;
   SDValue LoadedVal = SDValue(Res, 1);
   if (InsertTo64) {
-    SDValue SubReg = CurDAG->getTargetConstant(ARM64::sub_32, MVT::i32);
+    SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, MVT::i32);
     LoadedVal =
-        SDValue(CurDAG->getMachineNode(ARM64::SUBREG_TO_REG, SDLoc(N), MVT::i64,
-                                       CurDAG->getTargetConstant(0, MVT::i64),
-                                       LoadedVal, SubReg),
+        SDValue(CurDAG->getMachineNode(
+                    AArch64::SUBREG_TO_REG, SDLoc(N), MVT::i64,
+                    CurDAG->getTargetConstant(0, MVT::i64), LoadedVal, SubReg),
                 0);
   }
 
@@ -984,8 +989,8 @@ SDNode *ARM64DAGToDAGISel::SelectIndexedLoad(SDNode *N, bool &Done) {
   return nullptr;
 }
 
-SDNode *ARM64DAGToDAGISel::SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
-                                      unsigned SubRegIdx) {
+SDNode *AArch64DAGToDAGISel::SelectLoad(SDNode *N, unsigned NumVecs,
+                                        unsigned Opc, unsigned SubRegIdx) {
   SDLoc dl(N);
   EVT VT = N->getValueType(0);
   SDValue Chain = N->getOperand(0);
@@ -1008,8 +1013,8 @@ SDNode *ARM64DAGToDAGISel::SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
   return nullptr;
 }
 
-SDNode *ARM64DAGToDAGISel::SelectPostLoad(SDNode *N, unsigned NumVecs,
-                                          unsigned Opc, unsigned SubRegIdx) {
+SDNode *AArch64DAGToDAGISel::SelectPostLoad(SDNode *N, unsigned NumVecs,
+                                            unsigned Opc, unsigned SubRegIdx) {
   SDLoc dl(N);
   EVT VT = N->getValueType(0);
   SDValue Chain = N->getOperand(0);
@@ -1043,8 +1048,8 @@ SDNode *ARM64DAGToDAGISel::SelectPostLoad(SDNode *N, unsigned NumVecs,
   return nullptr;
 }
 
-SDNode *ARM64DAGToDAGISel::SelectStore(SDNode *N, unsigned NumVecs,
-                                       unsigned Opc) {
+SDNode *AArch64DAGToDAGISel::SelectStore(SDNode *N, unsigned NumVecs,
+                                         unsigned Opc) {
   SDLoc dl(N);
   EVT VT = N->getOperand(2)->getValueType(0);
 
@@ -1062,8 +1067,8 @@ SDNode *ARM64DAGToDAGISel::SelectStore(SDNode *N, unsigned NumVecs,
   return St;
 }
 
-SDNode *ARM64DAGToDAGISel::SelectPostStore(SDNode *N, unsigned NumVecs,
-                                               unsigned Opc) {
+SDNode *AArch64DAGToDAGISel::SelectPostStore(SDNode *N, unsigned NumVecs,
+                                             unsigned Opc) {
   SDLoc dl(N);
   EVT VT = N->getOperand(2)->getValueType(0);
   SmallVector<EVT, 2> ResTys;
@@ -1102,7 +1107,7 @@ public:
 
     SDValue Undef =
         SDValue(DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, WideTy), 0);
-    return DAG.getTargetInsertSubreg(ARM64::dsub, DL, WideTy, Undef, V64Reg);
+    return DAG.getTargetInsertSubreg(AArch64::dsub, DL, WideTy, Undef, V64Reg);
   }
 };
 
@@ -1114,12 +1119,12 @@ static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG) {
   MVT EltTy = VT.getVectorElementType().getSimpleVT();
   MVT NarrowTy = MVT::getVectorVT(EltTy, WideSize / 2);
 
-  return DAG.getTargetExtractSubreg(ARM64::dsub, SDLoc(V128Reg), NarrowTy,
+  return DAG.getTargetExtractSubreg(AArch64::dsub, SDLoc(V128Reg), NarrowTy,
                                     V128Reg);
 }
 
-SDNode *ARM64DAGToDAGISel::SelectLoadLane(SDNode *N, unsigned NumVecs,
-                                          unsigned Opc) {
+SDNode *AArch64DAGToDAGISel::SelectLoadLane(SDNode *N, unsigned NumVecs,
+                                            unsigned Opc) {
   SDLoc dl(N);
   EVT VT = N->getValueType(0);
   bool Narrow = VT.getSizeInBits() == 64;
@@ -1149,8 +1154,8 @@ SDNode *ARM64DAGToDAGISel::SelectLoadLane(SDNode *N, unsigned NumVecs,
   SDValue SuperReg = SDValue(Ld, 0);
 
   EVT WideVT = RegSeq.getOperand(1)->getValueType(0);
-  static unsigned QSubs[] = { ARM64::qsub0, ARM64::qsub1, ARM64::qsub2,
-                              ARM64::qsub3 };
+  static unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1, AArch64::qsub2,
+                              AArch64::qsub3 };
   for (unsigned i = 0; i < NumVecs; ++i) {
     SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT, SuperReg);
     if (Narrow)
@@ -1163,8 +1168,8 @@ SDNode *ARM64DAGToDAGISel::SelectLoadLane(SDNode *N, unsigned NumVecs,
   return Ld;
 }
 
-SDNode *ARM64DAGToDAGISel::SelectPostLoadLane(SDNode *N, unsigned NumVecs,
-                                              unsigned Opc) {
+SDNode *AArch64DAGToDAGISel::SelectPostLoadLane(SDNode *N, unsigned NumVecs,
+                                                unsigned Opc) {
   SDLoc dl(N);
   EVT VT = N->getValueType(0);
   bool Narrow = VT.getSizeInBits() == 64;
@@ -1204,8 +1209,8 @@ SDNode *ARM64DAGToDAGISel::SelectPostLoadLane(SDNode *N, unsigned NumVecs,
                 Narrow ? NarrowVector(SuperReg, *CurDAG) : SuperReg);
   } else {
     EVT WideVT = RegSeq.getOperand(1)->getValueType(0);
-    static unsigned QSubs[] = { ARM64::qsub0, ARM64::qsub1, ARM64::qsub2,
-                                ARM64::qsub3 };
+    static unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1, AArch64::qsub2,
+                                AArch64::qsub3 };
     for (unsigned i = 0; i < NumVecs; ++i) {
       SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT,
                                                   SuperReg);
@@ -1221,8 +1226,8 @@ SDNode *ARM64DAGToDAGISel::SelectPostLoadLane(SDNode *N, unsigned NumVecs,
   return Ld;
 }
 
-SDNode *ARM64DAGToDAGISel::SelectStoreLane(SDNode *N, unsigned NumVecs,
-                                           unsigned Opc) {
+SDNode *AArch64DAGToDAGISel::SelectStoreLane(SDNode *N, unsigned NumVecs,
+                                             unsigned Opc) {
   SDLoc dl(N);
   EVT VT = N->getOperand(2)->getValueType(0);
   bool Narrow = VT.getSizeInBits() == 64;
@@ -1254,8 +1259,8 @@ SDNode *ARM64DAGToDAGISel::SelectStoreLane(SDNode *N, unsigned NumVecs,
   return St;
 }
 
-SDNode *ARM64DAGToDAGISel::SelectPostStoreLane(SDNode *N, unsigned NumVecs,
-                                               unsigned Opc) {
+SDNode *AArch64DAGToDAGISel::SelectPostStoreLane(SDNode *N, unsigned NumVecs,
+                                                 unsigned Opc) {
   SDLoc dl(N);
   EVT VT = N->getOperand(2)->getValueType(0);
   bool Narrow = VT.getSizeInBits() == 64;
@@ -1374,7 +1379,7 @@ static bool isBitfieldExtractOpFromAnd(SelectionDAG *CurDAG, SDNode *N,
     // operation.
     MSB = MSB > 31 ? 31 : MSB;
 
-  Opc = VT == MVT::i32 ? ARM64::UBFMWri : ARM64::UBFMXri;
+  Opc = VT == MVT::i32 ? AArch64::UBFMWri : AArch64::UBFMXri;
   return true;
 }
 
@@ -1410,9 +1415,9 @@ static bool isOneBitExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0,
   // Check whether we really have a one bit extract here.
   if (And_mask >> Srl_imm == 0x1) {
     if (N->getValueType(0) == MVT::i32)
-      Opc = ARM64::UBFMWri;
+      Opc = AArch64::UBFMWri;
     else
-      Opc = ARM64::UBFMXri;
+      Opc = AArch64::UBFMXri;
 
     LSB = MSB = Srl_imm;
 
@@ -1479,9 +1484,9 @@ static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0,
   MSB = LSB + Width;
   // SRA requires a signed extraction
   if (VT == MVT::i32)
-    Opc = N->getOpcode() == ISD::SRA ? ARM64::SBFMWri : ARM64::UBFMWri;
+    Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMWri : AArch64::UBFMWri;
   else
-    Opc = N->getOpcode() == ISD::SRA ? ARM64::SBFMXri : ARM64::UBFMXri;
+    Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMXri : AArch64::UBFMXri;
   return true;
 }
 
@@ -1509,10 +1514,10 @@ static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc,
   switch (NOpc) {
   default:
     return false;
-  case ARM64::SBFMWri:
-  case ARM64::UBFMWri:
-  case ARM64::SBFMXri:
-  case ARM64::UBFMXri:
+  case AArch64::SBFMWri:
+  case AArch64::UBFMWri:
+  case AArch64::SBFMXri:
+  case AArch64::UBFMXri:
     Opc = NOpc;
     Opd0 = N->getOperand(0);
     LSB = cast<ConstantSDNode>(N->getOperand(1).getNode())->getZExtValue();
@@ -1523,7 +1528,7 @@ static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc,
   return false;
 }
 
-SDNode *ARM64DAGToDAGISel::SelectBitfieldExtractOp(SDNode *N) {
+SDNode *AArch64DAGToDAGISel::SelectBitfieldExtractOp(SDNode *N) {
   unsigned Opc, LSB, MSB;
   SDValue Opd0;
   if (!isBitfieldExtractOp(CurDAG, N, Opc, Opd0, LSB, MSB))
@@ -1533,12 +1538,12 @@ SDNode *ARM64DAGToDAGISel::SelectBitfieldExtractOp(SDNode *N) {
 
   // If the bit extract operation is 64bit but the original type is 32bit, we
   // need to add one EXTRACT_SUBREG.
-  if ((Opc == ARM64::SBFMXri || Opc == ARM64::UBFMXri) && VT == MVT::i32) {
+  if ((Opc == AArch64::SBFMXri || Opc == AArch64::UBFMXri) && VT == MVT::i32) {
     SDValue Ops64[] = {Opd0, CurDAG->getTargetConstant(LSB, MVT::i64),
                        CurDAG->getTargetConstant(MSB, MVT::i64)};
 
     SDNode *BFM = CurDAG->getMachineNode(Opc, SDLoc(N), MVT::i64, Ops64);
-    SDValue SubReg = CurDAG->getTargetConstant(ARM64::sub_32, MVT::i32);
+    SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, MVT::i32);
     MachineSDNode *Node =
         CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, SDLoc(N), MVT::i32,
                                SDValue(BFM, 0), SubReg);
@@ -1588,7 +1593,7 @@ static void getUsefulBitsFromAndWithImmediate(SDValue Op, APInt &UsefulBits,
                                               unsigned Depth) {
   uint64_t Imm =
       cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue();
-  Imm = ARM64_AM::decodeLogicalImmediate(Imm, UsefulBits.getBitWidth());
+  Imm = AArch64_AM::decodeLogicalImmediate(Imm, UsefulBits.getBitWidth());
   UsefulBits &= APInt(UsefulBits.getBitWidth(), Imm);
   getUsefulBits(Op, UsefulBits, Depth + 1);
 }
@@ -1638,17 +1643,17 @@ static void getUsefulBitsFromOrWithShiftedReg(SDValue Op, APInt &UsefulBits,
   Mask.clearAllBits();
   Mask.flipAllBits();
 
-  if (ARM64_AM::getShiftType(ShiftTypeAndValue) == ARM64_AM::LSL) {
+  if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSL) {
     // Shift Left
-    uint64_t ShiftAmt = ARM64_AM::getShiftValue(ShiftTypeAndValue);
+    uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue);
     Mask = Mask.shl(ShiftAmt);
     getUsefulBits(Op, Mask, Depth + 1);
     Mask = Mask.lshr(ShiftAmt);
-  } else if (ARM64_AM::getShiftType(ShiftTypeAndValue) == ARM64_AM::LSR) {
+  } else if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSR) {
     // Shift Right
-    // We do not handle ARM64_AM::ASR, because the sign will change the
+    // We do not handle AArch64_AM::ASR, because the sign will change the
     // number of useful bits
-    uint64_t ShiftAmt = ARM64_AM::getShiftValue(ShiftTypeAndValue);
+    uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue);
     Mask = Mask.lshr(ShiftAmt);
     getUsefulBits(Op, Mask, Depth + 1);
     Mask = Mask.shl(ShiftAmt);
@@ -1695,25 +1700,25 @@ static void getUsefulBitsForUse(SDNode *UserNode, APInt &UsefulBits,
   switch (UserNode->getMachineOpcode()) {
   default:
     return;
-  case ARM64::ANDSWri:
-  case ARM64::ANDSXri:
-  case ARM64::ANDWri:
-  case ARM64::ANDXri:
+  case AArch64::ANDSWri:
+  case AArch64::ANDSXri:
+  case AArch64::ANDWri:
+  case AArch64::ANDXri:
     // We increment Depth only when we call the getUsefulBits
     return getUsefulBitsFromAndWithImmediate(SDValue(UserNode, 0), UsefulBits,
                                              Depth);
-  case ARM64::UBFMWri:
-  case ARM64::UBFMXri:
+  case AArch64::UBFMWri:
+  case AArch64::UBFMXri:
     return getUsefulBitsFromUBFM(SDValue(UserNode, 0), UsefulBits, Depth);
 
-  case ARM64::ORRWrs:
-  case ARM64::ORRXrs:
+  case AArch64::ORRWrs:
+  case AArch64::ORRXrs:
     if (UserNode->getOperand(1) != Orig)
       return;
     return getUsefulBitsFromOrWithShiftedReg(SDValue(UserNode, 0), UsefulBits,
                                              Depth);
-  case ARM64::BFMWri:
-  case ARM64::BFMXri:
+  case AArch64::BFMWri:
+  case AArch64::BFMXri:
     return getUsefulBitsFromBFM(SDValue(UserNode, 0), Orig, UsefulBits, Depth);
   }
 }
@@ -1751,7 +1756,7 @@ static SDValue getLeftShift(SelectionDAG *CurDAG, SDValue Op, int ShlAmount) {
 
   EVT VT = Op.getValueType();
   unsigned BitWidth = VT.getSizeInBits();
-  unsigned UBFMOpc = BitWidth == 32 ? ARM64::UBFMWri : ARM64::UBFMXri;
+  unsigned UBFMOpc = BitWidth == 32 ? AArch64::UBFMWri : AArch64::UBFMXri;
 
   SDNode *ShiftNode;
   if (ShlAmount > 0) {
@@ -1833,9 +1838,9 @@ static bool isBitfieldInsertOpFromOr(SDNode *N, unsigned &Opc, SDValue &Dst,
   // Set Opc
   EVT VT = N->getValueType(0);
   if (VT == MVT::i32)
-    Opc = ARM64::BFMWri;
+    Opc = AArch64::BFMWri;
   else if (VT == MVT::i64)
-    Opc = ARM64::BFMXri;
+    Opc = AArch64::BFMXri;
   else
     return false;
 
@@ -1860,8 +1865,8 @@ static bool isBitfieldInsertOpFromOr(SDNode *N, unsigned &Opc, SDValue &Dst,
                             NumberOfIgnoredLowBits, true)) {
       // Check that the returned opcode is compatible with the pattern,
       // i.e., same type and zero extended (U and not S)
-      if ((BFXOpc != ARM64::UBFMXri && VT == MVT::i64) ||
-          (BFXOpc != ARM64::UBFMWri && VT == MVT::i32))
+      if ((BFXOpc != AArch64::UBFMXri && VT == MVT::i64) ||
+          (BFXOpc != AArch64::UBFMWri && VT == MVT::i32))
         continue;
 
       // Compute the width of the bitfield insertion
@@ -1919,7 +1924,7 @@ static bool isBitfieldInsertOpFromOr(SDNode *N, unsigned &Opc, SDValue &Dst,
   return false;
 }
 
-SDNode *ARM64DAGToDAGISel::SelectBitfieldInsertOp(SDNode *N) {
+SDNode *AArch64DAGToDAGISel::SelectBitfieldInsertOp(SDNode *N) {
   if (N->getOpcode() != ISD::OR)
     return nullptr;
 
@@ -1938,11 +1943,11 @@ SDNode *ARM64DAGToDAGISel::SelectBitfieldInsertOp(SDNode *N) {
   return CurDAG->SelectNodeTo(N, Opc, VT, Ops);
 }
 
-SDNode *ARM64DAGToDAGISel::SelectLIBM(SDNode *N) {
+SDNode *AArch64DAGToDAGISel::SelectLIBM(SDNode *N) {
   EVT VT = N->getValueType(0);
   unsigned Variant;
   unsigned Opc;
-  unsigned FRINTXOpcs[] = { ARM64::FRINTXSr, ARM64::FRINTXDr };
+  unsigned FRINTXOpcs[] = { AArch64::FRINTXSr, AArch64::FRINTXDr };
 
   if (VT == MVT::f32) {
     Variant = 0;
@@ -1958,22 +1963,22 @@ SDNode *ARM64DAGToDAGISel::SelectLIBM(SDNode *N) {
   default:
     return nullptr; // Unrecognized libm ISD node. Fall back on default codegen.
   case ISD::FCEIL: {
-    unsigned FRINTPOpcs[] = { ARM64::FRINTPSr, ARM64::FRINTPDr };
+    unsigned FRINTPOpcs[] = { AArch64::FRINTPSr, AArch64::FRINTPDr };
     Opc = FRINTPOpcs[Variant];
     break;
   }
   case ISD::FFLOOR: {
-    unsigned FRINTMOpcs[] = { ARM64::FRINTMSr, ARM64::FRINTMDr };
+    unsigned FRINTMOpcs[] = { AArch64::FRINTMSr, AArch64::FRINTMDr };
     Opc = FRINTMOpcs[Variant];
     break;
   }
   case ISD::FTRUNC: {
-    unsigned FRINTZOpcs[] = { ARM64::FRINTZSr, ARM64::FRINTZDr };
+    unsigned FRINTZOpcs[] = { AArch64::FRINTZSr, AArch64::FRINTZDr };
     Opc = FRINTZOpcs[Variant];
     break;
   }
   case ISD::FROUND: {
-    unsigned FRINTAOpcs[] = { ARM64::FRINTASr, ARM64::FRINTADr };
+    unsigned FRINTAOpcs[] = { AArch64::FRINTASr, AArch64::FRINTADr };
     Opc = FRINTAOpcs[Variant];
     break;
   }
@@ -1993,14 +1998,14 @@ SDNode *ARM64DAGToDAGISel::SelectLIBM(SDNode *N) {
 }
 
 bool
-ARM64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos,
+AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos,
                                               unsigned RegWidth) {
   APFloat FVal(0.0);
   if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N))
     FVal = CN->getValueAPF();
   else if (LoadSDNode *LN = dyn_cast<LoadSDNode>(N)) {
     // Some otherwise illegal constants are allowed in this case.
-    if (LN->getOperand(1).getOpcode() != ARM64ISD::ADDlow ||
+    if (LN->getOperand(1).getOpcode() != AArch64ISD::ADDlow ||
         !isa<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1)))
       return false;
 
@@ -2036,7 +2041,7 @@ ARM64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos,
   return true;
 }
 
-SDNode *ARM64DAGToDAGISel::Select(SDNode *Node) {
+SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
   // Dump information about the Node being selected
   DEBUG(errs() << "Selecting: ");
   DEBUG(Node->dump(CurDAG));
@@ -2108,10 +2113,10 @@ SDNode *ARM64DAGToDAGISel::Select(SDNode *Node) {
     default:
       assert(0 && "Unexpected vector element type!");
     case 64:
-      SubReg = ARM64::dsub;
+      SubReg = AArch64::dsub;
       break;
     case 32:
-      SubReg = ARM64::ssub;
+      SubReg = AArch64::ssub;
       break;
     case 16: // FALLTHROUGH
     case 8:
@@ -2131,10 +2136,10 @@ SDNode *ARM64DAGToDAGISel::Select(SDNode *Node) {
     if (ConstNode->isNullValue()) {
       if (VT == MVT::i32)
         return CurDAG->getCopyFromReg(CurDAG->getEntryNode(), SDLoc(Node),
-                                      ARM64::WZR, MVT::i32).getNode();
+                                      AArch64::WZR, MVT::i32).getNode();
       else if (VT == MVT::i64)
         return CurDAG->getCopyFromReg(CurDAG->getEntryNode(), SDLoc(Node),
-                                      ARM64::XZR, MVT::i64).getNode();
+                                      AArch64::XZR, MVT::i64).getNode();
     }
     break;
   }
@@ -2142,22 +2147,22 @@ SDNode *ARM64DAGToDAGISel::Select(SDNode *Node) {
   case ISD::FrameIndex: {
     // Selects to ADDXri FI, 0 which in turn will become ADDXri SP, imm.
     int FI = cast<FrameIndexSDNode>(Node)->getIndex();
-    unsigned Shifter = ARM64_AM::getShifterImm(ARM64_AM::LSL, 0);
+    unsigned Shifter = AArch64_AM::getShifterImm(AArch64_AM::LSL, 0);
     const TargetLowering *TLI = getTargetLowering();
     SDValue TFI = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
     SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, MVT::i32),
                       CurDAG->getTargetConstant(Shifter, MVT::i32) };
-    return CurDAG->SelectNodeTo(Node, ARM64::ADDXri, MVT::i64, Ops);
+    return CurDAG->SelectNodeTo(Node, AArch64::ADDXri, MVT::i64, Ops);
   }
   case ISD::INTRINSIC_W_CHAIN: {
     unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
     switch (IntNo) {
     default:
       break;
-    case Intrinsic::arm64_ldaxp:
-    case Intrinsic::arm64_ldxp: {
+    case Intrinsic::aarch64_ldaxp:
+    case Intrinsic::aarch64_ldxp: {
       unsigned Op =
-          IntNo == Intrinsic::arm64_ldaxp ? ARM64::LDAXPX : ARM64::LDXPX;
+          IntNo == Intrinsic::aarch64_ldaxp ? AArch64::LDAXPX : AArch64::LDXPX;
       SDValue MemAddr = Node->getOperand(2);
       SDLoc DL(Node);
       SDValue Chain = Node->getOperand(0);
@@ -2171,10 +2176,10 @@ SDNode *ARM64DAGToDAGISel::Select(SDNode *Node) {
       cast<MachineSDNode>(Ld)->setMemRefs(MemOp, MemOp + 1);
       return Ld;
     }
-    case Intrinsic::arm64_stlxp:
-    case Intrinsic::arm64_stxp: {
+    case Intrinsic::aarch64_stlxp:
+    case Intrinsic::aarch64_stxp: {
       unsigned Op =
-          IntNo == Intrinsic::arm64_stlxp ? ARM64::STLXPX : ARM64::STXPX;
+          IntNo == Intrinsic::aarch64_stlxp ? AArch64::STLXPX : AArch64::STXPX;
       SDLoc DL(Node);
       SDValue Chain = Node->getOperand(0);
       SDValue ValLo = Node->getOperand(2);
@@ -2196,203 +2201,203 @@ SDNode *ARM64DAGToDAGISel::Select(SDNode *Node) {
 
       return St;
     }
-    case Intrinsic::arm64_neon_ld1x2:
+    case Intrinsic::aarch64_neon_ld1x2:
       if (VT == MVT::v8i8)
-        return SelectLoad(Node, 2, ARM64::LD1Twov8b, ARM64::dsub0);
+        return SelectLoad(Node, 2, AArch64::LD1Twov8b, AArch64::dsub0);
       else if (VT == MVT::v16i8)
-        return SelectLoad(Node, 2, ARM64::LD1Twov16b, ARM64::qsub0);
+        return SelectLoad(Node, 2, AArch64::LD1Twov16b, AArch64::qsub0);
       else if (VT == MVT::v4i16)
-        return SelectLoad(Node, 2, ARM64::LD1Twov4h, ARM64::dsub0);
+        return SelectLoad(Node, 2, AArch64::LD1Twov4h, AArch64::dsub0);
       else if (VT == MVT::v8i16)
-        return SelectLoad(Node, 2, ARM64::LD1Twov8h, ARM64::qsub0);
+        return SelectLoad(Node, 2, AArch64::LD1Twov8h, AArch64::qsub0);
       else if (VT == MVT::v2i32 || VT == MVT::v2f32)
-        return SelectLoad(Node, 2, ARM64::LD1Twov2s, ARM64::dsub0);
+        return SelectLoad(Node, 2, AArch64::LD1Twov2s, AArch64::dsub0);
       else if (VT == MVT::v4i32 || VT == MVT::v4f32)
-        return SelectLoad(Node, 2, ARM64::LD1Twov4s, ARM64::qsub0);
+        return SelectLoad(Node, 2, AArch64::LD1Twov4s, AArch64::qsub0);
       else if (VT == MVT::v1i64 || VT == MVT::v1f64)
-        return SelectLoad(Node, 2, ARM64::LD1Twov1d, ARM64::dsub0);
+        return SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0);
       else if (VT == MVT::v2i64 || VT == MVT::v2f64)
-        return SelectLoad(Node, 2, ARM64::LD1Twov2d, ARM64::qsub0);
+        return SelectLoad(Node, 2, AArch64::LD1Twov2d, AArch64::qsub0);
       break;
-    case Intrinsic::arm64_neon_ld1x3:
+    case Intrinsic::aarch64_neon_ld1x3:
       if (VT == MVT::v8i8)
-        return SelectLoad(Node, 3, ARM64::LD1Threev8b, ARM64::dsub0);
+        return SelectLoad(Node, 3, AArch64::LD1Threev8b, AArch64::dsub0);
       else if (VT == MVT::v16i8)
-        return SelectLoad(Node, 3, ARM64::LD1Threev16b, ARM64::qsub0);
+        return SelectLoad(Node, 3, AArch64::LD1Threev16b, AArch64::qsub0);
       else if (VT == MVT::v4i16)
-        return SelectLoad(Node, 3, ARM64::LD1Threev4h, ARM64::dsub0);
+        return SelectLoad(Node, 3, AArch64::LD1Threev4h, AArch64::dsub0);
       else if (VT == MVT::v8i16)
-        return SelectLoad(Node, 3, ARM64::LD1Threev8h, ARM64::qsub0);
+        return SelectLoad(Node, 3, AArch64::LD1Threev8h, AArch64::qsub0);
       else if (VT == MVT::v2i32 || VT == MVT::v2f32)
-        return SelectLoad(Node, 3, ARM64::LD1Threev2s, ARM64::dsub0);
+        return SelectLoad(Node, 3, AArch64::LD1Threev2s, AArch64::dsub0);
       else if (VT == MVT::v4i32 || VT == MVT::v4f32)
-        return SelectLoad(Node, 3, ARM64::LD1Threev4s, ARM64::qsub0);
+        return SelectLoad(Node, 3, AArch64::LD1Threev4s, AArch64::qsub0);
       else if (VT == MVT::v1i64 || VT == MVT::v1f64)
-        return SelectLoad(Node, 3, ARM64::LD1Threev1d, ARM64::dsub0);
+        return SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0);
       else if (VT == MVT::v2i64 || VT == MVT::v2f64)
-        return SelectLoad(Node, 3, ARM64::LD1Threev2d, ARM64::qsub0);
+        return SelectLoad(Node, 3, AArch64::LD1Threev2d, AArch64::qsub0);
       break;
-    case Intrinsic::arm64_neon_ld1x4:
+    case Intrinsic::aarch64_neon_ld1x4:
       if (VT == MVT::v8i8)
-        return SelectLoad(Node, 4, ARM64::LD1Fourv8b, ARM64::dsub0);
+        return SelectLoad(Node, 4, AArch64::LD1Fourv8b, AArch64::dsub0);
       else if (VT == MVT::v16i8)
-        return SelectLoad(Node, 4, ARM64::LD1Fourv16b, ARM64::qsub0);
+        return SelectLoad(Node, 4, AArch64::LD1Fourv16b, AArch64::qsub0);
       else if (VT == MVT::v4i16)
-        return SelectLoad(Node, 4, ARM64::LD1Fourv4h, ARM64::dsub0);
+        return SelectLoad(Node, 4, AArch64::LD1Fourv4h, AArch64::dsub0);
       else if (VT == MVT::v8i16)
-        return SelectLoad(Node, 4, ARM64::LD1Fourv8h, ARM64::qsub0);
+        return SelectLoad(Node, 4, AArch64::LD1Fourv8h, AArch64::qsub0);
       else if (VT == MVT::v2i32 || VT == MVT::v2f32)
-        return SelectLoad(Node, 4, ARM64::LD1Fourv2s, ARM64::dsub0);
+        return SelectLoad(Node, 4, AArch64::LD1Fourv2s, AArch64::dsub0);
       else if (VT == MVT::v4i32 || VT == MVT::v4f32)
-        return SelectLoad(Node, 4, ARM64::LD1Fourv4s, ARM64::qsub0);
+        return SelectLoad(Node, 4, AArch64::LD1Fourv4s, AArch64::qsub0);
       else if (VT == MVT::v1i64 || VT == MVT::v1f64)
-        return SelectLoad(Node, 4, ARM64::LD1Fourv1d, ARM64::dsub0);
+        return SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0);
       else if (VT == MVT::v2i64 || VT == MVT::v2f64)
-        return SelectLoad(Node, 4, ARM64::LD1Fourv2d, ARM64::qsub0);
+        return SelectLoad(Node, 4, AArch64::LD1Fourv2d, AArch64::qsub0);
       break;
-    case Intrinsic::arm64_neon_ld2:
+    case Intrinsic::aarch64_neon_ld2:
       if (VT == MVT::v8i8)
-        return SelectLoad(Node, 2, ARM64::LD2Twov8b, ARM64::dsub0);
+        return SelectLoad(Node, 2, AArch64::LD2Twov8b, AArch64::dsub0);
       else if (VT == MVT::v16i8)
-        return SelectLoad(Node, 2, ARM64::LD2Twov16b, ARM64::qsub0);
+        return SelectLoad(Node, 2, AArch64::LD2Twov16b, AArch64::qsub0);
       else if (VT == MVT::v4i16)
-        return SelectLoad(Node, 2, ARM64::LD2Twov4h, ARM64::dsub0);
+        return SelectLoad(Node, 2, AArch64::LD2Twov4h, AArch64::dsub0);
       else if (VT == MVT::v8i16)
-        return SelectLoad(Node, 2, ARM64::LD2Twov8h, ARM64::qsub0);
+        return SelectLoad(Node, 2, AArch64::LD2Twov8h, AArch64::qsub0);
       else if (VT == MVT::v2i32 || VT == MVT::v2f32)
-        return SelectLoad(Node, 2, ARM64::LD2Twov2s, ARM64::dsub0);
+        return SelectLoad(Node, 2, AArch64::LD2Twov2s, AArch64::dsub0);
       else if (VT == MVT::v4i32 || VT == MVT::v4f32)
-        return SelectLoad(Node, 2, ARM64::LD2Twov4s, ARM64::qsub0);
+        return SelectLoad(Node, 2, AArch64::LD2Twov4s, AArch64::qsub0);
       else if (VT == MVT::v1i64 || VT == MVT::v1f64)
-        return SelectLoad(Node, 2, ARM64::LD1Twov1d, ARM64::dsub0);
+        return SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0);
       else if (VT == MVT::v2i64 || VT == MVT::v2f64)
-        return SelectLoad(Node, 2, ARM64::LD2Twov2d, ARM64::qsub0);
+        return SelectLoad(Node, 2, AArch64::LD2Twov2d, AArch64::qsub0);
       break;
-    case Intrinsic::arm64_neon_ld3:
+    case Intrinsic::aarch64_neon_ld3:
       if (VT == MVT::v8i8)
-        return SelectLoad(Node, 3, ARM64::LD3Threev8b, ARM64::dsub0);
+        return SelectLoad(Node, 3, AArch64::LD3Threev8b, AArch64::dsub0);
       else if (VT == MVT::v16i8)
-        return SelectLoad(Node, 3, ARM64::LD3Threev16b, ARM64::qsub0);
+        return SelectLoad(Node, 3, AArch64::LD3Threev16b, AArch64::qsub0);
       else if (VT == MVT::v4i16)
-        return SelectLoad(Node, 3, ARM64::LD3Threev4h, ARM64::dsub0);
+        return SelectLoad(Node, 3, AArch64::LD3Threev4h, AArch64::dsub0);
       else if (VT == MVT::v8i16)
-        return SelectLoad(Node, 3, ARM64::LD3Threev8h, ARM64::qsub0);
+        return SelectLoad(Node, 3, AArch64::LD3Threev8h, AArch64::qsub0);
       else if (VT == MVT::v2i32 || VT == MVT::v2f32)
-        return SelectLoad(Node, 3, ARM64::LD3Threev2s, ARM64::dsub0);
+        return SelectLoad(Node, 3, AArch64::LD3Threev2s, AArch64::dsub0);
       else if (VT == MVT::v4i32 || VT == MVT::v4f32)
-        return SelectLoad(Node, 3, ARM64::LD3Threev4s, ARM64::qsub0);
+        return SelectLoad(Node, 3, AArch64::LD3Threev4s, AArch64::qsub0);
       else if (VT == MVT::v1i64 || VT == MVT::v1f64)
-        return SelectLoad(Node, 3, ARM64::LD1Threev1d, ARM64::dsub0);
+        return SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0);
       else if (VT == MVT::v2i64 || VT == MVT::v2f64)
-        return SelectLoad(Node, 3, ARM64::LD3Threev2d, ARM64::qsub0);
+        return SelectLoad(Node, 3, AArch64::LD3Threev2d, AArch64::qsub0);
       break;
-    case Intrinsic::arm64_neon_ld4:
+    case Intrinsic::aarch64_neon_ld4:
       if (VT == MVT::v8i8)
-        return SelectLoad(Node, 4, ARM64::LD4Fourv8b, ARM64::dsub0);
+        return SelectLoad(Node, 4, AArch64::LD4Fourv8b, AArch64::dsub0);
       else if (VT == MVT::v16i8)
-        return SelectLoad(Node, 4, ARM64::LD4Fourv16b, ARM64::qsub0);
+        return SelectLoad(Node, 4, AArch64::LD4Fourv16b, AArch64::qsub0);
       else if (VT == MVT::v4i16)
-        return SelectLoad(Node, 4, ARM64::LD4Fourv4h, ARM64::dsub0);
+        return SelectLoad(Node, 4, AArch64::LD4Fourv4h, AArch64::dsub0);
       else if (VT == MVT::v8i16)
-        return SelectLoad(Node, 4, ARM64::LD4Fourv8h, ARM64::qsub0);
+        return SelectLoad(Node, 4, AArch64::LD4Fourv8h, AArch64::qsub0);
       else if (VT == MVT::v2i32 || VT == MVT::v2f32)
-        return SelectLoad(Node, 4, ARM64::LD4Fourv2s, ARM64::dsub0);
+        return SelectLoad(Node, 4, AArch64::LD4Fourv2s, AArch64::dsub0);
       else if (VT == MVT::v4i32 || VT == MVT::v4f32)
-        return SelectLoad(Node, 4, ARM64::LD4Fourv4s, ARM64::qsub0);
+        return SelectLoad(Node, 4, AArch64::LD4Fourv4s, AArch64::qsub0);
       else if (VT == MVT::v1i64 || VT == MVT::v1f64)
-        return SelectLoad(Node, 4, ARM64::LD1Fourv1d, ARM64::dsub0);
+        return SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0);
       else if (VT == MVT::v2i64 || VT == MVT::v2f64)
-        return SelectLoad(Node, 4, ARM64::LD4Fourv2d, ARM64::qsub0);
+        return SelectLoad(Node, 4, AArch64::LD4Fourv2d, AArch64::qsub0);
       break;
-    case Intrinsic::arm64_neon_ld2r:
+    case Intrinsic::aarch64_neon_ld2r:
       if (VT == MVT::v8i8)
-        return SelectLoad(Node, 2, ARM64::LD2Rv8b, ARM64::dsub0);
+        return SelectLoad(Node, 2, AArch64::LD2Rv8b, AArch64::dsub0);
       else if (VT == MVT::v16i8)
-        return SelectLoad(Node, 2, ARM64::LD2Rv16b, ARM64::qsub0);
+        return SelectLoad(Node, 2, AArch64::LD2Rv16b, AArch64::qsub0);
       else if (VT == MVT::v4i16)
-        return SelectLoad(Node, 2, ARM64::LD2Rv4h, ARM64::dsub0);
+        return SelectLoad(Node, 2, AArch64::LD2Rv4h, AArch64::dsub0);
       else if (VT == MVT::v8i16)
-        return SelectLoad(Node, 2, ARM64::LD2Rv8h, ARM64::qsub0);
+        return SelectLoad(Node, 2, AArch64::LD2Rv8h, AArch64::qsub0);
       else if (VT == MVT::v2i32 || VT == MVT::v2f32)
-        return SelectLoad(Node, 2, ARM64::LD2Rv2s, ARM64::dsub0);
+        return SelectLoad(Node, 2, AArch64::LD2Rv2s, AArch64::dsub0);
       else if (VT == MVT::v4i32 || VT == MVT::v4f32)
-        return SelectLoad(Node, 2, ARM64::LD2Rv4s, ARM64::qsub0);
+        return SelectLoad(Node, 2, AArch64::LD2Rv4s, AArch64::qsub0);
       else if (VT == MVT::v1i64 || VT == MVT::v1f64)
-        return SelectLoad(Node, 2, ARM64::LD2Rv1d, ARM64::dsub0);
+        return SelectLoad(Node, 2, AArch64::LD2Rv1d, AArch64::dsub0);
       else if (VT == MVT::v2i64 || VT == MVT::v2f64)
-        return SelectLoad(Node, 2, ARM64::LD2Rv2d, ARM64::qsub0);
+        return SelectLoad(Node, 2, AArch64::LD2Rv2d, AArch64::qsub0);
       break;
-    case Intrinsic::arm64_neon_ld3r:
+    case Intrinsic::aarch64_neon_ld3r:
       if (VT == MVT::v8i8)
-        return SelectLoad(Node, 3, ARM64::LD3Rv8b, ARM64::dsub0);
+        return SelectLoad(Node, 3, AArch64::LD3Rv8b, AArch64::dsub0);
       else if (VT == MVT::v16i8)
-        return SelectLoad(Node, 3, ARM64::LD3Rv16b, ARM64::qsub0);
+        return SelectLoad(Node, 3, AArch64::LD3Rv16b, AArch64::qsub0);
       else if (VT == MVT::v4i16)
-        return SelectLoad(Node, 3, ARM64::LD3Rv4h, ARM64::dsub0);
+        return SelectLoad(Node, 3, AArch64::LD3Rv4h, AArch64::dsub0);
       else if (VT == MVT::v8i16)
-        return SelectLoad(Node, 3, ARM64::LD3Rv8h, ARM64::qsub0);
+        return SelectLoad(Node, 3, AArch64::LD3Rv8h, AArch64::qsub0);
       else if (VT == MVT::v2i32 || VT == MVT::v2f32)
-        return SelectLoad(Node, 3, ARM64::LD3Rv2s, ARM64::dsub0);
+        return SelectLoad(Node, 3, AArch64::LD3Rv2s, AArch64::dsub0);
       else if (VT == MVT::v4i32 || VT == MVT::v4f32)
-        return SelectLoad(Node, 3, ARM64::LD3Rv4s, ARM64::qsub0);
+        return SelectLoad(Node, 3, AArch64::LD3Rv4s, AArch64::qsub0);
       else if (VT == MVT::v1i64 || VT == MVT::v1f64)
-        return SelectLoad(Node, 3, ARM64::LD3Rv1d, ARM64::dsub0);
+        return SelectLoad(Node, 3, AArch64::LD3Rv1d, AArch64::dsub0);
       else if (VT == MVT::v2i64 || VT == MVT::v2f64)
-        return SelectLoad(Node, 3, ARM64::LD3Rv2d, ARM64::qsub0);
+        return SelectLoad(Node, 3, AArch64::LD3Rv2d, AArch64::qsub0);
       break;
-    case Intrinsic::arm64_neon_ld4r:
+    case Intrinsic::aarch64_neon_ld4r:
       if (VT == MVT::v8i8)
-        return SelectLoad(Node, 4, ARM64::LD4Rv8b, ARM64::dsub0);
+        return SelectLoad(Node, 4, AArch64::LD4Rv8b, AArch64::dsub0);
       else if (VT == MVT::v16i8)
-        return SelectLoad(Node, 4, ARM64::LD4Rv16b, ARM64::qsub0);
+        return SelectLoad(Node, 4, AArch64::LD4Rv16b, AArch64::qsub0);
       else if (VT == MVT::v4i16)
-        return SelectLoad(Node, 4, ARM64::LD4Rv4h, ARM64::dsub0);
+        return SelectLoad(Node, 4, AArch64::LD4Rv4h, AArch64::dsub0);
       else if (VT == MVT::v8i16)
-        return SelectLoad(Node, 4, ARM64::LD4Rv8h, ARM64::qsub0);
+        return SelectLoad(Node, 4, AArch64::LD4Rv8h, AArch64::qsub0);
       else if (VT == MVT::v2i32 || VT == MVT::v2f32)
-        return SelectLoad(Node, 4, ARM64::LD4Rv2s, ARM64::dsub0);
+        return SelectLoad(Node, 4, AArch64::LD4Rv2s, AArch64::dsub0);
       else if (VT == MVT::v4i32 || VT == MVT::v4f32)
-        return SelectLoad(Node, 4, ARM64::LD4Rv4s, ARM64::qsub0);
+        return SelectLoad(Node, 4, AArch64::LD4Rv4s, AArch64::qsub0);
       else if (VT == MVT::v1i64 || VT == MVT::v1f64)
-        return SelectLoad(Node, 4, ARM64::LD4Rv1d, ARM64::dsub0);
+        return SelectLoad(Node, 4, AArch64::LD4Rv1d, AArch64::dsub0);
       else if (VT == MVT::v2i64 || VT == MVT::v2f64)
-        return SelectLoad(Node, 4, ARM64::LD4Rv2d, ARM64::qsub0);
+        return SelectLoad(Node, 4, AArch64::LD4Rv2d, AArch64::qsub0);
       break;
-    case Intrinsic::arm64_neon_ld2lane:
+    case Intrinsic::aarch64_neon_ld2lane:
       if (VT == MVT::v16i8 || VT == MVT::v8i8)
-        return SelectLoadLane(Node, 2, ARM64::LD2i8);
+        return SelectLoadLane(Node, 2, AArch64::LD2i8);
       else if (VT == MVT::v8i16 || VT == MVT::v4i16)
-        return SelectLoadLane(Node, 2, ARM64::LD2i16);
+        return SelectLoadLane(Node, 2, AArch64::LD2i16);
       else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
                VT == MVT::v2f32)
-        return SelectLoadLane(Node, 2, ARM64::LD2i32);
+        return SelectLoadLane(Node, 2, AArch64::LD2i32);
       else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
                VT == MVT::v1f64)
-        return SelectLoadLane(Node, 2, ARM64::LD2i64);
+        return SelectLoadLane(Node, 2, AArch64::LD2i64);
       break;
-    case Intrinsic::arm64_neon_ld3lane:
+    case Intrinsic::aarch64_neon_ld3lane:
       if (VT == MVT::v16i8 || VT == MVT::v8i8)
-        return SelectLoadLane(Node, 3, ARM64::LD3i8);
+        return SelectLoadLane(Node, 3, AArch64::LD3i8);
       else if (VT == MVT::v8i16 || VT == MVT::v4i16)
-        return SelectLoadLane(Node, 3, ARM64::LD3i16);
+        return SelectLoadLane(Node, 3, AArch64::LD3i16);
       else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
                VT == MVT::v2f32)
-        return SelectLoadLane(Node, 3, ARM64::LD3i32);
+        return SelectLoadLane(Node, 3, AArch64::LD3i32);
       else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
                VT == MVT::v1f64)
-        return SelectLoadLane(Node, 3, ARM64::LD3i64);
+        return SelectLoadLane(Node, 3, AArch64::LD3i64);
       break;
-    case Intrinsic::arm64_neon_ld4lane:
+    case Intrinsic::aarch64_neon_ld4lane:
       if (VT == MVT::v16i8 || VT == MVT::v8i8)
-        return SelectLoadLane(Node, 4, ARM64::LD4i8);
+        return SelectLoadLane(Node, 4, AArch64::LD4i8);
       else if (VT == MVT::v8i16 || VT == MVT::v4i16)
-        return SelectLoadLane(Node, 4, ARM64::LD4i16);
+        return SelectLoadLane(Node, 4, AArch64::LD4i16);
       else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
                VT == MVT::v2f32)
-        return SelectLoadLane(Node, 4, ARM64::LD4i32);
+        return SelectLoadLane(Node, 4, AArch64::LD4i32);
       else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
                VT == MVT::v1f64)
-        return SelectLoadLane(Node, 4, ARM64::LD4i64);
+        return SelectLoadLane(Node, 4, AArch64::LD4i64);
       break;
     }
   } break;
@@ -2401,32 +2406,32 @@ SDNode *ARM64DAGToDAGISel::Select(SDNode *Node) {
     switch (IntNo) {
     default:
       break;
-    case Intrinsic::arm64_neon_tbl2:
-      return SelectTable(Node, 2, VT == MVT::v8i8 ? ARM64::TBLv8i8Two
-                                                  : ARM64::TBLv16i8Two,
+    case Intrinsic::aarch64_neon_tbl2:
+      return SelectTable(Node, 2, VT == MVT::v8i8 ? AArch64::TBLv8i8Two
+                                                  : AArch64::TBLv16i8Two,
                          false);
-    case Intrinsic::arm64_neon_tbl3:
-      return SelectTable(Node, 3, VT == MVT::v8i8 ? ARM64::TBLv8i8Three
-                                                  : ARM64::TBLv16i8Three,
+    case Intrinsic::aarch64_neon_tbl3:
+      return SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBLv8i8Three
+                                                  : AArch64::TBLv16i8Three,
                          false);
-    case Intrinsic::arm64_neon_tbl4:
-      return SelectTable(Node, 4, VT == MVT::v8i8 ? ARM64::TBLv8i8Four
-                                                  : ARM64::TBLv16i8Four,
+    case Intrinsic::aarch64_neon_tbl4:
+      return SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBLv8i8Four
+                                                  : AArch64::TBLv16i8Four,
                          false);
-    case Intrinsic::arm64_neon_tbx2:
-      return SelectTable(Node, 2, VT == MVT::v8i8 ? ARM64::TBXv8i8Two
-                                                  : ARM64::TBXv16i8Two,
+    case Intrinsic::aarch64_neon_tbx2:
+      return SelectTable(Node, 2, VT == MVT::v8i8 ? AArch64::TBXv8i8Two
+                                                  : AArch64::TBXv16i8Two,
                          true);
-    case Intrinsic::arm64_neon_tbx3:
-      return SelectTable(Node, 3, VT == MVT::v8i8 ? ARM64::TBXv8i8Three
-                                                  : ARM64::TBXv16i8Three,
+    case Intrinsic::aarch64_neon_tbx3:
+      return SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBXv8i8Three
+                                                  : AArch64::TBXv16i8Three,
                          true);
-    case Intrinsic::arm64_neon_tbx4:
-      return SelectTable(Node, 4, VT == MVT::v8i8 ? ARM64::TBXv8i8Four
-                                                  : ARM64::TBXv16i8Four,
+    case Intrinsic::aarch64_neon_tbx4:
+      return SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBXv8i8Four
+                                                  : AArch64::TBXv16i8Four,
                          true);
-    case Intrinsic::arm64_neon_smull:
-    case Intrinsic::arm64_neon_umull:
+    case Intrinsic::aarch64_neon_smull:
+    case Intrinsic::aarch64_neon_umull:
       if (SDNode *N = SelectMULLV64LaneV128(IntNo, Node))
         return N;
       break;
@@ -2440,563 +2445,563 @@ SDNode *ARM64DAGToDAGISel::Select(SDNode *Node) {
     switch (IntNo) {
     default:
       break;
-    case Intrinsic::arm64_neon_st1x2: {
+    case Intrinsic::aarch64_neon_st1x2: {
       if (VT == MVT::v8i8)
-        return SelectStore(Node, 2, ARM64::ST1Twov8b);
+        return SelectStore(Node, 2, AArch64::ST1Twov8b);
       else if (VT == MVT::v16i8)
-        return SelectStore(Node, 2, ARM64::ST1Twov16b);
+        return SelectStore(Node, 2, AArch64::ST1Twov16b);
       else if (VT == MVT::v4i16)
-        return SelectStore(Node, 2, ARM64::ST1Twov4h);
+        return SelectStore(Node, 2, AArch64::ST1Twov4h);
       else if (VT == MVT::v8i16)
-        return SelectStore(Node, 2, ARM64::ST1Twov8h);
+        return SelectStore(Node, 2, AArch64::ST1Twov8h);
       else if (VT == MVT::v2i32 || VT == MVT::v2f32)
-        return SelectStore(Node, 2, ARM64::ST1Twov2s);
+        return SelectStore(Node, 2, AArch64::ST1Twov2s);
       else if (VT == MVT::v4i32 || VT == MVT::v4f32)
-        return SelectStore(Node, 2, ARM64::ST1Twov4s);
+        return SelectStore(Node, 2, AArch64::ST1Twov4s);
       else if (VT == MVT::v2i64 || VT == MVT::v2f64)
-        return SelectStore(Node, 2, ARM64::ST1Twov2d);
+        return SelectStore(Node, 2, AArch64::ST1Twov2d);
       else if (VT == MVT::v1i64 || VT == MVT::v1f64)
-        return SelectStore(Node, 2, ARM64::ST1Twov1d);
+        return SelectStore(Node, 2, AArch64::ST1Twov1d);
       break;
     }
-    case Intrinsic::arm64_neon_st1x3: {
+    case Intrinsic::aarch64_neon_st1x3: {
       if (VT == MVT::v8i8)
-        return SelectStore(Node, 3, ARM64::ST1Threev8b);
+        return SelectStore(Node, 3, AArch64::ST1Threev8b);
       else if (VT == MVT::v16i8)
-        return SelectStore(Node, 3, ARM64::ST1Threev16b);
+        return SelectStore(Node, 3, AArch64::ST1Threev16b);
       else if (VT == MVT::v4i16)
-        return SelectStore(Node, 3, ARM64::ST1Threev4h);
+        return SelectStore(Node, 3, AArch64::ST1Threev4h);
       else if (VT == MVT::v8i16)
-        return SelectStore(Node, 3, ARM64::ST1Threev8h);
+        return SelectStore(Node, 3, AArch64::ST1Threev8h);
       else if (VT == MVT::v2i32 || VT == MVT::v2f32)
-        return SelectStore(Node, 3, ARM64::ST1Threev2s);
+        return SelectStore(Node, 3, AArch64::ST1Threev2s);
       else if (VT == MVT::v4i32 || VT == MVT::v4f32)
-        return SelectStore(Node, 3, ARM64::ST1Threev4s);
+        return SelectStore(Node, 3, AArch64::ST1Threev4s);
       else if (VT == MVT::v2i64 || VT == MVT::v2f64)
-        return SelectStore(Node, 3, ARM64::ST1Threev2d);
+        return SelectStore(Node, 3, AArch64::ST1Threev2d);
       else if (VT == MVT::v1i64 || VT == MVT::v1f64)
-        return SelectStore(Node, 3, ARM64::ST1Threev1d);
+        return SelectStore(Node, 3, AArch64::ST1Threev1d);
       break;
     }
-    case Intrinsic::arm64_neon_st1x4: {
+    case Intrinsic::aarch64_neon_st1x4: {
       if (VT == MVT::v8i8)
-        return SelectStore(Node, 4, ARM64::ST1Fourv8b);
+        return SelectStore(Node, 4, AArch64::ST1Fourv8b);
       else if (VT == MVT::v16i8)
-        return SelectStore(Node, 4, ARM64::ST1Fourv16b);
+        return SelectStore(Node, 4, AArch64::ST1Fourv16b);
       else if (VT == MVT::v4i16)
-        return SelectStore(Node, 4, ARM64::ST1Fourv4h);
+        return SelectStore(Node, 4, AArch64::ST1Fourv4h);
       else if (VT == MVT::v8i16)
-        return SelectStore(Node, 4, ARM64::ST1Fourv8h);
+        return SelectStore(Node, 4, AArch64::ST1Fourv8h);
       else if (VT == MVT::v2i32 || VT == MVT::v2f32)
-        return SelectStore(Node, 4, ARM64::ST1Fourv2s);
+        return SelectStore(Node, 4, AArch64::ST1Fourv2s);
       else if (VT == MVT::v4i32 || VT == MVT::v4f32)
-        return SelectStore(Node, 4, ARM64::ST1Fourv4s);
+        return SelectStore(Node, 4, AArch64::ST1Fourv4s);
       else if (VT == MVT::v2i64 || VT == MVT::v2f64)
-        return SelectStore(Node, 4, ARM64::ST1Fourv2d);
+        return SelectStore(Node, 4, AArch64::ST1Fourv2d);
       else if (VT == MVT::v1i64 || VT == MVT::v1f64)
-        return SelectStore(Node, 4, ARM64::ST1Fourv1d);
+        return SelectStore(Node, 4, AArch64::ST1Fourv1d);
       break;
     }
-    case Intrinsic::arm64_neon_st2: {
+    case Intrinsic::aarch64_neon_st2: {
       if (VT == MVT::v8i8)
-        return SelectStore(Node, 2, ARM64::ST2Twov8b);
+        return SelectStore(Node, 2, AArch64::ST2Twov8b);
       else if (VT == MVT::v16i8)
-        return SelectStore(Node, 2, ARM64::ST2Twov16b);
+        return SelectStore(Node, 2, AArch64::ST2Twov16b);
       else if (VT == MVT::v4i16)
-        return SelectStore(Node, 2, ARM64::ST2Twov4h);
+        return SelectStore(Node, 2, AArch64::ST2Twov4h);
       else if (VT == MVT::v8i16)
-        return SelectStore(Node, 2, ARM64::ST2Twov8h);
+        return SelectStore(Node, 2, AArch64::ST2Twov8h);
       else if (VT == MVT::v2i32 || VT == MVT::v2f32)
-        return SelectStore(Node, 2, ARM64::ST2Twov2s);
+        return SelectStore(Node, 2, AArch64::ST2Twov2s);
       else if (VT == MVT::v4i32 || VT == MVT::v4f32)
-        return SelectStore(Node, 2, ARM64::ST2Twov4s);
+        return SelectStore(Node, 2, AArch64::ST2Twov4s);
       else if (VT == MVT::v2i64 || VT == MVT::v2f64)
-        return SelectStore(Node, 2, ARM64::ST2Twov2d);
+        return SelectStore(Node, 2, AArch64::ST2Twov2d);
       else if (VT == MVT::v1i64 || VT == MVT::v1f64)
-        return SelectStore(Node, 2, ARM64::ST1Twov1d);
+        return SelectStore(Node, 2, AArch64::ST1Twov1d);
       break;
     }
-    case Intrinsic::arm64_neon_st3: {
+    case Intrinsic::aarch64_neon_st3: {
       if (VT == MVT::v8i8)
-        return SelectStore(Node, 3, ARM64::ST3Threev8b);
+        return SelectStore(Node, 3, AArch64::ST3Threev8b);
       else if (VT == MVT::v16i8)
-        return SelectStore(Node, 3, ARM64::ST3Threev16b);
+        return SelectStore(Node, 3, AArch64::ST3Threev16b);
       else if (VT == MVT::v4i16)
-        return SelectStore(Node, 3, ARM64::ST3Threev4h);
+        return SelectStore(Node, 3, AArch64::ST3Threev4h);
       else if (VT == MVT::v8i16)
-        return SelectStore(Node, 3, ARM64::ST3Threev8h);
+        return SelectStore(Node, 3, AArch64::ST3Threev8h);
       else if (VT == MVT::v2i32 || VT == MVT::v2f32)
-        return SelectStore(Node, 3, ARM64::ST3Threev2s);
+        return SelectStore(Node, 3, AArch64::ST3Threev2s);
       else if (VT == MVT::v4i32 || VT == MVT::v4f32)
-        return SelectStore(Node, 3, ARM64::ST3Threev4s);
+        return SelectStore(Node, 3, AArch64::ST3Threev4s);
       else if (VT == MVT::v2i64 || VT == MVT::v2f64)
-        return SelectStore(Node, 3, ARM64::ST3Threev2d);
+        return SelectStore(Node, 3, AArch64::ST3Threev2d);
       else if (VT == MVT::v1i64 || VT == MVT::v1f64)
-        return SelectStore(Node, 3, ARM64::ST1Threev1d);
+        return SelectStore(Node, 3, AArch64::ST1Threev1d);
       break;
     }
-    case Intrinsic::arm64_neon_st4: {
+    case Intrinsic::aarch64_neon_st4: {
       if (VT == MVT::v8i8)
-        return SelectStore(Node, 4, ARM64::ST4Fourv8b);
+        return SelectStore(Node, 4, AArch64::ST4Fourv8b);
       else if (VT == MVT::v16i8)
-        return SelectStore(Node, 4, ARM64::ST4Fourv16b);
+        return SelectStore(Node, 4, AArch64::ST4Fourv16b);
       else if (VT == MVT::v4i16)
-        return SelectStore(Node, 4, ARM64::ST4Fourv4h);
+        return SelectStore(Node, 4, AArch64::ST4Fourv4h);
       else if (VT == MVT::v8i16)
-        return SelectStore(Node, 4, ARM64::ST4Fourv8h);
+        return SelectStore(Node, 4, AArch64::ST4Fourv8h);
       else if (VT == MVT::v2i32 || VT == MVT::v2f32)
-        return SelectStore(Node, 4, ARM64::ST4Fourv2s);
+        return SelectStore(Node, 4, AArch64::ST4Fourv2s);
       else if (VT == MVT::v4i32 || VT == MVT::v4f32)
-        return SelectStore(Node, 4, ARM64::ST4Fourv4s);
+        return SelectStore(Node, 4, AArch64::ST4Fourv4s);
       else if (VT == MVT::v2i64 || VT == MVT::v2f64)
-        return SelectStore(Node, 4, ARM64::ST4Fourv2d);
+        return SelectStore(Node, 4, AArch64::ST4Fourv2d);
       else if (VT == MVT::v1i64 || VT == MVT::v1f64)
-        return SelectStore(Node, 4, ARM64::ST1Fourv1d);
+        return SelectStore(Node, 4, AArch64::ST1Fourv1d);
       break;
     }
-    case Intrinsic::arm64_neon_st2lane: {
+    case Intrinsic::aarch64_neon_st2lane: {
       if (VT == MVT::v16i8 || VT == MVT::v8i8)
-        return SelectStoreLane(Node, 2, ARM64::ST2i8);
+        return SelectStoreLane(Node, 2, AArch64::ST2i8);
       else if (VT == MVT::v8i16 || VT == MVT::v4i16)
-        return SelectStoreLane(Node, 2, ARM64::ST2i16);
+        return SelectStoreLane(Node, 2, AArch64::ST2i16);
       else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
                VT == MVT::v2f32)
-        return SelectStoreLane(Node, 2, ARM64::ST2i32);
+        return SelectStoreLane(Node, 2, AArch64::ST2i32);
       else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
                VT == MVT::v1f64)
-        return SelectStoreLane(Node, 2, ARM64::ST2i64);
+        return SelectStoreLane(Node, 2, AArch64::ST2i64);
       break;
     }
-    case Intrinsic::arm64_neon_st3lane: {
+    case Intrinsic::aarch64_neon_st3lane: {
       if (VT == MVT::v16i8 || VT == MVT::v8i8)
-        return SelectStoreLane(Node, 3, ARM64::ST3i8);
+        return SelectStoreLane(Node, 3, AArch64::ST3i8);
       else if (VT == MVT::v8i16 || VT == MVT::v4i16)
-        return SelectStoreLane(Node, 3, ARM64::ST3i16);
+        return SelectStoreLane(Node, 3, AArch64::ST3i16);
       else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
                VT == MVT::v2f32)
-        return SelectStoreLane(Node, 3, ARM64::ST3i32);
+        return SelectStoreLane(Node, 3, AArch64::ST3i32);
       else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
                VT == MVT::v1f64)
-        return SelectStoreLane(Node, 3, ARM64::ST3i64);
+        return SelectStoreLane(Node, 3, AArch64::ST3i64);
       break;
     }
-    case Intrinsic::arm64_neon_st4lane: {
+    case Intrinsic::aarch64_neon_st4lane: {
       if (VT == MVT::v16i8 || VT == MVT::v8i8)
-        return SelectStoreLane(Node, 4, ARM64::ST4i8);
+        return SelectStoreLane(Node, 4, AArch64::ST4i8);
       else if (VT == MVT::v8i16 || VT == MVT::v4i16)
-        return SelectStoreLane(Node, 4, ARM64::ST4i16);
+        return SelectStoreLane(Node, 4, AArch64::ST4i16);
       else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
                VT == MVT::v2f32)
-        return SelectStoreLane(Node, 4, ARM64::ST4i32);
+        return SelectStoreLane(Node, 4, AArch64::ST4i32);
       else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
                VT == MVT::v1f64)
-        return SelectStoreLane(Node, 4, ARM64::ST4i64);
+        return SelectStoreLane(Node, 4, AArch64::ST4i64);
       break;
     }
     }
   }
-  case ARM64ISD::LD2post: {
+  case AArch64ISD::LD2post: {
     if (VT == MVT::v8i8)
-      return SelectPostLoad(Node, 2, ARM64::LD2Twov8b_POST, ARM64::dsub0);
+      return SelectPostLoad(Node, 2, AArch64::LD2Twov8b_POST, AArch64::dsub0);
     else if (VT == MVT::v16i8)
-      return SelectPostLoad(Node, 2, ARM64::LD2Twov16b_POST, ARM64::qsub0);
+      return SelectPostLoad(Node, 2, AArch64::LD2Twov16b_POST, AArch64::qsub0);
     else if (VT == MVT::v4i16)
-      return SelectPostLoad(Node, 2, ARM64::LD2Twov4h_POST, ARM64::dsub0);
+      return SelectPostLoad(Node, 2, AArch64::LD2Twov4h_POST, AArch64::dsub0);
     else if (VT == MVT::v8i16)
-      return SelectPostLoad(Node, 2, ARM64::LD2Twov8h_POST, ARM64::qsub0);
+      return SelectPostLoad(Node, 2, AArch64::LD2Twov8h_POST, AArch64::qsub0);
     else if (VT == MVT::v2i32 || VT == MVT::v2f32)
-      return SelectPostLoad(Node, 2, ARM64::LD2Twov2s_POST, ARM64::dsub0);
+      return SelectPostLoad(Node, 2, AArch64::LD2Twov2s_POST, AArch64::dsub0);
     else if (VT == MVT::v4i32 || VT == MVT::v4f32)
-      return SelectPostLoad(Node, 2, ARM64::LD2Twov4s_POST, ARM64::qsub0);
+      return SelectPostLoad(Node, 2, AArch64::LD2Twov4s_POST, AArch64::qsub0);
     else if (VT == MVT::v1i64 || VT == MVT::v1f64)
-      return SelectPostLoad(Node, 2, ARM64::LD1Twov1d_POST, ARM64::dsub0);
+      return SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0);
     else if (VT == MVT::v2i64 || VT == MVT::v2f64)
-      return SelectPostLoad(Node, 2, ARM64::LD2Twov2d_POST, ARM64::qsub0);
+      return SelectPostLoad(Node, 2, AArch64::LD2Twov2d_POST, AArch64::qsub0);
     break;
   }
-  case ARM64ISD::LD3post: {
+  case AArch64ISD::LD3post: {
     if (VT == MVT::v8i8)
-      return SelectPostLoad(Node, 3, ARM64::LD3Threev8b_POST, ARM64::dsub0);
+      return SelectPostLoad(Node, 3, AArch64::LD3Threev8b_POST, AArch64::dsub0);
     else if (VT == MVT::v16i8)
-      return SelectPostLoad(Node, 3, ARM64::LD3Threev16b_POST, ARM64::qsub0);
+      return SelectPostLoad(Node, 3, AArch64::LD3Threev16b_POST, AArch64::qsub0);
     else if (VT == MVT::v4i16)
-      return SelectPostLoad(Node, 3, ARM64::LD3Threev4h_POST, ARM64::dsub0);
+      return SelectPostLoad(Node, 3, AArch64::LD3Threev4h_POST, AArch64::dsub0);
     else if (VT == MVT::v8i16)
-      return SelectPostLoad(Node, 3, ARM64::LD3Threev8h_POST, ARM64::qsub0);
+      return SelectPostLoad(Node, 3, AArch64::LD3Threev8h_POST, AArch64::qsub0);
     else if (VT == MVT::v2i32 || VT == MVT::v2f32)
-      return SelectPostLoad(Node, 3, ARM64::LD3Threev2s_POST, ARM64::dsub0);
+      return SelectPostLoad(Node, 3, AArch64::LD3Threev2s_POST, AArch64::dsub0);
     else if (VT == MVT::v4i32 || VT == MVT::v4f32)
-      return SelectPostLoad(Node, 3, ARM64::LD3Threev4s_POST, ARM64::qsub0);
+      return SelectPostLoad(Node, 3, AArch64::LD3Threev4s_POST, AArch64::qsub0);
     else if (VT == MVT::v1i64 || VT == MVT::v1f64)
-      return SelectPostLoad(Node, 3, ARM64::LD1Threev1d_POST, ARM64::dsub0);
+      return SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0);
     else if (VT == MVT::v2i64 || VT == MVT::v2f64)
-      return SelectPostLoad(Node, 3, ARM64::LD3Threev2d_POST, ARM64::qsub0);
+      return SelectPostLoad(Node, 3, AArch64::LD3Threev2d_POST, AArch64::qsub0);
     break;
   }
-  case ARM64ISD::LD4post: {
+  case AArch64ISD::LD4post: {
     if (VT == MVT::v8i8)
-      return SelectPostLoad(Node, 4, ARM64::LD4Fourv8b_POST, ARM64::dsub0);
+      return SelectPostLoad(Node, 4, AArch64::LD4Fourv8b_POST, AArch64::dsub0);
     else if (VT == MVT::v16i8)
-      return SelectPostLoad(Node, 4, ARM64::LD4Fourv16b_POST, ARM64::qsub0);
+      return SelectPostLoad(Node, 4, AArch64::LD4Fourv16b_POST, AArch64::qsub0);
     else if (VT == MVT::v4i16)
-      return SelectPostLoad(Node, 4, ARM64::LD4Fourv4h_POST, ARM64::dsub0);
+      return SelectPostLoad(Node, 4, AArch64::LD4Fourv4h_POST, AArch64::dsub0);
     else if (VT == MVT::v8i16)
-      return SelectPostLoad(Node, 4, ARM64::LD4Fourv8h_POST, ARM64::qsub0);
+      return SelectPostLoad(Node, 4, AArch64::LD4Fourv8h_POST, AArch64::qsub0);
     else if (VT == MVT::v2i32 || VT == MVT::v2f32)
-      return SelectPostLoad(Node, 4, ARM64::LD4Fourv2s_POST, ARM64::dsub0);
+      return SelectPostLoad(Node, 4, AArch64::LD4Fourv2s_POST, AArch64::dsub0);
     else if (VT == MVT::v4i32 || VT == MVT::v4f32)
-      return SelectPostLoad(Node, 4, ARM64::LD4Fourv4s_POST, ARM64::qsub0);
+      return SelectPostLoad(Node, 4, AArch64::LD4Fourv4s_POST, AArch64::qsub0);
     else if (VT == MVT::v1i64 || VT == MVT::v1f64)
-      return SelectPostLoad(Node, 4, ARM64::LD1Fourv1d_POST, ARM64::dsub0);
+      return SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0);
     else if (VT == MVT::v2i64 || VT == MVT::v2f64)
-      return SelectPostLoad(Node, 4, ARM64::LD4Fourv2d_POST, ARM64::qsub0);
+      return SelectPostLoad(Node, 4, AArch64::LD4Fourv2d_POST, AArch64::qsub0);
     break;
   }
-  case ARM64ISD::LD1x2post: {
+  case AArch64ISD::LD1x2post: {
     if (VT == MVT::v8i8)
-      return SelectPostLoad(Node, 2, ARM64::LD1Twov8b_POST, ARM64::dsub0);
+      return SelectPostLoad(Node, 2, AArch64::LD1Twov8b_POST, AArch64::dsub0);
     else if (VT == MVT::v16i8)
-      return SelectPostLoad(Node, 2, ARM64::LD1Twov16b_POST, ARM64::qsub0);
+      return SelectPostLoad(Node, 2, AArch64::LD1Twov16b_POST, AArch64::qsub0);
     else if (VT == MVT::v4i16)
-      return SelectPostLoad(Node, 2, ARM64::LD1Twov4h_POST, ARM64::dsub0);
+      return SelectPostLoad(Node, 2, AArch64::LD1Twov4h_POST, AArch64::dsub0);
     else if (VT == MVT::v8i16)
-      return SelectPostLoad(Node, 2, ARM64::LD1Twov8h_POST, ARM64::qsub0);
+      return SelectPostLoad(Node, 2, AArch64::LD1Twov8h_POST, AArch64::qsub0);
     else if (VT == MVT::v2i32 || VT == MVT::v2f32)
-      return SelectPostLoad(Node, 2, ARM64::LD1Twov2s_POST, ARM64::dsub0);
+      return SelectPostLoad(Node, 2, AArch64::LD1Twov2s_POST, AArch64::dsub0);
     else if (VT == MVT::v4i32 || VT == MVT::v4f32)
-      return SelectPostLoad(Node, 2, ARM64::LD1Twov4s_POST, ARM64::qsub0);
+      return SelectPostLoad(Node, 2, AArch64::LD1Twov4s_POST, AArch64::qsub0);
     else if (VT == MVT::v1i64 || VT == MVT::v1f64)
-      return SelectPostLoad(Node, 2, ARM64::LD1Twov1d_POST, ARM64::dsub0);
+      return SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0);
     else if (VT == MVT::v2i64 || VT == MVT::v2f64)
-      return SelectPostLoad(Node, 2, ARM64::LD1Twov2d_POST, ARM64::qsub0);
+      return SelectPostLoad(Node, 2, AArch64::LD1Twov2d_POST, AArch64::qsub0);
     break;
   }
-  case ARM64ISD::LD1x3post: {
+  case AArch64ISD::LD1x3post: {
     if (VT == MVT::v8i8)
-      return SelectPostLoad(Node, 3, ARM64::LD1Threev8b_POST, ARM64::dsub0);
+      return SelectPostLoad(Node, 3, AArch64::LD1Threev8b_POST, AArch64::dsub0);
     else if (VT == MVT::v16i8)
-      return SelectPostLoad(Node, 3, ARM64::LD1Threev16b_POST, ARM64::qsub0);
+      return SelectPostLoad(Node, 3, AArch64::LD1Threev16b_POST, AArch64::qsub0);
     else if (VT == MVT::v4i16)
-      return SelectPostLoad(Node, 3, ARM64::LD1Threev4h_POST, ARM64::dsub0);
+      return SelectPostLoad(Node, 3, AArch64::LD1Threev4h_POST, AArch64::dsub0);
     else if (VT == MVT::v8i16)
-      return SelectPostLoad(Node, 3, ARM64::LD1Threev8h_POST, ARM64::qsub0);
+      return SelectPostLoad(Node, 3, AArch64::LD1Threev8h_POST, AArch64::qsub0);
     else if (VT == MVT::v2i32 || VT == MVT::v2f32)
-      return SelectPostLoad(Node, 3, ARM64::LD1Threev2s_POST, ARM64::dsub0);
+      return SelectPostLoad(Node, 3, AArch64::LD1Threev2s_POST, AArch64::dsub0);
     else if (VT == MVT::v4i32 || VT == MVT::v4f32)
-      return SelectPostLoad(Node, 3, ARM64::LD1Threev4s_POST, ARM64::qsub0);
+      return SelectPostLoad(Node, 3, AArch64::LD1Threev4s_POST, AArch64::qsub0);
     else if (VT == MVT::v1i64 || VT == MVT::v1f64)
-      return SelectPostLoad(Node, 3, ARM64::LD1Threev1d_POST, ARM64::dsub0);
+      return SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0);
     else if (VT == MVT::v2i64 || VT == MVT::v2f64)
-      return SelectPostLoad(Node, 3, ARM64::LD1Threev2d_POST, ARM64::qsub0);
+      return SelectPostLoad(Node, 3, AArch64::LD1Threev2d_POST, AArch64::qsub0);
     break;
   }
-  case ARM64ISD::LD1x4post: {
+  case AArch64ISD::LD1x4post: {
     if (VT == MVT::v8i8)
-      return SelectPostLoad(Node, 4, ARM64::LD1Fourv8b_POST, ARM64::dsub0);
+      return SelectPostLoad(Node, 4, AArch64::LD1Fourv8b_POST, AArch64::dsub0);
     else if (VT == MVT::v16i8)
-      return SelectPostLoad(Node, 4, ARM64::LD1Fourv16b_POST, ARM64::qsub0);
+      return SelectPostLoad(Node, 4, AArch64::LD1Fourv16b_POST, AArch64::qsub0);
     else if (VT == MVT::v4i16)
-      return SelectPostLoad(Node, 4, ARM64::LD1Fourv4h_POST, ARM64::dsub0);
+      return SelectPostLoad(Node, 4, AArch64::LD1Fourv4h_POST, AArch64::dsub0);
     else if (VT == MVT::v8i16)
-      return SelectPostLoad(Node, 4, ARM64::LD1Fourv8h_POST, ARM64::qsub0);
+      return SelectPostLoad(Node, 4, AArch64::LD1Fourv8h_POST, AArch64::qsub0);
     else if (VT == MVT::v2i32 || VT == MVT::v2f32)
-      return SelectPostLoad(Node, 4, ARM64::LD1Fourv2s_POST, ARM64::dsub0);
+      return SelectPostLoad(Node, 4, AArch64::LD1Fourv2s_POST, AArch64::dsub0);
     else if (VT == MVT::v4i32 || VT == MVT::v4f32)
-      return SelectPostLoad(Node, 4, ARM64::LD1Fourv4s_POST, ARM64::qsub0);
+      return SelectPostLoad(Node, 4, AArch64::LD1Fourv4s_POST, AArch64::qsub0);
     else if (VT == MVT::v1i64 || VT == MVT::v1f64)
-      return SelectPostLoad(Node, 4, ARM64::LD1Fourv1d_POST, ARM64::dsub0);
+      return SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0);
     else if (VT == MVT::v2i64 || VT == MVT::v2f64)
-      return SelectPostLoad(Node, 4, ARM64::LD1Fourv2d_POST, ARM64::qsub0);
+      return SelectPostLoad(Node, 4, AArch64::LD1Fourv2d_POST, AArch64::qsub0);
     break;
   }
-  case ARM64ISD::LD1DUPpost: {
+  case AArch64ISD::LD1DUPpost: {
     if (VT == MVT::v8i8)
-      return SelectPostLoad(Node, 1, ARM64::LD1Rv8b_POST, ARM64::dsub0);
+      return SelectPostLoad(Node, 1, AArch64::LD1Rv8b_POST, AArch64::dsub0);
     else if (VT == MVT::v16i8)
-      return SelectPostLoad(Node, 1, ARM64::LD1Rv16b_POST, ARM64::qsub0);
+      return SelectPostLoad(Node, 1, AArch64::LD1Rv16b_POST, AArch64::qsub0);
     else if (VT == MVT::v4i16)
-      return SelectPostLoad(Node, 1, ARM64::LD1Rv4h_POST, ARM64::dsub0);
+      return SelectPostLoad(Node, 1, AArch64::LD1Rv4h_POST, AArch64::dsub0);
     else if (VT == MVT::v8i16)
-      return SelectPostLoad(Node, 1, ARM64::LD1Rv8h_POST, ARM64::qsub0);
+      return SelectPostLoad(Node, 1, AArch64::LD1Rv8h_POST, AArch64::qsub0);
     else if (VT == MVT::v2i32 || VT == MVT::v2f32)
-      return SelectPostLoad(Node, 1, ARM64::LD1Rv2s_POST, ARM64::dsub0);
+      return SelectPostLoad(Node, 1, AArch64::LD1Rv2s_POST, AArch64::dsub0);
     else if (VT == MVT::v4i32 || VT == MVT::v4f32)
-      return SelectPostLoad(Node, 1, ARM64::LD1Rv4s_POST, ARM64::qsub0);
+      return SelectPostLoad(Node, 1, AArch64::LD1Rv4s_POST, AArch64::qsub0);
     else if (VT == MVT::v1i64 || VT == MVT::v1f64)
-      return SelectPostLoad(Node, 1, ARM64::LD1Rv1d_POST, ARM64::dsub0);
+      return SelectPostLoad(Node, 1, AArch64::LD1Rv1d_POST, AArch64::dsub0);
     else if (VT == MVT::v2i64 || VT == MVT::v2f64)
-      return SelectPostLoad(Node, 1, ARM64::LD1Rv2d_POST, ARM64::qsub0);
+      return SelectPostLoad(Node, 1, AArch64::LD1Rv2d_POST, AArch64::qsub0);
     break;
   }
-  case ARM64ISD::LD2DUPpost: {
+  case AArch64ISD::LD2DUPpost: {
     if (VT == MVT::v8i8)
-      return SelectPostLoad(Node, 2, ARM64::LD2Rv8b_POST, ARM64::dsub0);
+      return SelectPostLoad(Node, 2, AArch64::LD2Rv8b_POST, AArch64::dsub0);
     else if (VT == MVT::v16i8)
-      return SelectPostLoad(Node, 2, ARM64::LD2Rv16b_POST, ARM64::qsub0);
+      return SelectPostLoad(Node, 2, AArch64::LD2Rv16b_POST, AArch64::qsub0);
     else if (VT == MVT::v4i16)
-      return SelectPostLoad(Node, 2, ARM64::LD2Rv4h_POST, ARM64::dsub0);
+      return SelectPostLoad(Node, 2, AArch64::LD2Rv4h_POST, AArch64::dsub0);
     else if (VT == MVT::v8i16)
-      return SelectPostLoad(Node, 2, ARM64::LD2Rv8h_POST, ARM64::qsub0);
+      return SelectPostLoad(Node, 2, AArch64::LD2Rv8h_POST, AArch64::qsub0);
     else if (VT == MVT::v2i32 || VT == MVT::v2f32)
-      return SelectPostLoad(Node, 2, ARM64::LD2Rv2s_POST, ARM64::dsub0);
+      return SelectPostLoad(Node, 2, AArch64::LD2Rv2s_POST, AArch64::dsub0);
     else if (VT == MVT::v4i32 || VT == MVT::v4f32)
-      return SelectPostLoad(Node, 2, ARM64::LD2Rv4s_POST, ARM64::qsub0);
+      return SelectPostLoad(Node, 2, AArch64::LD2Rv4s_POST, AArch64::qsub0);
     else if (VT == MVT::v1i64 || VT == MVT::v1f64)
-      return SelectPostLoad(Node, 2, ARM64::LD2Rv1d_POST, ARM64::dsub0);
+      return SelectPostLoad(Node, 2, AArch64::LD2Rv1d_POST, AArch64::dsub0);
     else if (VT == MVT::v2i64 || VT == MVT::v2f64)
-      return SelectPostLoad(Node, 2, ARM64::LD2Rv2d_POST, ARM64::qsub0);
+      return SelectPostLoad(Node, 2, AArch64::LD2Rv2d_POST, AArch64::qsub0);
     break;
   }
-  case ARM64ISD::LD3DUPpost: {
+  case AArch64ISD::LD3DUPpost: {
     if (VT == MVT::v8i8)
-      return SelectPostLoad(Node, 3, ARM64::LD3Rv8b_POST, ARM64::dsub0);
+      return SelectPostLoad(Node, 3, AArch64::LD3Rv8b_POST, AArch64::dsub0);
     else if (VT == MVT::v16i8)
-      return SelectPostLoad(Node, 3, ARM64::LD3Rv16b_POST, ARM64::qsub0);
+      return SelectPostLoad(Node, 3, AArch64::LD3Rv16b_POST, AArch64::qsub0);
     else if (VT == MVT::v4i16)
-      return SelectPostLoad(Node, 3, ARM64::LD3Rv4h_POST, ARM64::dsub0);
+      return SelectPostLoad(Node, 3, AArch64::LD3Rv4h_POST, AArch64::dsub0);
     else if (VT == MVT::v8i16)
-      return SelectPostLoad(Node, 3, ARM64::LD3Rv8h_POST, ARM64::qsub0);
+      return SelectPostLoad(Node, 3, AArch64::LD3Rv8h_POST, AArch64::qsub0);
     else if (VT == MVT::v2i32 || VT == MVT::v2f32)
-      return SelectPostLoad(Node, 3, ARM64::LD3Rv2s_POST, ARM64::dsub0);
+      return SelectPostLoad(Node, 3, AArch64::LD3Rv2s_POST, AArch64::dsub0);
     else if (VT == MVT::v4i32 || VT == MVT::v4f32)
-      return SelectPostLoad(Node, 3, ARM64::LD3Rv4s_POST, ARM64::qsub0);
+      return SelectPostLoad(Node, 3, AArch64::LD3Rv4s_POST, AArch64::qsub0);
     else if (VT == MVT::v1i64 || VT == MVT::v1f64)
-      return SelectPostLoad(Node, 3, ARM64::LD3Rv1d_POST, ARM64::dsub0);
+      return SelectPostLoad(Node, 3, AArch64::LD3Rv1d_POST, AArch64::dsub0);
     else if (VT == MVT::v2i64 || VT == MVT::v2f64)
-      return SelectPostLoad(Node, 3, ARM64::LD3Rv2d_POST, ARM64::qsub0);
+      return SelectPostLoad(Node, 3, AArch64::LD3Rv2d_POST, AArch64::qsub0);
     break;
   }
-  case ARM64ISD::LD4DUPpost: {
+  case AArch64ISD::LD4DUPpost: {
     if (VT == MVT::v8i8)
-      return SelectPostLoad(Node, 4, ARM64::LD4Rv8b_POST, ARM64::dsub0);
+      return SelectPostLoad(Node, 4, AArch64::LD4Rv8b_POST, AArch64::dsub0);
     else if (VT == MVT::v16i8)
-      return SelectPostLoad(Node, 4, ARM64::LD4Rv16b_POST, ARM64::qsub0);
+      return SelectPostLoad(Node, 4, AArch64::LD4Rv16b_POST, AArch64::qsub0);
     else if (VT == MVT::v4i16)
-      return SelectPostLoad(Node, 4, ARM64::LD4Rv4h_POST, ARM64::dsub0);
+      return SelectPostLoad(Node, 4, AArch64::LD4Rv4h_POST, AArch64::dsub0);
     else if (VT == MVT::v8i16)
-      return SelectPostLoad(Node, 4, ARM64::LD4Rv8h_POST, ARM64::qsub0);
+      return SelectPostLoad(Node, 4, AArch64::LD4Rv8h_POST, AArch64::qsub0);
     else if (VT == MVT::v2i32 || VT == MVT::v2f32)
-      return SelectPostLoad(Node, 4, ARM64::LD4Rv2s_POST, ARM64::dsub0);
+      return SelectPostLoad(Node, 4, AArch64::LD4Rv2s_POST, AArch64::dsub0);
     else if (VT == MVT::v4i32 || VT == MVT::v4f32)
-      return SelectPostLoad(Node, 4, ARM64::LD4Rv4s_POST, ARM64::qsub0);
+      return SelectPostLoad(Node, 4, AArch64::LD4Rv4s_POST, AArch64::qsub0);
     else if (VT == MVT::v1i64 || VT == MVT::v1f64)
-      return SelectPostLoad(Node, 4, ARM64::LD4Rv1d_POST, ARM64::dsub0);
+      return SelectPostLoad(Node, 4, AArch64::LD4Rv1d_POST, AArch64::dsub0);
     else if (VT == MVT::v2i64 || VT == MVT::v2f64)
-      return SelectPostLoad(Node, 4, ARM64::LD4Rv2d_POST, ARM64::qsub0);
+      return SelectPostLoad(Node, 4, AArch64::LD4Rv2d_POST, AArch64::qsub0);
     break;
   }
-  case ARM64ISD::LD1LANEpost: {
+  case AArch64ISD::LD1LANEpost: {
     if (VT == MVT::v16i8 || VT == MVT::v8i8)
-      return SelectPostLoadLane(Node, 1, ARM64::LD1i8_POST);
+      return SelectPostLoadLane(Node, 1, AArch64::LD1i8_POST);
     else if (VT == MVT::v8i16 || VT == MVT::v4i16)
-      return SelectPostLoadLane(Node, 1, ARM64::LD1i16_POST);
+      return SelectPostLoadLane(Node, 1, AArch64::LD1i16_POST);
     else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
              VT == MVT::v2f32)
-      return SelectPostLoadLane(Node, 1, ARM64::LD1i32_POST);
+      return SelectPostLoadLane(Node, 1, AArch64::LD1i32_POST);
     else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
              VT == MVT::v1f64)
-      return SelectPostLoadLane(Node, 1, ARM64::LD1i64_POST);
+      return SelectPostLoadLane(Node, 1, AArch64::LD1i64_POST);
     break;
   }
-  case ARM64ISD::LD2LANEpost: {
+  case AArch64ISD::LD2LANEpost: {
     if (VT == MVT::v16i8 || VT == MVT::v8i8)
-      return SelectPostLoadLane(Node, 2, ARM64::LD2i8_POST);
+      return SelectPostLoadLane(Node, 2, AArch64::LD2i8_POST);
     else if (VT == MVT::v8i16 || VT == MVT::v4i16)
-      return SelectPostLoadLane(Node, 2, ARM64::LD2i16_POST);
+      return SelectPostLoadLane(Node, 2, AArch64::LD2i16_POST);
     else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
              VT == MVT::v2f32)
-      return SelectPostLoadLane(Node, 2, ARM64::LD2i32_POST);
+      return SelectPostLoadLane(Node, 2, AArch64::LD2i32_POST);
     else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
              VT == MVT::v1f64)
-      return SelectPostLoadLane(Node, 2, ARM64::LD2i64_POST);
+      return SelectPostLoadLane(Node, 2, AArch64::LD2i64_POST);
     break;
   }
-  case ARM64ISD::LD3LANEpost: {
+  case AArch64ISD::LD3LANEpost: {
     if (VT == MVT::v16i8 || VT == MVT::v8i8)
-      return SelectPostLoadLane(Node, 3, ARM64::LD3i8_POST);
+      return SelectPostLoadLane(Node, 3, AArch64::LD3i8_POST);
     else if (VT == MVT::v8i16 || VT == MVT::v4i16)
-      return SelectPostLoadLane(Node, 3, ARM64::LD3i16_POST);
+      return SelectPostLoadLane(Node, 3, AArch64::LD3i16_POST);
     else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
              VT == MVT::v2f32)
-      return SelectPostLoadLane(Node, 3, ARM64::LD3i32_POST);
+      return SelectPostLoadLane(Node, 3, AArch64::LD3i32_POST);
     else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
              VT == MVT::v1f64)
-      return SelectPostLoadLane(Node, 3, ARM64::LD3i64_POST);
+      return SelectPostLoadLane(Node, 3, AArch64::LD3i64_POST);
     break;
   }
-  case ARM64ISD::LD4LANEpost: {
+  case AArch64ISD::LD4LANEpost: {
     if (VT == MVT::v16i8 || VT == MVT::v8i8)
-      return SelectPostLoadLane(Node, 4, ARM64::LD4i8_POST);
+      return SelectPostLoadLane(Node, 4, AArch64::LD4i8_POST);
     else if (VT == MVT::v8i16 || VT == MVT::v4i16)
-      return SelectPostLoadLane(Node, 4, ARM64::LD4i16_POST);
+      return SelectPostLoadLane(Node, 4, AArch64::LD4i16_POST);
     else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
              VT == MVT::v2f32)
-      return SelectPostLoadLane(Node, 4, ARM64::LD4i32_POST);
+      return SelectPostLoadLane(Node, 4, AArch64::LD4i32_POST);
     else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
              VT == MVT::v1f64)
-      return SelectPostLoadLane(Node, 4, ARM64::LD4i64_POST);
+      return SelectPostLoadLane(Node, 4, AArch64::LD4i64_POST);
     break;
   }
-  case ARM64ISD::ST2post: {
+  case AArch64ISD::ST2post: {
     VT = Node->getOperand(1).getValueType();
     if (VT == MVT::v8i8)
-      return SelectPostStore(Node, 2, ARM64::ST2Twov8b_POST);
+      return SelectPostStore(Node, 2, AArch64::ST2Twov8b_POST);
     else if (VT == MVT::v16i8)
-      return SelectPostStore(Node, 2, ARM64::ST2Twov16b_POST);
+      return SelectPostStore(Node, 2, AArch64::ST2Twov16b_POST);
     else if (VT == MVT::v4i16)
-      return SelectPostStore(Node, 2, ARM64::ST2Twov4h_POST);
+      return SelectPostStore(Node, 2, AArch64::ST2Twov4h_POST);
     else if (VT == MVT::v8i16)
-      return SelectPostStore(Node, 2, ARM64::ST2Twov8h_POST);
+      return SelectPostStore(Node, 2, AArch64::ST2Twov8h_POST);
     else if (VT == MVT::v2i32 || VT == MVT::v2f32)
-      return SelectPostStore(Node, 2, ARM64::ST2Twov2s_POST);
+      return SelectPostStore(Node, 2, AArch64::ST2Twov2s_POST);
     else if (VT == MVT::v4i32 || VT == MVT::v4f32)
-      return SelectPostStore(Node, 2, ARM64::ST2Twov4s_POST);
+      return SelectPostStore(Node, 2, AArch64::ST2Twov4s_POST);
     else if (VT == MVT::v2i64 || VT == MVT::v2f64)
-      return SelectPostStore(Node, 2, ARM64::ST2Twov2d_POST);
+      return SelectPostStore(Node, 2, AArch64::ST2Twov2d_POST);
     else if (VT == MVT::v1i64 || VT == MVT::v1f64)
-      return SelectPostStore(Node, 2, ARM64::ST1Twov1d_POST);
+      return SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST);
     break;
   }
-  case ARM64ISD::ST3post: {
+  case AArch64ISD::ST3post: {
     VT = Node->getOperand(1).getValueType();
     if (VT == MVT::v8i8)
-      return SelectPostStore(Node, 3, ARM64::ST3Threev8b_POST);
+      return SelectPostStore(Node, 3, AArch64::ST3Threev8b_POST);
     else if (VT == MVT::v16i8)
-      return SelectPostStore(Node, 3, ARM64::ST3Threev16b_POST);
+      return SelectPostStore(Node, 3, AArch64::ST3Threev16b_POST);
     else if (VT == MVT::v4i16)
-      return SelectPostStore(Node, 3, ARM64::ST3Threev4h_POST);
+      return SelectPostStore(Node, 3, AArch64::ST3Threev4h_POST);
     else if (VT == MVT::v8i16)
-      return SelectPostStore(Node, 3, ARM64::ST3Threev8h_POST);
+      return SelectPostStore(Node, 3, AArch64::ST3Threev8h_POST);
     else if (VT == MVT::v2i32 || VT == MVT::v2f32)
-      return SelectPostStore(Node, 3, ARM64::ST3Threev2s_POST);
+      return SelectPostStore(Node, 3, AArch64::ST3Threev2s_POST);
     else if (VT == MVT::v4i32 || VT == MVT::v4f32)
-      return SelectPostStore(Node, 3, ARM64::ST3Threev4s_POST);
+      return SelectPostStore(Node, 3, AArch64::ST3Threev4s_POST);
     else if (VT == MVT::v2i64 || VT == MVT::v2f64)
-      return SelectPostStore(Node, 3, ARM64::ST3Threev2d_POST);
+      return SelectPostStore(Node, 3, AArch64::ST3Threev2d_POST);
     else if (VT == MVT::v1i64 || VT == MVT::v1f64)
-      return SelectPostStore(Node, 3, ARM64::ST1Threev1d_POST);
+      return SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST);
     break;
   }
-  case ARM64ISD::ST4post: {
+  case AArch64ISD::ST4post: {
     VT = Node->getOperand(1).getValueType();
     if (VT == MVT::v8i8)
-      return SelectPostStore(Node, 4, ARM64::ST4Fourv8b_POST);
+      return SelectPostStore(Node, 4, AArch64::ST4Fourv8b_POST);
     else if (VT == MVT::v16i8)
-      return SelectPostStore(Node, 4, ARM64::ST4Fourv16b_POST);
+      return SelectPostStore(Node, 4, AArch64::ST4Fourv16b_POST);
     else if (VT == MVT::v4i16)
-      return SelectPostStore(Node, 4, ARM64::ST4Fourv4h_POST);
+      return SelectPostStore(Node, 4, AArch64::ST4Fourv4h_POST);
     else if (VT == MVT::v8i16)
-      return SelectPostStore(Node, 4, ARM64::ST4Fourv8h_POST);
+      return SelectPostStore(Node, 4, AArch64::ST4Fourv8h_POST);
     else if (VT == MVT::v2i32 || VT == MVT::v2f32)
-      return SelectPostStore(Node, 4, ARM64::ST4Fourv2s_POST);
+      return SelectPostStore(Node, 4, AArch64::ST4Fourv2s_POST);
     else if (VT == MVT::v4i32 || VT == MVT::v4f32)
-      return SelectPostStore(Node, 4, ARM64::ST4Fourv4s_POST);
+      return SelectPostStore(Node, 4, AArch64::ST4Fourv4s_POST);
     else if (VT == MVT::v2i64 || VT == MVT::v2f64)
-      return SelectPostStore(Node, 4, ARM64::ST4Fourv2d_POST);
+      return SelectPostStore(Node, 4, AArch64::ST4Fourv2d_POST);
     else if (VT == MVT::v1i64 || VT == MVT::v1f64)
-      return SelectPostStore(Node, 4, ARM64::ST1Fourv1d_POST);
+      return SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST);
     break;
   }
-  case ARM64ISD::ST1x2post: {
+  case AArch64ISD::ST1x2post: {
     VT = Node->getOperand(1).getValueType();
     if (VT == MVT::v8i8)
-      return SelectPostStore(Node, 2, ARM64::ST1Twov8b_POST);
+      return SelectPostStore(Node, 2, AArch64::ST1Twov8b_POST);
     else if (VT == MVT::v16i8)
-      return SelectPostStore(Node, 2, ARM64::ST1Twov16b_POST);
+      return SelectPostStore(Node, 2, AArch64::ST1Twov16b_POST);
     else if (VT == MVT::v4i16)
-      return SelectPostStore(Node, 2, ARM64::ST1Twov4h_POST);
+      return SelectPostStore(Node, 2, AArch64::ST1Twov4h_POST);
     else if (VT == MVT::v8i16)
-      return SelectPostStore(Node, 2, ARM64::ST1Twov8h_POST);
+      return SelectPostStore(Node, 2, AArch64::ST1Twov8h_POST);
     else if (VT == MVT::v2i32 || VT == MVT::v2f32)
-      return SelectPostStore(Node, 2, ARM64::ST1Twov2s_POST);
+      return SelectPostStore(Node, 2, AArch64::ST1Twov2s_POST);
     else if (VT == MVT::v4i32 || VT == MVT::v4f32)
-      return SelectPostStore(Node, 2, ARM64::ST1Twov4s_POST);
+      return SelectPostStore(Node, 2, AArch64::ST1Twov4s_POST);
     else if (VT == MVT::v1i64 || VT == MVT::v1f64)
-      return SelectPostStore(Node, 2, ARM64::ST1Twov1d_POST);
+      return SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST);
     else if (VT == MVT::v2i64 || VT == MVT::v2f64)
-      return SelectPostStore(Node, 2, ARM64::ST1Twov2d_POST);
+      return SelectPostStore(Node, 2, AArch64::ST1Twov2d_POST);
     break;
   }
-  case ARM64ISD::ST1x3post: {
+  case AArch64ISD::ST1x3post: {
     VT = Node->getOperand(1).getValueType();
     if (VT == MVT::v8i8)
-      return SelectPostStore(Node, 3, ARM64::ST1Threev8b_POST);
+      return SelectPostStore(Node, 3, AArch64::ST1Threev8b_POST);
     else if (VT == MVT::v16i8)
-      return SelectPostStore(Node, 3, ARM64::ST1Threev16b_POST);
+      return SelectPostStore(Node, 3, AArch64::ST1Threev16b_POST);
     else if (VT == MVT::v4i16)
-      return SelectPostStore(Node, 3, ARM64::ST1Threev4h_POST);
+      return SelectPostStore(Node, 3, AArch64::ST1Threev4h_POST);
     else if (VT == MVT::v8i16)
-      return SelectPostStore(Node, 3, ARM64::ST1Threev8h_POST);
+      return SelectPostStore(Node, 3, AArch64::ST1Threev8h_POST);
     else if (VT == MVT::v2i32 || VT == MVT::v2f32)
-      return SelectPostStore(Node, 3, ARM64::ST1Threev2s_POST);
+      return SelectPostStore(Node, 3, AArch64::ST1Threev2s_POST);
     else if (VT == MVT::v4i32 || VT == MVT::v4f32)
-      return SelectPostStore(Node, 3, ARM64::ST1Threev4s_POST);
+      return SelectPostStore(Node, 3, AArch64::ST1Threev4s_POST);
     else if (VT == MVT::v1i64 || VT == MVT::v1f64)
-      return SelectPostStore(Node, 3, ARM64::ST1Threev1d_POST);
+      return SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST);
     else if (VT == MVT::v2i64 || VT == MVT::v2f64)
-      return SelectPostStore(Node, 3, ARM64::ST1Threev2d_POST);
+      return SelectPostStore(Node, 3, AArch64::ST1Threev2d_POST);
     break;
   }
-  case ARM64ISD::ST1x4post: {
+  case AArch64ISD::ST1x4post: {
     VT = Node->getOperand(1).getValueType();
     if (VT == MVT::v8i8)
-      return SelectPostStore(Node, 4, ARM64::ST1Fourv8b_POST);
+      return SelectPostStore(Node, 4, AArch64::ST1Fourv8b_POST);
     else if (VT == MVT::v16i8)
-      return SelectPostStore(Node, 4, ARM64::ST1Fourv16b_POST);
+      return SelectPostStore(Node, 4, AArch64::ST1Fourv16b_POST);
     else if (VT == MVT::v4i16)
-      return SelectPostStore(Node, 4, ARM64::ST1Fourv4h_POST);
+      return SelectPostStore(Node, 4, AArch64::ST1Fourv4h_POST);
     else if (VT == MVT::v8i16)
-      return SelectPostStore(Node, 4, ARM64::ST1Fourv8h_POST);
+      return SelectPostStore(Node, 4, AArch64::ST1Fourv8h_POST);
     else if (VT == MVT::v2i32 || VT == MVT::v2f32)
-      return SelectPostStore(Node, 4, ARM64::ST1Fourv2s_POST);
+      return SelectPostStore(Node, 4, AArch64::ST1Fourv2s_POST);
     else if (VT == MVT::v4i32 || VT == MVT::v4f32)
-      return SelectPostStore(Node, 4, ARM64::ST1Fourv4s_POST);
+      return SelectPostStore(Node, 4, AArch64::ST1Fourv4s_POST);
     else if (VT == MVT::v1i64 || VT == MVT::v1f64)
-      return SelectPostStore(Node, 4, ARM64::ST1Fourv1d_POST);
+      return SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST);
     else if (VT == MVT::v2i64 || VT == MVT::v2f64)
-      return SelectPostStore(Node, 4, ARM64::ST1Fourv2d_POST);
+      return SelectPostStore(Node, 4, AArch64::ST1Fourv2d_POST);
     break;
   }
-  case ARM64ISD::ST2LANEpost: {
+  case AArch64ISD::ST2LANEpost: {
     VT = Node->getOperand(1).getValueType();
     if (VT == MVT::v16i8 || VT == MVT::v8i8)
-      return SelectPostStoreLane(Node, 2, ARM64::ST2i8_POST);
+      return SelectPostStoreLane(Node, 2, AArch64::ST2i8_POST);
     else if (VT == MVT::v8i16 || VT == MVT::v4i16)
-      return SelectPostStoreLane(Node, 2, ARM64::ST2i16_POST);
+      return SelectPostStoreLane(Node, 2, AArch64::ST2i16_POST);
     else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
              VT == MVT::v2f32)
-      return SelectPostStoreLane(Node, 2, ARM64::ST2i32_POST);
+      return SelectPostStoreLane(Node, 2, AArch64::ST2i32_POST);
     else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
              VT == MVT::v1f64)
-      return SelectPostStoreLane(Node, 2, ARM64::ST2i64_POST);
+      return SelectPostStoreLane(Node, 2, AArch64::ST2i64_POST);
     break;
   }
-  case ARM64ISD::ST3LANEpost: {
+  case AArch64ISD::ST3LANEpost: {
     VT = Node->getOperand(1).getValueType();
     if (VT == MVT::v16i8 || VT == MVT::v8i8)
-      return SelectPostStoreLane(Node, 3, ARM64::ST3i8_POST);
+      return SelectPostStoreLane(Node, 3, AArch64::ST3i8_POST);
     else if (VT == MVT::v8i16 || VT == MVT::v4i16)
-      return SelectPostStoreLane(Node, 3, ARM64::ST3i16_POST);
+      return SelectPostStoreLane(Node, 3, AArch64::ST3i16_POST);
     else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
              VT == MVT::v2f32)
-      return SelectPostStoreLane(Node, 3, ARM64::ST3i32_POST);
+      return SelectPostStoreLane(Node, 3, AArch64::ST3i32_POST);
     else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
              VT == MVT::v1f64)
-      return SelectPostStoreLane(Node, 3, ARM64::ST3i64_POST);
+      return SelectPostStoreLane(Node, 3, AArch64::ST3i64_POST);
     break;
   }
-  case ARM64ISD::ST4LANEpost: {
+  case AArch64ISD::ST4LANEpost: {
     VT = Node->getOperand(1).getValueType();
     if (VT == MVT::v16i8 || VT == MVT::v8i8)
-      return SelectPostStoreLane(Node, 4, ARM64::ST4i8_POST);
+      return SelectPostStoreLane(Node, 4, AArch64::ST4i8_POST);
     else if (VT == MVT::v8i16 || VT == MVT::v4i16)
-      return SelectPostStoreLane(Node, 4, ARM64::ST4i16_POST);
+      return SelectPostStoreLane(Node, 4, AArch64::ST4i16_POST);
     else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
              VT == MVT::v2f32)
-      return SelectPostStoreLane(Node, 4, ARM64::ST4i32_POST);
+      return SelectPostStoreLane(Node, 4, AArch64::ST4i32_POST);
     else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
              VT == MVT::v1f64)
-      return SelectPostStoreLane(Node, 4, ARM64::ST4i64_POST);
+      return SelectPostStoreLane(Node, 4, AArch64::ST4i64_POST);
     break;
   }
 
@@ -3022,9 +3027,9 @@ SDNode *ARM64DAGToDAGISel::Select(SDNode *Node) {
   return ResNode;
 }
 
-/// createARM64ISelDag - This pass converts a legalized DAG into a
-/// ARM64-specific DAG, ready for instruction scheduling.
-FunctionPass *llvm::createARM64ISelDag(ARM64TargetMachine &TM,
-                                       CodeGenOpt::Level OptLevel) {
-  return new ARM64DAGToDAGISel(TM, OptLevel);
+/// createAArch64ISelDag - This pass converts a legalized DAG into a
+/// AArch64-specific DAG, ready for instruction scheduling.
+FunctionPass *llvm::createAArch64ISelDag(AArch64TargetMachine &TM,
+                                         CodeGenOpt::Level OptLevel) {
+  return new AArch64DAGToDAGISel(TM, OptLevel);
 }
diff --git a/lib/Target/ARM64/ARM64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp
similarity index 80%
rename from lib/Target/ARM64/ARM64ISelLowering.cpp
rename to lib/Target/AArch64/AArch64ISelLowering.cpp
index c24b7deea94..4ddba007339 100644
--- a/lib/Target/ARM64/ARM64ISelLowering.cpp
+++ b/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -1,4 +1,4 @@
-//===-- ARM64ISelLowering.cpp - ARM64 DAG Lowering Implementation  --------===//
+//===-- AArch64ISelLowering.cpp - AArch64 DAG Lowering Implementation  ----===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -7,18 +7,18 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file implements the ARM64TargetLowering class.
+// This file implements the AArch64TargetLowering class.
 //
 //===----------------------------------------------------------------------===//
 
-#include "ARM64ISelLowering.h"
-#include "ARM64PerfectShuffle.h"
-#include "ARM64Subtarget.h"
-#include "ARM64CallingConv.h"
-#include "ARM64MachineFunctionInfo.h"
-#include "ARM64TargetMachine.h"
-#include "ARM64TargetObjectFile.h"
-#include "MCTargetDesc/ARM64AddressingModes.h"
+#include "AArch64ISelLowering.h"
+#include "AArch64PerfectShuffle.h"
+#include "AArch64Subtarget.h"
+#include "AArch64CallingConv.h"
+#include "AArch64MachineFunctionInfo.h"
+#include "AArch64TargetMachine.h"
+#include "AArch64TargetObjectFile.h"
+#include "MCTargetDesc/AArch64AddressingModes.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/CallingConvLower.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
@@ -34,7 +34,7 @@
 #include "llvm/Target/TargetOptions.h"
 using namespace llvm;
 
-#define DEBUG_TYPE "arm64-lower"
+#define DEBUG_TYPE "aarch64-lower"
 
 STATISTIC(NumTailCalls, "Number of tail calls");
 STATISTIC(NumShiftInserts, "Number of vector shift inserts");
@@ -48,38 +48,38 @@ static cl::opt<AlignMode>
 Align(cl::desc("Load/store alignment support"),
       cl::Hidden, cl::init(NoStrictAlign),
       cl::values(
-          clEnumValN(StrictAlign,   "arm64-strict-align",
+          clEnumValN(StrictAlign,   "aarch64-strict-align",
                      "Disallow all unaligned memory accesses"),
-          clEnumValN(NoStrictAlign, "arm64-no-strict-align",
+          clEnumValN(NoStrictAlign, "aarch64-no-strict-align",
                      "Allow unaligned memory accesses"),
           clEnumValEnd));
 
 // Place holder until extr generation is tested fully.
 static cl::opt<bool>
-EnableARM64ExtrGeneration("arm64-extr-generation", cl::Hidden,
-                          cl::desc("Allow ARM64 (or (shift)(shift))->extract"),
+EnableAArch64ExtrGeneration("aarch64-extr-generation", cl::Hidden,
+                          cl::desc("Allow AArch64 (or (shift)(shift))->extract"),
                           cl::init(true));
 
 static cl::opt<bool>
-EnableARM64SlrGeneration("arm64-shift-insert-generation", cl::Hidden,
-                         cl::desc("Allow ARM64 SLI/SRI formation"),
+EnableAArch64SlrGeneration("aarch64-shift-insert-generation", cl::Hidden,
+                         cl::desc("Allow AArch64 SLI/SRI formation"),
                          cl::init(false));
 
 //===----------------------------------------------------------------------===//
-// ARM64 Lowering public interface.
+// AArch64 Lowering public interface.
 //===----------------------------------------------------------------------===//
 static TargetLoweringObjectFile *createTLOF(TargetMachine &TM) {
-  if (TM.getSubtarget<ARM64Subtarget>().isTargetDarwin())
-    return new ARM64_MachoTargetObjectFile();
+  if (TM.getSubtarget<AArch64Subtarget>().isTargetDarwin())
+    return new AArch64_MachoTargetObjectFile();
 
-  return new ARM64_ELFTargetObjectFile();
+  return new AArch64_ELFTargetObjectFile();
 }
 
-ARM64TargetLowering::ARM64TargetLowering(ARM64TargetMachine &TM)
+AArch64TargetLowering::AArch64TargetLowering(AArch64TargetMachine &TM)
     : TargetLowering(TM, createTLOF(TM)) {
-  Subtarget = &TM.getSubtarget<ARM64Subtarget>();
+  Subtarget = &TM.getSubtarget<AArch64Subtarget>();
 
-  // ARM64 doesn't have comparisons which set GPRs or setcc instructions, so
+  // AArch64 doesn't have comparisons which set GPRs or setcc instructions, so
   // we have to make something up. Arbitrarily, choose ZeroOrOne.
   setBooleanContents(ZeroOrOneBooleanContent);
   // When comparing vectors the result sets the different elements in the
@@ -87,19 +87,19 @@ ARM64TargetLowering::ARM64TargetLowering(ARM64TargetMachine &TM)
   setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
 
   // Set up the register classes.
-  addRegisterClass(MVT::i32, &ARM64::GPR32allRegClass);
-  addRegisterClass(MVT::i64, &ARM64::GPR64allRegClass);
+  addRegisterClass(MVT::i32, &AArch64::GPR32allRegClass);
+  addRegisterClass(MVT::i64, &AArch64::GPR64allRegClass);
 
   if (Subtarget->hasFPARMv8()) {
-    addRegisterClass(MVT::f16, &ARM64::FPR16RegClass);
-    addRegisterClass(MVT::f32, &ARM64::FPR32RegClass);
-    addRegisterClass(MVT::f64, &ARM64::FPR64RegClass);
-    addRegisterClass(MVT::f128, &ARM64::FPR128RegClass);
+    addRegisterClass(MVT::f16, &AArch64::FPR16RegClass);
+    addRegisterClass(MVT::f32, &AArch64::FPR32RegClass);
+    addRegisterClass(MVT::f64, &AArch64::FPR64RegClass);
+    addRegisterClass(MVT::f128, &AArch64::FPR128RegClass);
   }
 
   if (Subtarget->hasNEON()) {
-    addRegisterClass(MVT::v16i8, &ARM64::FPR8RegClass);
-    addRegisterClass(MVT::v8i16, &ARM64::FPR16RegClass);
+    addRegisterClass(MVT::v16i8, &AArch64::FPR8RegClass);
+    addRegisterClass(MVT::v8i16, &AArch64::FPR16RegClass);
     // Someone set us up the NEON.
     addDRTypeForNEON(MVT::v2f32);
     addDRTypeForNEON(MVT::v8i8);
@@ -209,8 +209,8 @@ ARM64TargetLowering::ARM64TargetLowering(ARM64TargetMachine &TM)
 
   // Exception handling.
   // FIXME: These are guesses. Has this been defined yet?
-  setExceptionPointerRegister(ARM64::X0);
-  setExceptionSelectorRegister(ARM64::X1);
+  setExceptionPointerRegister(AArch64::X0);
+  setExceptionSelectorRegister(AArch64::X1);
 
   // Constant pool entries
   setOperationAction(ISD::ConstantPool, MVT::i64, Custom);
@@ -228,17 +228,17 @@ ARM64TargetLowering::ARM64TargetLowering(ARM64TargetMachine &TM)
   setOperationAction(ISD::SUBC, MVT::i64, Custom);
   setOperationAction(ISD::SUBE, MVT::i64, Custom);
 
-  // ARM64 lacks both left-rotate and popcount instructions.
+  // AArch64 lacks both left-rotate and popcount instructions.
   setOperationAction(ISD::ROTL, MVT::i32, Expand);
   setOperationAction(ISD::ROTL, MVT::i64, Expand);
 
-  // ARM64 doesn't have {U|S}MUL_LOHI.
+  // AArch64 doesn't have {U|S}MUL_LOHI.
   setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
   setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
 
 
   // Expand the undefined-at-zero variants to cttz/ctlz to their defined-at-zero
-  // counterparts, which ARM64 supports directly.
+  // counterparts, which AArch64 supports directly.
   setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand);
   setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Expand);
   setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand);
@@ -279,7 +279,7 @@ ARM64TargetLowering::ARM64TargetLowering(ARM64TargetMachine &TM)
   setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
   setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
 
-  // ARM64 has implementations of a lot of rounding-like FP operations.
+  // AArch64 has implementations of a lot of rounding-like FP operations.
   static MVT RoundingTypes[] = { MVT::f32, MVT::f64};
   for (unsigned I = 0; I < array_lengthof(RoundingTypes); ++I) {
     MVT Ty = RoundingTypes[I];
@@ -304,8 +304,8 @@ ARM64TargetLowering::ARM64TargetLowering(ARM64TargetMachine &TM)
     setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
   }
 
-  // ARM64 does not have floating-point extending loads, i1 sign-extending load,
-  // floating-point truncating stores, or v2i32->v2i16 truncating store.
+  // AArch64 does not have floating-point extending loads, i1 sign-extending
+  // load, floating-point truncating stores, or v2i32->v2i16 truncating store.
   setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
   setLoadExtAction(ISD::EXTLOAD, MVT::f64, Expand);
   setLoadExtAction(ISD::EXTLOAD, MVT::f80, Expand);
@@ -371,7 +371,7 @@ ARM64TargetLowering::ARM64TargetLowering(ARM64TargetMachine &TM)
   MaxStoresPerMemcpy = MaxStoresPerMemcpyOptSize = 4;
   MaxStoresPerMemmove = MaxStoresPerMemmoveOptSize = 4;
 
-  setStackPointerRegisterToSaveRestore(ARM64::SP);
+  setStackPointerRegisterToSaveRestore(AArch64::SP);
 
   setSchedulingPreference(Sched::Hybrid);
 
@@ -421,7 +421,7 @@ ARM64TargetLowering::ARM64TargetLowering(ARM64TargetMachine &TM)
 
     setOperationAction(ISD::MUL, MVT::v1i64, Expand);
 
-    // ARM64 doesn't have a direct vector ->f32 conversion instructions for
+    // AArch64 doesn't have a direct vector ->f32 conversion instructions for
     // elements smaller than i32, so promote the input to i32 first.
     setOperationAction(ISD::UINT_TO_FP, MVT::v4i8, Promote);
     setOperationAction(ISD::SINT_TO_FP, MVT::v4i8, Promote);
@@ -433,7 +433,7 @@ ARM64TargetLowering::ARM64TargetLowering(ARM64TargetMachine &TM)
     setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Custom);
     setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Custom);
 
-    // ARM64 doesn't have MUL.2d:
+    // AArch64 doesn't have MUL.2d:
     setOperationAction(ISD::MUL, MVT::v2i64, Expand);
     setOperationAction(ISD::ANY_EXTEND, MVT::v4i32, Legal);
     setTruncStoreAction(MVT::v2i32, MVT::v2i16, Expand);
@@ -461,7 +461,7 @@ ARM64TargetLowering::ARM64TargetLowering(ARM64TargetMachine &TM)
       setLoadExtAction(ISD::EXTLOAD, (MVT::SimpleValueType)VT, Expand);
     }
 
-    // ARM64 has implementations of a lot of rounding-like FP operations.
+    // AArch64 has implementations of a lot of rounding-like FP operations.
     static MVT RoundingVecTypes[] = {MVT::v2f32, MVT::v4f32, MVT::v2f64 };
     for (unsigned I = 0; I < array_lengthof(RoundingVecTypes); ++I) {
       MVT Ty = RoundingVecTypes[I];
@@ -475,7 +475,7 @@ ARM64TargetLowering::ARM64TargetLowering(ARM64TargetMachine &TM)
   }
 }
 
-void ARM64TargetLowering::addTypeForNEON(EVT VT, EVT PromotedBitwiseVT) {
+void AArch64TargetLowering::addTypeForNEON(EVT VT, EVT PromotedBitwiseVT) {
   if (VT == MVT::v2f32) {
     setOperationAction(ISD::LOAD, VT.getSimpleVT(), Promote);
     AddPromotedToType(ISD::LOAD, VT.getSimpleVT(), MVT::v2i32);
@@ -543,17 +543,17 @@ void ARM64TargetLowering::addTypeForNEON(EVT VT, EVT PromotedBitwiseVT) {
   }
 }
 
-void ARM64TargetLowering::addDRTypeForNEON(MVT VT) {
-  addRegisterClass(VT, &ARM64::FPR64RegClass);
+void AArch64TargetLowering::addDRTypeForNEON(MVT VT) {
+  addRegisterClass(VT, &AArch64::FPR64RegClass);
   addTypeForNEON(VT, MVT::v2i32);
 }
 
-void ARM64TargetLowering::addQRTypeForNEON(MVT VT) {
-  addRegisterClass(VT, &ARM64::FPR128RegClass);
+void AArch64TargetLowering::addQRTypeForNEON(MVT VT) {
+  addRegisterClass(VT, &AArch64::FPR128RegClass);
   addTypeForNEON(VT, MVT::v4i32);
 }
 
-EVT ARM64TargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
+EVT AArch64TargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
   if (!VT.isVector())
     return MVT::i32;
   return VT.changeVectorElementTypeToInteger();
@@ -562,13 +562,13 @@ EVT ARM64TargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
 /// computeKnownBitsForTargetNode - Determine which of the bits specified in
 /// Mask are known to be either zero or one and return them in the
 /// KnownZero/KnownOne bitsets.
-void ARM64TargetLowering::computeKnownBitsForTargetNode(
+void AArch64TargetLowering::computeKnownBitsForTargetNode(
     const SDValue Op, APInt &KnownZero, APInt &KnownOne,
     const SelectionDAG &DAG, unsigned Depth) const {
   switch (Op.getOpcode()) {
   default:
     break;
-  case ARM64ISD::CSEL: {
+  case AArch64ISD::CSEL: {
     APInt KnownZero2, KnownOne2;
     DAG.computeKnownBits(Op->getOperand(0), KnownZero, KnownOne, Depth + 1);
     DAG.computeKnownBits(Op->getOperand(1), KnownZero2, KnownOne2, Depth + 1);
@@ -581,8 +581,8 @@ void ARM64TargetLowering::computeKnownBitsForTargetNode(
     Intrinsic::ID IntID = static_cast<Intrinsic::ID>(CN->getZExtValue());
     switch (IntID) {
     default: return;
-    case Intrinsic::arm64_ldaxr:
-    case Intrinsic::arm64_ldxr: {
+    case Intrinsic::aarch64_ldaxr:
+    case Intrinsic::aarch64_ldxr: {
       unsigned BitWidth = KnownOne.getBitWidth();
       EVT VT = cast<MemIntrinsicSDNode>(Op)->getMemoryVT();
       unsigned MemBits = VT.getScalarType().getSizeInBits();
@@ -598,8 +598,8 @@ void ARM64TargetLowering::computeKnownBitsForTargetNode(
     switch (IntNo) {
     default:
       break;
-    case Intrinsic::arm64_neon_umaxv:
-    case Intrinsic::arm64_neon_uminv: {
+    case Intrinsic::aarch64_neon_umaxv:
+    case Intrinsic::aarch64_neon_uminv: {
       // Figure out the datatype of the vector operand. The UMINV instruction
       // will zero extend the result, so we can mark as known zero all the
       // bits larger than the element datatype. 32-bit or larget doesn't need
@@ -622,142 +622,142 @@ void ARM64TargetLowering::computeKnownBitsForTargetNode(
   }
 }
 
-MVT ARM64TargetLowering::getScalarShiftAmountTy(EVT LHSTy) const {
+MVT AArch64TargetLowering::getScalarShiftAmountTy(EVT LHSTy) const {
   return MVT::i64;
 }
 
-unsigned ARM64TargetLowering::getMaximalGlobalOffset() const {
-  // FIXME: On ARM64, this depends on the type.
+unsigned AArch64TargetLowering::getMaximalGlobalOffset() const {
+  // FIXME: On AArch64, this depends on the type.
   // Basically, the addressable offsets are o to 4095 * Ty.getSizeInBytes().
   // and the offset has to be a multiple of the related size in bytes.
   return 4095;
 }
 
 FastISel *
-ARM64TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo,
-                                    const TargetLibraryInfo *libInfo) const {
-  return ARM64::createFastISel(funcInfo, libInfo);
+AArch64TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo,
+                                      const TargetLibraryInfo *libInfo) const {
+  return AArch64::createFastISel(funcInfo, libInfo);
 }
 
-const char *ARM64TargetLowering::getTargetNodeName(unsigned Opcode) const {
+const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
   switch (Opcode) {
   default:
     return nullptr;
-  case ARM64ISD::CALL:              return "ARM64ISD::CALL";
-  case ARM64ISD::ADRP:              return "ARM64ISD::ADRP";
-  case ARM64ISD::ADDlow:            return "ARM64ISD::ADDlow";
-  case ARM64ISD::LOADgot:           return "ARM64ISD::LOADgot";
-  case ARM64ISD::RET_FLAG:          return "ARM64ISD::RET_FLAG";
-  case ARM64ISD::BRCOND:            return "ARM64ISD::BRCOND";
-  case ARM64ISD::CSEL:              return "ARM64ISD::CSEL";
-  case ARM64ISD::FCSEL:             return "ARM64ISD::FCSEL";
-  case ARM64ISD::CSINV:             return "ARM64ISD::CSINV";
-  case ARM64ISD::CSNEG:             return "ARM64ISD::CSNEG";
-  case ARM64ISD::CSINC:             return "ARM64ISD::CSINC";
-  case ARM64ISD::THREAD_POINTER:    return "ARM64ISD::THREAD_POINTER";
-  case ARM64ISD::TLSDESC_CALL:      return "ARM64ISD::TLSDESC_CALL";
-  case ARM64ISD::ADC:               return "ARM64ISD::ADC";
-  case ARM64ISD::SBC:               return "ARM64ISD::SBC";
-  case ARM64ISD::ADDS:              return "ARM64ISD::ADDS";
-  case ARM64ISD::SUBS:              return "ARM64ISD::SUBS";
-  case ARM64ISD::ADCS:              return "ARM64ISD::ADCS";
-  case ARM64ISD::SBCS:              return "ARM64ISD::SBCS";
-  case ARM64ISD::ANDS:              return "ARM64ISD::ANDS";
-  case ARM64ISD::FCMP:              return "ARM64ISD::FCMP";
-  case ARM64ISD::FMIN:              return "ARM64ISD::FMIN";
-  case ARM64ISD::FMAX:              return "ARM64ISD::FMAX";
-  case ARM64ISD::DUP:               return "ARM64ISD::DUP";
-  case ARM64ISD::DUPLANE8:          return "ARM64ISD::DUPLANE8";
-  case ARM64ISD::DUPLANE16:         return "ARM64ISD::DUPLANE16";
-  case ARM64ISD::DUPLANE32:         return "ARM64ISD::DUPLANE32";
-  case ARM64ISD::DUPLANE64:         return "ARM64ISD::DUPLANE64";
-  case ARM64ISD::MOVI:              return "ARM64ISD::MOVI";
-  case ARM64ISD::MOVIshift:         return "ARM64ISD::MOVIshift";
-  case ARM64ISD::MOVIedit:          return "ARM64ISD::MOVIedit";
-  case ARM64ISD::MOVImsl:           return "ARM64ISD::MOVImsl";
-  case ARM64ISD::FMOV:              return "ARM64ISD::FMOV";
-  case ARM64ISD::MVNIshift:         return "ARM64ISD::MVNIshift";
-  case ARM64ISD::MVNImsl:           return "ARM64ISD::MVNImsl";
-  case ARM64ISD::BICi:              return "ARM64ISD::BICi";
-  case ARM64ISD::ORRi:              return "ARM64ISD::ORRi";
-  case ARM64ISD::BSL:               return "ARM64ISD::BSL";
-  case ARM64ISD::NEG:               return "ARM64ISD::NEG";
-  case ARM64ISD::EXTR:              return "ARM64ISD::EXTR";
-  case ARM64ISD::ZIP1:              return "ARM64ISD::ZIP1";
-  case ARM64ISD::ZIP2:              return "ARM64ISD::ZIP2";
-  case ARM64ISD::UZP1:              return "ARM64ISD::UZP1";
-  case ARM64ISD::UZP2:              return "ARM64ISD::UZP2";
-  case ARM64ISD::TRN1:              return "ARM64ISD::TRN1";
-  case ARM64ISD::TRN2:              return "ARM64ISD::TRN2";
-  case ARM64ISD::REV16:             return "ARM64ISD::REV16";
-  case ARM64ISD::REV32:             return "ARM64ISD::REV32";
-  case ARM64ISD::REV64:             return "ARM64ISD::REV64";
-  case ARM64ISD::EXT:               return "ARM64ISD::EXT";
-  case ARM64ISD::VSHL:              return "ARM64ISD::VSHL";
-  case ARM64ISD::VLSHR:             return "ARM64ISD::VLSHR";
-  case ARM64ISD::VASHR:             return "ARM64ISD::VASHR";
-  case ARM64ISD::CMEQ:              return "ARM64ISD::CMEQ";
-  case ARM64ISD::CMGE:              return "ARM64ISD::CMGE";
-  case ARM64ISD::CMGT:              return "ARM64ISD::CMGT";
-  case ARM64ISD::CMHI:              return "ARM64ISD::CMHI";
-  case ARM64ISD::CMHS:              return "ARM64ISD::CMHS";
-  case ARM64ISD::FCMEQ:             return "ARM64ISD::FCMEQ";
-  case ARM64ISD::FCMGE:             return "ARM64ISD::FCMGE";
-  case ARM64ISD::FCMGT:             return "ARM64ISD::FCMGT";
-  case ARM64ISD::CMEQz:             return "ARM64ISD::CMEQz";
-  case ARM64ISD::CMGEz:             return "ARM64ISD::CMGEz";
-  case ARM64ISD::CMGTz:             return "ARM64ISD::CMGTz";
-  case ARM64ISD::CMLEz:             return "ARM64ISD::CMLEz";
-  case ARM64ISD::CMLTz:             return "ARM64ISD::CMLTz";
-  case ARM64ISD::FCMEQz:            return "ARM64ISD::FCMEQz";
-  case ARM64ISD::FCMGEz:            return "ARM64ISD::FCMGEz";
-  case ARM64ISD::FCMGTz:            return "ARM64ISD::FCMGTz";
-  case ARM64ISD::FCMLEz:            return "ARM64ISD::FCMLEz";
-  case ARM64ISD::FCMLTz:            return "ARM64ISD::FCMLTz";
-  case ARM64ISD::NOT:               return "ARM64ISD::NOT";
-  case ARM64ISD::BIT:               return "ARM64ISD::BIT";
-  case ARM64ISD::CBZ:               return "ARM64ISD::CBZ";
-  case ARM64ISD::CBNZ:              return "ARM64ISD::CBNZ";
-  case ARM64ISD::TBZ:               return "ARM64ISD::TBZ";
-  case ARM64ISD::TBNZ:              return "ARM64ISD::TBNZ";
-  case ARM64ISD::TC_RETURN:         return "ARM64ISD::TC_RETURN";
-  case ARM64ISD::SITOF:             return "ARM64ISD::SITOF";
-  case ARM64ISD::UITOF:             return "ARM64ISD::UITOF";
-  case ARM64ISD::SQSHL_I:           return "ARM64ISD::SQSHL_I";
-  case ARM64ISD::UQSHL_I:           return "ARM64ISD::UQSHL_I";
-  case ARM64ISD::SRSHR_I:           return "ARM64ISD::SRSHR_I";
-  case ARM64ISD::URSHR_I:           return "ARM64ISD::URSHR_I";
-  case ARM64ISD::SQSHLU_I:          return "ARM64ISD::SQSHLU_I";
-  case ARM64ISD::WrapperLarge:      return "ARM64ISD::WrapperLarge";
-  case ARM64ISD::LD2post:           return "ARM64ISD::LD2post";
-  case ARM64ISD::LD3post:           return "ARM64ISD::LD3post";
-  case ARM64ISD::LD4post:           return "ARM64ISD::LD4post";
-  case ARM64ISD::ST2post:           return "ARM64ISD::ST2post";
-  case ARM64ISD::ST3post:           return "ARM64ISD::ST3post";
-  case ARM64ISD::ST4post:           return "ARM64ISD::ST4post";
-  case ARM64ISD::LD1x2post:         return "ARM64ISD::LD1x2post";
-  case ARM64ISD::LD1x3post:         return "ARM64ISD::LD1x3post";
-  case ARM64ISD::LD1x4post:         return "ARM64ISD::LD1x4post";
-  case ARM64ISD::ST1x2post:         return "ARM64ISD::ST1x2post";
-  case ARM64ISD::ST1x3post:         return "ARM64ISD::ST1x3post";
-  case ARM64ISD::ST1x4post:         return "ARM64ISD::ST1x4post";
-  case ARM64ISD::LD1DUPpost:        return "ARM64ISD::LD1DUPpost";
-  case ARM64ISD::LD2DUPpost:        return "ARM64ISD::LD2DUPpost";
-  case ARM64ISD::LD3DUPpost:        return "ARM64ISD::LD3DUPpost";
-  case ARM64ISD::LD4DUPpost:        return "ARM64ISD::LD4DUPpost";
-  case ARM64ISD::LD1LANEpost:       return "ARM64ISD::LD1LANEpost";
-  case ARM64ISD::LD2LANEpost:       return "ARM64ISD::LD2LANEpost";
-  case ARM64ISD::LD3LANEpost:       return "ARM64ISD::LD3LANEpost";
-  case ARM64ISD::LD4LANEpost:       return "ARM64ISD::LD4LANEpost";
-  case ARM64ISD::ST2LANEpost:       return "ARM64ISD::ST2LANEpost";
-  case ARM64ISD::ST3LANEpost:       return "ARM64ISD::ST3LANEpost";
-  case ARM64ISD::ST4LANEpost:       return "ARM64ISD::ST4LANEpost";
+  case AArch64ISD::CALL:              return "AArch64ISD::CALL";
+  case AArch64ISD::ADRP:              return "AArch64ISD::ADRP";
+  case AArch64ISD::ADDlow:            return "AArch64ISD::ADDlow";
+  case AArch64ISD::LOADgot:           return "AArch64ISD::LOADgot";
+  case AArch64ISD::RET_FLAG:          return "AArch64ISD::RET_FLAG";
+  case AArch64ISD::BRCOND:            return "AArch64ISD::BRCOND";
+  case AArch64ISD::CSEL:              return "AArch64ISD::CSEL";
+  case AArch64ISD::FCSEL:             return "AArch64ISD::FCSEL";
+  case AArch64ISD::CSINV:             return "AArch64ISD::CSINV";
+  case AArch64ISD::CSNEG:             return "AArch64ISD::CSNEG";
+  case AArch64ISD::CSINC:             return "AArch64ISD::CSINC";
+  case AArch64ISD::THREAD_POINTER:    return "AArch64ISD::THREAD_POINTER";
+  case AArch64ISD::TLSDESC_CALL:      return "AArch64ISD::TLSDESC_CALL";
+  case AArch64ISD::ADC:               return "AArch64ISD::ADC";
+  case AArch64ISD::SBC:               return "AArch64ISD::SBC";
+  case AArch64ISD::ADDS:              return "AArch64ISD::ADDS";
+  case AArch64ISD::SUBS:              return "AArch64ISD::SUBS";
+  case AArch64ISD::ADCS:              return "AArch64ISD::ADCS";
+  case AArch64ISD::SBCS:              return "AArch64ISD::SBCS";
+  case AArch64ISD::ANDS:              return "AArch64ISD::ANDS";
+  case AArch64ISD::FCMP:              return "AArch64ISD::FCMP";
+  case AArch64ISD::FMIN:              return "AArch64ISD::FMIN";
+  case AArch64ISD::FMAX:              return "AArch64ISD::FMAX";
+  case AArch64ISD::DUP:               return "AArch64ISD::DUP";
+  case AArch64ISD::DUPLANE8:          return "AArch64ISD::DUPLANE8";
+  case AArch64ISD::DUPLANE16:         return "AArch64ISD::DUPLANE16";
+  case AArch64ISD::DUPLANE32:         return "AArch64ISD::DUPLANE32";
+  case AArch64ISD::DUPLANE64:         return "AArch64ISD::DUPLANE64";
+  case AArch64ISD::MOVI:              return "AArch64ISD::MOVI";
+  case AArch64ISD::MOVIshift:         return "AArch64ISD::MOVIshift";
+  case AArch64ISD::MOVIedit:          return "AArch64ISD::MOVIedit";
+  case AArch64ISD::MOVImsl:           return "AArch64ISD::MOVImsl";
+  case AArch64ISD::FMOV:              return "AArch64ISD::FMOV";
+  case AArch64ISD::MVNIshift:         return "AArch64ISD::MVNIshift";
+  case AArch64ISD::MVNImsl:           return "AArch64ISD::MVNImsl";
+  case AArch64ISD::BICi:              return "AArch64ISD::BICi";
+  case AArch64ISD::ORRi:              return "AArch64ISD::ORRi";
+  case AArch64ISD::BSL:               return "AArch64ISD::BSL";
+  case AArch64ISD::NEG:               return "AArch64ISD::NEG";
+  case AArch64ISD::EXTR:              return "AArch64ISD::EXTR";
+  case AArch64ISD::ZIP1:              return "AArch64ISD::ZIP1";
+  case AArch64ISD::ZIP2:              return "AArch64ISD::ZIP2";
+  case AArch64ISD::UZP1:              return "AArch64ISD::UZP1";
+  case AArch64ISD::UZP2:              return "AArch64ISD::UZP2";
+  case AArch64ISD::TRN1:              return "AArch64ISD::TRN1";
+  case AArch64ISD::TRN2:              return "AArch64ISD::TRN2";
+  case AArch64ISD::REV16:             return "AArch64ISD::REV16";
+  case AArch64ISD::REV32:             return "AArch64ISD::REV32";
+  case AArch64ISD::REV64:             return "AArch64ISD::REV64";
+  case AArch64ISD::EXT:               return "AArch64ISD::EXT";
+  case AArch64ISD::VSHL:              return "AArch64ISD::VSHL";
+  case AArch64ISD::VLSHR:             return "AArch64ISD::VLSHR";
+  case AArch64ISD::VASHR:             return "AArch64ISD::VASHR";
+  case AArch64ISD::CMEQ:              return "AArch64ISD::CMEQ";
+  case AArch64ISD::CMGE:              return "AArch64ISD::CMGE";
+  case AArch64ISD::CMGT:              return "AArch64ISD::CMGT";
+  case AArch64ISD::CMHI:              return "AArch64ISD::CMHI";
+  case AArch64ISD::CMHS:              return "AArch64ISD::CMHS";
+  case AArch64ISD::FCMEQ:             return "AArch64ISD::FCMEQ";
+  case AArch64ISD::FCMGE:             return "AArch64ISD::FCMGE";
+  case AArch64ISD::FCMGT:             return "AArch64ISD::FCMGT";
+  case AArch64ISD::CMEQz:             return "AArch64ISD::CMEQz";
+  case AArch64ISD::CMGEz:             return "AArch64ISD::CMGEz";
+  case AArch64ISD::CMGTz:             return "AArch64ISD::CMGTz";
+  case AArch64ISD::CMLEz:             return "AArch64ISD::CMLEz";
+  case AArch64ISD::CMLTz:             return "AArch64ISD::CMLTz";
+  case AArch64ISD::FCMEQz:            return "AArch64ISD::FCMEQz";
+  case AArch64ISD::FCMGEz:            return "AArch64ISD::FCMGEz";
+  case AArch64ISD::FCMGTz:            return "AArch64ISD::FCMGTz";
+  case AArch64ISD::FCMLEz:            return "AArch64ISD::FCMLEz";
+  case AArch64ISD::FCMLTz:            return "AArch64ISD::FCMLTz";
+  case AArch64ISD::NOT:               return "AArch64ISD::NOT";
+  case AArch64ISD::BIT:               return "AArch64ISD::BIT";
+  case AArch64ISD::CBZ:               return "AArch64ISD::CBZ";
+  case AArch64ISD::CBNZ:              return "AArch64ISD::CBNZ";
+  case AArch64ISD::TBZ:               return "AArch64ISD::TBZ";
+  case AArch64ISD::TBNZ:              return "AArch64ISD::TBNZ";
+  case AArch64ISD::TC_RETURN:         return "AArch64ISD::TC_RETURN";
+  case AArch64ISD::SITOF:             return "AArch64ISD::SITOF";
+  case AArch64ISD::UITOF:             return "AArch64ISD::UITOF";
+  case AArch64ISD::SQSHL_I:           return "AArch64ISD::SQSHL_I";
+  case AArch64ISD::UQSHL_I:           return "AArch64ISD::UQSHL_I";
+  case AArch64ISD::SRSHR_I:           return "AArch64ISD::SRSHR_I";
+  case AArch64ISD::URSHR_I:           return "AArch64ISD::URSHR_I";
+  case AArch64ISD::SQSHLU_I:          return "AArch64ISD::SQSHLU_I";
+  case AArch64ISD::WrapperLarge:      return "AArch64ISD::WrapperLarge";
+  case AArch64ISD::LD2post:           return "AArch64ISD::LD2post";
+  case AArch64ISD::LD3post:           return "AArch64ISD::LD3post";
+  case AArch64ISD::LD4post:           return "AArch64ISD::LD4post";
+  case AArch64ISD::ST2post:           return "AArch64ISD::ST2post";
+  case AArch64ISD::ST3post:           return "AArch64ISD::ST3post";
+  case AArch64ISD::ST4post:           return "AArch64ISD::ST4post";
+  case AArch64ISD::LD1x2post:         return "AArch64ISD::LD1x2post";
+  case AArch64ISD::LD1x3post:         return "AArch64ISD::LD1x3post";
+  case AArch64ISD::LD1x4post:         return "AArch64ISD::LD1x4post";
+  case AArch64ISD::ST1x2post:         return "AArch64ISD::ST1x2post";
+  case AArch64ISD::ST1x3post:         return "AArch64ISD::ST1x3post";
+  case AArch64ISD::ST1x4post:         return "AArch64ISD::ST1x4post";
+  case AArch64ISD::LD1DUPpost:        return "AArch64ISD::LD1DUPpost";
+  case AArch64ISD::LD2DUPpost:        return "AArch64ISD::LD2DUPpost";
+  case AArch64ISD::LD3DUPpost:        return "AArch64ISD::LD3DUPpost";
+  case AArch64ISD::LD4DUPpost:        return "AArch64ISD::LD4DUPpost";
+  case AArch64ISD::LD1LANEpost:       return "AArch64ISD::LD1LANEpost";
+  case AArch64ISD::LD2LANEpost:       return "AArch64ISD::LD2LANEpost";
+  case AArch64ISD::LD3LANEpost:       return "AArch64ISD::LD3LANEpost";
+  case AArch64ISD::LD4LANEpost:       return "AArch64ISD::LD4LANEpost";
+  case AArch64ISD::ST2LANEpost:       return "AArch64ISD::ST2LANEpost";
+  case AArch64ISD::ST3LANEpost:       return "AArch64ISD::ST3LANEpost";
+  case AArch64ISD::ST4LANEpost:       return "AArch64ISD::ST4LANEpost";
   }
 }
 
 MachineBasicBlock *
-ARM64TargetLowering::EmitF128CSEL(MachineInstr *MI,
-                                  MachineBasicBlock *MBB) const {
+AArch64TargetLowering::EmitF128CSEL(MachineInstr *MI,
+                                    MachineBasicBlock *MBB) const {
   // We materialise the F128CSEL pseudo-instruction as some control flow and a
   // phi node:
 
@@ -793,8 +793,8 @@ ARM64TargetLowering::EmitF128CSEL(MachineInstr *MI,
                 MBB->end());
   EndBB->transferSuccessorsAndUpdatePHIs(MBB);
 
-  BuildMI(MBB, DL, TII->get(ARM64::Bcc)).addImm(CondCode).addMBB(TrueBB);
-  BuildMI(MBB, DL, TII->get(ARM64::B)).addMBB(EndBB);
+  BuildMI(MBB, DL, TII->get(AArch64::Bcc)).addImm(CondCode).addMBB(TrueBB);
+  BuildMI(MBB, DL, TII->get(AArch64::B)).addMBB(EndBB);
   MBB->addSuccessor(TrueBB);
   MBB->addSuccessor(EndBB);
 
@@ -802,11 +802,11 @@ ARM64TargetLowering::EmitF128CSEL(MachineInstr *MI,
   TrueBB->addSuccessor(EndBB);
 
   if (!NZCVKilled) {
-    TrueBB->addLiveIn(ARM64::NZCV);
-    EndBB->addLiveIn(ARM64::NZCV);
+    TrueBB->addLiveIn(AArch64::NZCV);
+    EndBB->addLiveIn(AArch64::NZCV);
   }
 
-  BuildMI(*EndBB, EndBB->begin(), DL, TII->get(ARM64::PHI), DestReg)
+  BuildMI(*EndBB, EndBB->begin(), DL, TII->get(AArch64::PHI), DestReg)
       .addReg(IfTrueReg)
       .addMBB(TrueBB)
       .addReg(IfFalseReg)
@@ -817,7 +817,7 @@ ARM64TargetLowering::EmitF128CSEL(MachineInstr *MI,
 }
 
 MachineBasicBlock *
-ARM64TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
+AArch64TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
                                                  MachineBasicBlock *BB) const {
   switch (MI->getOpcode()) {
   default:
@@ -827,7 +827,7 @@ ARM64TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
     assert(0 && "Unexpected instruction for custom inserter!");
     break;
 
-  case ARM64::F128CSEL:
+  case AArch64::F128CSEL:
     return EmitF128CSEL(MI, BB);
 
   case TargetOpcode::STACKMAP:
@@ -838,120 +838,122 @@ ARM64TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
 }
 
 //===----------------------------------------------------------------------===//
-// ARM64 Lowering private implementation.
+// AArch64 Lowering private implementation.
 //===----------------------------------------------------------------------===//
 
 //===----------------------------------------------------------------------===//
 // Lowering Code
 //===----------------------------------------------------------------------===//
 
-/// changeIntCCToARM64CC - Convert a DAG integer condition code to an ARM64 CC
-static ARM64CC::CondCode changeIntCCToARM64CC(ISD::CondCode CC) {
+/// changeIntCCToAArch64CC - Convert a DAG integer condition code to an AArch64
+/// CC
+static AArch64CC::CondCode changeIntCCToAArch64CC(ISD::CondCode CC) {
   switch (CC) {
   default:
     llvm_unreachable("Unknown condition code!");
   case ISD::SETNE:
-    return ARM64CC::NE;
+    return AArch64CC::NE;
   case ISD::SETEQ:
-    return ARM64CC::EQ;
+    return AArch64CC::EQ;
   case ISD::SETGT:
-    return ARM64CC::GT;
+    return AArch64CC::GT;
   case ISD::SETGE:
-    return ARM64CC::GE;
+    return AArch64CC::GE;
   case ISD::SETLT:
-    return ARM64CC::LT;
+    return AArch64CC::LT;
   case ISD::SETLE:
-    return ARM64CC::LE;
+    return AArch64CC::LE;
   case ISD::SETUGT:
-    return ARM64CC::HI;
+    return AArch64CC::HI;
   case ISD::SETUGE:
-    return ARM64CC::HS;
+    return AArch64CC::HS;
   case ISD::SETULT:
-    return ARM64CC::LO;
+    return AArch64CC::LO;
   case ISD::SETULE:
-    return ARM64CC::LS;
+    return AArch64CC::LS;
   }
 }
 
-/// changeFPCCToARM64CC - Convert a DAG fp condition code to an ARM64 CC.
-static void changeFPCCToARM64CC(ISD::CondCode CC, ARM64CC::CondCode &CondCode,
-                                ARM64CC::CondCode &CondCode2) {
-  CondCode2 = ARM64CC::AL;
+/// changeFPCCToAArch64CC - Convert a DAG fp condition code to an AArch64 CC.
+static void changeFPCCToAArch64CC(ISD::CondCode CC,
+                                  AArch64CC::CondCode &CondCode,
+                                  AArch64CC::CondCode &CondCode2) {
+  CondCode2 = AArch64CC::AL;
   switch (CC) {
   default:
     llvm_unreachable("Unknown FP condition!");
   case ISD::SETEQ:
   case ISD::SETOEQ:
-    CondCode = ARM64CC::EQ;
+    CondCode = AArch64CC::EQ;
     break;
   case ISD::SETGT:
   case ISD::SETOGT:
-    CondCode = ARM64CC::GT;
+    CondCode = AArch64CC::GT;
     break;
   case ISD::SETGE:
   case ISD::SETOGE:
-    CondCode = ARM64CC::GE;
+    CondCode = AArch64CC::GE;
     break;
   case ISD::SETOLT:
-    CondCode = ARM64CC::MI;
+    CondCode = AArch64CC::MI;
     break;
   case ISD::SETOLE:
-    CondCode = ARM64CC::LS;
+    CondCode = AArch64CC::LS;
     break;
   case ISD::SETONE:
-    CondCode = ARM64CC::MI;
-    CondCode2 = ARM64CC::GT;
+    CondCode = AArch64CC::MI;
+    CondCode2 = AArch64CC::GT;
     break;
   case ISD::SETO:
-    CondCode = ARM64CC::VC;
+    CondCode = AArch64CC::VC;
     break;
   case ISD::SETUO:
-    CondCode = ARM64CC::VS;
+    CondCode = AArch64CC::VS;
     break;
   case ISD::SETUEQ:
-    CondCode = ARM64CC::EQ;
-    CondCode2 = ARM64CC::VS;
+    CondCode = AArch64CC::EQ;
+    CondCode2 = AArch64CC::VS;
     break;
   case ISD::SETUGT:
-    CondCode = ARM64CC::HI;
+    CondCode = AArch64CC::HI;
     break;
   case ISD::SETUGE:
-    CondCode = ARM64CC::PL;
+    CondCode = AArch64CC::PL;
     break;
   case ISD::SETLT:
   case ISD::SETULT:
-    CondCode = ARM64CC::LT;
+    CondCode = AArch64CC::LT;
     break;
   case ISD::SETLE:
   case ISD::SETULE:
-    CondCode = ARM64CC::LE;
+    CondCode = AArch64CC::LE;
     break;
   case ISD::SETNE:
   case ISD::SETUNE:
-    CondCode = ARM64CC::NE;
+    CondCode = AArch64CC::NE;
     break;
   }
 }
 
-/// changeVectorFPCCToARM64CC - Convert a DAG fp condition code to an ARM64 CC
-/// usable with the vector instructions. Fewer operations are available without
-/// a real NZCV register, so we have to use less efficient combinations to get
-/// the same effect.
-static void changeVectorFPCCToARM64CC(ISD::CondCode CC,
-                                      ARM64CC::CondCode &CondCode,
-                                      ARM64CC::CondCode &CondCode2,
-                                      bool &Invert) {
+/// changeVectorFPCCToAArch64CC - Convert a DAG fp condition code to an AArch64
+/// CC usable with the vector instructions. Fewer operations are available
+/// without a real NZCV register, so we have to use less efficient combinations
+/// to get the same effect.
+static void changeVectorFPCCToAArch64CC(ISD::CondCode CC,
+                                        AArch64CC::CondCode &CondCode,
+                                        AArch64CC::CondCode &CondCode2,
+                                        bool &Invert) {
   Invert = false;
   switch (CC) {
   default:
     // Mostly the scalar mappings work fine.
-    changeFPCCToARM64CC(CC, CondCode, CondCode2);
+    changeFPCCToAArch64CC(CC, CondCode, CondCode2);
     break;
   case ISD::SETUO:
     Invert = true; // Fallthrough
   case ISD::SETO:
-    CondCode = ARM64CC::MI;
-    CondCode2 = ARM64CC::GE;
+    CondCode = AArch64CC::MI;
+    CondCode2 = AArch64CC::GE;
     break;
   case ISD::SETUEQ:
   case ISD::SETULT:
@@ -961,13 +963,13 @@ static void changeVectorFPCCToARM64CC(ISD::CondCode CC,
     // All of the compare-mask comparisons are ordered, but we can switch
     // between the two by a double inversion. E.g. ULE == !OGT.
     Invert = true;
-    changeFPCCToARM64CC(getSetCCInverse(CC, false), CondCode, CondCode2);
+    changeFPCCToAArch64CC(getSetCCInverse(CC, false), CondCode, CondCode2);
     break;
   }
 }
 
 static bool isLegalArithImmed(uint64_t C) {
-  // Matches ARM64DAGToDAGISel::SelectArithImmed().
+  // Matches AArch64DAGToDAGISel::SelectArithImmed().
   return (C >> 12 == 0) || ((C & 0xFFFULL) == 0 && C >> 24 == 0);
 }
 
@@ -976,13 +978,13 @@ static SDValue emitComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC,
   EVT VT = LHS.getValueType();
 
   if (VT.isFloatingPoint())
-    return DAG.getNode(ARM64ISD::FCMP, dl, VT, LHS, RHS);
+    return DAG.getNode(AArch64ISD::FCMP, dl, VT, LHS, RHS);
 
   // The CMP instruction is just an alias for SUBS, and representing it as
   // SUBS means that it's possible to get CSE with subtract operations.
   // A later phase can perform the optimization of setting the destination
   // register to WZR/XZR if it ends up being unused.
-  unsigned Opcode = ARM64ISD::SUBS;
+  unsigned Opcode = AArch64ISD::SUBS;
 
   if (RHS.getOpcode() == ISD::SUB && isa<ConstantSDNode>(RHS.getOperand(0)) &&
       cast<ConstantSDNode>(RHS.getOperand(0))->getZExtValue() == 0 &&
@@ -997,7 +999,7 @@ static SDValue emitComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC,
     // So, finally, the only LLVM-native comparisons that don't mention C and V
     // are SETEQ and SETNE. They're the only ones we can safely use CMN for in
     // the absence of information about op2.
-    Opcode = ARM64ISD::ADDS;
+    Opcode = AArch64ISD::ADDS;
     RHS = RHS.getOperand(1);
   } else if (LHS.getOpcode() == ISD::AND && isa<ConstantSDNode>(RHS) &&
              cast<ConstantSDNode>(RHS)->getZExtValue() == 0 &&
@@ -1005,7 +1007,7 @@ static SDValue emitComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC,
     // Similarly, (CMP (and X, Y), 0) can be implemented with a TST
     // (a.k.a. ANDS) except that the flags are only guaranteed to work for one
     // of the signed comparisons.
-    Opcode = ARM64ISD::ANDS;
+    Opcode = AArch64ISD::ANDS;
     RHS = LHS.getOperand(1);
     LHS = LHS.getOperand(0);
   }
@@ -1014,8 +1016,8 @@ static SDValue emitComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC,
       .getValue(1);
 }
 
-static SDValue getARM64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
-                           SDValue &ARM64cc, SelectionDAG &DAG, SDLoc dl) {
+static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
+                             SDValue &AArch64cc, SelectionDAG &DAG, SDLoc dl) {
   if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) {
     EVT VT = RHS.getValueType();
     uint64_t C = RHSC->getZExtValue();
@@ -1072,13 +1074,13 @@ static SDValue getARM64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
   }
 
   SDValue Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
-  ARM64CC::CondCode ARM64CC = changeIntCCToARM64CC(CC);
-  ARM64cc = DAG.getConstant(ARM64CC, MVT::i32);
+  AArch64CC::CondCode AArch64CC = changeIntCCToAArch64CC(CC);
+  AArch64cc = DAG.getConstant(AArch64CC, MVT::i32);
   return Cmp;
 }
 
 static std::pair<SDValue, SDValue>
-getARM64XALUOOp(ARM64CC::CondCode &CC, SDValue Op, SelectionDAG &DAG) {
+getAArch64XALUOOp(AArch64CC::CondCode &CC, SDValue Op, SelectionDAG &DAG) {
   assert((Op.getValueType() == MVT::i32 || Op.getValueType() == MVT::i64) &&
          "Unsupported value type");
   SDValue Value, Overflow;
@@ -1090,25 +1092,25 @@ getARM64XALUOOp(ARM64CC::CondCode &CC, SDValue Op, SelectionDAG &DAG) {
   default:
     llvm_unreachable("Unknown overflow instruction!");
   case ISD::SADDO:
-    Opc = ARM64ISD::ADDS;
-    CC = ARM64CC::VS;
+    Opc = AArch64ISD::ADDS;
+    CC = AArch64CC::VS;
     break;
   case ISD::UADDO:
-    Opc = ARM64ISD::ADDS;
-    CC = ARM64CC::HS;
+    Opc = AArch64ISD::ADDS;
+    CC = AArch64CC::HS;
     break;
   case ISD::SSUBO:
-    Opc = ARM64ISD::SUBS;
-    CC = ARM64CC::VS;
+    Opc = AArch64ISD::SUBS;
+    CC = AArch64CC::VS;
     break;
   case ISD::USUBO:
-    Opc = ARM64ISD::SUBS;
-    CC = ARM64CC::LO;
+    Opc = AArch64ISD::SUBS;
+    CC = AArch64CC::LO;
     break;
   // Multiply needs a little bit extra work.
   case ISD::SMULO:
   case ISD::UMULO: {
-    CC = ARM64CC::NE;
+    CC = AArch64CC::NE;
     bool IsSigned = (Op.getOpcode() == ISD::SMULO) ? true : false;
     if (Op.getValueType() == MVT::i32) {
       unsigned ExtendOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
@@ -1121,7 +1123,7 @@ getARM64XALUOOp(ARM64CC::CondCode &CC, SDValue Op, SelectionDAG &DAG) {
       SDValue Mul = DAG.getNode(ISD::MUL, DL, MVT::i64, LHS, RHS);
       SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Mul,
                                 DAG.getConstant(0, MVT::i64));
-      // On ARM64 the upper 32 bits are always zero extended for a 32 bit
+      // On AArch64 the upper 32 bits are always zero extended for a 32 bit
       // operation. We need to clear out the upper 32 bits, because we used a
       // widening multiply that wrote all 64 bits. In the end this should be a
       // noop.
@@ -1140,19 +1142,19 @@ getARM64XALUOOp(ARM64CC::CondCode &CC, SDValue Op, SelectionDAG &DAG) {
         // It is important that LowerBits is last, otherwise the arithmetic
         // shift will not be folded into the compare (SUBS).
         SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32);
-        Overflow = DAG.getNode(ARM64ISD::SUBS, DL, VTs, UpperBits, LowerBits)
+        Overflow = DAG.getNode(AArch64ISD::SUBS, DL, VTs, UpperBits, LowerBits)
                        .getValue(1);
       } else {
         // The overflow check for unsigned multiply is easy. We only need to
         // check if any of the upper 32 bits are set. This can be done with a
         // CMP (shifted register). For that we need to generate the following
         // pattern:
-        // (i64 ARM64ISD::SUBS i64 0, (i64 srl i64 %Mul, i64 32)
+        // (i64 AArch64ISD::SUBS i64 0, (i64 srl i64 %Mul, i64 32)
         SDValue UpperBits = DAG.getNode(ISD::SRL, DL, MVT::i64, Mul,
                                         DAG.getConstant(32, MVT::i64));
         SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i32);
         Overflow =
-            DAG.getNode(ARM64ISD::SUBS, DL, VTs, DAG.getConstant(0, MVT::i64),
+            DAG.getNode(AArch64ISD::SUBS, DL, VTs, DAG.getConstant(0, MVT::i64),
                         UpperBits).getValue(1);
       }
       break;
@@ -1167,13 +1169,13 @@ getARM64XALUOOp(ARM64CC::CondCode &CC, SDValue Op, SelectionDAG &DAG) {
       // It is important that LowerBits is last, otherwise the arithmetic
       // shift will not be folded into the compare (SUBS).
       SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i32);
-      Overflow = DAG.getNode(ARM64ISD::SUBS, DL, VTs, UpperBits, LowerBits)
+      Overflow = DAG.getNode(AArch64ISD::SUBS, DL, VTs, UpperBits, LowerBits)
                      .getValue(1);
     } else {
       SDValue UpperBits = DAG.getNode(ISD::MULHU, DL, MVT::i64, LHS, RHS);
       SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i32);
       Overflow =
-          DAG.getNode(ARM64ISD::SUBS, DL, VTs, DAG.getConstant(0, MVT::i64),
+          DAG.getNode(AArch64ISD::SUBS, DL, VTs, DAG.getConstant(0, MVT::i64),
                       UpperBits).getValue(1);
     }
     break;
@@ -1183,15 +1185,15 @@ getARM64XALUOOp(ARM64CC::CondCode &CC, SDValue Op, SelectionDAG &DAG) {
   if (Opc) {
     SDVTList VTs = DAG.getVTList(Op->getValueType(0), MVT::i32);
 
-    // Emit the ARM64 operation with overflow check.
+    // Emit the AArch64 operation with overflow check.
     Value = DAG.getNode(Opc, DL, VTs, LHS, RHS);
     Overflow = Value.getValue(1);
   }
   return std::make_pair(Value, Overflow);
 }
 
-SDValue ARM64TargetLowering::LowerF128Call(SDValue Op, SelectionDAG &DAG,
-                                           RTLIB::Libcall Call) const {
+SDValue AArch64TargetLowering::LowerF128Call(SDValue Op, SelectionDAG &DAG,
+                                             RTLIB::Libcall Call) const {
   SmallVector<SDValue, 2> Ops;
   for (unsigned i = 0, e = Op->getNumOperands(); i != e; ++i)
     Ops.push_back(Op.getOperand(i));
@@ -1246,13 +1248,13 @@ static SDValue LowerXOR(SDValue Op, SelectionDAG &DAG) {
   // If the constants line up, perform the transform!
   if (CTVal->isNullValue() && CFVal->isAllOnesValue()) {
     SDValue CCVal;
-    SDValue Cmp = getARM64Cmp(LHS, RHS, CC, CCVal, DAG, dl);
+    SDValue Cmp = getAArch64Cmp(LHS, RHS, CC, CCVal, DAG, dl);
 
     FVal = Other;
     TVal = DAG.getNode(ISD::XOR, dl, Other.getValueType(), Other,
                        DAG.getConstant(-1ULL, Other.getValueType()));
 
-    return DAG.getNode(ARM64ISD::CSEL, dl, Sel.getValueType(), FVal, TVal,
+    return DAG.getNode(AArch64ISD::CSEL, dl, Sel.getValueType(), FVal, TVal,
                        CCVal, Cmp);
   }
 
@@ -1274,17 +1276,17 @@ static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) {
   default:
     assert(0 && "Invalid code");
   case ISD::ADDC:
-    Opc = ARM64ISD::ADDS;
+    Opc = AArch64ISD::ADDS;
     break;
   case ISD::SUBC:
-    Opc = ARM64ISD::SUBS;
+    Opc = AArch64ISD::SUBS;
     break;
   case ISD::ADDE:
-    Opc = ARM64ISD::ADCS;
+    Opc = AArch64ISD::ADCS;
     ExtraOp = true;
     break;
   case ISD::SUBE:
-    Opc = ARM64ISD::SBCS;
+    Opc = AArch64ISD::SBCS;
     ExtraOp = true;
     break;
   }
@@ -1300,10 +1302,10 @@ static SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) {
   if (!DAG.getTargetLoweringInfo().isTypeLegal(Op.getValueType()))
     return SDValue();
 
-  ARM64CC::CondCode CC;
+  AArch64CC::CondCode CC;
   // The actual operation that sets the overflow or carry flag.
   SDValue Value, Overflow;
-  std::tie(Value, Overflow) = getARM64XALUOOp(CC, Op, DAG);
+  std::tie(Value, Overflow) = getAArch64XALUOOp(CC, Op, DAG);
 
   // We use 0 and 1 as false and true values.
   SDValue TVal = DAG.getConstant(1, MVT::i32);
@@ -1313,8 +1315,8 @@ static SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) {
   // too. This will allow it to be selected to a single instruction:
   // CSINC Wd, WZR, WZR, invert(cond).
   SDValue CCVal = DAG.getConstant(getInvertedCondCode(CC), MVT::i32);
-  Overflow = DAG.getNode(ARM64ISD::CSEL, SDLoc(Op), MVT::i32, FVal, TVal, CCVal,
-                         Overflow);
+  Overflow = DAG.getNode(AArch64ISD::CSEL, SDLoc(Op), MVT::i32, FVal, TVal,
+                         CCVal, Overflow);
 
   SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
   return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op), VTs, Value, Overflow);
@@ -1347,12 +1349,12 @@ static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG) {
   unsigned PrfOp = (IsWrite << 4) |     // Load/Store bit
                    (Locality << 1) |    // Cache level bits
                    (unsigned)IsStream;  // Stream bit
-  return DAG.getNode(ARM64ISD::PREFETCH, DL, MVT::Other, Op.getOperand(0),
+  return DAG.getNode(AArch64ISD::PREFETCH, DL, MVT::Other, Op.getOperand(0),
                      DAG.getConstant(PrfOp, MVT::i32), Op.getOperand(1));
 }
 
-SDValue ARM64TargetLowering::LowerFP_EXTEND(SDValue Op,
-                                            SelectionDAG &DAG) const {
+SDValue AArch64TargetLowering::LowerFP_EXTEND(SDValue Op,
+                                              SelectionDAG &DAG) const {
   assert(Op.getValueType() == MVT::f128 && "Unexpected lowering");
 
   RTLIB::Libcall LC;
@@ -1361,8 +1363,8 @@ SDValue ARM64TargetLowering::LowerFP_EXTEND(SDValue Op,
   return LowerF128Call(Op, DAG, LC);
 }
 
-SDValue ARM64TargetLowering::LowerFP_ROUND(SDValue Op,
-                                           SelectionDAG &DAG) const {
+SDValue AArch64TargetLowering::LowerFP_ROUND(SDValue Op,
+                                             SelectionDAG &DAG) const {
   if (Op.getOperand(0).getValueType() != MVT::f128) {
     // It's legal except when f128 is involved
     return Op;
@@ -1380,7 +1382,7 @@ SDValue ARM64TargetLowering::LowerFP_ROUND(SDValue Op,
 }
 
 static SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) {
-  // Warning: We maintain cost tables in ARM64TargetTransformInfo.cpp.
+  // Warning: We maintain cost tables in AArch64TargetTransformInfo.cpp.
   // Any additional optimization in this function should be recorded
   // in the cost tables.
   EVT InVT = Op.getOperand(0).getValueType();
@@ -1406,8 +1408,8 @@ static SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) {
   return SDValue();
 }
 
-SDValue ARM64TargetLowering::LowerFP_TO_INT(SDValue Op,
-                                            SelectionDAG &DAG) const {
+SDValue AArch64TargetLowering::LowerFP_TO_INT(SDValue Op,
+                                              SelectionDAG &DAG) const {
   if (Op.getOperand(0).getValueType().isVector())
     return LowerVectorFP_TO_INT(Op, DAG);
 
@@ -1431,7 +1433,7 @@ SDValue ARM64TargetLowering::LowerFP_TO_INT(SDValue Op,
 }
 
 static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
-  // Warning: We maintain cost tables in ARM64TargetTransformInfo.cpp.
+  // Warning: We maintain cost tables in AArch64TargetTransformInfo.cpp.
   // Any additional optimization in this function should be recorded
   // in the cost tables.
   EVT VT = Op.getValueType();
@@ -1467,7 +1469,7 @@ static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
   return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, BuildVectorOps);
 }
 
-SDValue ARM64TargetLowering::LowerINT_TO_FP(SDValue Op,
+SDValue AArch64TargetLowering::LowerINT_TO_FP(SDValue Op,
                                             SelectionDAG &DAG) const {
   if (Op.getValueType().isVector())
     return LowerVectorINT_TO_FP(Op, DAG);
@@ -1490,7 +1492,8 @@ SDValue ARM64TargetLowering::LowerINT_TO_FP(SDValue Op,
   return LowerF128Call(Op, DAG, LC);
 }
 
-SDValue ARM64TargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const {
+SDValue AArch64TargetLowering::LowerFSINCOS(SDValue Op,
+                                            SelectionDAG &DAG) const {
   // For iOS, we want to call an alternative entry point: __sincos_stret,
   // which returns the values in two S / D registers.
   SDLoc dl(Op);
@@ -1520,8 +1523,8 @@ SDValue ARM64TargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const {
   return CallResult.first;
 }
 
-SDValue ARM64TargetLowering::LowerOperation(SDValue Op,
-                                            SelectionDAG &DAG) const {
+SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
+                                              SelectionDAG &DAG) const {
   switch (Op.getOpcode()) {
   default:
     llvm_unreachable("unimplemented operand");
@@ -1621,7 +1624,7 @@ SDValue ARM64TargetLowering::LowerOperation(SDValue Op,
 }
 
 /// getFunctionAlignment - Return the Log2 alignment of this function.
-unsigned ARM64TargetLowering::getFunctionAlignment(const Function *F) const {
+unsigned AArch64TargetLowering::getFunctionAlignment(const Function *F) const {
   return 2;
 }
 
@@ -1629,26 +1632,26 @@ unsigned ARM64TargetLowering::getFunctionAlignment(const Function *F) const {
 //                      Calling Convention Implementation
 //===----------------------------------------------------------------------===//
 
-#include "ARM64GenCallingConv.inc"
+#include "AArch64GenCallingConv.inc"
 
 /// Selects the correct CCAssignFn for a the given CallingConvention
 /// value.
-CCAssignFn *ARM64TargetLowering::CCAssignFnForCall(CallingConv::ID CC,
-                                                   bool IsVarArg) const {
+CCAssignFn *AArch64TargetLowering::CCAssignFnForCall(CallingConv::ID CC,
+                                                     bool IsVarArg) const {
   switch (CC) {
   default:
     llvm_unreachable("Unsupported calling convention.");
   case CallingConv::WebKit_JS:
-    return CC_ARM64_WebKit_JS;
+    return CC_AArch64_WebKit_JS;
   case CallingConv::C:
   case CallingConv::Fast:
     if (!Subtarget->isTargetDarwin())
-      return CC_ARM64_AAPCS;
-    return IsVarArg ? CC_ARM64_DarwinPCS_VarArg : CC_ARM64_DarwinPCS;
+      return CC_AArch64_AAPCS;
+    return IsVarArg ? CC_AArch64_DarwinPCS_VarArg : CC_AArch64_DarwinPCS;
   }
 }
 
-SDValue ARM64TargetLowering::LowerFormalArguments(
+SDValue AArch64TargetLowering::LowerFormalArguments(
     SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
     const SmallVectorImpl<ISD::InputArg> &Ins, SDLoc DL, SelectionDAG &DAG,
     SmallVectorImpl<SDValue> &InVals) const {
@@ -1662,7 +1665,7 @@ SDValue ARM64TargetLowering::LowerFormalArguments(
 
   // At this point, Ins[].VT may already be promoted to i32. To correctly
   // handle passing i8 as i8 instead of i32 on stack, we pass in both i32 and
-  // i8 to CC_ARM64_AAPCS with i32 being ValVT and i8 being LocVT.
+  // i8 to CC_AArch64_AAPCS with i32 being ValVT and i8 being LocVT.
   // Since AnalyzeFormalArguments uses Ins[].VT for both ValVT and LocVT, here
   // we use a special version of AnalyzeFormalArguments to pass in ValVT and
   // LocVT.
@@ -1718,15 +1721,15 @@ SDValue ARM64TargetLowering::LowerFormalArguments(
       const TargetRegisterClass *RC;
 
       if (RegVT == MVT::i32)
-        RC = &ARM64::GPR32RegClass;
+        RC = &AArch64::GPR32RegClass;
       else if (RegVT == MVT::i64)
-        RC = &ARM64::GPR64RegClass;
+        RC = &AArch64::GPR64RegClass;
       else if (RegVT == MVT::f32)
-        RC = &ARM64::FPR32RegClass;
+        RC = &AArch64::FPR32RegClass;
       else if (RegVT == MVT::f64 || RegVT.is64BitVector())
-        RC = &ARM64::FPR64RegClass;
+        RC = &AArch64::FPR64RegClass;
       else if (RegVT == MVT::f128 || RegVT.is128BitVector())
-        RC = &ARM64::FPR128RegClass;
+        RC = &AArch64::FPR128RegClass;
       else
         llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering");
 
@@ -1802,7 +1805,7 @@ SDValue ARM64TargetLowering::LowerFormalArguments(
       saveVarArgRegisters(CCInfo, DAG, DL, Chain);
     }
 
-    ARM64FunctionInfo *AFI = MF.getInfo<ARM64FunctionInfo>();
+    AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
     // This will point to the next argument passed via stack.
     unsigned StackOffset = CCInfo.getNextStackOffset();
     // We currently pass all varargs at 8-byte alignment.
@@ -1810,7 +1813,7 @@ SDValue ARM64TargetLowering::LowerFormalArguments(
     AFI->setVarArgsStackIndex(MFI->CreateFixedObject(4, StackOffset, true));
   }
 
-  ARM64FunctionInfo *FuncInfo = MF.getInfo<ARM64FunctionInfo>();
+  AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
   unsigned StackArgSize = CCInfo.getNextStackOffset();
   bool TailCallOpt = MF.getTarget().Options.GuaranteedTailCallOpt;
   if (DoesCalleeRestoreStack(CallConv, TailCallOpt)) {
@@ -1834,18 +1837,18 @@ SDValue ARM64TargetLowering::LowerFormalArguments(
   return Chain;
 }
 
-void ARM64TargetLowering::saveVarArgRegisters(CCState &CCInfo,
-                                              SelectionDAG &DAG, SDLoc DL,
-                                              SDValue &Chain) const {
+void AArch64TargetLowering::saveVarArgRegisters(CCState &CCInfo,
+                                                SelectionDAG &DAG, SDLoc DL,
+                                                SDValue &Chain) const {
   MachineFunction &MF = DAG.getMachineFunction();
   MachineFrameInfo *MFI = MF.getFrameInfo();
-  ARM64FunctionInfo *FuncInfo = MF.getInfo<ARM64FunctionInfo>();
+  AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
 
   SmallVector<SDValue, 8> MemOps;
 
-  static const MCPhysReg GPRArgRegs[] = { ARM64::X0, ARM64::X1, ARM64::X2,
-                                          ARM64::X3, ARM64::X4, ARM64::X5,
-                                          ARM64::X6, ARM64::X7 };
+  static const MCPhysReg GPRArgRegs[] = { AArch64::X0, AArch64::X1, AArch64::X2,
+                                          AArch64::X3, AArch64::X4, AArch64::X5,
+                                          AArch64::X6, AArch64::X7 };
   static const unsigned NumGPRArgRegs = array_lengthof(GPRArgRegs);
   unsigned FirstVariadicGPR =
       CCInfo.getFirstUnallocated(GPRArgRegs, NumGPRArgRegs);
@@ -1858,7 +1861,7 @@ void ARM64TargetLowering::saveVarArgRegisters(CCState &CCInfo,
     SDValue FIN = DAG.getFrameIndex(GPRIdx, getPointerTy());
 
     for (unsigned i = FirstVariadicGPR; i < NumGPRArgRegs; ++i) {
-      unsigned VReg = MF.addLiveIn(GPRArgRegs[i], &ARM64::GPR64RegClass);
+      unsigned VReg = MF.addLiveIn(GPRArgRegs[i], &AArch64::GPR64RegClass);
       SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::i64);
       SDValue Store =
           DAG.getStore(Val.getValue(1), DL, Val, FIN,
@@ -1872,9 +1875,9 @@ void ARM64TargetLowering::saveVarArgRegisters(CCState &CCInfo,
   FuncInfo->setVarArgsGPRSize(GPRSaveSize);
 
   if (Subtarget->hasFPARMv8()) {
-    static const MCPhysReg FPRArgRegs[] = { ARM64::Q0, ARM64::Q1, ARM64::Q2,
-                                            ARM64::Q3, ARM64::Q4, ARM64::Q5,
-                                            ARM64::Q6, ARM64::Q7 };
+    static const MCPhysReg FPRArgRegs[] = {
+        AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3,
+        AArch64::Q4, AArch64::Q5, AArch64::Q6, AArch64::Q7};
     static const unsigned NumFPRArgRegs = array_lengthof(FPRArgRegs);
     unsigned FirstVariadicFPR =
         CCInfo.getFirstUnallocated(FPRArgRegs, NumFPRArgRegs);
@@ -1887,7 +1890,7 @@ void ARM64TargetLowering::saveVarArgRegisters(CCState &CCInfo,
       SDValue FIN = DAG.getFrameIndex(FPRIdx, getPointerTy());
 
       for (unsigned i = FirstVariadicFPR; i < NumFPRArgRegs; ++i) {
-        unsigned VReg = MF.addLiveIn(FPRArgRegs[i], &ARM64::FPR128RegClass);
+        unsigned VReg = MF.addLiveIn(FPRArgRegs[i], &AArch64::FPR128RegClass);
         SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::f128);
 
         SDValue Store =
@@ -1909,13 +1912,14 @@ void ARM64TargetLowering::saveVarArgRegisters(CCState &CCInfo,
 
 /// LowerCallResult - Lower the result values of a call into the
 /// appropriate copies out of appropriate physical registers.
-SDValue ARM64TargetLowering::LowerCallResult(
+SDValue AArch64TargetLowering::LowerCallResult(
     SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
     const SmallVectorImpl<ISD::InputArg> &Ins, SDLoc DL, SelectionDAG &DAG,
     SmallVectorImpl<SDValue> &InVals, bool isThisReturn,
     SDValue ThisVal) const {
-  CCAssignFn *RetCC = CallConv == CallingConv::WebKit_JS ? RetCC_ARM64_WebKit_JS
-                                                         : RetCC_ARM64_AAPCS;
+  CCAssignFn *RetCC = CallConv == CallingConv::WebKit_JS
+                          ? RetCC_AArch64_WebKit_JS
+                          : RetCC_AArch64_AAPCS;
   // Assign locations to each value returned by this call.
   SmallVector<CCValAssign, 16> RVLocs;
   CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
@@ -1956,7 +1960,7 @@ SDValue ARM64TargetLowering::LowerCallResult(
   return Chain;
 }
 
-bool ARM64TargetLowering::isEligibleForTailCallOptimization(
+bool AArch64TargetLowering::isEligibleForTailCallOptimization(
     SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg,
     bool isCalleeStructRet, bool isCallerStructRet,
     const SmallVectorImpl<ISD::OutputArg> &Outs,
@@ -2054,17 +2058,17 @@ bool ARM64TargetLowering::isEligibleForTailCallOptimization(
 
   CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CalleeCC, isVarArg));
 
-  const ARM64FunctionInfo *FuncInfo = MF.getInfo<ARM64FunctionInfo>();
+  const AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
 
   // If the stack arguments for this call would fit into our own save area then
   // the call can be made tail.
   return CCInfo.getNextStackOffset() <= FuncInfo->getBytesInStackArgArea();
 }
 
-SDValue ARM64TargetLowering::addTokenForArgument(SDValue Chain,
-                                                 SelectionDAG &DAG,
-                                                 MachineFrameInfo *MFI,
-                                                 int ClobberedFI) const {
+SDValue AArch64TargetLowering::addTokenForArgument(SDValue Chain,
+                                                   SelectionDAG &DAG,
+                                                   MachineFrameInfo *MFI,
+                                                   int ClobberedFI) const {
   SmallVector<SDValue, 8> ArgChains;
   int64_t FirstByte = MFI->getObjectOffset(ClobberedFI);
   int64_t LastByte = FirstByte + MFI->getObjectSize(ClobberedFI) - 1;
@@ -2094,19 +2098,20 @@ SDValue ARM64TargetLowering::addTokenForArgument(SDValue Chain,
   return DAG.getNode(ISD::TokenFactor, SDLoc(Chain), MVT::Other, ArgChains);
 }
 
-bool ARM64TargetLowering::DoesCalleeRestoreStack(CallingConv::ID CallCC,
-                                                 bool TailCallOpt) const {
+bool AArch64TargetLowering::DoesCalleeRestoreStack(CallingConv::ID CallCC,
+                                                   bool TailCallOpt) const {
   return CallCC == CallingConv::Fast && TailCallOpt;
 }
 
-bool ARM64TargetLowering::IsTailCallConvention(CallingConv::ID CallCC) const {
+bool AArch64TargetLowering::IsTailCallConvention(CallingConv::ID CallCC) const {
   return CallCC == CallingConv::Fast;
 }
 
 /// LowerCall - Lower a call to a callseq_start + CALL + callseq_end chain,
 /// and add input and output parameter nodes.
-SDValue ARM64TargetLowering::LowerCall(CallLoweringInfo &CLI,
-                                       SmallVectorImpl<SDValue> &InVals) const {
+SDValue
+AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
+                                 SmallVectorImpl<SDValue> &InVals) const {
   SelectionDAG &DAG = CLI.DAG;
   SDLoc &DL = CLI.DL;
   SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs;
@@ -2122,7 +2127,7 @@ SDValue ARM64TargetLowering::LowerCall(CallLoweringInfo &CLI,
   bool IsStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet();
   bool IsThisReturn = false;
 
-  ARM64FunctionInfo *FuncInfo = MF.getInfo<ARM64FunctionInfo>();
+  AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
   bool TailCallOpt = MF.getTarget().Options.GuaranteedTailCallOpt;
   bool IsSibCall = false;
 
@@ -2166,7 +2171,7 @@ SDValue ARM64TargetLowering::LowerCall(CallLoweringInfo &CLI,
   } else {
     // At this point, Outs[].VT may already be promoted to i32. To correctly
     // handle passing i8 as i8 instead of i32 on stack, we pass in both i32 and
-    // i8 to CC_ARM64_AAPCS with i32 being ValVT and i8 being LocVT.
+    // i8 to CC_AArch64_AAPCS with i32 being ValVT and i8 being LocVT.
     // Since AnalyzeCallOperands uses Ins[].VT for both ValVT and LocVT, here
     // we use a special version of AnalyzeCallOperands to pass in ValVT and
     // LocVT.
@@ -2234,7 +2239,7 @@ SDValue ARM64TargetLowering::LowerCall(CallLoweringInfo &CLI,
     Chain =
         DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true), DL);
 
-  SDValue StackPtr = DAG.getCopyFromReg(Chain, DL, ARM64::SP, getPointerTy());
+  SDValue StackPtr = DAG.getCopyFromReg(Chain, DL, AArch64::SP, getPointerTy());
 
   SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
   SmallVector<SDValue, 8> MemOpChains;
@@ -2367,15 +2372,15 @@ SDValue ARM64TargetLowering::LowerCall(CallLoweringInfo &CLI,
         Callee = DAG.getTargetGlobalAddress(GV, DL, getPointerTy(), 0, 0);
       else {
         Callee = DAG.getTargetGlobalAddress(GV, DL, getPointerTy(), 0,
-                                            ARM64II::MO_GOT);
-        Callee = DAG.getNode(ARM64ISD::LOADgot, DL, getPointerTy(), Callee);
+                                            AArch64II::MO_GOT);
+        Callee = DAG.getNode(AArch64ISD::LOADgot, DL, getPointerTy(), Callee);
       }
     } else if (ExternalSymbolSDNode *S =
                    dyn_cast<ExternalSymbolSDNode>(Callee)) {
       const char *Sym = S->getSymbol();
       Callee =
-          DAG.getTargetExternalSymbol(Sym, getPointerTy(), ARM64II::MO_GOT);
-      Callee = DAG.getNode(ARM64ISD::LOADgot, DL, getPointerTy(), Callee);
+          DAG.getTargetExternalSymbol(Sym, getPointerTy(), AArch64II::MO_GOT);
+      Callee = DAG.getNode(AArch64ISD::LOADgot, DL, getPointerTy(), Callee);
     }
   } else if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
     const GlobalValue *GV = G->getGlobal();
@@ -2415,7 +2420,8 @@ SDValue ARM64TargetLowering::LowerCall(CallLoweringInfo &CLI,
   // Add a register mask operand representing the call-preserved registers.
   const uint32_t *Mask;
   const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
-  const ARM64RegisterInfo *ARI = static_cast<const ARM64RegisterInfo *>(TRI);
+  const AArch64RegisterInfo *ARI =
+      static_cast<const AArch64RegisterInfo *>(TRI);
   if (IsThisReturn) {
     // For 'this' returns, use the X0-preserving mask if applicable
     Mask = ARI->getThisReturnPreservedMask(CallConv);
@@ -2437,10 +2443,10 @@ SDValue ARM64TargetLowering::LowerCall(CallLoweringInfo &CLI,
   // If we're doing a tall call, use a TC_RETURN here rather than an
   // actual call instruction.
   if (IsTailCall)
-    return DAG.getNode(ARM64ISD::TC_RETURN, DL, NodeTys, Ops);
+    return DAG.getNode(AArch64ISD::TC_RETURN, DL, NodeTys, Ops);
 
   // Returns a chain and a flag for retval copy to use.
-  Chain = DAG.getNode(ARM64ISD::CALL, DL, NodeTys, Ops);
+  Chain = DAG.getNode(AArch64ISD::CALL, DL, NodeTys, Ops);
   InFlag = Chain.getValue(1);
 
   uint64_t CalleePopBytes = DoesCalleeRestoreStack(CallConv, TailCallOpt)
@@ -2460,24 +2466,26 @@ SDValue ARM64TargetLowering::LowerCall(CallLoweringInfo &CLI,
                          IsThisReturn ? OutVals[0] : SDValue());
 }
 
-bool ARM64TargetLowering::CanLowerReturn(
+bool AArch64TargetLowering::CanLowerReturn(
     CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg,
     const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
-  CCAssignFn *RetCC = CallConv == CallingConv::WebKit_JS ? RetCC_ARM64_WebKit_JS
-                                                         : RetCC_ARM64_AAPCS;
+  CCAssignFn *RetCC = CallConv == CallingConv::WebKit_JS
+                          ? RetCC_AArch64_WebKit_JS
+                          : RetCC_AArch64_AAPCS;
   SmallVector<CCValAssign, 16> RVLocs;
   CCState CCInfo(CallConv, isVarArg, MF, getTargetMachine(), RVLocs, Context);
   return CCInfo.CheckReturn(Outs, RetCC);
 }
 
 SDValue
-ARM64TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
-                                 bool isVarArg,
-                                 const SmallVectorImpl<ISD::OutputArg> &Outs,
-                                 const SmallVectorImpl<SDValue> &OutVals,
-                                 SDLoc DL, SelectionDAG &DAG) const {
-  CCAssignFn *RetCC = CallConv == CallingConv::WebKit_JS ? RetCC_ARM64_WebKit_JS
-                                                         : RetCC_ARM64_AAPCS;
+AArch64TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
+                                   bool isVarArg,
+                                   const SmallVectorImpl<ISD::OutputArg> &Outs,
+                                   const SmallVectorImpl<SDValue> &OutVals,
+                                   SDLoc DL, SelectionDAG &DAG) const {
+  CCAssignFn *RetCC = CallConv == CallingConv::WebKit_JS
+                          ? RetCC_AArch64_WebKit_JS
+                          : RetCC_AArch64_AAPCS;
   SmallVector<CCValAssign, 16> RVLocs;
   CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
                  getTargetMachine(), RVLocs, *DAG.getContext());
@@ -2513,15 +2521,15 @@ ARM64TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
   if (Flag.getNode())
     RetOps.push_back(Flag);
 
-  return DAG.getNode(ARM64ISD::RET_FLAG, DL, MVT::Other, RetOps);
+  return DAG.getNode(AArch64ISD::RET_FLAG, DL, MVT::Other, RetOps);
 }
 
 //===----------------------------------------------------------------------===//
 //  Other Lowering Code
 //===----------------------------------------------------------------------===//
 
-SDValue ARM64TargetLowering::LowerGlobalAddress(SDValue Op,
-                                                SelectionDAG &DAG) const {
+SDValue AArch64TargetLowering::LowerGlobalAddress(SDValue Op,
+                                                  SelectionDAG &DAG) const {
   EVT PtrVT = getPointerTy();
   SDLoc DL(Op);
   const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
@@ -2532,31 +2540,31 @@ SDValue ARM64TargetLowering::LowerGlobalAddress(SDValue Op,
          "unexpected offset in global node");
 
   // This also catched the large code model case for Darwin.
-  if ((OpFlags & ARM64II::MO_GOT) != 0) {
+  if ((OpFlags & AArch64II::MO_GOT) != 0) {
     SDValue GotAddr = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, OpFlags);
     // FIXME: Once remat is capable of dealing with instructions with register
     // operands, expand this into two nodes instead of using a wrapper node.
-    return DAG.getNode(ARM64ISD::LOADgot, DL, PtrVT, GotAddr);
+    return DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, GotAddr);
   }
 
   if (getTargetMachine().getCodeModel() == CodeModel::Large) {
-    const unsigned char MO_NC = ARM64II::MO_NC;
+    const unsigned char MO_NC = AArch64II::MO_NC;
     return DAG.getNode(
-        ARM64ISD::WrapperLarge, DL, PtrVT,
-        DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, ARM64II::MO_G3),
-        DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, ARM64II::MO_G2 | MO_NC),
-        DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, ARM64II::MO_G1 | MO_NC),
-        DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, ARM64II::MO_G0 | MO_NC));
+        AArch64ISD::WrapperLarge, DL, PtrVT,
+        DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_G3),
+        DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_G2 | MO_NC),
+        DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_G1 | MO_NC),
+        DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_G0 | MO_NC));
   } else {
     // Use ADRP/ADD or ADRP/LDR for everything else: the small model on ELF and
     // the only correct model on Darwin.
     SDValue Hi = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0,
-                                            OpFlags | ARM64II::MO_PAGE);
-    unsigned char LoFlags = OpFlags | ARM64II::MO_PAGEOFF | ARM64II::MO_NC;
+                                            OpFlags | AArch64II::MO_PAGE);
+    unsigned char LoFlags = OpFlags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC;
     SDValue Lo = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, LoFlags);
 
-    SDValue ADRP = DAG.getNode(ARM64ISD::ADRP, DL, PtrVT, Hi);
-    return DAG.getNode(ARM64ISD::ADDlow, DL, PtrVT, ADRP, Lo);
+    SDValue ADRP = DAG.getNode(AArch64ISD::ADRP, DL, PtrVT, Hi);
+    return DAG.getNode(AArch64ISD::ADDlow, DL, PtrVT, ADRP, Lo);
   }
 }
 
@@ -2589,8 +2597,8 @@ SDValue ARM64TargetLowering::LowerGlobalAddress(SDValue Op,
 /// change the first "ldr" instruction to an appropriate "add x0, x0, #imm" for
 /// a slight efficiency gain.
 SDValue
-ARM64TargetLowering::LowerDarwinGlobalTLSAddress(SDValue Op,
-                                                 SelectionDAG &DAG) const {
+AArch64TargetLowering::LowerDarwinGlobalTLSAddress(SDValue Op,
+                                                   SelectionDAG &DAG) const {
   assert(Subtarget->isTargetDarwin() && "TLS only supported on Darwin");
 
   SDLoc DL(Op);
@@ -2598,8 +2606,8 @@ ARM64TargetLowering::LowerDarwinGlobalTLSAddress(SDValue Op,
   const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
 
   SDValue TLVPAddr =
-      DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, ARM64II::MO_TLS);
-  SDValue DescAddr = DAG.getNode(ARM64ISD::LOADgot, DL, PtrVT, TLVPAddr);
+      DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_TLS);
+  SDValue DescAddr = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, TLVPAddr);
 
   // The first entry in the descriptor is a function pointer that we must call
   // to obtain the address of the variable.
@@ -2616,17 +2624,19 @@ ARM64TargetLowering::LowerDarwinGlobalTLSAddress(SDValue Op,
   // trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be
   // silly).
   const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
-  const ARM64RegisterInfo *ARI = static_cast<const ARM64RegisterInfo *>(TRI);
+  const AArch64RegisterInfo *ARI =
+      static_cast<const AArch64RegisterInfo *>(TRI);
   const uint32_t *Mask = ARI->getTLSCallPreservedMask();
 
   // Finally, we can make the call. This is just a degenerate version of a
-  // normal ARM64 call node: x0 takes the address of the descriptor, and returns
-  // the address of the variable in this thread.
-  Chain = DAG.getCopyToReg(Chain, DL, ARM64::X0, DescAddr, SDValue());
-  Chain = DAG.getNode(ARM64ISD::CALL, DL, DAG.getVTList(MVT::Other, MVT::Glue),
-                      Chain, FuncTLVGet, DAG.getRegister(ARM64::X0, MVT::i64),
-                      DAG.getRegisterMask(Mask), Chain.getValue(1));
-  return DAG.getCopyFromReg(Chain, DL, ARM64::X0, PtrVT, Chain.getValue(1));
+  // normal AArch64 call node: x0 takes the address of the descriptor, and
+  // returns the address of the variable in this thread.
+  Chain = DAG.getCopyToReg(Chain, DL, AArch64::X0, DescAddr, SDValue());
+  Chain =
+      DAG.getNode(AArch64ISD::CALL, DL, DAG.getVTList(MVT::Other, MVT::Glue),
+                  Chain, FuncTLVGet, DAG.getRegister(AArch64::X0, MVT::i64),
+                  DAG.getRegisterMask(Mask), Chain.getValue(1));
+  return DAG.getCopyFromReg(Chain, DL, AArch64::X0, PtrVT, Chain.getValue(1));
 }
 
 /// When accessing thread-local variables under either the general-dynamic or
@@ -2651,26 +2661,27 @@ ARM64TargetLowering::LowerDarwinGlobalTLSAddress(SDValue Op,
 ///
 /// FIXME: we currently produce an extra, duplicated, ADRP instruction, but this
 /// is harmless.
-SDValue ARM64TargetLowering::LowerELFTLSDescCall(SDValue SymAddr,
-                                                 SDValue DescAddr, SDLoc DL,
-                                                 SelectionDAG &DAG) const {
+SDValue AArch64TargetLowering::LowerELFTLSDescCall(SDValue SymAddr,
+                                                   SDValue DescAddr, SDLoc DL,
+                                                   SelectionDAG &DAG) const {
   EVT PtrVT = getPointerTy();
 
   // The function we need to call is simply the first entry in the GOT for this
   // descriptor, load it in preparation.
-  SDValue Func = DAG.getNode(ARM64ISD::LOADgot, DL, PtrVT, SymAddr);
+  SDValue Func = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, SymAddr);
 
   // TLS calls preserve all registers except those that absolutely must be
   // trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be
   // silly).
   const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
-  const ARM64RegisterInfo *ARI = static_cast<const ARM64RegisterInfo *>(TRI);
+  const AArch64RegisterInfo *ARI =
+      static_cast<const AArch64RegisterInfo *>(TRI);
   const uint32_t *Mask = ARI->getTLSCallPreservedMask();
 
   // The function takes only one argument: the address of the descriptor itself
   // in X0.
   SDValue Glue, Chain;
-  Chain = DAG.getCopyToReg(DAG.getEntryNode(), DL, ARM64::X0, DescAddr, Glue);
+  Chain = DAG.getCopyToReg(DAG.getEntryNode(), DL, AArch64::X0, DescAddr, Glue);
   Glue = Chain.getValue(1);
 
   // We're now ready to populate the argument list, as with a normal call:
@@ -2678,19 +2689,20 @@ SDValue ARM64TargetLowering::LowerELFTLSDescCall(SDValue SymAddr,
   Ops.push_back(Chain);
   Ops.push_back(Func);
   Ops.push_back(SymAddr);
-  Ops.push_back(DAG.getRegister(ARM64::X0, PtrVT));
+  Ops.push_back(DAG.getRegister(AArch64::X0, PtrVT));
   Ops.push_back(DAG.getRegisterMask(Mask));
   Ops.push_back(Glue);
 
   SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
-  Chain = DAG.getNode(ARM64ISD::TLSDESC_CALL, DL, NodeTys, Ops);
+  Chain = DAG.getNode(AArch64ISD::TLSDESC_CALL, DL, NodeTys, Ops);
   Glue = Chain.getValue(1);
 
-  return DAG.getCopyFromReg(Chain, DL, ARM64::X0, PtrVT, Glue);
+  return DAG.getCopyFromReg(Chain, DL, AArch64::X0, PtrVT, Glue);
 }
 
-SDValue ARM64TargetLowering::LowerELFGlobalTLSAddress(SDValue Op,
-                                                      SelectionDAG &DAG) const {
+SDValue
+AArch64TargetLowering::LowerELFGlobalTLSAddress(SDValue Op,
+                                                SelectionDAG &DAG) const {
   assert(Subtarget->isTargetELF() && "This function expects an ELF target");
   assert(getTargetMachine().getCodeModel() == CodeModel::Small &&
          "ELF TLS only supported in small memory model");
@@ -2703,23 +2715,24 @@ SDValue ARM64TargetLowering::LowerELFGlobalTLSAddress(SDValue Op,
   SDLoc DL(Op);
   const GlobalValue *GV = GA->getGlobal();
 
-  SDValue ThreadBase = DAG.getNode(ARM64ISD::THREAD_POINTER, DL, PtrVT);
+  SDValue ThreadBase = DAG.getNode(AArch64ISD::THREAD_POINTER, DL, PtrVT);
 
   if (Model == TLSModel::LocalExec) {
     SDValue HiVar = DAG.getTargetGlobalAddress(
-        GV, DL, PtrVT, 0, ARM64II::MO_TLS | ARM64II::MO_G1);
+        GV, DL, PtrVT, 0, AArch64II::MO_TLS | AArch64II::MO_G1);
     SDValue LoVar = DAG.getTargetGlobalAddress(
-        GV, DL, PtrVT, 0, ARM64II::MO_TLS | ARM64II::MO_G0 | ARM64II::MO_NC);
+        GV, DL, PtrVT, 0,
+        AArch64II::MO_TLS | AArch64II::MO_G0 | AArch64II::MO_NC);
 
-    TPOff = SDValue(DAG.getMachineNode(ARM64::MOVZXi, DL, PtrVT, HiVar,
+    TPOff = SDValue(DAG.getMachineNode(AArch64::MOVZXi, DL, PtrVT, HiVar,
                                        DAG.getTargetConstant(16, MVT::i32)),
                     0);
-    TPOff = SDValue(DAG.getMachineNode(ARM64::MOVKXi, DL, PtrVT, TPOff, LoVar,
+    TPOff = SDValue(DAG.getMachineNode(AArch64::MOVKXi, DL, PtrVT, TPOff, LoVar,
                                        DAG.getTargetConstant(0, MVT::i32)),
                     0);
   } else if (Model == TLSModel::InitialExec) {
-    TPOff = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, ARM64II::MO_TLS);
-    TPOff = DAG.getNode(ARM64ISD::LOADgot, DL, PtrVT, TPOff);
+    TPOff = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_TLS);
+    TPOff = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, TPOff);
   } else if (Model == TLSModel::LocalDynamic) {
     // Local-dynamic accesses proceed in two phases. A general-dynamic TLS
     // descriptor call against the special symbol _TLS_MODULE_BASE_ to calculate
@@ -2727,28 +2740,28 @@ SDValue ARM64TargetLowering::LowerELFGlobalTLSAddress(SDValue Op,
     // calculation.
 
     // These accesses will need deduplicating if there's more than one.
-    ARM64FunctionInfo *MFI =
-        DAG.getMachineFunction().getInfo<ARM64FunctionInfo>();
+    AArch64FunctionInfo *MFI =
+        DAG.getMachineFunction().getInfo<AArch64FunctionInfo>();
     MFI->incNumLocalDynamicTLSAccesses();
 
     // Accesses used in this sequence go via the TLS descriptor which lives in
     // the GOT. Prepare an address we can use to handle this.
     SDValue HiDesc = DAG.getTargetExternalSymbol(
-        "_TLS_MODULE_BASE_", PtrVT, ARM64II::MO_TLS | ARM64II::MO_PAGE);
+        "_TLS_MODULE_BASE_", PtrVT, AArch64II::MO_TLS | AArch64II::MO_PAGE);
     SDValue LoDesc = DAG.getTargetExternalSymbol(
         "_TLS_MODULE_BASE_", PtrVT,
-        ARM64II::MO_TLS | ARM64II::MO_PAGEOFF | ARM64II::MO_NC);
+        AArch64II::MO_TLS | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
 
     // First argument to the descriptor call is the address of the descriptor
     // itself.
-    SDValue DescAddr = DAG.getNode(ARM64ISD::ADRP, DL, PtrVT, HiDesc);
-    DescAddr = DAG.getNode(ARM64ISD::ADDlow, DL, PtrVT, DescAddr, LoDesc);
+    SDValue DescAddr = DAG.getNode(AArch64ISD::ADRP, DL, PtrVT, HiDesc);
+    DescAddr = DAG.getNode(AArch64ISD::ADDlow, DL, PtrVT, DescAddr, LoDesc);
 
     // The call needs a relocation too for linker relaxation. It doesn't make
     // sense to call it MO_PAGE or MO_PAGEOFF though so we need another copy of
     // the address.
     SDValue SymAddr = DAG.getTargetExternalSymbol("_TLS_MODULE_BASE_", PtrVT,
-                                                  ARM64II::MO_TLS);
+                                                  AArch64II::MO_TLS);
 
     // Now we can calculate the offset from TPIDR_EL0 to this module's
     // thread-local area.
@@ -2757,38 +2770,40 @@ SDValue ARM64TargetLowering::LowerELFGlobalTLSAddress(SDValue Op,
     // Now use :dtprel_whatever: operations to calculate this variable's offset
     // in its thread-storage area.
     SDValue HiVar = DAG.getTargetGlobalAddress(
-        GV, DL, MVT::i64, 0, ARM64II::MO_TLS | ARM64II::MO_G1);
+        GV, DL, MVT::i64, 0, AArch64II::MO_TLS | AArch64II::MO_G1);
     SDValue LoVar = DAG.getTargetGlobalAddress(
-        GV, DL, MVT::i64, 0, ARM64II::MO_TLS | ARM64II::MO_G0 | ARM64II::MO_NC);
+        GV, DL, MVT::i64, 0,
+        AArch64II::MO_TLS | AArch64II::MO_G0 | AArch64II::MO_NC);
 
     SDValue DTPOff =
-        SDValue(DAG.getMachineNode(ARM64::MOVZXi, DL, PtrVT, HiVar,
+        SDValue(DAG.getMachineNode(AArch64::MOVZXi, DL, PtrVT, HiVar,
                                    DAG.getTargetConstant(16, MVT::i32)),
                 0);
-    DTPOff = SDValue(DAG.getMachineNode(ARM64::MOVKXi, DL, PtrVT, DTPOff, LoVar,
-                                        DAG.getTargetConstant(0, MVT::i32)),
-                     0);
+    DTPOff =
+        SDValue(DAG.getMachineNode(AArch64::MOVKXi, DL, PtrVT, DTPOff, LoVar,
+                                   DAG.getTargetConstant(0, MVT::i32)),
+                0);
 
     TPOff = DAG.getNode(ISD::ADD, DL, PtrVT, TPOff, DTPOff);
   } else if (Model == TLSModel::GeneralDynamic) {
     // Accesses used in this sequence go via the TLS descriptor which lives in
     // the GOT. Prepare an address we can use to handle this.
     SDValue HiDesc = DAG.getTargetGlobalAddress(
-        GV, DL, PtrVT, 0, ARM64II::MO_TLS | ARM64II::MO_PAGE);
+        GV, DL, PtrVT, 0, AArch64II::MO_TLS | AArch64II::MO_PAGE);
     SDValue LoDesc = DAG.getTargetGlobalAddress(
         GV, DL, PtrVT, 0,
-        ARM64II::MO_TLS | ARM64II::MO_PAGEOFF | ARM64II::MO_NC);
+        AArch64II::MO_TLS | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
 
     // First argument to the descriptor call is the address of the descriptor
     // itself.
-    SDValue DescAddr = DAG.getNode(ARM64ISD::ADRP, DL, PtrVT, HiDesc);
-    DescAddr = DAG.getNode(ARM64ISD::ADDlow, DL, PtrVT, DescAddr, LoDesc);
+    SDValue DescAddr = DAG.getNode(AArch64ISD::ADRP, DL, PtrVT, HiDesc);
+    DescAddr = DAG.getNode(AArch64ISD::ADDlow, DL, PtrVT, DescAddr, LoDesc);
 
     // The call needs a relocation too for linker relaxation. It doesn't make
     // sense to call it MO_PAGE or MO_PAGEOFF though so we need another copy of
     // the address.
     SDValue SymAddr =
-        DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, ARM64II::MO_TLS);
+        DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_TLS);
 
     // Finally we can make a call to calculate the offset from tpidr_el0.
     TPOff = LowerELFTLSDescCall(SymAddr, DescAddr, DL, DAG);
@@ -2798,8 +2813,8 @@ SDValue ARM64TargetLowering::LowerELFGlobalTLSAddress(SDValue Op,
   return DAG.getNode(ISD::ADD, DL, PtrVT, ThreadBase, TPOff);
 }
 
-SDValue ARM64TargetLowering::LowerGlobalTLSAddress(SDValue Op,
-                                                   SelectionDAG &DAG) const {
+SDValue AArch64TargetLowering::LowerGlobalTLSAddress(SDValue Op,
+                                                     SelectionDAG &DAG) const {
   if (Subtarget->isTargetDarwin())
     return LowerDarwinGlobalTLSAddress(Op, DAG);
   else if (Subtarget->isTargetELF())
@@ -2807,7 +2822,7 @@ SDValue ARM64TargetLowering::LowerGlobalTLSAddress(SDValue Op,
 
   llvm_unreachable("Unexpected platform trying to use TLS");
 }
-SDValue ARM64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
+SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
   SDValue Chain = Op.getOperand(0);
   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
   SDValue LHS = Op.getOperand(2);
@@ -2843,15 +2858,15 @@ SDValue ARM64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
       return SDValue();
 
     // The actual operation with overflow check.
-    ARM64CC::CondCode OFCC;
+    AArch64CC::CondCode OFCC;
     SDValue Value, Overflow;
-    std::tie(Value, Overflow) = getARM64XALUOOp(OFCC, LHS.getValue(0), DAG);
+    std::tie(Value, Overflow) = getAArch64XALUOOp(OFCC, LHS.getValue(0), DAG);
 
     if (CC == ISD::SETNE)
       OFCC = getInvertedCondCode(OFCC);
     SDValue CCVal = DAG.getConstant(OFCC, MVT::i32);
 
-    return DAG.getNode(ARM64ISD::BRCOND, SDLoc(LHS), MVT::Other, Chain, Dest,
+    return DAG.getNode(AArch64ISD::BRCOND, SDLoc(LHS), MVT::Other, Chain, Dest,
                        CCVal, Overflow);
   }
 
@@ -2878,11 +2893,11 @@ SDValue ARM64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
           if (Test.getValueType() == MVT::i32)
             Test = DAG.getAnyExtOrTrunc(Test, dl, MVT::i64);
 
-          return DAG.getNode(ARM64ISD::TBZ, dl, MVT::Other, Chain, Test,
+          return DAG.getNode(AArch64ISD::TBZ, dl, MVT::Other, Chain, Test,
                              DAG.getConstant(Log2_64(Mask), MVT::i64), Dest);
         }
 
-        return DAG.getNode(ARM64ISD::CBZ, dl, MVT::Other, Chain, LHS, Dest);
+        return DAG.getNode(AArch64ISD::CBZ, dl, MVT::Other, Chain, LHS, Dest);
       } else if (CC == ISD::SETNE) {
         // See if we can use a TBZ to fold in an AND as well.
         // TBZ has a smaller branch displacement than CBZ.  If the offset is
@@ -2898,41 +2913,41 @@ SDValue ARM64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
           if (Test.getValueType() == MVT::i32)
             Test = DAG.getAnyExtOrTrunc(Test, dl, MVT::i64);
 
-          return DAG.getNode(ARM64ISD::TBNZ, dl, MVT::Other, Chain, Test,
+          return DAG.getNode(AArch64ISD::TBNZ, dl, MVT::Other, Chain, Test,
                              DAG.getConstant(Log2_64(Mask), MVT::i64), Dest);
         }
 
-        return DAG.getNode(ARM64ISD::CBNZ, dl, MVT::Other, Chain, LHS, Dest);
+        return DAG.getNode(AArch64ISD::CBNZ, dl, MVT::Other, Chain, LHS, Dest);
       }
     }
 
     SDValue CCVal;
-    SDValue Cmp = getARM64Cmp(LHS, RHS, CC, CCVal, DAG, dl);
-    return DAG.getNode(ARM64ISD::BRCOND, dl, MVT::Other, Chain, Dest, CCVal,
+    SDValue Cmp = getAArch64Cmp(LHS, RHS, CC, CCVal, DAG, dl);
+    return DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, Chain, Dest, CCVal,
                        Cmp);
   }
 
   assert(LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64);
 
-  // Unfortunately, the mapping of LLVM FP CC's onto ARM64 CC's isn't totally
+  // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't totally
   // clean.  Some of them require two branches to implement.
   SDValue Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
-  ARM64CC::CondCode CC1, CC2;
-  changeFPCCToARM64CC(CC, CC1, CC2);
+  AArch64CC::CondCode CC1, CC2;
+  changeFPCCToAArch64CC(CC, CC1, CC2);
   SDValue CC1Val = DAG.getConstant(CC1, MVT::i32);
   SDValue BR1 =
-      DAG.getNode(ARM64ISD::BRCOND, dl, MVT::Other, Chain, Dest, CC1Val, Cmp);
-  if (CC2 != ARM64CC::AL) {
+      DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, Chain, Dest, CC1Val, Cmp);
+  if (CC2 != AArch64CC::AL) {
     SDValue CC2Val = DAG.getConstant(CC2, MVT::i32);
-    return DAG.getNode(ARM64ISD::BRCOND, dl, MVT::Other, BR1, Dest, CC2Val,
+    return DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, BR1, Dest, CC2Val,
                        Cmp);
   }
 
   return BR1;
 }
 
-SDValue ARM64TargetLowering::LowerFCOPYSIGN(SDValue Op,
-                                            SelectionDAG &DAG) const {
+SDValue AArch64TargetLowering::LowerFCOPYSIGN(SDValue Op,
+                                              SelectionDAG &DAG) const {
   EVT VT = Op.getValueType();
   SDLoc DL(Op);
 
@@ -2959,9 +2974,9 @@ SDValue ARM64TargetLowering::LowerFCOPYSIGN(SDValue Op,
     EltMask = DAG.getConstant(0x80000000ULL, EltVT);
 
     if (!VT.isVector()) {
-      VecVal1 = DAG.getTargetInsertSubreg(ARM64::ssub, DL, VecVT,
+      VecVal1 = DAG.getTargetInsertSubreg(AArch64::ssub, DL, VecVT,
                                           DAG.getUNDEF(VecVT), In1);
-      VecVal2 = DAG.getTargetInsertSubreg(ARM64::ssub, DL, VecVT,
+      VecVal2 = DAG.getTargetInsertSubreg(AArch64::ssub, DL, VecVT,
                                           DAG.getUNDEF(VecVT), In2);
     } else {
       VecVal1 = DAG.getNode(ISD::BITCAST, DL, VecVT, In1);
@@ -2977,9 +2992,9 @@ SDValue ARM64TargetLowering::LowerFCOPYSIGN(SDValue Op,
     EltMask = DAG.getConstant(0, EltVT);
 
     if (!VT.isVector()) {
-      VecVal1 = DAG.getTargetInsertSubreg(ARM64::dsub, DL, VecVT,
+      VecVal1 = DAG.getTargetInsertSubreg(AArch64::dsub, DL, VecVT,
                                           DAG.getUNDEF(VecVT), In1);
-      VecVal2 = DAG.getTargetInsertSubreg(ARM64::dsub, DL, VecVT,
+      VecVal2 = DAG.getTargetInsertSubreg(AArch64::dsub, DL, VecVT,
                                           DAG.getUNDEF(VecVT), In2);
     } else {
       VecVal1 = DAG.getNode(ISD::BITCAST, DL, VecVT, In1);
@@ -3004,17 +3019,17 @@ SDValue ARM64TargetLowering::LowerFCOPYSIGN(SDValue Op,
   }
 
   SDValue Sel =
-      DAG.getNode(ARM64ISD::BIT, DL, VecVT, VecVal1, VecVal2, BuildVec);
+      DAG.getNode(AArch64ISD::BIT, DL, VecVT, VecVal1, VecVal2, BuildVec);
 
   if (VT == MVT::f32)
-    return DAG.getTargetExtractSubreg(ARM64::ssub, DL, VT, Sel);
+    return DAG.getTargetExtractSubreg(AArch64::ssub, DL, VT, Sel);
   else if (VT == MVT::f64)
-    return DAG.getTargetExtractSubreg(ARM64::dsub, DL, VT, Sel);
+    return DAG.getTargetExtractSubreg(AArch64::dsub, DL, VT, Sel);
   else
     return DAG.getNode(ISD::BITCAST, DL, VT, Sel);
 }
 
-SDValue ARM64TargetLowering::LowerCTPOP(SDValue Op, SelectionDAG &DAG) const {
+SDValue AArch64TargetLowering::LowerCTPOP(SDValue Op, SelectionDAG &DAG) const {
   if (DAG.getMachineFunction().getFunction()->getAttributes().hasAttribute(
           AttributeSet::FunctionIndex, Attribute::NoImplicitFloat))
     return SDValue();
@@ -3035,8 +3050,8 @@ SDValue ARM64TargetLowering::LowerCTPOP(SDValue Op, SelectionDAG &DAG) const {
   SDValue VecVal;
   if (VT == MVT::i32) {
     VecVal = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Val);
-    VecVal =
-        DAG.getTargetInsertSubreg(ARM64::ssub, DL, MVT::v8i8, ZeroVec, VecVal);
+    VecVal = DAG.getTargetInsertSubreg(AArch64::ssub, DL, MVT::v8i8, ZeroVec,
+                                       VecVal);
   } else {
     VecVal = DAG.getNode(ISD::BITCAST, DL, MVT::v8i8, Val);
   }
@@ -3044,14 +3059,14 @@ SDValue ARM64TargetLowering::LowerCTPOP(SDValue Op, SelectionDAG &DAG) const {
   SDValue CtPop = DAG.getNode(ISD::CTPOP, DL, MVT::v8i8, VecVal);
   SDValue UaddLV = DAG.getNode(
       ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
-      DAG.getConstant(Intrinsic::arm64_neon_uaddlv, MVT::i32), CtPop);
+      DAG.getConstant(Intrinsic::aarch64_neon_uaddlv, MVT::i32), CtPop);
 
   if (VT == MVT::i64)
     UaddLV = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, UaddLV);
   return UaddLV;
 }
 
-SDValue ARM64TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
+SDValue AArch64TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
 
   if (Op.getValueType().isVector())
     return LowerVSETCC(Op, DAG);
@@ -3082,12 +3097,12 @@ SDValue ARM64TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
   if (LHS.getValueType().isInteger()) {
     SDValue CCVal;
     SDValue Cmp =
-        getARM64Cmp(LHS, RHS, ISD::getSetCCInverse(CC, true), CCVal, DAG, dl);
+        getAArch64Cmp(LHS, RHS, ISD::getSetCCInverse(CC, true), CCVal, DAG, dl);
 
     // Note that we inverted the condition above, so we reverse the order of
     // the true and false operands here.  This will allow the setcc to be
     // matched to a single CSINC instruction.
-    return DAG.getNode(ARM64ISD::CSEL, dl, VT, FVal, TVal, CCVal, Cmp);
+    return DAG.getNode(AArch64ISD::CSEL, dl, VT, FVal, TVal, CCVal, Cmp);
   }
 
   // Now we know we're dealing with FP values.
@@ -3097,28 +3112,29 @@ SDValue ARM64TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
   // and do the comparison.
   SDValue Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
 
-  ARM64CC::CondCode CC1, CC2;
-  changeFPCCToARM64CC(CC, CC1, CC2);
-  if (CC2 == ARM64CC::AL) {
-    changeFPCCToARM64CC(ISD::getSetCCInverse(CC, false), CC1, CC2);
+  AArch64CC::CondCode CC1, CC2;
+  changeFPCCToAArch64CC(CC, CC1, CC2);
+  if (CC2 == AArch64CC::AL) {
+    changeFPCCToAArch64CC(ISD::getSetCCInverse(CC, false), CC1, CC2);
     SDValue CC1Val = DAG.getConstant(CC1, MVT::i32);
 
     // Note that we inverted the condition above, so we reverse the order of
     // the true and false operands here.  This will allow the setcc to be
     // matched to a single CSINC instruction.
-    return DAG.getNode(ARM64ISD::CSEL, dl, VT, FVal, TVal, CC1Val, Cmp);
+    return DAG.getNode(AArch64ISD::CSEL, dl, VT, FVal, TVal, CC1Val, Cmp);
   } else {
-    // Unfortunately, the mapping of LLVM FP CC's onto ARM64 CC's isn't totally
-    // clean.  Some of them require two CSELs to implement.  As is in this case,
-    // we emit the first CSEL and then emit a second using the output of the
-    // first as the RHS.  We're effectively OR'ing the two CC's together.
+    // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't
+    // totally clean.  Some of them require two CSELs to implement.  As is in
+    // this case, we emit the first CSEL and then emit a second using the output
+    // of the first as the RHS.  We're effectively OR'ing the two CC's together.
 
     // FIXME: It would be nice if we could match the two CSELs to two CSINCs.
     SDValue CC1Val = DAG.getConstant(CC1, MVT::i32);
-    SDValue CS1 = DAG.getNode(ARM64ISD::CSEL, dl, VT, TVal, FVal, CC1Val, Cmp);
+    SDValue CS1 =
+        DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, FVal, CC1Val, Cmp);
 
     SDValue CC2Val = DAG.getConstant(CC2, MVT::i32);
-    return DAG.getNode(ARM64ISD::CSEL, dl, VT, TVal, CS1, CC2Val, Cmp);
+    return DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, CS1, CC2Val, Cmp);
   }
 }
 
@@ -3147,7 +3163,8 @@ static bool selectCCOpsAreFMaxCompatible(SDValue Cmp, SDValue Result) {
   return Result->getOpcode() == ISD::FP_EXTEND && Result->getOperand(0) == Cmp;
 }
 
-SDValue ARM64TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
+SDValue AArch64TargetLowering::LowerSELECT(SDValue Op,
+                                           SelectionDAG &DAG) const {
   SDValue CC = Op->getOperand(0);
   SDValue TVal = Op->getOperand(1);
   SDValue FVal = Op->getOperand(2);
@@ -3163,13 +3180,13 @@ SDValue ARM64TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
     if (!DAG.getTargetLoweringInfo().isTypeLegal(CC->getValueType(0)))
       return SDValue();
 
-    ARM64CC::CondCode OFCC;
+    AArch64CC::CondCode OFCC;
     SDValue Value, Overflow;
-    std::tie(Value, Overflow) = getARM64XALUOOp(OFCC, CC.getValue(0), DAG);
+    std::tie(Value, Overflow) = getAArch64XALUOOp(OFCC, CC.getValue(0), DAG);
     SDValue CCVal = DAG.getConstant(OFCC, MVT::i32);
 
-    return DAG.getNode(ARM64ISD::CSEL, DL, Op.getValueType(), TVal, FVal, CCVal,
-                       Overflow);
+    return DAG.getNode(AArch64ISD::CSEL, DL, Op.getValueType(), TVal, FVal,
+                       CCVal, Overflow);
   }
 
   if (CC.getOpcode() == ISD::SETCC)
@@ -3180,8 +3197,8 @@ SDValue ARM64TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
                            FVal, ISD::SETNE);
 }
 
-SDValue ARM64TargetLowering::LowerSELECT_CC(SDValue Op,
-                                            SelectionDAG &DAG) const {
+SDValue AArch64TargetLowering::LowerSELECT_CC(SDValue Op,
+                                              SelectionDAG &DAG) const {
   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
   SDValue LHS = Op.getOperand(0);
   SDValue RHS = Op.getOperand(1);
@@ -3207,7 +3224,7 @@ SDValue ARM64TargetLowering::LowerSELECT_CC(SDValue Op,
     assert((LHS.getValueType() == RHS.getValueType()) &&
            (LHS.getValueType() == MVT::i32 || LHS.getValueType() == MVT::i64));
 
-    unsigned Opcode = ARM64ISD::CSEL;
+    unsigned Opcode = AArch64ISD::CSEL;
 
     // If both the TVal and the FVal are constants, see if we can swap them in
     // order to for a CSINV or CSINC out of them.
@@ -3251,9 +3268,9 @@ SDValue ARM64TargetLowering::LowerSELECT_CC(SDValue Op,
       // inverse/negation/increment of TVal and generate a CSINV/CSNEG/CSINC
       // instead of a CSEL in that case.
       if (TrueVal == ~FalseVal) {
-        Opcode = ARM64ISD::CSINV;
+        Opcode = AArch64ISD::CSINV;
       } else if (TrueVal == -FalseVal) {
-        Opcode = ARM64ISD::CSNEG;
+        Opcode = AArch64ISD::CSNEG;
       } else if (TVal.getValueType() == MVT::i32) {
         // If our operands are only 32-bit wide, make sure we use 32-bit
         // arithmetic for the check whether we can use CSINC. This ensures that
@@ -3264,7 +3281,7 @@ SDValue ARM64TargetLowering::LowerSELECT_CC(SDValue Op,
         const uint32_t FalseVal32 = CFVal->getZExtValue();
 
         if ((TrueVal32 == FalseVal32 + 1) || (TrueVal32 + 1 == FalseVal32)) {
-          Opcode = ARM64ISD::CSINC;
+          Opcode = AArch64ISD::CSINC;
 
           if (TrueVal32 > FalseVal32) {
             Swap = true;
@@ -3272,7 +3289,7 @@ SDValue ARM64TargetLowering::LowerSELECT_CC(SDValue Op,
         }
         // 64-bit check whether we can use CSINC.
       } else if ((TrueVal == FalseVal + 1) || (TrueVal + 1 == FalseVal)) {
-        Opcode = ARM64ISD::CSINC;
+        Opcode = AArch64ISD::CSINC;
 
         if (TrueVal > FalseVal) {
           Swap = true;
@@ -3286,7 +3303,7 @@ SDValue ARM64TargetLowering::LowerSELECT_CC(SDValue Op,
         CC = ISD::getSetCCInverse(CC, true);
       }
 
-      if (Opcode != ARM64ISD::CSEL) {
+      if (Opcode != AArch64ISD::CSEL) {
         // Drop FVal since we can get its value by simply inverting/negating
         // TVal.
         FVal = TVal;
@@ -3294,7 +3311,7 @@ SDValue ARM64TargetLowering::LowerSELECT_CC(SDValue Op,
     }
 
     SDValue CCVal;
-    SDValue Cmp = getARM64Cmp(LHS, RHS, CC, CCVal, DAG, dl);
+    SDValue Cmp = getAArch64Cmp(LHS, RHS, CC, CCVal, DAG, dl);
 
     EVT VT = Op.getValueType();
     return DAG.getNode(Opcode, dl, VT, TVal, FVal, CCVal, Cmp);
@@ -3328,7 +3345,7 @@ SDValue ARM64TargetLowering::LowerSELECT_CC(SDValue Op,
       case ISD::SETUGE:
       case ISD::SETOGT:
       case ISD::SETOGE:
-        return DAG.getNode(ARM64ISD::FMAX, dl, VT, MinMaxLHS, MinMaxRHS);
+        return DAG.getNode(AArch64ISD::FMAX, dl, VT, MinMaxLHS, MinMaxRHS);
         break;
       case ISD::SETLT:
       case ISD::SETLE:
@@ -3336,7 +3353,7 @@ SDValue ARM64TargetLowering::LowerSELECT_CC(SDValue Op,
       case ISD::SETULE:
       case ISD::SETOLT:
       case ISD::SETOLE:
-        return DAG.getNode(ARM64ISD::FMIN, dl, VT, MinMaxLHS, MinMaxRHS);
+        return DAG.getNode(AArch64ISD::FMIN, dl, VT, MinMaxLHS, MinMaxRHS);
         break;
       }
     }
@@ -3346,26 +3363,26 @@ SDValue ARM64TargetLowering::LowerSELECT_CC(SDValue Op,
   // and do the comparison.
   SDValue Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
 
-  // Unfortunately, the mapping of LLVM FP CC's onto ARM64 CC's isn't totally
+  // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't totally
   // clean.  Some of them require two CSELs to implement.
-  ARM64CC::CondCode CC1, CC2;
-  changeFPCCToARM64CC(CC, CC1, CC2);
+  AArch64CC::CondCode CC1, CC2;
+  changeFPCCToAArch64CC(CC, CC1, CC2);
   SDValue CC1Val = DAG.getConstant(CC1, MVT::i32);
-  SDValue CS1 = DAG.getNode(ARM64ISD::CSEL, dl, VT, TVal, FVal, CC1Val, Cmp);
+  SDValue CS1 = DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, FVal, CC1Val, Cmp);
 
   // If we need a second CSEL, emit it, using the output of the first as the
   // RHS.  We're effectively OR'ing the two CC's together.
-  if (CC2 != ARM64CC::AL) {
+  if (CC2 != AArch64CC::AL) {
     SDValue CC2Val = DAG.getConstant(CC2, MVT::i32);
-    return DAG.getNode(ARM64ISD::CSEL, dl, VT, TVal, CS1, CC2Val, Cmp);
+    return DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, CS1, CC2Val, Cmp);
   }
 
   // Otherwise, return the output of the first CSEL.
   return CS1;
 }
 
-SDValue ARM64TargetLowering::LowerJumpTable(SDValue Op,
-                                            SelectionDAG &DAG) const {
+SDValue AArch64TargetLowering::LowerJumpTable(SDValue Op,
+                                              SelectionDAG &DAG) const {
   // Jump table entries as PC relative offsets. No additional tweaking
   // is necessary here. Just get the address of the jump table.
   JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
@@ -3374,24 +3391,26 @@ SDValue ARM64TargetLowering::LowerJumpTable(SDValue Op,
 
   if (getTargetMachine().getCodeModel() == CodeModel::Large &&
       !Subtarget->isTargetMachO()) {
-    const unsigned char MO_NC = ARM64II::MO_NC;
+    const unsigned char MO_NC = AArch64II::MO_NC;
     return DAG.getNode(
-        ARM64ISD::WrapperLarge, DL, PtrVT,
-        DAG.getTargetJumpTable(JT->getIndex(), PtrVT, ARM64II::MO_G3),
-        DAG.getTargetJumpTable(JT->getIndex(), PtrVT, ARM64II::MO_G2 | MO_NC),
-        DAG.getTargetJumpTable(JT->getIndex(), PtrVT, ARM64II::MO_G1 | MO_NC),
-        DAG.getTargetJumpTable(JT->getIndex(), PtrVT, ARM64II::MO_G0 | MO_NC));
+        AArch64ISD::WrapperLarge, DL, PtrVT,
+        DAG.getTargetJumpTable(JT->getIndex(), PtrVT, AArch64II::MO_G3),
+        DAG.getTargetJumpTable(JT->getIndex(), PtrVT, AArch64II::MO_G2 | MO_NC),
+        DAG.getTargetJumpTable(JT->getIndex(), PtrVT, AArch64II::MO_G1 | MO_NC),
+        DAG.getTargetJumpTable(JT->getIndex(), PtrVT,
+                               AArch64II::MO_G0 | MO_NC));
   }
 
-  SDValue Hi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, ARM64II::MO_PAGE);
+  SDValue Hi =
+      DAG.getTargetJumpTable(JT->getIndex(), PtrVT, AArch64II::MO_PAGE);
   SDValue Lo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT,
-                                      ARM64II::MO_PAGEOFF | ARM64II::MO_NC);
-  SDValue ADRP = DAG.getNode(ARM64ISD::ADRP, DL, PtrVT, Hi);
-  return DAG.getNode(ARM64ISD::ADDlow, DL, PtrVT, ADRP, Lo);
+                                      AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
+  SDValue ADRP = DAG.getNode(AArch64ISD::ADRP, DL, PtrVT, Hi);
+  return DAG.getNode(AArch64ISD::ADDlow, DL, PtrVT, ADRP, Lo);
 }
 
-SDValue ARM64TargetLowering::LowerConstantPool(SDValue Op,
-                                               SelectionDAG &DAG) const {
+SDValue AArch64TargetLowering::LowerConstantPool(SDValue Op,
+                                                 SelectionDAG &DAG) const {
   ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
   EVT PtrVT = getPointerTy();
   SDLoc DL(Op);
@@ -3401,63 +3420,63 @@ SDValue ARM64TargetLowering::LowerConstantPool(SDValue Op,
     if (Subtarget->isTargetMachO()) {
       SDValue GotAddr = DAG.getTargetConstantPool(
           CP->getConstVal(), PtrVT, CP->getAlignment(), CP->getOffset(),
-          ARM64II::MO_GOT);
-      return DAG.getNode(ARM64ISD::LOADgot, DL, PtrVT, GotAddr);
+          AArch64II::MO_GOT);
+      return DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, GotAddr);
     }
 
-    const unsigned char MO_NC = ARM64II::MO_NC;
+    const unsigned char MO_NC = AArch64II::MO_NC;
     return DAG.getNode(
-        ARM64ISD::WrapperLarge, DL, PtrVT,
+        AArch64ISD::WrapperLarge, DL, PtrVT,
         DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CP->getAlignment(),
-                                  CP->getOffset(), ARM64II::MO_G3),
+                                  CP->getOffset(), AArch64II::MO_G3),
         DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CP->getAlignment(),
-                                  CP->getOffset(), ARM64II::MO_G2 | MO_NC),
+                                  CP->getOffset(), AArch64II::MO_G2 | MO_NC),
         DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CP->getAlignment(),
-                                  CP->getOffset(), ARM64II::MO_G1 | MO_NC),
+                                  CP->getOffset(), AArch64II::MO_G1 | MO_NC),
         DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CP->getAlignment(),
-                                  CP->getOffset(), ARM64II::MO_G0 | MO_NC));
+                                  CP->getOffset(), AArch64II::MO_G0 | MO_NC));
   } else {
     // Use ADRP/ADD or ADRP/LDR for everything else: the small memory model on
     // ELF, the only valid one on Darwin.
     SDValue Hi =
         DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CP->getAlignment(),
-                                  CP->getOffset(), ARM64II::MO_PAGE);
+                                  CP->getOffset(), AArch64II::MO_PAGE);
     SDValue Lo = DAG.getTargetConstantPool(
         CP->getConstVal(), PtrVT, CP->getAlignment(), CP->getOffset(),
-        ARM64II::MO_PAGEOFF | ARM64II::MO_NC);
+        AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
 
-    SDValue ADRP = DAG.getNode(ARM64ISD::ADRP, DL, PtrVT, Hi);
-    return DAG.getNode(ARM64ISD::ADDlow, DL, PtrVT, ADRP, Lo);
+    SDValue ADRP = DAG.getNode(AArch64ISD::ADRP, DL, PtrVT, Hi);
+    return DAG.getNode(AArch64ISD::ADDlow, DL, PtrVT, ADRP, Lo);
   }
 }
 
-SDValue ARM64TargetLowering::LowerBlockAddress(SDValue Op,
+SDValue AArch64TargetLowering::LowerBlockAddress(SDValue Op,
                                                SelectionDAG &DAG) const {
   const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
   EVT PtrVT = getPointerTy();
   SDLoc DL(Op);
   if (getTargetMachine().getCodeModel() == CodeModel::Large &&
       !Subtarget->isTargetMachO()) {
-    const unsigned char MO_NC = ARM64II::MO_NC;
+    const unsigned char MO_NC = AArch64II::MO_NC;
     return DAG.getNode(
-        ARM64ISD::WrapperLarge, DL, PtrVT,
-        DAG.getTargetBlockAddress(BA, PtrVT, 0, ARM64II::MO_G3),
-        DAG.getTargetBlockAddress(BA, PtrVT, 0, ARM64II::MO_G2 | MO_NC),
-        DAG.getTargetBlockAddress(BA, PtrVT, 0, ARM64II::MO_G1 | MO_NC),
-        DAG.getTargetBlockAddress(BA, PtrVT, 0, ARM64II::MO_G0 | MO_NC));
+        AArch64ISD::WrapperLarge, DL, PtrVT,
+        DAG.getTargetBlockAddress(BA, PtrVT, 0, AArch64II::MO_G3),
+        DAG.getTargetBlockAddress(BA, PtrVT, 0, AArch64II::MO_G2 | MO_NC),
+        DAG.getTargetBlockAddress(BA, PtrVT, 0, AArch64II::MO_G1 | MO_NC),
+        DAG.getTargetBlockAddress(BA, PtrVT, 0, AArch64II::MO_G0 | MO_NC));
   } else {
-    SDValue Hi = DAG.getTargetBlockAddress(BA, PtrVT, 0, ARM64II::MO_PAGE);
-    SDValue Lo = DAG.getTargetBlockAddress(BA, PtrVT, 0, ARM64II::MO_PAGEOFF |
-                                                             ARM64II::MO_NC);
-    SDValue ADRP = DAG.getNode(ARM64ISD::ADRP, DL, PtrVT, Hi);
-    return DAG.getNode(ARM64ISD::ADDlow, DL, PtrVT, ADRP, Lo);
+    SDValue Hi = DAG.getTargetBlockAddress(BA, PtrVT, 0, AArch64II::MO_PAGE);
+    SDValue Lo = DAG.getTargetBlockAddress(BA, PtrVT, 0, AArch64II::MO_PAGEOFF |
+                                                             AArch64II::MO_NC);
+    SDValue ADRP = DAG.getNode(AArch64ISD::ADRP, DL, PtrVT, Hi);
+    return DAG.getNode(AArch64ISD::ADDlow, DL, PtrVT, ADRP, Lo);
   }
 }
 
-SDValue ARM64TargetLowering::LowerDarwin_VASTART(SDValue Op,
+SDValue AArch64TargetLowering::LowerDarwin_VASTART(SDValue Op,
                                                  SelectionDAG &DAG) const {
-  ARM64FunctionInfo *FuncInfo =
-      DAG.getMachineFunction().getInfo<ARM64FunctionInfo>();
+  AArch64FunctionInfo *FuncInfo =
+      DAG.getMachineFunction().getInfo<AArch64FunctionInfo>();
 
   SDLoc DL(Op);
   SDValue FR =
@@ -3467,12 +3486,12 @@ SDValue ARM64TargetLowering::LowerDarwin_VASTART(SDValue Op,
                       MachinePointerInfo(SV), false, false, 0);
 }
 
-SDValue ARM64TargetLowering::LowerAAPCS_VASTART(SDValue Op,
+SDValue AArch64TargetLowering::LowerAAPCS_VASTART(SDValue Op,
                                                 SelectionDAG &DAG) const {
   // The layout of the va_list struct is specified in the AArch64 Procedure Call
   // Standard, section B.3.
   MachineFunction &MF = DAG.getMachineFunction();
-  ARM64FunctionInfo *FuncInfo = MF.getInfo<ARM64FunctionInfo>();
+  AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
   SDLoc DL(Op);
 
   SDValue Chain = Op.getOperand(0);
@@ -3534,12 +3553,14 @@ SDValue ARM64TargetLowering::LowerAAPCS_VASTART(SDValue Op,
   return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
 }
 
-SDValue ARM64TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
+SDValue AArch64TargetLowering::LowerVASTART(SDValue Op,
+                                            SelectionDAG &DAG) const {
   return Subtarget->isTargetDarwin() ? LowerDarwin_VASTART(Op, DAG)
                                      : LowerAAPCS_VASTART(Op, DAG);
 }
 
-SDValue ARM64TargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const {
+SDValue AArch64TargetLowering::LowerVACOPY(SDValue Op,
+                                           SelectionDAG &DAG) const {
   // AAPCS has three pointers and two ints (= 32 bytes), Darwin has single
   // pointer.
   unsigned VaListSize = Subtarget->isTargetDarwin() ? 8 : 32;
@@ -3552,7 +3573,7 @@ SDValue ARM64TargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const {
                        MachinePointerInfo(SrcSV));
 }
 
-SDValue ARM64TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
+SDValue AArch64TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
   assert(Subtarget->isTargetDarwin() &&
          "automatic va_arg instruction only works on Darwin");
 
@@ -3614,15 +3635,16 @@ SDValue ARM64TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
                      false, false, 0);
 }
 
-SDValue ARM64TargetLowering::LowerFRAMEADDR(SDValue Op,
-                                            SelectionDAG &DAG) const {
+SDValue AArch64TargetLowering::LowerFRAMEADDR(SDValue Op,
+                                              SelectionDAG &DAG) const {
   MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
   MFI->setFrameAddressIsTaken(true);
 
   EVT VT = Op.getValueType();
   SDLoc DL(Op);
   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
-  SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, ARM64::FP, VT);
+  SDValue FrameAddr =
+      DAG.getCopyFromReg(DAG.getEntryNode(), DL, AArch64::FP, VT);
   while (Depth--)
     FrameAddr = DAG.getLoad(VT, DL, DAG.getEntryNode(), FrameAddr,
                             MachinePointerInfo(), false, false, false, 0);
@@ -3631,18 +3653,18 @@ SDValue ARM64TargetLowering::LowerFRAMEADDR(SDValue Op,
 
 // FIXME? Maybe this could be a TableGen attribute on some registers and
 // this table could be generated automatically from RegInfo.
-unsigned ARM64TargetLowering::getRegisterByName(const char* RegName,
-                                                EVT VT) const {
+unsigned AArch64TargetLowering::getRegisterByName(const char* RegName,
+                                                  EVT VT) const {
   unsigned Reg = StringSwitch<unsigned>(RegName)
-                       .Case("sp", ARM64::SP)
+                       .Case("sp", AArch64::SP)
                        .Default(0);
   if (Reg)
     return Reg;
   report_fatal_error("Invalid register name global variable");
 }
 
-SDValue ARM64TargetLowering::LowerRETURNADDR(SDValue Op,
-                                             SelectionDAG &DAG) const {
+SDValue AArch64TargetLowering::LowerRETURNADDR(SDValue Op,
+                                               SelectionDAG &DAG) const {
   MachineFunction &MF = DAG.getMachineFunction();
   MachineFrameInfo *MFI = MF.getFrameInfo();
   MFI->setReturnAddressIsTaken(true);
@@ -3659,14 +3681,14 @@ SDValue ARM64TargetLowering::LowerRETURNADDR(SDValue Op,
   }
 
   // Return LR, which contains the return address. Mark it an implicit live-in.
-  unsigned Reg = MF.addLiveIn(ARM64::LR, &ARM64::GPR64RegClass);
+  unsigned Reg = MF.addLiveIn(AArch64::LR, &AArch64::GPR64RegClass);
   return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, VT);
 }
 
 /// LowerShiftRightParts - Lower SRA_PARTS, which returns two
 /// i64 values and take a 2 x i64 value to shift plus a shift amount.
-SDValue ARM64TargetLowering::LowerShiftRightParts(SDValue Op,
-                                                  SelectionDAG &DAG) const {
+SDValue AArch64TargetLowering::LowerShiftRightParts(SDValue Op,
+                                                    SelectionDAG &DAG) const {
   assert(Op.getNumOperands() == 3 && "Not a double-shift!");
   EVT VT = Op.getValueType();
   unsigned VTBits = VT.getSizeInBits();
@@ -3688,14 +3710,14 @@ SDValue ARM64TargetLowering::LowerShiftRightParts(SDValue Op,
 
   SDValue Cmp = emitComparison(ExtraShAmt, DAG.getConstant(0, MVT::i64),
                                ISD::SETGE, dl, DAG);
-  SDValue CCVal = DAG.getConstant(ARM64CC::GE, MVT::i32);
+  SDValue CCVal = DAG.getConstant(AArch64CC::GE, MVT::i32);
 
   SDValue FalseValLo = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
   SDValue TrueValLo = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt);
   SDValue Lo =
-      DAG.getNode(ARM64ISD::CSEL, dl, VT, TrueValLo, FalseValLo, CCVal, Cmp);
+      DAG.getNode(AArch64ISD::CSEL, dl, VT, TrueValLo, FalseValLo, CCVal, Cmp);
 
-  // ARM64 shifts larger than the register width are wrapped rather than
+  // AArch64 shifts larger than the register width are wrapped rather than
   // clamped, so we can't just emit "hi >> x".
   SDValue FalseValHi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt);
   SDValue TrueValHi = Opc == ISD::SRA
@@ -3703,7 +3725,7 @@ SDValue ARM64TargetLowering::LowerShiftRightParts(SDValue Op,
                                         DAG.getConstant(VTBits - 1, MVT::i64))
                           : DAG.getConstant(0, VT);
   SDValue Hi =
-      DAG.getNode(ARM64ISD::CSEL, dl, VT, TrueValHi, FalseValHi, CCVal, Cmp);
+      DAG.getNode(AArch64ISD::CSEL, dl, VT, TrueValHi, FalseValHi, CCVal, Cmp);
 
   SDValue Ops[2] = { Lo, Hi };
   return DAG.getMergeValues(Ops, dl);
@@ -3711,7 +3733,7 @@ SDValue ARM64TargetLowering::LowerShiftRightParts(SDValue Op,
 
 /// LowerShiftLeftParts - Lower SHL_PARTS, which returns two
 /// i64 values and take a 2 x i64 value to shift plus a shift amount.
-SDValue ARM64TargetLowering::LowerShiftLeftParts(SDValue Op,
+SDValue AArch64TargetLowering::LowerShiftLeftParts(SDValue Op,
                                                  SelectionDAG &DAG) const {
   assert(Op.getNumOperands() == 3 && "Not a double-shift!");
   EVT VT = Op.getValueType();
@@ -3735,45 +3757,46 @@ SDValue ARM64TargetLowering::LowerShiftLeftParts(SDValue Op,
 
   SDValue Cmp = emitComparison(ExtraShAmt, DAG.getConstant(0, MVT::i64),
                                ISD::SETGE, dl, DAG);
-  SDValue CCVal = DAG.getConstant(ARM64CC::GE, MVT::i32);
-  SDValue Hi = DAG.getNode(ARM64ISD::CSEL, dl, VT, Tmp3, FalseVal, CCVal, Cmp);
+  SDValue CCVal = DAG.getConstant(AArch64CC::GE, MVT::i32);
+  SDValue Hi =
+      DAG.getNode(AArch64ISD::CSEL, dl, VT, Tmp3, FalseVal, CCVal, Cmp);
 
-  // ARM64 shifts of larger than register sizes are wrapped rather than clamped,
-  // so we can't just emit "lo << a" if a is too big.
+  // AArch64 shifts of larger than register sizes are wrapped rather than
+  // clamped, so we can't just emit "lo << a" if a is too big.
   SDValue TrueValLo = DAG.getConstant(0, VT);
   SDValue FalseValLo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);
   SDValue Lo =
-      DAG.getNode(ARM64ISD::CSEL, dl, VT, TrueValLo, FalseValLo, CCVal, Cmp);
+      DAG.getNode(AArch64ISD::CSEL, dl, VT, TrueValLo, FalseValLo, CCVal, Cmp);
 
   SDValue Ops[2] = { Lo, Hi };
   return DAG.getMergeValues(Ops, dl);
 }
 
-bool
-ARM64TargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
-  // The ARM64 target doesn't support folding offsets into global addresses.
+bool AArch64TargetLowering::isOffsetFoldingLegal(
+    const GlobalAddressSDNode *GA) const {
+  // The AArch64 target doesn't support folding offsets into global addresses.
   return false;
 }
 
-bool ARM64TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
+bool AArch64TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
   // We can materialize #0.0 as fmov $Rd, XZR for 64-bit and 32-bit cases.
   // FIXME: We should be able to handle f128 as well with a clever lowering.
   if (Imm.isPosZero() && (VT == MVT::f64 || VT == MVT::f32))
     return true;
 
   if (VT == MVT::f64)
-    return ARM64_AM::getFP64Imm(Imm) != -1;
+    return AArch64_AM::getFP64Imm(Imm) != -1;
   else if (VT == MVT::f32)
-    return ARM64_AM::getFP32Imm(Imm) != -1;
+    return AArch64_AM::getFP32Imm(Imm) != -1;
   return false;
 }
 
 //===----------------------------------------------------------------------===//
-//                          ARM64 Optimization Hooks
+//                          AArch64 Optimization Hooks
 //===----------------------------------------------------------------------===//
 
 //===----------------------------------------------------------------------===//
-//                          ARM64 Inline Assembly Support
+//                          AArch64 Inline Assembly Support
 //===----------------------------------------------------------------------===//
 
 // Table of Constraints
@@ -3802,8 +3825,8 @@ bool ARM64TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
 
 /// getConstraintType - Given a constraint letter, return the type of
 /// constraint it is for this target.
-ARM64TargetLowering::ConstraintType
-ARM64TargetLowering::getConstraintType(const std::string &Constraint) const {
+AArch64TargetLowering::ConstraintType
+AArch64TargetLowering::getConstraintType(const std::string &Constraint) const {
   if (Constraint.size() == 1) {
     switch (Constraint[0]) {
     default:
@@ -3826,7 +3849,7 @@ ARM64TargetLowering::getConstraintType(const std::string &Constraint) const {
 /// This object must already have been set up with the operand type
 /// and the current alternative constraint selected.
 TargetLowering::ConstraintWeight
-ARM64TargetLowering::getSingleConstraintMatchWeight(
+AArch64TargetLowering::getSingleConstraintMatchWeight(
     AsmOperandInfo &info, const char *constraint) const {
   ConstraintWeight weight = CW_Invalid;
   Value *CallOperandVal = info.CallOperandVal;
@@ -3853,32 +3876,32 @@ ARM64TargetLowering::getSingleConstraintMatchWeight(
 }
 
 std::pair<unsigned, const TargetRegisterClass *>
-ARM64TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
-                                                  MVT VT) const {
+AArch64TargetLowering::getRegForInlineAsmConstraint(
+    const std::string &Constraint, MVT VT) const {
   if (Constraint.size() == 1) {
     switch (Constraint[0]) {
     case 'r':
       if (VT.getSizeInBits() == 64)
-        return std::make_pair(0U, &ARM64::GPR64commonRegClass);
-      return std::make_pair(0U, &ARM64::GPR32commonRegClass);
+        return std::make_pair(0U, &AArch64::GPR64commonRegClass);
+      return std::make_pair(0U, &AArch64::GPR32commonRegClass);
     case 'w':
       if (VT == MVT::f32)
-        return std::make_pair(0U, &ARM64::FPR32RegClass);
+        return std::make_pair(0U, &AArch64::FPR32RegClass);
       if (VT.getSizeInBits() == 64)
-        return std::make_pair(0U, &ARM64::FPR64RegClass);
+        return std::make_pair(0U, &AArch64::FPR64RegClass);
       if (VT.getSizeInBits() == 128)
-        return std::make_pair(0U, &ARM64::FPR128RegClass);
+        return std::make_pair(0U, &AArch64::FPR128RegClass);
       break;
     // The instructions that this constraint is designed for can
     // only take 128-bit registers so just use that regclass.
     case 'x':
       if (VT.getSizeInBits() == 128)
-        return std::make_pair(0U, &ARM64::FPR128_loRegClass);
+        return std::make_pair(0U, &AArch64::FPR128_loRegClass);
       break;
     }
   }
   if (StringRef("{cc}").equals_lower(Constraint))
-    return std::make_pair(unsigned(ARM64::NZCV), &ARM64::CCRRegClass);
+    return std::make_pair(unsigned(AArch64::NZCV), &AArch64::CCRRegClass);
 
   // Use the default implementation in TargetLowering to convert the register
   // constraint into a member of a register class.
@@ -3897,8 +3920,8 @@ ARM64TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
         // v0 - v31 are aliases of q0 - q31.
         // By default we'll emit v0-v31 for this unless there's a modifier where
         // we'll emit the correct register as well.
-        Res.first = ARM64::FPR128RegClass.getRegister(RegNo);
-        Res.second = &ARM64::FPR128RegClass;
+        Res.first = AArch64::FPR128RegClass.getRegister(RegNo);
+        Res.second = &AArch64::FPR128RegClass;
       }
     }
   }
@@ -3908,7 +3931,7 @@ ARM64TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
 
 /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
 /// vector.  If it is invalid, don't add anything to Ops.
-void ARM64TargetLowering::LowerAsmOperandForConstraint(
+void AArch64TargetLowering::LowerAsmOperandForConstraint(
     SDValue Op, std::string &Constraint, std::vector<SDValue> &Ops,
     SelectionDAG &DAG) const {
   SDValue Result;
@@ -3931,9 +3954,9 @@ void ARM64TargetLowering::LowerAsmOperandForConstraint(
       return;
 
     if (Op.getValueType() == MVT::i64)
-      Result = DAG.getRegister(ARM64::XZR, MVT::i64);
+      Result = DAG.getRegister(AArch64::XZR, MVT::i64);
     else
-      Result = DAG.getRegister(ARM64::WZR, MVT::i32);
+      Result = DAG.getRegister(AArch64::WZR, MVT::i32);
     break;
   }
 
@@ -3974,11 +3997,11 @@ void ARM64TargetLowering::LowerAsmOperandForConstraint(
     // not a valid bimm64 (L) where 0xaaaaaaaaaaaaaaaa would be valid, and vice
     // versa.
     case 'K':
-      if (ARM64_AM::isLogicalImmediate(CVal, 32))
+      if (AArch64_AM::isLogicalImmediate(CVal, 32))
         break;
       return;
     case 'L':
-      if (ARM64_AM::isLogicalImmediate(CVal, 64))
+      if (AArch64_AM::isLogicalImmediate(CVal, 64))
         break;
       return;
     // The M and N constraints are a superset of K and L respectively, for use
@@ -3990,7 +4013,7 @@ void ARM64TargetLowering::LowerAsmOperandForConstraint(
     case 'M': {
       if (!isUInt<32>(CVal))
         return;
-      if (ARM64_AM::isLogicalImmediate(CVal, 32))
+      if (AArch64_AM::isLogicalImmediate(CVal, 32))
         break;
       if ((CVal & 0xFFFF) == CVal)
         break;
@@ -4004,7 +4027,7 @@ void ARM64TargetLowering::LowerAsmOperandForConstraint(
       return;
     }
     case 'N': {
-      if (ARM64_AM::isLogicalImmediate(CVal, 64))
+      if (AArch64_AM::isLogicalImmediate(CVal, 64))
         break;
       if ((CVal & 0xFFFFULL) == CVal)
         break;
@@ -4043,7 +4066,7 @@ void ARM64TargetLowering::LowerAsmOperandForConstraint(
 }
 
 //===----------------------------------------------------------------------===//
-//                     ARM64 Advanced SIMD Support
+//                     AArch64 Advanced SIMD Support
 //===----------------------------------------------------------------------===//
 
 /// WidenVector - Given a value in the V64 register class, produce the
@@ -4075,13 +4098,13 @@ static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG) {
   MVT NarrowTy = MVT::getVectorVT(EltTy, WideSize / 2);
   SDLoc DL(V128Reg);
 
-  return DAG.getTargetExtractSubreg(ARM64::dsub, DL, NarrowTy, V128Reg);
+  return DAG.getTargetExtractSubreg(AArch64::dsub, DL, NarrowTy, V128Reg);
 }
 
 // Gather data to see if the operation can be modelled as a
 // shuffle in combination with VEXTs.
-SDValue ARM64TargetLowering::ReconstructShuffle(SDValue Op,
-                                                SelectionDAG &DAG) const {
+SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op,
+                                                  SelectionDAG &DAG) const {
   SDLoc dl(Op);
   EVT VT = Op.getValueType();
   unsigned NumElts = VT.getVectorNumElements();
@@ -4186,7 +4209,7 @@ SDValue ARM64TargetLowering::ReconstructShuffle(SDValue Op,
           DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, SourceVecs[i],
                       DAG.getIntPtrConstant(NumElts));
       unsigned Imm = VEXTOffsets[i] * getExtFactor(VEXTSrc1);
-      ShuffleSrcs[i] = DAG.getNode(ARM64ISD::EXT, dl, VT, VEXTSrc1, VEXTSrc2,
+      ShuffleSrcs[i] = DAG.getNode(AArch64ISD::EXT, dl, VT, VEXTSrc1, VEXTSrc2,
                                    DAG.getConstant(Imm, MVT::i32));
     }
   }
@@ -4542,13 +4565,13 @@ static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
     // VREV divides the vector in half and swaps within the half.
     if (VT.getVectorElementType() == MVT::i32 ||
         VT.getVectorElementType() == MVT::f32)
-      return DAG.getNode(ARM64ISD::REV64, dl, VT, OpLHS);
+      return DAG.getNode(AArch64ISD::REV64, dl, VT, OpLHS);
     // vrev <4 x i16> -> REV32
     if (VT.getVectorElementType() == MVT::i16)
-      return DAG.getNode(ARM64ISD::REV32, dl, VT, OpLHS);
+      return DAG.getNode(AArch64ISD::REV32, dl, VT, OpLHS);
     // vrev <4 x i8> -> REV16
     assert(VT.getVectorElementType() == MVT::i8);
-    return DAG.getNode(ARM64ISD::REV16, dl, VT, OpLHS);
+    return DAG.getNode(AArch64ISD::REV16, dl, VT, OpLHS);
   case OP_VDUP0:
   case OP_VDUP1:
   case OP_VDUP2:
@@ -4556,13 +4579,13 @@ static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
     EVT EltTy = VT.getVectorElementType();
     unsigned Opcode;
     if (EltTy == MVT::i8)
-      Opcode = ARM64ISD::DUPLANE8;
+      Opcode = AArch64ISD::DUPLANE8;
     else if (EltTy == MVT::i16)
-      Opcode = ARM64ISD::DUPLANE16;
+      Opcode = AArch64ISD::DUPLANE16;
     else if (EltTy == MVT::i32 || EltTy == MVT::f32)
-      Opcode = ARM64ISD::DUPLANE32;
+      Opcode = AArch64ISD::DUPLANE32;
     else if (EltTy == MVT::i64 || EltTy == MVT::f64)
-      Opcode = ARM64ISD::DUPLANE64;
+      Opcode = AArch64ISD::DUPLANE64;
     else
       llvm_unreachable("Invalid vector element type?");
 
@@ -4575,21 +4598,27 @@ static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
   case OP_VEXT2:
   case OP_VEXT3: {
     unsigned Imm = (OpNum - OP_VEXT1 + 1) * getExtFactor(OpLHS);
-    return DAG.getNode(ARM64ISD::EXT, dl, VT, OpLHS, OpRHS,
+    return DAG.getNode(AArch64ISD::EXT, dl, VT, OpLHS, OpRHS,
                        DAG.getConstant(Imm, MVT::i32));
   }
   case OP_VUZPL:
-    return DAG.getNode(ARM64ISD::UZP1, dl, DAG.getVTList(VT, VT), OpLHS, OpRHS);
+    return DAG.getNode(AArch64ISD::UZP1, dl, DAG.getVTList(VT, VT), OpLHS,
+                       OpRHS);
   case OP_VUZPR:
-    return DAG.getNode(ARM64ISD::UZP2, dl, DAG.getVTList(VT, VT), OpLHS, OpRHS);
+    return DAG.getNode(AArch64ISD::UZP2, dl, DAG.getVTList(VT, VT), OpLHS,
+                       OpRHS);
   case OP_VZIPL:
-    return DAG.getNode(ARM64ISD::ZIP1, dl, DAG.getVTList(VT, VT), OpLHS, OpRHS);
+    return DAG.getNode(AArch64ISD::ZIP1, dl, DAG.getVTList(VT, VT), OpLHS,
+                       OpRHS);
   case OP_VZIPR:
-    return DAG.getNode(ARM64ISD::ZIP2, dl, DAG.getVTList(VT, VT), OpLHS, OpRHS);
+    return DAG.getNode(AArch64ISD::ZIP2, dl, DAG.getVTList(VT, VT), OpLHS,
+                       OpRHS);
   case OP_VTRNL:
-    return DAG.getNode(ARM64ISD::TRN1, dl, DAG.getVTList(VT, VT), OpLHS, OpRHS);
+    return DAG.getNode(AArch64ISD::TRN1, dl, DAG.getVTList(VT, VT), OpLHS,
+                       OpRHS);
   case OP_VTRNR:
-    return DAG.getNode(ARM64ISD::TRN2, dl, DAG.getVTList(VT, VT), OpLHS, OpRHS);
+    return DAG.getNode(AArch64ISD::TRN2, dl, DAG.getVTList(VT, VT), OpLHS,
+                       OpRHS);
   }
 }
 
@@ -4627,7 +4656,7 @@ static SDValue GenerateTBL(SDValue Op, ArrayRef<int> ShuffleMask,
       V1Cst = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v16i8, V1Cst, V1Cst);
     Shuffle = DAG.getNode(
         ISD::INTRINSIC_WO_CHAIN, DL, IndexVT,
-        DAG.getConstant(Intrinsic::arm64_neon_tbl1, MVT::i32), V1Cst,
+        DAG.getConstant(Intrinsic::aarch64_neon_tbl1, MVT::i32), V1Cst,
         DAG.getNode(ISD::BUILD_VECTOR, DL, IndexVT,
                     makeArrayRef(TBLMask.data(), IndexLen)));
   } else {
@@ -4635,19 +4664,19 @@ static SDValue GenerateTBL(SDValue Op, ArrayRef<int> ShuffleMask,
       V1Cst = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v16i8, V1Cst, V2Cst);
       Shuffle = DAG.getNode(
           ISD::INTRINSIC_WO_CHAIN, DL, IndexVT,
-          DAG.getConstant(Intrinsic::arm64_neon_tbl1, MVT::i32), V1Cst,
+          DAG.getConstant(Intrinsic::aarch64_neon_tbl1, MVT::i32), V1Cst,
           DAG.getNode(ISD::BUILD_VECTOR, DL, IndexVT,
                       makeArrayRef(TBLMask.data(), IndexLen)));
     } else {
       // FIXME: We cannot, for the moment, emit a TBL2 instruction because we
       // cannot currently represent the register constraints on the input
       // table registers.
-      //  Shuffle = DAG.getNode(ARM64ISD::TBL2, DL, IndexVT, V1Cst, V2Cst,
+      //  Shuffle = DAG.getNode(AArch64ISD::TBL2, DL, IndexVT, V1Cst, V2Cst,
       //                   DAG.getNode(ISD::BUILD_VECTOR, DL, IndexVT,
       //                               &TBLMask[0], IndexLen));
       Shuffle = DAG.getNode(
           ISD::INTRINSIC_WO_CHAIN, DL, IndexVT,
-          DAG.getConstant(Intrinsic::arm64_neon_tbl2, MVT::i32), V1Cst, V2Cst,
+          DAG.getConstant(Intrinsic::aarch64_neon_tbl2, MVT::i32), V1Cst, V2Cst,
           DAG.getNode(ISD::BUILD_VECTOR, DL, IndexVT,
                       makeArrayRef(TBLMask.data(), IndexLen)));
     }
@@ -4657,19 +4686,19 @@ static SDValue GenerateTBL(SDValue Op, ArrayRef<int> ShuffleMask,
 
 static unsigned getDUPLANEOp(EVT EltType) {
   if (EltType == MVT::i8)
-    return ARM64ISD::DUPLANE8;
+    return AArch64ISD::DUPLANE8;
   if (EltType == MVT::i16)
-    return ARM64ISD::DUPLANE16;
+    return AArch64ISD::DUPLANE16;
   if (EltType == MVT::i32 || EltType == MVT::f32)
-    return ARM64ISD::DUPLANE32;
+    return AArch64ISD::DUPLANE32;
   if (EltType == MVT::i64 || EltType == MVT::f64)
-    return ARM64ISD::DUPLANE64;
+    return AArch64ISD::DUPLANE64;
 
   llvm_unreachable("Invalid vector element type?");
 }
 
-SDValue ARM64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
-                                                 SelectionDAG &DAG) const {
+SDValue AArch64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
+                                                   SelectionDAG &DAG) const {
   SDLoc dl(Op);
   EVT VT = Op.getValueType();
 
@@ -4692,13 +4721,13 @@ SDValue ARM64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
       Lane = 0;
 
     if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR)
-      return DAG.getNode(ARM64ISD::DUP, dl, V1.getValueType(),
+      return DAG.getNode(AArch64ISD::DUP, dl, V1.getValueType(),
                          V1.getOperand(0));
     // Test if V1 is a BUILD_VECTOR and the lane being referenced is a non-
     // constant. If so, we can just reference the lane's definition directly.
     if (V1.getOpcode() == ISD::BUILD_VECTOR &&
         !isa<ConstantSDNode>(V1.getOperand(Lane)))
-      return DAG.getNode(ARM64ISD::DUP, dl, VT, V1.getOperand(Lane));
+      return DAG.getNode(AArch64ISD::DUP, dl, VT, V1.getOperand(Lane));
 
     // Otherwise, duplicate from the lane of the input vector.
     unsigned Opcode = getDUPLANEOp(V1.getValueType().getVectorElementType());
@@ -4720,11 +4749,11 @@ SDValue ARM64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
   }
 
   if (isREVMask(ShuffleMask, VT, 64))
-    return DAG.getNode(ARM64ISD::REV64, dl, V1.getValueType(), V1, V2);
+    return DAG.getNode(AArch64ISD::REV64, dl, V1.getValueType(), V1, V2);
   if (isREVMask(ShuffleMask, VT, 32))
-    return DAG.getNode(ARM64ISD::REV32, dl, V1.getValueType(), V1, V2);
+    return DAG.getNode(AArch64ISD::REV32, dl, V1.getValueType(), V1, V2);
   if (isREVMask(ShuffleMask, VT, 16))
-    return DAG.getNode(ARM64ISD::REV16, dl, V1.getValueType(), V1, V2);
+    return DAG.getNode(AArch64ISD::REV16, dl, V1.getValueType(), V1, V2);
 
   bool ReverseEXT = false;
   unsigned Imm;
@@ -4732,39 +4761,39 @@ SDValue ARM64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
     if (ReverseEXT)
       std::swap(V1, V2);
     Imm *= getExtFactor(V1);
-    return DAG.getNode(ARM64ISD::EXT, dl, V1.getValueType(), V1, V2,
+    return DAG.getNode(AArch64ISD::EXT, dl, V1.getValueType(), V1, V2,
                        DAG.getConstant(Imm, MVT::i32));
   } else if (V2->getOpcode() == ISD::UNDEF &&
              isSingletonEXTMask(ShuffleMask, VT, Imm)) {
     Imm *= getExtFactor(V1);
-    return DAG.getNode(ARM64ISD::EXT, dl, V1.getValueType(), V1, V1,
+    return DAG.getNode(AArch64ISD::EXT, dl, V1.getValueType(), V1, V1,
                        DAG.getConstant(Imm, MVT::i32));
   }
 
   unsigned WhichResult;
   if (isZIPMask(ShuffleMask, VT, WhichResult)) {
-    unsigned Opc = (WhichResult == 0) ? ARM64ISD::ZIP1 : ARM64ISD::ZIP2;
+    unsigned Opc = (WhichResult == 0) ? AArch64ISD::ZIP1 : AArch64ISD::ZIP2;
     return DAG.getNode(Opc, dl, V1.getValueType(), V1, V2);
   }
   if (isUZPMask(ShuffleMask, VT, WhichResult)) {
-    unsigned Opc = (WhichResult == 0) ? ARM64ISD::UZP1 : ARM64ISD::UZP2;
+    unsigned Opc = (WhichResult == 0) ? AArch64ISD::UZP1 : AArch64ISD::UZP2;
     return DAG.getNode(Opc, dl, V1.getValueType(), V1, V2);
   }
   if (isTRNMask(ShuffleMask, VT, WhichResult)) {
-    unsigned Opc = (WhichResult == 0) ? ARM64ISD::TRN1 : ARM64ISD::TRN2;
+    unsigned Opc = (WhichResult == 0) ? AArch64ISD::TRN1 : AArch64ISD::TRN2;
     return DAG.getNode(Opc, dl, V1.getValueType(), V1, V2);
   }
 
   if (isZIP_v_undef_Mask(ShuffleMask, VT, WhichResult)) {
-    unsigned Opc = (WhichResult == 0) ? ARM64ISD::ZIP1 : ARM64ISD::ZIP2;
+    unsigned Opc = (WhichResult == 0) ? AArch64ISD::ZIP1 : AArch64ISD::ZIP2;
     return DAG.getNode(Opc, dl, V1.getValueType(), V1, V1);
   }
   if (isUZP_v_undef_Mask(ShuffleMask, VT, WhichResult)) {
-    unsigned Opc = (WhichResult == 0) ? ARM64ISD::UZP1 : ARM64ISD::UZP2;
+    unsigned Opc = (WhichResult == 0) ? AArch64ISD::UZP1 : AArch64ISD::UZP2;
     return DAG.getNode(Opc, dl, V1.getValueType(), V1, V1);
   }
   if (isTRN_v_undef_Mask(ShuffleMask, VT, WhichResult)) {
-    unsigned Opc = (WhichResult == 0) ? ARM64ISD::TRN1 : ARM64ISD::TRN2;
+    unsigned Opc = (WhichResult == 0) ? AArch64ISD::TRN1 : AArch64ISD::TRN2;
     return DAG.getNode(Opc, dl, V1.getValueType(), V1, V1);
   }
 
@@ -4844,8 +4873,8 @@ static bool resolveBuildVector(BuildVectorSDNode *BVN, APInt &CnstBits,
   return false;
 }
 
-SDValue ARM64TargetLowering::LowerVectorAND(SDValue Op,
-                                            SelectionDAG &DAG) const {
+SDValue AArch64TargetLowering::LowerVectorAND(SDValue Op,
+                                              SelectionDAG &DAG) const {
   BuildVectorSDNode *BVN =
       dyn_cast<BuildVectorSDNode>(Op.getOperand(1).getNode());
   SDValue LHS = Op.getOperand(0);
@@ -4870,55 +4899,55 @@ SDValue ARM64TargetLowering::LowerVectorAND(SDValue Op,
       CnstBits = CnstBits.zextOrTrunc(64);
       uint64_t CnstVal = CnstBits.getZExtValue();
 
-      if (ARM64_AM::isAdvSIMDModImmType1(CnstVal)) {
-        CnstVal = ARM64_AM::encodeAdvSIMDModImmType1(CnstVal);
+      if (AArch64_AM::isAdvSIMDModImmType1(CnstVal)) {
+        CnstVal = AArch64_AM::encodeAdvSIMDModImmType1(CnstVal);
         MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
-        SDValue Mov = DAG.getNode(ARM64ISD::BICi, dl, MovTy, LHS,
+        SDValue Mov = DAG.getNode(AArch64ISD::BICi, dl, MovTy, LHS,
                                   DAG.getConstant(CnstVal, MVT::i32),
                                   DAG.getConstant(0, MVT::i32));
         return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
       }
 
-      if (ARM64_AM::isAdvSIMDModImmType2(CnstVal)) {
-        CnstVal = ARM64_AM::encodeAdvSIMDModImmType2(CnstVal);
+      if (AArch64_AM::isAdvSIMDModImmType2(CnstVal)) {
+        CnstVal = AArch64_AM::encodeAdvSIMDModImmType2(CnstVal);
         MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
-        SDValue Mov = DAG.getNode(ARM64ISD::BICi, dl, MovTy, LHS,
+        SDValue Mov = DAG.getNode(AArch64ISD::BICi, dl, MovTy, LHS,
                                   DAG.getConstant(CnstVal, MVT::i32),
                                   DAG.getConstant(8, MVT::i32));
         return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
       }
 
-      if (ARM64_AM::isAdvSIMDModImmType3(CnstVal)) {
-        CnstVal = ARM64_AM::encodeAdvSIMDModImmType3(CnstVal);
+      if (AArch64_AM::isAdvSIMDModImmType3(CnstVal)) {
+        CnstVal = AArch64_AM::encodeAdvSIMDModImmType3(CnstVal);
         MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
-        SDValue Mov = DAG.getNode(ARM64ISD::BICi, dl, MovTy, LHS,
+        SDValue Mov = DAG.getNode(AArch64ISD::BICi, dl, MovTy, LHS,
                                   DAG.getConstant(CnstVal, MVT::i32),
                                   DAG.getConstant(16, MVT::i32));
         return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
       }
 
-      if (ARM64_AM::isAdvSIMDModImmType4(CnstVal)) {
-        CnstVal = ARM64_AM::encodeAdvSIMDModImmType4(CnstVal);
+      if (AArch64_AM::isAdvSIMDModImmType4(CnstVal)) {
+        CnstVal = AArch64_AM::encodeAdvSIMDModImmType4(CnstVal);
         MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
-        SDValue Mov = DAG.getNode(ARM64ISD::BICi, dl, MovTy, LHS,
+        SDValue Mov = DAG.getNode(AArch64ISD::BICi, dl, MovTy, LHS,
                                   DAG.getConstant(CnstVal, MVT::i32),
                                   DAG.getConstant(24, MVT::i32));
         return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
       }
 
-      if (ARM64_AM::isAdvSIMDModImmType5(CnstVal)) {
-        CnstVal = ARM64_AM::encodeAdvSIMDModImmType5(CnstVal);
+      if (AArch64_AM::isAdvSIMDModImmType5(CnstVal)) {
+        CnstVal = AArch64_AM::encodeAdvSIMDModImmType5(CnstVal);
         MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16;
-        SDValue Mov = DAG.getNode(ARM64ISD::BICi, dl, MovTy, LHS,
+        SDValue Mov = DAG.getNode(AArch64ISD::BICi, dl, MovTy, LHS,
                                   DAG.getConstant(CnstVal, MVT::i32),
                                   DAG.getConstant(0, MVT::i32));
         return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
       }
 
-      if (ARM64_AM::isAdvSIMDModImmType6(CnstVal)) {
-        CnstVal = ARM64_AM::encodeAdvSIMDModImmType6(CnstVal);
+      if (AArch64_AM::isAdvSIMDModImmType6(CnstVal)) {
+        CnstVal = AArch64_AM::encodeAdvSIMDModImmType6(CnstVal);
         MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16;
-        SDValue Mov = DAG.getNode(ARM64ISD::BICi, dl, MovTy, LHS,
+        SDValue Mov = DAG.getNode(AArch64ISD::BICi, dl, MovTy, LHS,
                                   DAG.getConstant(CnstVal, MVT::i32),
                                   DAG.getConstant(8, MVT::i32));
         return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
@@ -4990,12 +5019,12 @@ static SDValue tryLowerToSLI(SDNode *N, SelectionDAG &DAG) {
 
   // Is the second op an shl or lshr?
   SDValue Shift = N->getOperand(1);
-  // This will have been turned into: ARM64ISD::VSHL vector, #shift
-  // or ARM64ISD::VLSHR vector, #shift
+  // This will have been turned into: AArch64ISD::VSHL vector, #shift
+  // or AArch64ISD::VLSHR vector, #shift
   unsigned ShiftOpc = Shift.getOpcode();
-  if ((ShiftOpc != ARM64ISD::VSHL && ShiftOpc != ARM64ISD::VLSHR))
+  if ((ShiftOpc != AArch64ISD::VSHL && ShiftOpc != AArch64ISD::VLSHR))
     return SDValue();
-  bool IsShiftRight = ShiftOpc == ARM64ISD::VLSHR;
+  bool IsShiftRight = ShiftOpc == AArch64ISD::VLSHR;
 
   // Is the shift amount constant?
   ConstantSDNode *C2node = dyn_cast<ConstantSDNode>(Shift.getOperand(1));
@@ -5021,12 +5050,12 @@ static SDValue tryLowerToSLI(SDNode *N, SelectionDAG &DAG) {
   SDValue Y = Shift.getOperand(0);
 
   unsigned Intrin =
-      IsShiftRight ? Intrinsic::arm64_neon_vsri : Intrinsic::arm64_neon_vsli;
+      IsShiftRight ? Intrinsic::aarch64_neon_vsri : Intrinsic::aarch64_neon_vsli;
   SDValue ResultSLI =
       DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
                   DAG.getConstant(Intrin, MVT::i32), X, Y, Shift.getOperand(1));
 
-  DEBUG(dbgs() << "arm64-lower: transformed: \n");
+  DEBUG(dbgs() << "aarch64-lower: transformed: \n");
   DEBUG(N->dump(&DAG));
   DEBUG(dbgs() << "into: \n");
   DEBUG(ResultSLI->dump(&DAG));
@@ -5035,10 +5064,10 @@ static SDValue tryLowerToSLI(SDNode *N, SelectionDAG &DAG) {
   return ResultSLI;
 }
 
-SDValue ARM64TargetLowering::LowerVectorOR(SDValue Op,
-                                           SelectionDAG &DAG) const {
+SDValue AArch64TargetLowering::LowerVectorOR(SDValue Op,
+                                             SelectionDAG &DAG) const {
   // Attempt to form a vector S[LR]I from (or (and X, C1), (lsl Y, C2))
-  if (EnableARM64SlrGeneration) {
+  if (EnableAArch64SlrGeneration) {
     SDValue Res = tryLowerToSLI(Op.getNode(), DAG);
     if (Res.getNode())
       return Res;
@@ -5070,55 +5099,55 @@ SDValue ARM64TargetLowering::LowerVectorOR(SDValue Op,
       CnstBits = CnstBits.zextOrTrunc(64);
       uint64_t CnstVal = CnstBits.getZExtValue();
 
-      if (ARM64_AM::isAdvSIMDModImmType1(CnstVal)) {
-        CnstVal = ARM64_AM::encodeAdvSIMDModImmType1(CnstVal);
+      if (AArch64_AM::isAdvSIMDModImmType1(CnstVal)) {
+        CnstVal = AArch64_AM::encodeAdvSIMDModImmType1(CnstVal);
         MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
-        SDValue Mov = DAG.getNode(ARM64ISD::ORRi, dl, MovTy, LHS,
+        SDValue Mov = DAG.getNode(AArch64ISD::ORRi, dl, MovTy, LHS,
                                   DAG.getConstant(CnstVal, MVT::i32),
                                   DAG.getConstant(0, MVT::i32));
         return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
       }
 
-      if (ARM64_AM::isAdvSIMDModImmType2(CnstVal)) {
-        CnstVal = ARM64_AM::encodeAdvSIMDModImmType2(CnstVal);
+      if (AArch64_AM::isAdvSIMDModImmType2(CnstVal)) {
+        CnstVal = AArch64_AM::encodeAdvSIMDModImmType2(CnstVal);
         MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
-        SDValue Mov = DAG.getNode(ARM64ISD::ORRi, dl, MovTy, LHS,
+        SDValue Mov = DAG.getNode(AArch64ISD::ORRi, dl, MovTy, LHS,
                                   DAG.getConstant(CnstVal, MVT::i32),
                                   DAG.getConstant(8, MVT::i32));
         return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
       }
 
-      if (ARM64_AM::isAdvSIMDModImmType3(CnstVal)) {
-        CnstVal = ARM64_AM::encodeAdvSIMDModImmType3(CnstVal);
+      if (AArch64_AM::isAdvSIMDModImmType3(CnstVal)) {
+        CnstVal = AArch64_AM::encodeAdvSIMDModImmType3(CnstVal);
         MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
-        SDValue Mov = DAG.getNode(ARM64ISD::ORRi, dl, MovTy, LHS,
+        SDValue Mov = DAG.getNode(AArch64ISD::ORRi, dl, MovTy, LHS,
                                   DAG.getConstant(CnstVal, MVT::i32),
                                   DAG.getConstant(16, MVT::i32));
         return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
       }
 
-      if (ARM64_AM::isAdvSIMDModImmType4(CnstVal)) {
-        CnstVal = ARM64_AM::encodeAdvSIMDModImmType4(CnstVal);
+      if (AArch64_AM::isAdvSIMDModImmType4(CnstVal)) {
+        CnstVal = AArch64_AM::encodeAdvSIMDModImmType4(CnstVal);
         MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
-        SDValue Mov = DAG.getNode(ARM64ISD::ORRi, dl, MovTy, LHS,
+        SDValue Mov = DAG.getNode(AArch64ISD::ORRi, dl, MovTy, LHS,
                                   DAG.getConstant(CnstVal, MVT::i32),
                                   DAG.getConstant(24, MVT::i32));
         return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
       }
 
-      if (ARM64_AM::isAdvSIMDModImmType5(CnstVal)) {
-        CnstVal = ARM64_AM::encodeAdvSIMDModImmType5(CnstVal);
+      if (AArch64_AM::isAdvSIMDModImmType5(CnstVal)) {
+        CnstVal = AArch64_AM::encodeAdvSIMDModImmType5(CnstVal);
         MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16;
-        SDValue Mov = DAG.getNode(ARM64ISD::ORRi, dl, MovTy, LHS,
+        SDValue Mov = DAG.getNode(AArch64ISD::ORRi, dl, MovTy, LHS,
                                   DAG.getConstant(CnstVal, MVT::i32),
                                   DAG.getConstant(0, MVT::i32));
         return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
       }
 
-      if (ARM64_AM::isAdvSIMDModImmType6(CnstVal)) {
-        CnstVal = ARM64_AM::encodeAdvSIMDModImmType6(CnstVal);
+      if (AArch64_AM::isAdvSIMDModImmType6(CnstVal)) {
+        CnstVal = AArch64_AM::encodeAdvSIMDModImmType6(CnstVal);
         MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16;
-        SDValue Mov = DAG.getNode(ARM64ISD::ORRi, dl, MovTy, LHS,
+        SDValue Mov = DAG.getNode(AArch64ISD::ORRi, dl, MovTy, LHS,
                                   DAG.getConstant(CnstVal, MVT::i32),
                                   DAG.getConstant(8, MVT::i32));
         return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
@@ -5137,8 +5166,8 @@ FailedModImm:
   return Op;
 }
 
-SDValue ARM64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
-                                               SelectionDAG &DAG) const {
+SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
+                                                 SelectionDAG &DAG) const {
   BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode());
   SDLoc dl(Op);
   EVT VT = Op.getValueType();
@@ -5163,186 +5192,186 @@ SDValue ARM64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
         return Op;
 
       // The many faces of MOVI...
-      if (ARM64_AM::isAdvSIMDModImmType10(CnstVal)) {
-        CnstVal = ARM64_AM::encodeAdvSIMDModImmType10(CnstVal);
+      if (AArch64_AM::isAdvSIMDModImmType10(CnstVal)) {
+        CnstVal = AArch64_AM::encodeAdvSIMDModImmType10(CnstVal);
         if (VT.getSizeInBits() == 128) {
-          SDValue Mov = DAG.getNode(ARM64ISD::MOVIedit, dl, MVT::v2i64,
+          SDValue Mov = DAG.getNode(AArch64ISD::MOVIedit, dl, MVT::v2i64,
                                     DAG.getConstant(CnstVal, MVT::i32));
           return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
         }
 
         // Support the V64 version via subregister insertion.
-        SDValue Mov = DAG.getNode(ARM64ISD::MOVIedit, dl, MVT::f64,
+        SDValue Mov = DAG.getNode(AArch64ISD::MOVIedit, dl, MVT::f64,
                                   DAG.getConstant(CnstVal, MVT::i32));
         return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
       }
 
-      if (ARM64_AM::isAdvSIMDModImmType1(CnstVal)) {
-        CnstVal = ARM64_AM::encodeAdvSIMDModImmType1(CnstVal);
+      if (AArch64_AM::isAdvSIMDModImmType1(CnstVal)) {
+        CnstVal = AArch64_AM::encodeAdvSIMDModImmType1(CnstVal);
         MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
-        SDValue Mov = DAG.getNode(ARM64ISD::MOVIshift, dl, MovTy,
+        SDValue Mov = DAG.getNode(AArch64ISD::MOVIshift, dl, MovTy,
                                   DAG.getConstant(CnstVal, MVT::i32),
                                   DAG.getConstant(0, MVT::i32));
         return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
       }
 
-      if (ARM64_AM::isAdvSIMDModImmType2(CnstVal)) {
-        CnstVal = ARM64_AM::encodeAdvSIMDModImmType2(CnstVal);
+      if (AArch64_AM::isAdvSIMDModImmType2(CnstVal)) {
+        CnstVal = AArch64_AM::encodeAdvSIMDModImmType2(CnstVal);
         MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
-        SDValue Mov = DAG.getNode(ARM64ISD::MOVIshift, dl, MovTy,
+        SDValue Mov = DAG.getNode(AArch64ISD::MOVIshift, dl, MovTy,
                                   DAG.getConstant(CnstVal, MVT::i32),
                                   DAG.getConstant(8, MVT::i32));
         return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
       }
 
-      if (ARM64_AM::isAdvSIMDModImmType3(CnstVal)) {
-        CnstVal = ARM64_AM::encodeAdvSIMDModImmType3(CnstVal);
+      if (AArch64_AM::isAdvSIMDModImmType3(CnstVal)) {
+        CnstVal = AArch64_AM::encodeAdvSIMDModImmType3(CnstVal);
         MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
-        SDValue Mov = DAG.getNode(ARM64ISD::MOVIshift, dl, MovTy,
+        SDValue Mov = DAG.getNode(AArch64ISD::MOVIshift, dl, MovTy,
                                   DAG.getConstant(CnstVal, MVT::i32),
                                   DAG.getConstant(16, MVT::i32));
         return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
       }
 
-      if (ARM64_AM::isAdvSIMDModImmType4(CnstVal)) {
-        CnstVal = ARM64_AM::encodeAdvSIMDModImmType4(CnstVal);
+      if (AArch64_AM::isAdvSIMDModImmType4(CnstVal)) {
+        CnstVal = AArch64_AM::encodeAdvSIMDModImmType4(CnstVal);
         MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
-        SDValue Mov = DAG.getNode(ARM64ISD::MOVIshift, dl, MovTy,
+        SDValue Mov = DAG.getNode(AArch64ISD::MOVIshift, dl, MovTy,
                                   DAG.getConstant(CnstVal, MVT::i32),
                                   DAG.getConstant(24, MVT::i32));
         return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
       }
 
-      if (ARM64_AM::isAdvSIMDModImmType5(CnstVal)) {
-        CnstVal = ARM64_AM::encodeAdvSIMDModImmType5(CnstVal);
+      if (AArch64_AM::isAdvSIMDModImmType5(CnstVal)) {
+        CnstVal = AArch64_AM::encodeAdvSIMDModImmType5(CnstVal);
         MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16;
-        SDValue Mov = DAG.getNode(ARM64ISD::MOVIshift, dl, MovTy,
+        SDValue Mov = DAG.getNode(AArch64ISD::MOVIshift, dl, MovTy,
                                   DAG.getConstant(CnstVal, MVT::i32),
                                   DAG.getConstant(0, MVT::i32));
         return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
       }
 
-      if (ARM64_AM::isAdvSIMDModImmType6(CnstVal)) {
-        CnstVal = ARM64_AM::encodeAdvSIMDModImmType6(CnstVal);
+      if (AArch64_AM::isAdvSIMDModImmType6(CnstVal)) {
+        CnstVal = AArch64_AM::encodeAdvSIMDModImmType6(CnstVal);
         MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16;
-        SDValue Mov = DAG.getNode(ARM64ISD::MOVIshift, dl, MovTy,
+        SDValue Mov = DAG.getNode(AArch64ISD::MOVIshift, dl, MovTy,
                                   DAG.getConstant(CnstVal, MVT::i32),
                                   DAG.getConstant(8, MVT::i32));
         return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
       }
 
-      if (ARM64_AM::isAdvSIMDModImmType7(CnstVal)) {
-        CnstVal = ARM64_AM::encodeAdvSIMDModImmType7(CnstVal);
+      if (AArch64_AM::isAdvSIMDModImmType7(CnstVal)) {
+        CnstVal = AArch64_AM::encodeAdvSIMDModImmType7(CnstVal);
         MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
-        SDValue Mov = DAG.getNode(ARM64ISD::MOVImsl, dl, MovTy,
+        SDValue Mov = DAG.getNode(AArch64ISD::MOVImsl, dl, MovTy,
                                   DAG.getConstant(CnstVal, MVT::i32),
                                   DAG.getConstant(264, MVT::i32));
         return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
       }
 
-      if (ARM64_AM::isAdvSIMDModImmType8(CnstVal)) {
-        CnstVal = ARM64_AM::encodeAdvSIMDModImmType8(CnstVal);
+      if (AArch64_AM::isAdvSIMDModImmType8(CnstVal)) {
+        CnstVal = AArch64_AM::encodeAdvSIMDModImmType8(CnstVal);
         MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
-        SDValue Mov = DAG.getNode(ARM64ISD::MOVImsl, dl, MovTy,
+        SDValue Mov = DAG.getNode(AArch64ISD::MOVImsl, dl, MovTy,
                                   DAG.getConstant(CnstVal, MVT::i32),
                                   DAG.getConstant(272, MVT::i32));
         return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
       }
 
-      if (ARM64_AM::isAdvSIMDModImmType9(CnstVal)) {
-        CnstVal = ARM64_AM::encodeAdvSIMDModImmType9(CnstVal);
+      if (AArch64_AM::isAdvSIMDModImmType9(CnstVal)) {
+        CnstVal = AArch64_AM::encodeAdvSIMDModImmType9(CnstVal);
         MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v16i8 : MVT::v8i8;
-        SDValue Mov = DAG.getNode(ARM64ISD::MOVI, dl, MovTy,
+        SDValue Mov = DAG.getNode(AArch64ISD::MOVI, dl, MovTy,
                                   DAG.getConstant(CnstVal, MVT::i32));
         return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
       }
 
       // The few faces of FMOV...
-      if (ARM64_AM::isAdvSIMDModImmType11(CnstVal)) {
-        CnstVal = ARM64_AM::encodeAdvSIMDModImmType11(CnstVal);
+      if (AArch64_AM::isAdvSIMDModImmType11(CnstVal)) {
+        CnstVal = AArch64_AM::encodeAdvSIMDModImmType11(CnstVal);
         MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4f32 : MVT::v2f32;
-        SDValue Mov = DAG.getNode(ARM64ISD::FMOV, dl, MovTy,
+        SDValue Mov = DAG.getNode(AArch64ISD::FMOV, dl, MovTy,
                                   DAG.getConstant(CnstVal, MVT::i32));
         return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
       }
 
-      if (ARM64_AM::isAdvSIMDModImmType12(CnstVal) &&
+      if (AArch64_AM::isAdvSIMDModImmType12(CnstVal) &&
           VT.getSizeInBits() == 128) {
-        CnstVal = ARM64_AM::encodeAdvSIMDModImmType12(CnstVal);
-        SDValue Mov = DAG.getNode(ARM64ISD::FMOV, dl, MVT::v2f64,
+        CnstVal = AArch64_AM::encodeAdvSIMDModImmType12(CnstVal);
+        SDValue Mov = DAG.getNode(AArch64ISD::FMOV, dl, MVT::v2f64,
                                   DAG.getConstant(CnstVal, MVT::i32));
         return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
       }
 
       // The many faces of MVNI...
       CnstVal = ~CnstVal;
-      if (ARM64_AM::isAdvSIMDModImmType1(CnstVal)) {
-        CnstVal = ARM64_AM::encodeAdvSIMDModImmType1(CnstVal);
+      if (AArch64_AM::isAdvSIMDModImmType1(CnstVal)) {
+        CnstVal = AArch64_AM::encodeAdvSIMDModImmType1(CnstVal);
         MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
-        SDValue Mov = DAG.getNode(ARM64ISD::MVNIshift, dl, MovTy,
+        SDValue Mov = DAG.getNode(AArch64ISD::MVNIshift, dl, MovTy,
                                   DAG.getConstant(CnstVal, MVT::i32),
                                   DAG.getConstant(0, MVT::i32));
         return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
       }
 
-      if (ARM64_AM::isAdvSIMDModImmType2(CnstVal)) {
-        CnstVal = ARM64_AM::encodeAdvSIMDModImmType2(CnstVal);
+      if (AArch64_AM::isAdvSIMDModImmType2(CnstVal)) {
+        CnstVal = AArch64_AM::encodeAdvSIMDModImmType2(CnstVal);
         MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
-        SDValue Mov = DAG.getNode(ARM64ISD::MVNIshift, dl, MovTy,
+        SDValue Mov = DAG.getNode(AArch64ISD::MVNIshift, dl, MovTy,
                                   DAG.getConstant(CnstVal, MVT::i32),
                                   DAG.getConstant(8, MVT::i32));
         return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
       }
 
-      if (ARM64_AM::isAdvSIMDModImmType3(CnstVal)) {
-        CnstVal = ARM64_AM::encodeAdvSIMDModImmType3(CnstVal);
+      if (AArch64_AM::isAdvSIMDModImmType3(CnstVal)) {
+        CnstVal = AArch64_AM::encodeAdvSIMDModImmType3(CnstVal);
         MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
-        SDValue Mov = DAG.getNode(ARM64ISD::MVNIshift, dl, MovTy,
+        SDValue Mov = DAG.getNode(AArch64ISD::MVNIshift, dl, MovTy,
                                   DAG.getConstant(CnstVal, MVT::i32),
                                   DAG.getConstant(16, MVT::i32));
         return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
       }
 
-      if (ARM64_AM::isAdvSIMDModImmType4(CnstVal)) {
-        CnstVal = ARM64_AM::encodeAdvSIMDModImmType4(CnstVal);
+      if (AArch64_AM::isAdvSIMDModImmType4(CnstVal)) {
+        CnstVal = AArch64_AM::encodeAdvSIMDModImmType4(CnstVal);
         MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
-        SDValue Mov = DAG.getNode(ARM64ISD::MVNIshift, dl, MovTy,
+        SDValue Mov = DAG.getNode(AArch64ISD::MVNIshift, dl, MovTy,
                                   DAG.getConstant(CnstVal, MVT::i32),
                                   DAG.getConstant(24, MVT::i32));
         return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
       }
 
-      if (ARM64_AM::isAdvSIMDModImmType5(CnstVal)) {
-        CnstVal = ARM64_AM::encodeAdvSIMDModImmType5(CnstVal);
+      if (AArch64_AM::isAdvSIMDModImmType5(CnstVal)) {
+        CnstVal = AArch64_AM::encodeAdvSIMDModImmType5(CnstVal);
         MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16;
-        SDValue Mov = DAG.getNode(ARM64ISD::MVNIshift, dl, MovTy,
+        SDValue Mov = DAG.getNode(AArch64ISD::MVNIshift, dl, MovTy,
                                   DAG.getConstant(CnstVal, MVT::i32),
                                   DAG.getConstant(0, MVT::i32));
         return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
       }
 
-      if (ARM64_AM::isAdvSIMDModImmType6(CnstVal)) {
-        CnstVal = ARM64_AM::encodeAdvSIMDModImmType6(CnstVal);
+      if (AArch64_AM::isAdvSIMDModImmType6(CnstVal)) {
+        CnstVal = AArch64_AM::encodeAdvSIMDModImmType6(CnstVal);
         MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16;
-        SDValue Mov = DAG.getNode(ARM64ISD::MVNIshift, dl, MovTy,
+        SDValue Mov = DAG.getNode(AArch64ISD::MVNIshift, dl, MovTy,
                                   DAG.getConstant(CnstVal, MVT::i32),
                                   DAG.getConstant(8, MVT::i32));
         return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
       }
 
-      if (ARM64_AM::isAdvSIMDModImmType7(CnstVal)) {
-        CnstVal = ARM64_AM::encodeAdvSIMDModImmType7(CnstVal);
+      if (AArch64_AM::isAdvSIMDModImmType7(CnstVal)) {
+        CnstVal = AArch64_AM::encodeAdvSIMDModImmType7(CnstVal);
         MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
-        SDValue Mov = DAG.getNode(ARM64ISD::MVNImsl, dl, MovTy,
+        SDValue Mov = DAG.getNode(AArch64ISD::MVNImsl, dl, MovTy,
                                   DAG.getConstant(CnstVal, MVT::i32),
                                   DAG.getConstant(264, MVT::i32));
         return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
       }
 
-      if (ARM64_AM::isAdvSIMDModImmType8(CnstVal)) {
-        CnstVal = ARM64_AM::encodeAdvSIMDModImmType8(CnstVal);
+      if (AArch64_AM::isAdvSIMDModImmType8(CnstVal)) {
+        CnstVal = AArch64_AM::encodeAdvSIMDModImmType8(CnstVal);
         MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
-        SDValue Mov = DAG.getNode(ARM64ISD::MVNImsl, dl, MovTy,
+        SDValue Mov = DAG.getNode(AArch64ISD::MVNImsl, dl, MovTy,
                                   DAG.getConstant(CnstVal, MVT::i32),
                                   DAG.getConstant(272, MVT::i32));
         return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
@@ -5411,7 +5440,7 @@ FailedModImm:
     if (!isConstant) {
       if (Value.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
           Value.getValueType() != VT)
-        return DAG.getNode(ARM64ISD::DUP, dl, VT, Value);
+        return DAG.getNode(AArch64ISD::DUP, dl, VT, Value);
 
       // This is actually a DUPLANExx operation, which keeps everything vectory.
 
@@ -5444,7 +5473,7 @@ FailedModImm:
   // is better than the default, which will perform a separate initialization
   // for each lane.
   if (NumConstantLanes > 0 && usesOnlyOneConstantValue) {
-    SDValue Val = DAG.getNode(ARM64ISD::DUP, dl, VT, ConstantValue);
+    SDValue Val = DAG.getNode(AArch64ISD::DUP, dl, VT, ConstantValue);
     // Now insert the non-constant lanes.
     for (unsigned i = 0; i < NumElts; ++i) {
       SDValue V = Op.getOperand(i);
@@ -5487,7 +5516,7 @@ FailedModImm:
     // b) Allow the register coalescer to fold away the copy if the
     //    value is already in an S or D register.
     if (Op0.getOpcode() != ISD::UNDEF && (ElemSize == 32 || ElemSize == 64)) {
-      unsigned SubIdx = ElemSize == 32 ? ARM64::ssub : ARM64::dsub;
+      unsigned SubIdx = ElemSize == 32 ? AArch64::ssub : AArch64::dsub;
       MachineSDNode *N =
           DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, dl, VT, Vec, Op0,
                              DAG.getTargetConstant(SubIdx, MVT::i32));
@@ -5508,8 +5537,8 @@ FailedModImm:
   return SDValue();
 }
 
-SDValue ARM64TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
-                                                    SelectionDAG &DAG) const {
+SDValue AArch64TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
+                                                      SelectionDAG &DAG) const {
   assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT && "Unknown opcode!");
 
   // Check for non-constant lane.
@@ -5539,8 +5568,9 @@ SDValue ARM64TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
   return NarrowVector(Node, DAG);
 }
 
-SDValue ARM64TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
-                                                     SelectionDAG &DAG) const {
+SDValue
+AArch64TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
+                                               SelectionDAG &DAG) const {
   assert(Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT && "Unknown opcode!");
 
   // Check for non-constant lane.
@@ -5573,8 +5603,8 @@ SDValue ARM64TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
                      Op.getOperand(1));
 }
 
-SDValue ARM64TargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op,
-                                                    SelectionDAG &DAG) const {
+SDValue AArch64TargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op,
+                                                      SelectionDAG &DAG) const {
   EVT VT = Op.getOperand(0).getValueType();
   SDLoc dl(Op);
   // Just in case...
@@ -5590,16 +5620,16 @@ SDValue ARM64TargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op,
   if (Val == 0) {
     switch (Size) {
     case 8:
-      return DAG.getTargetExtractSubreg(ARM64::bsub, dl, Op.getValueType(),
+      return DAG.getTargetExtractSubreg(AArch64::bsub, dl, Op.getValueType(),
                                         Op.getOperand(0));
     case 16:
-      return DAG.getTargetExtractSubreg(ARM64::hsub, dl, Op.getValueType(),
+      return DAG.getTargetExtractSubreg(AArch64::hsub, dl, Op.getValueType(),
                                         Op.getOperand(0));
     case 32:
-      return DAG.getTargetExtractSubreg(ARM64::ssub, dl, Op.getValueType(),
+      return DAG.getTargetExtractSubreg(AArch64::ssub, dl, Op.getValueType(),
                                         Op.getOperand(0));
     case 64:
-      return DAG.getTargetExtractSubreg(ARM64::dsub, dl, Op.getValueType(),
+      return DAG.getTargetExtractSubreg(AArch64::dsub, dl, Op.getValueType(),
                                         Op.getOperand(0));
     default:
       llvm_unreachable("Unexpected vector type in extract_subvector!");
@@ -5613,8 +5643,8 @@ SDValue ARM64TargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op,
   return SDValue();
 }
 
-bool ARM64TargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M,
-                                             EVT VT) const {
+bool AArch64TargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M,
+                                               EVT VT) const {
   if (VT.getVectorNumElements() == 4 &&
       (VT.is128BitVector() || VT.is64BitVector())) {
     unsigned PFIndexes[4];
@@ -5700,8 +5730,8 @@ static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, bool isIntrinsic,
   return (Cnt >= 1 && Cnt <= (isNarrow ? ElementBits / 2 : ElementBits));
 }
 
-SDValue ARM64TargetLowering::LowerVectorSRA_SRL_SHL(SDValue Op,
-                                                    SelectionDAG &DAG) const {
+SDValue AArch64TargetLowering::LowerVectorSRA_SRL_SHL(SDValue Op,
+                                                      SelectionDAG &DAG) const {
   EVT VT = Op.getValueType();
   SDLoc DL(Op);
   int64_t Cnt;
@@ -5716,10 +5746,10 @@ SDValue ARM64TargetLowering::LowerVectorSRA_SRL_SHL(SDValue Op,
 
   case ISD::SHL:
     if (isVShiftLImm(Op.getOperand(1), VT, false, Cnt) && Cnt < EltSize)
-      return DAG.getNode(ARM64ISD::VSHL, SDLoc(Op), VT, Op.getOperand(0),
+      return DAG.getNode(AArch64ISD::VSHL, SDLoc(Op), VT, Op.getOperand(0),
                          DAG.getConstant(Cnt, MVT::i32));
     return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
-                       DAG.getConstant(Intrinsic::arm64_neon_ushl, MVT::i32),
+                       DAG.getConstant(Intrinsic::aarch64_neon_ushl, MVT::i32),
                        Op.getOperand(0), Op.getOperand(1));
   case ISD::SRA:
   case ISD::SRL:
@@ -5727,7 +5757,7 @@ SDValue ARM64TargetLowering::LowerVectorSRA_SRL_SHL(SDValue Op,
     if (isVShiftRImm(Op.getOperand(1), VT, false, false, Cnt) &&
         Cnt < EltSize) {
       unsigned Opc =
-          (Op.getOpcode() == ISD::SRA) ? ARM64ISD::VASHR : ARM64ISD::VLSHR;
+          (Op.getOpcode() == ISD::SRA) ? AArch64ISD::VASHR : AArch64ISD::VLSHR;
       return DAG.getNode(Opc, SDLoc(Op), VT, Op.getOperand(0),
                          DAG.getConstant(Cnt, MVT::i32));
     }
@@ -5735,10 +5765,10 @@ SDValue ARM64TargetLowering::LowerVectorSRA_SRL_SHL(SDValue Op,
     // Right shift register.  Note, there is not a shift right register
     // instruction, but the shift left register instruction takes a signed
     // value, where negative numbers specify a right shift.
-    unsigned Opc = (Op.getOpcode() == ISD::SRA) ? Intrinsic::arm64_neon_sshl
-                                                : Intrinsic::arm64_neon_ushl;
+    unsigned Opc = (Op.getOpcode() == ISD::SRA) ? Intrinsic::aarch64_neon_sshl
+                                                : Intrinsic::aarch64_neon_ushl;
     // negate the shift amount
-    SDValue NegShift = DAG.getNode(ARM64ISD::NEG, DL, VT, Op.getOperand(1));
+    SDValue NegShift = DAG.getNode(AArch64ISD::NEG, DL, VT, Op.getOperand(1));
     SDValue NegShiftLeft =
         DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
                     DAG.getConstant(Opc, MVT::i32), Op.getOperand(0), NegShift);
@@ -5749,7 +5779,7 @@ SDValue ARM64TargetLowering::LowerVectorSRA_SRL_SHL(SDValue Op,
 }
 
 static SDValue EmitVectorComparison(SDValue LHS, SDValue RHS,
-                                    ARM64CC::CondCode CC, bool NoNans, EVT VT,
+                                    AArch64CC::CondCode CC, bool NoNans, EVT VT,
                                     SDLoc dl, SelectionDAG &DAG) {
   EVT SrcVT = LHS.getValueType();
 
@@ -5763,85 +5793,86 @@ static SDValue EmitVectorComparison(SDValue LHS, SDValue RHS,
     switch (CC) {
     default:
       return SDValue();
-    case ARM64CC::NE: {
+    case AArch64CC::NE: {
       SDValue Fcmeq;
       if (IsZero)
-        Fcmeq = DAG.getNode(ARM64ISD::FCMEQz, dl, VT, LHS);
+        Fcmeq = DAG.getNode(AArch64ISD::FCMEQz, dl, VT, LHS);
       else
-        Fcmeq = DAG.getNode(ARM64ISD::FCMEQ, dl, VT, LHS, RHS);
-      return DAG.getNode(ARM64ISD::NOT, dl, VT, Fcmeq);
+        Fcmeq = DAG.getNode(AArch64ISD::FCMEQ, dl, VT, LHS, RHS);
+      return DAG.getNode(AArch64ISD::NOT, dl, VT, Fcmeq);
     }
-    case ARM64CC::EQ:
+    case AArch64CC::EQ:
       if (IsZero)
-        return DAG.getNode(ARM64ISD::FCMEQz, dl, VT, LHS);
-      return DAG.getNode(ARM64ISD::FCMEQ, dl, VT, LHS, RHS);
-    case ARM64CC::GE:
+        return DAG.getNode(AArch64ISD::FCMEQz, dl, VT, LHS);
+      return DAG.getNode(AArch64ISD::FCMEQ, dl, VT, LHS, RHS);
+    case AArch64CC::GE:
       if (IsZero)
-        return DAG.getNode(ARM64ISD::FCMGEz, dl, VT, LHS);
-      return DAG.getNode(ARM64ISD::FCMGE, dl, VT, LHS, RHS);
-    case ARM64CC::GT:
+        return DAG.getNode(AArch64ISD::FCMGEz, dl, VT, LHS);
+      return DAG.getNode(AArch64ISD::FCMGE, dl, VT, LHS, RHS);
+    case AArch64CC::GT:
       if (IsZero)
-        return DAG.getNode(ARM64ISD::FCMGTz, dl, VT, LHS);
-      return DAG.getNode(ARM64ISD::FCMGT, dl, VT, LHS, RHS);
-    case ARM64CC::LS:
+        return DAG.getNode(AArch64ISD::FCMGTz, dl, VT, LHS);
+      return DAG.getNode(AArch64ISD::FCMGT, dl, VT, LHS, RHS);
+    case AArch64CC::LS:
       if (IsZero)
-        return DAG.getNode(ARM64ISD::FCMLEz, dl, VT, LHS);
-      return DAG.getNode(ARM64ISD::FCMGE, dl, VT, RHS, LHS);
-    case ARM64CC::LT:
+        return DAG.getNode(AArch64ISD::FCMLEz, dl, VT, LHS);
+      return DAG.getNode(AArch64ISD::FCMGE, dl, VT, RHS, LHS);
+    case AArch64CC::LT:
       if (!NoNans)
         return SDValue();
     // If we ignore NaNs then we can use to the MI implementation.
     // Fallthrough.
-    case ARM64CC::MI:
+    case AArch64CC::MI:
       if (IsZero)
-        return DAG.getNode(ARM64ISD::FCMLTz, dl, VT, LHS);
-      return DAG.getNode(ARM64ISD::FCMGT, dl, VT, RHS, LHS);
+        return DAG.getNode(AArch64ISD::FCMLTz, dl, VT, LHS);
+      return DAG.getNode(AArch64ISD::FCMGT, dl, VT, RHS, LHS);
     }
   }
 
   switch (CC) {
   default:
     return SDValue();
-  case ARM64CC::NE: {
+  case AArch64CC::NE: {
     SDValue Cmeq;
     if (IsZero)
-      Cmeq = DAG.getNode(ARM64ISD::CMEQz, dl, VT, LHS);
+      Cmeq = DAG.getNode(AArch64ISD::CMEQz, dl, VT, LHS);
     else
-      Cmeq = DAG.getNode(ARM64ISD::CMEQ, dl, VT, LHS, RHS);
-    return DAG.getNode(ARM64ISD::NOT, dl, VT, Cmeq);
+      Cmeq = DAG.getNode(AArch64ISD::CMEQ, dl, VT, LHS, RHS);
+    return DAG.getNode(AArch64ISD::NOT, dl, VT, Cmeq);
   }
-  case ARM64CC::EQ:
+  case AArch64CC::EQ:
     if (IsZero)
-      return DAG.getNode(ARM64ISD::CMEQz, dl, VT, LHS);
-    return DAG.getNode(ARM64ISD::CMEQ, dl, VT, LHS, RHS);
-  case ARM64CC::GE:
+      return DAG.getNode(AArch64ISD::CMEQz, dl, VT, LHS);
+    return DAG.getNode(AArch64ISD::CMEQ, dl, VT, LHS, RHS);
+  case AArch64CC::GE:
     if (IsZero)
-      return DAG.getNode(ARM64ISD::CMGEz, dl, VT, LHS);
-    return DAG.getNode(ARM64ISD::CMGE, dl, VT, LHS, RHS);
-  case ARM64CC::GT:
+      return DAG.getNode(AArch64ISD::CMGEz, dl, VT, LHS);
+    return DAG.getNode(AArch64ISD::CMGE, dl, VT, LHS, RHS);
+  case AArch64CC::GT:
     if (IsZero)
-      return DAG.getNode(ARM64ISD::CMGTz, dl, VT, LHS);
-    return DAG.getNode(ARM64ISD::CMGT, dl, VT, LHS, RHS);
-  case ARM64CC::LE:
+      return DAG.getNode(AArch64ISD::CMGTz, dl, VT, LHS);
+    return DAG.getNode(AArch64ISD::CMGT, dl, VT, LHS, RHS);
+  case AArch64CC::LE:
     if (IsZero)
-      return DAG.getNode(ARM64ISD::CMLEz, dl, VT, LHS);
-    return DAG.getNode(ARM64ISD::CMGE, dl, VT, RHS, LHS);
-  case ARM64CC::LS:
-    return DAG.getNode(ARM64ISD::CMHS, dl, VT, RHS, LHS);
-  case ARM64CC::LO:
-    return DAG.getNode(ARM64ISD::CMHI, dl, VT, RHS, LHS);
-  case ARM64CC::LT:
+      return DAG.getNode(AArch64ISD::CMLEz, dl, VT, LHS);
+    return DAG.getNode(AArch64ISD::CMGE, dl, VT, RHS, LHS);
+  case AArch64CC::LS:
+    return DAG.getNode(AArch64ISD::CMHS, dl, VT, RHS, LHS);
+  case AArch64CC::LO:
+    return DAG.getNode(AArch64ISD::CMHI, dl, VT, RHS, LHS);
+  case AArch64CC::LT:
     if (IsZero)
-      return DAG.getNode(ARM64ISD::CMLTz, dl, VT, LHS);
-    return DAG.getNode(ARM64ISD::CMGT, dl, VT, RHS, LHS);
-  case ARM64CC::HI:
-    return DAG.getNode(ARM64ISD::CMHI, dl, VT, LHS, RHS);
-  case ARM64CC::HS:
-    return DAG.getNode(ARM64ISD::CMHS, dl, VT, LHS, RHS);
+      return DAG.getNode(AArch64ISD::CMLTz, dl, VT, LHS);
+    return DAG.getNode(AArch64ISD::CMGT, dl, VT, RHS, LHS);
+  case AArch64CC::HI:
+    return DAG.getNode(AArch64ISD::CMHI, dl, VT, LHS, RHS);
+  case AArch64CC::HS:
+    return DAG.getNode(AArch64ISD::CMHS, dl, VT, LHS, RHS);
   }
 }
 
-SDValue ARM64TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) const {
+SDValue AArch64TargetLowering::LowerVSETCC(SDValue Op,
+                                           SelectionDAG &DAG) const {
   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
   SDValue LHS = Op.getOperand(0);
   SDValue RHS = Op.getOperand(1);
@@ -5849,19 +5880,19 @@ SDValue ARM64TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) const {
 
   if (LHS.getValueType().getVectorElementType().isInteger()) {
     assert(LHS.getValueType() == RHS.getValueType());
-    ARM64CC::CondCode ARM64CC = changeIntCCToARM64CC(CC);
-    return EmitVectorComparison(LHS, RHS, ARM64CC, false, Op.getValueType(), dl,
-                                DAG);
+    AArch64CC::CondCode AArch64CC = changeIntCCToAArch64CC(CC);
+    return EmitVectorComparison(LHS, RHS, AArch64CC, false, Op.getValueType(),
+                                dl, DAG);
   }
 
   assert(LHS.getValueType().getVectorElementType() == MVT::f32 ||
          LHS.getValueType().getVectorElementType() == MVT::f64);
 
-  // Unfortunately, the mapping of LLVM FP CC's onto ARM64 CC's isn't totally
+  // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't totally
   // clean.  Some of them require two branches to implement.
-  ARM64CC::CondCode CC1, CC2;
+  AArch64CC::CondCode CC1, CC2;
   bool ShouldInvert;
-  changeVectorFPCCToARM64CC(CC, CC1, CC2, ShouldInvert);
+  changeVectorFPCCToAArch64CC(CC, CC1, CC2, ShouldInvert);
 
   bool NoNaNs = getTargetMachine().Options.NoNaNsFPMath;
   SDValue Cmp =
@@ -5869,7 +5900,7 @@ SDValue ARM64TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) const {
   if (!Cmp.getNode())
     return SDValue();
 
-  if (CC2 != ARM64CC::AL) {
+  if (CC2 != AArch64CC::AL) {
     SDValue Cmp2 =
         EmitVectorComparison(LHS, RHS, CC2, NoNaNs, Op.getValueType(), dl, DAG);
     if (!Cmp2.getNode())
@@ -5887,22 +5918,22 @@ SDValue ARM64TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) const {
 /// getTgtMemIntrinsic - Represent NEON load and store intrinsics as
 /// MemIntrinsicNodes.  The associated MachineMemOperands record the alignment
 /// specified in the intrinsic calls.
-bool ARM64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
-                                             const CallInst &I,
-                                             unsigned Intrinsic) const {
+bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
+                                               const CallInst &I,
+                                               unsigned Intrinsic) const {
   switch (Intrinsic) {
-  case Intrinsic::arm64_neon_ld2:
-  case Intrinsic::arm64_neon_ld3:
-  case Intrinsic::arm64_neon_ld4:
-  case Intrinsic::arm64_neon_ld1x2:
-  case Intrinsic::arm64_neon_ld1x3:
-  case Intrinsic::arm64_neon_ld1x4:
-  case Intrinsic::arm64_neon_ld2lane:
-  case Intrinsic::arm64_neon_ld3lane:
-  case Intrinsic::arm64_neon_ld4lane:
-  case Intrinsic::arm64_neon_ld2r:
-  case Intrinsic::arm64_neon_ld3r:
-  case Intrinsic::arm64_neon_ld4r: {
+  case Intrinsic::aarch64_neon_ld2:
+  case Intrinsic::aarch64_neon_ld3:
+  case Intrinsic::aarch64_neon_ld4:
+  case Intrinsic::aarch64_neon_ld1x2:
+  case Intrinsic::aarch64_neon_ld1x3:
+  case Intrinsic::aarch64_neon_ld1x4:
+  case Intrinsic::aarch64_neon_ld2lane:
+  case Intrinsic::aarch64_neon_ld3lane:
+  case Intrinsic::aarch64_neon_ld4lane:
+  case Intrinsic::aarch64_neon_ld2r:
+  case Intrinsic::aarch64_neon_ld3r:
+  case Intrinsic::aarch64_neon_ld4r: {
     Info.opc = ISD::INTRINSIC_W_CHAIN;
     // Conservatively set memVT to the entire set of vectors loaded.
     uint64_t NumElts = getDataLayout()->getTypeAllocSize(I.getType()) / 8;
@@ -5915,15 +5946,15 @@ bool ARM64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
     Info.writeMem = false;
     return true;
   }
-  case Intrinsic::arm64_neon_st2:
-  case Intrinsic::arm64_neon_st3:
-  case Intrinsic::arm64_neon_st4:
-  case Intrinsic::arm64_neon_st1x2:
-  case Intrinsic::arm64_neon_st1x3:
-  case Intrinsic::arm64_neon_st1x4:
-  case Intrinsic::arm64_neon_st2lane:
-  case Intrinsic::arm64_neon_st3lane:
-  case Intrinsic::arm64_neon_st4lane: {
+  case Intrinsic::aarch64_neon_st2:
+  case Intrinsic::aarch64_neon_st3:
+  case Intrinsic::aarch64_neon_st4:
+  case Intrinsic::aarch64_neon_st1x2:
+  case Intrinsic::aarch64_neon_st1x3:
+  case Intrinsic::aarch64_neon_st1x4:
+  case Intrinsic::aarch64_neon_st2lane:
+  case Intrinsic::aarch64_neon_st3lane:
+  case Intrinsic::aarch64_neon_st4lane: {
     Info.opc = ISD::INTRINSIC_VOID;
     // Conservatively set memVT to the entire set of vectors stored.
     unsigned NumElts = 0;
@@ -5942,8 +5973,8 @@ bool ARM64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
     Info.writeMem = true;
     return true;
   }
-  case Intrinsic::arm64_ldaxr:
-  case Intrinsic::arm64_ldxr: {
+  case Intrinsic::aarch64_ldaxr:
+  case Intrinsic::aarch64_ldxr: {
     PointerType *PtrTy = cast<PointerType>(I.getArgOperand(0)->getType());
     Info.opc = ISD::INTRINSIC_W_CHAIN;
     Info.memVT = MVT::getVT(PtrTy->getElementType());
@@ -5955,8 +5986,8 @@ bool ARM64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
     Info.writeMem = false;
     return true;
   }
-  case Intrinsic::arm64_stlxr:
-  case Intrinsic::arm64_stxr: {
+  case Intrinsic::aarch64_stlxr:
+  case Intrinsic::aarch64_stxr: {
     PointerType *PtrTy = cast<PointerType>(I.getArgOperand(1)->getType());
     Info.opc = ISD::INTRINSIC_W_CHAIN;
     Info.memVT = MVT::getVT(PtrTy->getElementType());
@@ -5968,8 +5999,8 @@ bool ARM64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
     Info.writeMem = true;
     return true;
   }
-  case Intrinsic::arm64_ldaxp:
-  case Intrinsic::arm64_ldxp: {
+  case Intrinsic::aarch64_ldaxp:
+  case Intrinsic::aarch64_ldxp: {
     Info.opc = ISD::INTRINSIC_W_CHAIN;
     Info.memVT = MVT::i128;
     Info.ptrVal = I.getArgOperand(0);
@@ -5980,8 +6011,8 @@ bool ARM64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
     Info.writeMem = false;
     return true;
   }
-  case Intrinsic::arm64_stlxp:
-  case Intrinsic::arm64_stxp: {
+  case Intrinsic::aarch64_stlxp:
+  case Intrinsic::aarch64_stxp: {
     Info.opc = ISD::INTRINSIC_W_CHAIN;
     Info.memVT = MVT::i128;
     Info.ptrVal = I.getArgOperand(2);
@@ -6000,7 +6031,7 @@ bool ARM64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
 }
 
 // Truncations from 64-bit GPR to 32-bit GPR is free.
-bool ARM64TargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const {
+bool AArch64TargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const {
   if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
     return false;
   unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
@@ -6009,7 +6040,7 @@ bool ARM64TargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const {
     return false;
   return true;
 }
-bool ARM64TargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
+bool AArch64TargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
   if (!VT1.isInteger() || !VT2.isInteger())
     return false;
   unsigned NumBits1 = VT1.getSizeInBits();
@@ -6021,7 +6052,7 @@ bool ARM64TargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
 
 // All 32-bit GPR operations implicitly zero the high-half of the corresponding
 // 64-bit GPR.
-bool ARM64TargetLowering::isZExtFree(Type *Ty1, Type *Ty2) const {
+bool AArch64TargetLowering::isZExtFree(Type *Ty1, Type *Ty2) const {
   if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
     return false;
   unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
@@ -6030,7 +6061,7 @@ bool ARM64TargetLowering::isZExtFree(Type *Ty1, Type *Ty2) const {
     return true;
   return false;
 }
-bool ARM64TargetLowering::isZExtFree(EVT VT1, EVT VT2) const {
+bool AArch64TargetLowering::isZExtFree(EVT VT1, EVT VT2) const {
   if (!VT1.isInteger() || !VT2.isInteger())
     return false;
   unsigned NumBits1 = VT1.getSizeInBits();
@@ -6040,7 +6071,7 @@ bool ARM64TargetLowering::isZExtFree(EVT VT1, EVT VT2) const {
   return false;
 }
 
-bool ARM64TargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
+bool AArch64TargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
   EVT VT1 = Val.getValueType();
   if (isZExtFree(VT1, VT2)) {
     return true;
@@ -6054,8 +6085,8 @@ bool ARM64TargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
           VT2.isInteger() && VT1.getSizeInBits() <= 32);
 }
 
-bool ARM64TargetLowering::hasPairedLoad(Type *LoadedType,
-                                        unsigned &RequiredAligment) const {
+bool AArch64TargetLowering::hasPairedLoad(Type *LoadedType,
+                                          unsigned &RequiredAligment) const {
   if (!LoadedType->isIntegerTy() && !LoadedType->isFloatTy())
     return false;
   // Cyclone supports unaligned accesses.
@@ -6064,8 +6095,8 @@ bool ARM64TargetLowering::hasPairedLoad(Type *LoadedType,
   return NumBits == 32 || NumBits == 64;
 }
 
-bool ARM64TargetLowering::hasPairedLoad(EVT LoadedType,
-                                        unsigned &RequiredAligment) const {
+bool AArch64TargetLowering::hasPairedLoad(EVT LoadedType,
+                                          unsigned &RequiredAligment) const {
   if (!LoadedType.isSimple() ||
       (!LoadedType.isInteger() && !LoadedType.isFloatingPoint()))
     return false;
@@ -6081,10 +6112,11 @@ static bool memOpAlign(unsigned DstAlign, unsigned SrcAlign,
           (DstAlign == 0 || DstAlign % AlignCheck == 0));
 }
 
-EVT ARM64TargetLowering::getOptimalMemOpType(uint64_t Size, unsigned DstAlign,
-                                             unsigned SrcAlign, bool IsMemset,
-                                             bool ZeroMemset, bool MemcpyStrSrc,
-                                             MachineFunction &MF) const {
+EVT AArch64TargetLowering::getOptimalMemOpType(uint64_t Size, unsigned DstAlign,
+                                               unsigned SrcAlign, bool IsMemset,
+                                               bool ZeroMemset,
+                                               bool MemcpyStrSrc,
+                                               MachineFunction &MF) const {
   // Don't use AdvSIMD to implement 16-byte memset. It would have taken one
   // instruction to materialize the v2i64 zero and one store (with restrictive
   // addressing mode). Just do two i64 store of zero-registers.
@@ -6101,7 +6133,7 @@ EVT ARM64TargetLowering::getOptimalMemOpType(uint64_t Size, unsigned DstAlign,
 }
 
 // 12-bit optionally shifted immediates are legal for adds.
-bool ARM64TargetLowering::isLegalAddImmediate(int64_t Immed) const {
+bool AArch64TargetLowering::isLegalAddImmediate(int64_t Immed) const {
   if ((Immed >> 12) == 0 || ((Immed & 0xfff) == 0 && Immed >> 24 == 0))
     return true;
   return false;
@@ -6109,7 +6141,7 @@ bool ARM64TargetLowering::isLegalAddImmediate(int64_t Immed) const {
 
 // Integer comparisons are implemented with ADDS/SUBS, so the range of valid
 // immediates is the same as for an add or a sub.
-bool ARM64TargetLowering::isLegalICmpImmediate(int64_t Immed) const {
+bool AArch64TargetLowering::isLegalICmpImmediate(int64_t Immed) const {
   if (Immed < 0)
     Immed *= -1;
   return isLegalAddImmediate(Immed);
@@ -6117,9 +6149,9 @@ bool ARM64TargetLowering::isLegalICmpImmediate(int64_t Immed) const {
 
 /// isLegalAddressingMode - Return true if the addressing mode represented
 /// by AM is legal for this target, for a load/store of the specified type.
-bool ARM64TargetLowering::isLegalAddressingMode(const AddrMode &AM,
-                                                Type *Ty) const {
-  // ARM64 has five basic addressing modes:
+bool AArch64TargetLowering::isLegalAddressingMode(const AddrMode &AM,
+                                                  Type *Ty) const {
+  // AArch64 has five basic addressing modes:
   //  reg
   //  reg + 9-bit signed offset
   //  reg + SIZE_IN_BYTES * 12-bit unsigned offset
@@ -6168,8 +6200,8 @@ bool ARM64TargetLowering::isLegalAddressingMode(const AddrMode &AM,
   return false;
 }
 
-int ARM64TargetLowering::getScalingFactorCost(const AddrMode &AM,
-                                              Type *Ty) const {
+int AArch64TargetLowering::getScalingFactorCost(const AddrMode &AM,
+                                                Type *Ty) const {
   // Scaling factors are not free at all.
   // Operands                     | Rt Latency
   // -------------------------------------------
@@ -6184,7 +6216,7 @@ int ARM64TargetLowering::getScalingFactorCost(const AddrMode &AM,
   return -1;
 }
 
-bool ARM64TargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
+bool AArch64TargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
   VT = VT.getScalarType();
 
   if (!VT.isSimple())
@@ -6202,17 +6234,18 @@ bool ARM64TargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
 }
 
 const MCPhysReg *
-ARM64TargetLowering::getScratchRegisters(CallingConv::ID) const {
+AArch64TargetLowering::getScratchRegisters(CallingConv::ID) const {
   // LR is a callee-save register, but we must treat it as clobbered by any call
   // site. Hence we include LR in the scratch registers, which are in turn added
   // as implicit-defs for stackmaps and patchpoints.
   static const MCPhysReg ScratchRegs[] = {
-    ARM64::X16, ARM64::X17, ARM64::LR, 0
+    AArch64::X16, AArch64::X17, AArch64::LR, 0
   };
   return ScratchRegs;
 }
 
-bool ARM64TargetLowering::isDesirableToCommuteWithShift(const SDNode *N) const {
+bool
+AArch64TargetLowering::isDesirableToCommuteWithShift(const SDNode *N) const {
   EVT VT = N->getValueType(0);
     // If N is unsigned bit extraction: ((x >> C) & mask), then do not combine
     // it with shift to let it be lowered to UBFX.
@@ -6227,8 +6260,8 @@ bool ARM64TargetLowering::isDesirableToCommuteWithShift(const SDNode *N) const {
   return true;
 }
 
-bool ARM64TargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
-                                                            Type *Ty) const {
+bool AArch64TargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
+                                                              Type *Ty) const {
   assert(Ty->isIntegerTy());
 
   unsigned BitSize = Ty->getPrimitiveSizeInBits();
@@ -6236,7 +6269,7 @@ bool ARM64TargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
     return false;
 
   int64_t Val = Imm.getSExtValue();
-  if (Val == 0 || ARM64_AM::isLogicalImmediate(Val, BitSize))
+  if (Val == 0 || AArch64_AM::isLogicalImmediate(Val, BitSize))
     return true;
 
   if ((int64_t)Val < 0)
@@ -6269,10 +6302,10 @@ static SDValue performIntegerAbsCombine(SDNode *N, SelectionDAG &DAG) {
                                   N0.getOperand(0));
         // Generate SUBS & CSEL.
         SDValue Cmp =
-            DAG.getNode(ARM64ISD::SUBS, DL, DAG.getVTList(VT, MVT::i32),
+            DAG.getNode(AArch64ISD::SUBS, DL, DAG.getVTList(VT, MVT::i32),
                         N0.getOperand(0), DAG.getConstant(0, VT));
-        return DAG.getNode(ARM64ISD::CSEL, DL, VT, N0.getOperand(0), Neg,
-                           DAG.getConstant(ARM64CC::PL, MVT::i32),
+        return DAG.getNode(AArch64ISD::CSEL, DL, VT, N0.getOperand(0), Neg,
+                           DAG.getConstant(AArch64CC::PL, MVT::i32),
                            SDValue(Cmp.getNode(), 1));
       }
   return SDValue();
@@ -6281,7 +6314,7 @@ static SDValue performIntegerAbsCombine(SDNode *N, SelectionDAG &DAG) {
 // performXorCombine - Attempts to handle integer ABS.
 static SDValue performXorCombine(SDNode *N, SelectionDAG &DAG,
                                  TargetLowering::DAGCombinerInfo &DCI,
-                                 const ARM64Subtarget *Subtarget) {
+                                 const AArch64Subtarget *Subtarget) {
   if (DCI.isBeforeLegalizeOps())
     return SDValue();
 
@@ -6290,7 +6323,7 @@ static SDValue performXorCombine(SDNode *N, SelectionDAG &DAG,
 
 static SDValue performMulCombine(SDNode *N, SelectionDAG &DAG,
                                  TargetLowering::DAGCombinerInfo &DCI,
-                                 const ARM64Subtarget *Subtarget) {
+                                 const AArch64Subtarget *Subtarget) {
   if (DCI.isBeforeLegalizeOps())
     return SDValue();
 
@@ -6350,7 +6383,7 @@ static SDValue performIntToFpCombine(SDNode *N, SelectionDAG &DAG) {
     DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), Load.getValue(1));
 
     unsigned Opcode =
-        (N->getOpcode() == ISD::SINT_TO_FP) ? ARM64ISD::SITOF : ARM64ISD::UITOF;
+        (N->getOpcode() == ISD::SINT_TO_FP) ? AArch64ISD::SITOF : AArch64ISD::UITOF;
     return DAG.getNode(Opcode, SDLoc(N), VT, Load);
   }
 
@@ -6417,7 +6450,7 @@ static SDValue tryCombineToEXTR(SDNode *N,
     std::swap(ShiftLHS, ShiftRHS);
   }
 
-  return DAG.getNode(ARM64ISD::EXTR, DL, VT, LHS, RHS,
+  return DAG.getNode(AArch64ISD::EXTR, DL, VT, LHS, RHS,
                      DAG.getConstant(ShiftRHS, MVT::i64));
 }
 
@@ -6461,7 +6494,7 @@ static SDValue tryCombineToBSL(SDNode *N,
       }
 
       if (FoundMatch)
-        return DAG.getNode(ARM64ISD::BSL, DL, VT, SDValue(BVN0, 0),
+        return DAG.getNode(AArch64ISD::BSL, DL, VT, SDValue(BVN0, 0),
                            N0->getOperand(1 - i), N1->getOperand(1 - j));
     }
 
@@ -6469,9 +6502,9 @@ static SDValue tryCombineToBSL(SDNode *N,
 }
 
 static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
-                                const ARM64Subtarget *Subtarget) {
+                                const AArch64Subtarget *Subtarget) {
   // Attempt to form an EXTR from (or (shl VAL1, #N), (srl VAL2, #RegWidth-N))
-  if (!EnableARM64ExtrGeneration)
+  if (!EnableAArch64ExtrGeneration)
     return SDValue();
   SelectionDAG &DAG = DCI.DAG;
   EVT VT = N->getValueType(0);
@@ -6517,14 +6550,14 @@ static SDValue performBitcastCombine(SDNode *N,
   SDValue Op0 = N->getOperand(0);
   if (Op0->getOpcode() != ISD::EXTRACT_SUBVECTOR &&
       !(Op0->isMachineOpcode() &&
-        Op0->getMachineOpcode() == ARM64::EXTRACT_SUBREG))
+        Op0->getMachineOpcode() == AArch64::EXTRACT_SUBREG))
     return SDValue();
   uint64_t idx = cast<ConstantSDNode>(Op0->getOperand(1))->getZExtValue();
   if (Op0->getOpcode() == ISD::EXTRACT_SUBVECTOR) {
     if (Op0->getValueType(0).getVectorNumElements() != idx && idx != 0)
       return SDValue();
-  } else if (Op0->getMachineOpcode() == ARM64::EXTRACT_SUBREG) {
-    if (idx != ARM64::dsub)
+  } else if (Op0->getMachineOpcode() == AArch64::EXTRACT_SUBREG) {
+    if (idx != AArch64::dsub)
       return SDValue();
     // The dsub reference is equivalent to a lane zero subvector reference.
     idx = 0;
@@ -6539,7 +6572,7 @@ static SDValue performBitcastCombine(SDNode *N,
   if (SVT.getVectorNumElements() != VT.getVectorNumElements() * 2)
     return SDValue();
 
-  DEBUG(dbgs() << "arm64-lower: bitcast extract_subvector simplification\n");
+  DEBUG(dbgs() << "aarch64-lower: bitcast extract_subvector simplification\n");
 
   // Create the simplified form to just extract the low or high half of the
   // vector directly rather than bothering with the bitcasts.
@@ -6549,7 +6582,7 @@ static SDValue performBitcastCombine(SDNode *N,
     SDValue HalfIdx = DAG.getConstant(NumElements, MVT::i64);
     return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, Source, HalfIdx);
   } else {
-    SDValue SubReg = DAG.getTargetConstant(ARM64::dsub, MVT::i32);
+    SDValue SubReg = DAG.getTargetConstant(AArch64::dsub, MVT::i32);
     return SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, VT,
                                       Source, SubReg),
                    0);
@@ -6572,7 +6605,7 @@ static SDValue performConcatVectorsCombine(SDNode *N,
   // canonicalise to that.
   if (N->getOperand(0) == N->getOperand(1) && VT.getVectorNumElements() == 2) {
     assert(VT.getVectorElementType().getSizeInBits() == 64);
-    return DAG.getNode(ARM64ISD::DUPLANE64, dl, VT,
+    return DAG.getNode(AArch64ISD::DUPLANE64, dl, VT,
                        WidenVector(N->getOperand(0), DAG),
                        DAG.getConstant(0, MVT::i64));
   }
@@ -6595,7 +6628,7 @@ static SDValue performConcatVectorsCombine(SDNode *N,
   if (!RHSTy.isVector())
     return SDValue();
 
-  DEBUG(dbgs() << "arm64-lower: concat_vectors bitcast simplification\n");
+  DEBUG(dbgs() << "aarch64-lower: concat_vectors bitcast simplification\n");
 
   MVT ConcatTy = MVT::getVectorVT(RHSTy.getVectorElementType(),
                                   RHSTy.getVectorNumElements() * 2);
@@ -6670,13 +6703,13 @@ static SDValue tryExtendDUPToExtractHigh(SDValue N, SelectionDAG &DAG) {
   // operand saying *which* lane, so we need to know.
   bool IsDUPLANE;
   switch (N.getOpcode()) {
-  case ARM64ISD::DUP:
+  case AArch64ISD::DUP:
     IsDUPLANE = false;
     break;
-  case ARM64ISD::DUPLANE8:
-  case ARM64ISD::DUPLANE16:
-  case ARM64ISD::DUPLANE32:
-  case ARM64ISD::DUPLANE64:
+  case AArch64ISD::DUPLANE8:
+  case AArch64ISD::DUPLANE16:
+  case AArch64ISD::DUPLANE32:
+  case AArch64ISD::DUPLANE64:
     IsDUPLANE = true;
     break;
   default:
@@ -6696,7 +6729,7 @@ static SDValue tryExtendDUPToExtractHigh(SDValue N, SelectionDAG &DAG) {
     NewDUP = DAG.getNode(N.getOpcode(), SDLoc(N), NewDUPVT, N.getOperand(0),
                          N.getOperand(1));
   else
-    NewDUP = DAG.getNode(ARM64ISD::DUP, SDLoc(N), NewDUPVT, N.getOperand(0));
+    NewDUP = DAG.getNode(AArch64ISD::DUP, SDLoc(N), NewDUPVT, N.getOperand(0));
 
   return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N.getNode()), NarrowTy,
                      NewDUP, DAG.getConstant(NumElems, MVT::i64));
@@ -6717,29 +6750,29 @@ struct GenericSetCCInfo {
   ISD::CondCode CC;
 };
 
-/// \brief Helper structure to keep track of a SET_CC lowered into ARM64 code.
-struct ARM64SetCCInfo {
+/// \brief Helper structure to keep track of a SET_CC lowered into AArch64 code.
+struct AArch64SetCCInfo {
   const SDValue *Cmp;
-  ARM64CC::CondCode CC;
+  AArch64CC::CondCode CC;
 };
 
 /// \brief Helper structure to keep track of SetCC information.
 union SetCCInfo {
   GenericSetCCInfo Generic;
-  ARM64SetCCInfo ARM64;
+  AArch64SetCCInfo AArch64;
 };
 
-/// \brief Helper structure to be able to read SetCC information.
-/// If set to true, IsARM64 field, Info is a ARM64SetCCInfo, otherwise Info is
-/// a GenericSetCCInfo.
+/// \brief Helper structure to be able to read SetCC information.  If set to
+/// true, IsAArch64 field, Info is a AArch64SetCCInfo, otherwise Info is a
+/// GenericSetCCInfo.
 struct SetCCInfoAndKind {
   SetCCInfo Info;
-  bool IsARM64;
+  bool IsAArch64;
 };
 
 /// \brief Check whether or not \p Op is a SET_CC operation, either a generic or
 /// an
-/// ARM64 lowered one.
+/// AArch64 lowered one.
 /// \p SetCCInfo is filled accordingly.
 /// \post SetCCInfo is meanginfull only when this function returns true.
 /// \return True when Op is a kind of SET_CC operation.
@@ -6749,20 +6782,20 @@ static bool isSetCC(SDValue Op, SetCCInfoAndKind &SetCCInfo) {
     SetCCInfo.Info.Generic.Opnd0 = &Op.getOperand(0);
     SetCCInfo.Info.Generic.Opnd1 = &Op.getOperand(1);
     SetCCInfo.Info.Generic.CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
-    SetCCInfo.IsARM64 = false;
+    SetCCInfo.IsAArch64 = false;
     return true;
   }
   // Otherwise, check if this is a matching csel instruction.
   // In other words:
   // - csel 1, 0, cc
   // - csel 0, 1, !cc
-  if (Op.getOpcode() != ARM64ISD::CSEL)
+  if (Op.getOpcode() != AArch64ISD::CSEL)
     return false;
   // Set the information about the operands.
   // TODO: we want the operands of the Cmp not the csel
-  SetCCInfo.Info.ARM64.Cmp = &Op.getOperand(3);
-  SetCCInfo.IsARM64 = true;
-  SetCCInfo.Info.ARM64.CC = static_cast<ARM64CC::CondCode>(
+  SetCCInfo.Info.AArch64.Cmp = &Op.getOperand(3);
+  SetCCInfo.IsAArch64 = true;
+  SetCCInfo.Info.AArch64.CC = static_cast<AArch64CC::CondCode>(
       cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue());
 
   // Check that the operands matches the constraints:
@@ -6779,8 +6812,8 @@ static bool isSetCC(SDValue Op, SetCCInfoAndKind &SetCCInfo) {
   if (!TValue->isOne()) {
     // Update the comparison when we are interested in !cc.
     std::swap(TValue, FValue);
-    SetCCInfo.Info.ARM64.CC =
-        ARM64CC::getInvertedCondCode(SetCCInfo.Info.ARM64.CC);
+    SetCCInfo.Info.AArch64.CC =
+        AArch64CC::getInvertedCondCode(SetCCInfo.Info.AArch64.CC);
   }
   return TValue->isOne() && FValue->isNullValue();
 }
@@ -6813,8 +6846,8 @@ static SDValue performSetccAddFolding(SDNode *Op, SelectionDAG &DAG) {
   }
 
   // FIXME: This could be generatized to work for FP comparisons.
-  EVT CmpVT = InfoAndKind.IsARM64
-                  ? InfoAndKind.Info.ARM64.Cmp->getOperand(0).getValueType()
+  EVT CmpVT = InfoAndKind.IsAArch64
+                  ? InfoAndKind.Info.AArch64.Cmp->getOperand(0).getValueType()
                   : InfoAndKind.Info.Generic.Opnd0->getValueType();
   if (CmpVT != MVT::i32 && CmpVT != MVT::i64)
     return SDValue();
@@ -6822,19 +6855,19 @@ static SDValue performSetccAddFolding(SDNode *Op, SelectionDAG &DAG) {
   SDValue CCVal;
   SDValue Cmp;
   SDLoc dl(Op);
-  if (InfoAndKind.IsARM64) {
+  if (InfoAndKind.IsAArch64) {
     CCVal = DAG.getConstant(
-        ARM64CC::getInvertedCondCode(InfoAndKind.Info.ARM64.CC), MVT::i32);
-    Cmp = *InfoAndKind.Info.ARM64.Cmp;
+        AArch64CC::getInvertedCondCode(InfoAndKind.Info.AArch64.CC), MVT::i32);
+    Cmp = *InfoAndKind.Info.AArch64.Cmp;
   } else
-    Cmp = getARM64Cmp(*InfoAndKind.Info.Generic.Opnd0,
+    Cmp = getAArch64Cmp(*InfoAndKind.Info.Generic.Opnd0,
                       *InfoAndKind.Info.Generic.Opnd1,
                       ISD::getSetCCInverse(InfoAndKind.Info.Generic.CC, true),
                       CCVal, DAG, dl);
 
   EVT VT = Op->getValueType(0);
   LHS = DAG.getNode(ISD::ADD, dl, VT, RHS, DAG.getConstant(1, VT));
-  return DAG.getNode(ARM64ISD::CSEL, dl, VT, RHS, LHS, CCVal, Cmp);
+  return DAG.getNode(AArch64ISD::CSEL, dl, VT, RHS, LHS, CCVal, Cmp);
 }
 
 // The basic add/sub long vector instructions have variants with "2" on the end
@@ -6893,8 +6926,8 @@ static SDValue performAddSubLongCombine(SDNode *N,
 // Massage DAGs which we can use the high-half "long" operations on into
 // something isel will recognize better. E.g.
 //
-// (arm64_neon_umull (extract_high vec) (dupv64 scalar)) -->
-//   (arm64_neon_umull (extract_high (v2i64 vec)))
+// (aarch64_neon_umull (extract_high vec) (dupv64 scalar)) -->
+//   (aarch64_neon_umull (extract_high (v2i64 vec)))
 //                     (extract_high (v2i64 (dup128 scalar)))))
 //
 static SDValue tryCombineLongOpWithDup(unsigned IID, SDNode *N,
@@ -6951,24 +6984,24 @@ static SDValue tryCombineShiftImm(unsigned IID, SDNode *N, SelectionDAG &DAG) {
   switch (IID) {
   default:
     llvm_unreachable("Unknown shift intrinsic");
-  case Intrinsic::arm64_neon_sqshl:
-    Opcode = ARM64ISD::SQSHL_I;
+  case Intrinsic::aarch64_neon_sqshl:
+    Opcode = AArch64ISD::SQSHL_I;
     IsRightShift = false;
     break;
-  case Intrinsic::arm64_neon_uqshl:
-    Opcode = ARM64ISD::UQSHL_I;
+  case Intrinsic::aarch64_neon_uqshl:
+    Opcode = AArch64ISD::UQSHL_I;
     IsRightShift = false;
     break;
-  case Intrinsic::arm64_neon_srshl:
-    Opcode = ARM64ISD::SRSHR_I;
+  case Intrinsic::aarch64_neon_srshl:
+    Opcode = AArch64ISD::SRSHR_I;
     IsRightShift = true;
     break;
-  case Intrinsic::arm64_neon_urshl:
-    Opcode = ARM64ISD::URSHR_I;
+  case Intrinsic::aarch64_neon_urshl:
+    Opcode = AArch64ISD::URSHR_I;
     IsRightShift = true;
     break;
-  case Intrinsic::arm64_neon_sqshlu:
-    Opcode = ARM64ISD::SQSHLU_I;
+  case Intrinsic::aarch64_neon_sqshlu:
+    Opcode = AArch64ISD::SQSHLU_I;
     IsRightShift = false;
     break;
   }
@@ -7001,38 +7034,38 @@ static SDValue tryCombineCRC32(unsigned Mask, SDNode *N, SelectionDAG &DAG) {
 
 static SDValue performIntrinsicCombine(SDNode *N,
                                        TargetLowering::DAGCombinerInfo &DCI,
-                                       const ARM64Subtarget *Subtarget) {
+                                       const AArch64Subtarget *Subtarget) {
   SelectionDAG &DAG = DCI.DAG;
   unsigned IID = getIntrinsicID(N);
   switch (IID) {
   default:
     break;
-  case Intrinsic::arm64_neon_vcvtfxs2fp:
-  case Intrinsic::arm64_neon_vcvtfxu2fp:
+  case Intrinsic::aarch64_neon_vcvtfxs2fp:
+  case Intrinsic::aarch64_neon_vcvtfxu2fp:
     return tryCombineFixedPointConvert(N, DCI, DAG);
     break;
-  case Intrinsic::arm64_neon_fmax:
-    return DAG.getNode(ARM64ISD::FMAX, SDLoc(N), N->getValueType(0),
+  case Intrinsic::aarch64_neon_fmax:
+    return DAG.getNode(AArch64ISD::FMAX, SDLoc(N), N->getValueType(0),
                        N->getOperand(1), N->getOperand(2));
-  case Intrinsic::arm64_neon_fmin:
-    return DAG.getNode(ARM64ISD::FMIN, SDLoc(N), N->getValueType(0),
+  case Intrinsic::aarch64_neon_fmin:
+    return DAG.getNode(AArch64ISD::FMIN, SDLoc(N), N->getValueType(0),
                        N->getOperand(1), N->getOperand(2));
-  case Intrinsic::arm64_neon_smull:
-  case Intrinsic::arm64_neon_umull:
-  case Intrinsic::arm64_neon_pmull:
-  case Intrinsic::arm64_neon_sqdmull:
+  case Intrinsic::aarch64_neon_smull:
+  case Intrinsic::aarch64_neon_umull:
+  case Intrinsic::aarch64_neon_pmull:
+  case Intrinsic::aarch64_neon_sqdmull:
     return tryCombineLongOpWithDup(IID, N, DCI, DAG);
-  case Intrinsic::arm64_neon_sqshl:
-  case Intrinsic::arm64_neon_uqshl:
-  case Intrinsic::arm64_neon_sqshlu:
-  case Intrinsic::arm64_neon_srshl:
-  case Intrinsic::arm64_neon_urshl:
+  case Intrinsic::aarch64_neon_sqshl:
+  case Intrinsic::aarch64_neon_uqshl:
+  case Intrinsic::aarch64_neon_sqshlu:
+  case Intrinsic::aarch64_neon_srshl:
+  case Intrinsic::aarch64_neon_urshl:
     return tryCombineShiftImm(IID, N, DAG);
-  case Intrinsic::arm64_crc32b:
-  case Intrinsic::arm64_crc32cb:
+  case Intrinsic::aarch64_crc32b:
+  case Intrinsic::aarch64_crc32cb:
     return tryCombineCRC32(0xff, N, DAG);
-  case Intrinsic::arm64_crc32h:
-  case Intrinsic::arm64_crc32ch:
+  case Intrinsic::aarch64_crc32h:
+  case Intrinsic::aarch64_crc32ch:
     return tryCombineCRC32(0xffff, N, DAG);
   }
   return SDValue();
@@ -7049,8 +7082,8 @@ static SDValue performExtendCombine(SDNode *N,
       N->getOperand(0).getOpcode() == ISD::INTRINSIC_WO_CHAIN) {
     SDNode *ABDNode = N->getOperand(0).getNode();
     unsigned IID = getIntrinsicID(ABDNode);
-    if (IID == Intrinsic::arm64_neon_sabd ||
-        IID == Intrinsic::arm64_neon_uabd) {
+    if (IID == Intrinsic::aarch64_neon_sabd ||
+        IID == Intrinsic::aarch64_neon_uabd) {
       SDValue NewABD = tryCombineLongOpWithDup(IID, ABDNode, DCI, DAG);
       if (!NewABD.getNode())
         return SDValue();
@@ -7060,7 +7093,7 @@ static SDValue performExtendCombine(SDNode *N,
     }
   }
 
-  // This is effectively a custom type legalization for ARM64.
+  // This is effectively a custom type legalization for AArch64.
   //
   // Type legalization will split an extend of a small, legal, type to a larger
   // illegal type by first splitting the destination type, often creating
@@ -7074,7 +7107,7 @@ static SDValue performExtendCombine(SDNode *N,
   //   %hi = v4i32 sext v4i8 %hisrc
   // Things go rapidly downhill from there.
   //
-  // For ARM64, the [sz]ext vector instructions can only go up one element
+  // For AArch64, the [sz]ext vector instructions can only go up one element
   // size, so we can, e.g., extend from i8 to i16, but to go from i8 to i32
   // take two instructions.
   //
@@ -7199,7 +7232,7 @@ static SDValue replaceSplatVectorStore(SelectionDAG &DAG, StoreSDNode *St) {
 static SDValue performSTORECombine(SDNode *N,
                                    TargetLowering::DAGCombinerInfo &DCI,
                                    SelectionDAG &DAG,
-                                   const ARM64Subtarget *Subtarget) {
+                                   const AArch64Subtarget *Subtarget) {
   if (!DCI.isBeforeLegalize())
     return SDValue();
 
@@ -7322,7 +7355,7 @@ static SDValue performPostLD1Combine(SDNode *N,
       unsigned NumBytes = VT.getScalarSizeInBits() / 8;
       if (IncVal != NumBytes)
         continue;
-      Inc = DAG.getRegister(ARM64::XZR, MVT::i64);
+      Inc = DAG.getRegister(AArch64::XZR, MVT::i64);
     }
 
     SmallVector<SDValue, 8> Ops;
@@ -7336,7 +7369,7 @@ static SDValue performPostLD1Combine(SDNode *N,
 
     EVT Tys[3] = { VT, MVT::i64, MVT::Other };
     SDVTList SDTys = DAG.getVTList(ArrayRef<EVT>(Tys, 3));
-    unsigned NewOp = IsLaneOp ? ARM64ISD::LD1LANEpost : ARM64ISD::LD1DUPpost;
+    unsigned NewOp = IsLaneOp ? AArch64ISD::LD1LANEpost : AArch64ISD::LD1DUPpost;
     SDValue UpdN = DAG.getMemIntrinsicNode(NewOp, SDLoc(N), SDTys, Ops,
                                            MemVT,
                                            LoadSDN->getMemOperand());
@@ -7387,47 +7420,47 @@ static SDValue performNEONPostLDSTCombine(SDNode *N,
     unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
     switch (IntNo) {
     default: llvm_unreachable("unexpected intrinsic for Neon base update");
-    case Intrinsic::arm64_neon_ld2:       NewOpc = ARM64ISD::LD2post;
+    case Intrinsic::aarch64_neon_ld2:       NewOpc = AArch64ISD::LD2post;
       NumVecs = 2; break;
-    case Intrinsic::arm64_neon_ld3:       NewOpc = ARM64ISD::LD3post;
+    case Intrinsic::aarch64_neon_ld3:       NewOpc = AArch64ISD::LD3post;
       NumVecs = 3; break;
-    case Intrinsic::arm64_neon_ld4:       NewOpc = ARM64ISD::LD4post;
+    case Intrinsic::aarch64_neon_ld4:       NewOpc = AArch64ISD::LD4post;
       NumVecs = 4; break;
-    case Intrinsic::arm64_neon_st2:       NewOpc = ARM64ISD::ST2post;
+    case Intrinsic::aarch64_neon_st2:       NewOpc = AArch64ISD::ST2post;
       NumVecs = 2; IsStore = true; break;
-    case Intrinsic::arm64_neon_st3:       NewOpc = ARM64ISD::ST3post;
+    case Intrinsic::aarch64_neon_st3:       NewOpc = AArch64ISD::ST3post;
       NumVecs = 3; IsStore = true; break;
-    case Intrinsic::arm64_neon_st4:       NewOpc = ARM64ISD::ST4post;
+    case Intrinsic::aarch64_neon_st4:       NewOpc = AArch64ISD::ST4post;
       NumVecs = 4; IsStore = true; break;
-    case Intrinsic::arm64_neon_ld1x2:     NewOpc = ARM64ISD::LD1x2post;
+    case Intrinsic::aarch64_neon_ld1x2:     NewOpc = AArch64ISD::LD1x2post;
       NumVecs = 2; break;
-    case Intrinsic::arm64_neon_ld1x3:     NewOpc = ARM64ISD::LD1x3post;
+    case Intrinsic::aarch64_neon_ld1x3:     NewOpc = AArch64ISD::LD1x3post;
       NumVecs = 3; break;
-    case Intrinsic::arm64_neon_ld1x4:     NewOpc = ARM64ISD::LD1x4post;
+    case Intrinsic::aarch64_neon_ld1x4:     NewOpc = AArch64ISD::LD1x4post;
       NumVecs = 4; break;
-    case Intrinsic::arm64_neon_st1x2:     NewOpc = ARM64ISD::ST1x2post;
+    case Intrinsic::aarch64_neon_st1x2:     NewOpc = AArch64ISD::ST1x2post;
       NumVecs = 2; IsStore = true; break;
-    case Intrinsic::arm64_neon_st1x3:     NewOpc = ARM64ISD::ST1x3post;
+    case Intrinsic::aarch64_neon_st1x3:     NewOpc = AArch64ISD::ST1x3post;
       NumVecs = 3; IsStore = true; break;
-    case Intrinsic::arm64_neon_st1x4:     NewOpc = ARM64ISD::ST1x4post;
+    case Intrinsic::aarch64_neon_st1x4:     NewOpc = AArch64ISD::ST1x4post;
       NumVecs = 4; IsStore = true; break;
-    case Intrinsic::arm64_neon_ld2r:      NewOpc = ARM64ISD::LD2DUPpost;
+    case Intrinsic::aarch64_neon_ld2r:      NewOpc = AArch64ISD::LD2DUPpost;
       NumVecs = 2; IsDupOp = true; break;
-    case Intrinsic::arm64_neon_ld3r:      NewOpc = ARM64ISD::LD3DUPpost;
+    case Intrinsic::aarch64_neon_ld3r:      NewOpc = AArch64ISD::LD3DUPpost;
       NumVecs = 3; IsDupOp = true; break;
-    case Intrinsic::arm64_neon_ld4r:      NewOpc = ARM64ISD::LD4DUPpost;
+    case Intrinsic::aarch64_neon_ld4r:      NewOpc = AArch64ISD::LD4DUPpost;
       NumVecs = 4; IsDupOp = true; break;
-    case Intrinsic::arm64_neon_ld2lane:   NewOpc = ARM64ISD::LD2LANEpost;
+    case Intrinsic::aarch64_neon_ld2lane:   NewOpc = AArch64ISD::LD2LANEpost;
       NumVecs = 2; IsLaneOp = true; break;
-    case Intrinsic::arm64_neon_ld3lane:   NewOpc = ARM64ISD::LD3LANEpost;
+    case Intrinsic::aarch64_neon_ld3lane:   NewOpc = AArch64ISD::LD3LANEpost;
       NumVecs = 3; IsLaneOp = true; break;
-    case Intrinsic::arm64_neon_ld4lane:   NewOpc = ARM64ISD::LD4LANEpost;
+    case Intrinsic::aarch64_neon_ld4lane:   NewOpc = AArch64ISD::LD4LANEpost;
       NumVecs = 4; IsLaneOp = true; break;
-    case Intrinsic::arm64_neon_st2lane:   NewOpc = ARM64ISD::ST2LANEpost;
+    case Intrinsic::aarch64_neon_st2lane:   NewOpc = AArch64ISD::ST2LANEpost;
       NumVecs = 2; IsStore = true; IsLaneOp = true; break;
-    case Intrinsic::arm64_neon_st3lane:   NewOpc = ARM64ISD::ST3LANEpost;
+    case Intrinsic::aarch64_neon_st3lane:   NewOpc = AArch64ISD::ST3LANEpost;
       NumVecs = 3; IsStore = true; IsLaneOp = true; break;
-    case Intrinsic::arm64_neon_st4lane:   NewOpc = ARM64ISD::ST4LANEpost;
+    case Intrinsic::aarch64_neon_st4lane:   NewOpc = AArch64ISD::ST4LANEpost;
       NumVecs = 4; IsStore = true; IsLaneOp = true; break;
     }
 
@@ -7446,7 +7479,7 @@ static SDValue performNEONPostLDSTCombine(SDNode *N,
         NumBytes /= VecTy.getVectorNumElements();
       if (IncVal != NumBytes)
         continue;
-      Inc = DAG.getRegister(ARM64::XZR, MVT::i64);
+      Inc = DAG.getRegister(AArch64::XZR, MVT::i64);
     }
     SmallVector<SDValue, 8> Ops;
     Ops.push_back(N->getOperand(0)); // Incoming chain
@@ -7497,11 +7530,11 @@ static SDValue performBRCONDCombine(SDNode *N,
 
   assert(isa<ConstantSDNode>(CCVal) && "Expected a ConstantSDNode here!");
   unsigned CC = cast<ConstantSDNode>(CCVal)->getZExtValue();
-  if (CC != ARM64CC::EQ && CC != ARM64CC::NE)
+  if (CC != AArch64CC::EQ && CC != AArch64CC::NE)
     return SDValue();
 
   unsigned CmpOpc = Cmp.getOpcode();
-  if (CmpOpc != ARM64ISD::ADDS && CmpOpc != ARM64ISD::SUBS)
+  if (CmpOpc != AArch64ISD::ADDS && CmpOpc != AArch64ISD::SUBS)
     return SDValue();
 
   // Only attempt folding if there is only one use of the flag and no use of the
@@ -7529,10 +7562,10 @@ static SDValue performBRCONDCombine(SDNode *N,
 
   // Fold the compare into the branch instruction.
   SDValue BR;
-  if (CC == ARM64CC::EQ)
-    BR = DAG.getNode(ARM64ISD::CBZ, SDLoc(N), MVT::Other, Chain, LHS, Dest);
+  if (CC == AArch64CC::EQ)
+    BR = DAG.getNode(AArch64ISD::CBZ, SDLoc(N), MVT::Other, Chain, LHS, Dest);
   else
-    BR = DAG.getNode(ARM64ISD::CBNZ, SDLoc(N), MVT::Other, Chain, LHS, Dest);
+    BR = DAG.getNode(AArch64ISD::CBNZ, SDLoc(N), MVT::Other, Chain, LHS, Dest);
 
   // Do not add new nodes to DAG combiner worklist.
   DCI.CombineTo(N, BR, false);
@@ -7608,8 +7641,8 @@ static SDValue performSelectCombine(SDNode *N, SelectionDAG &DAG) {
   return DAG.getSelect(DL, ResVT, Mask, N->getOperand(1), N->getOperand(2));
 }
 
-SDValue ARM64TargetLowering::PerformDAGCombine(SDNode *N,
-                                               DAGCombinerInfo &DCI) const {
+SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
+                                                 DAGCombinerInfo &DCI) const {
   SelectionDAG &DAG = DCI.DAG;
   switch (N->getOpcode()) {
   default:
@@ -7642,36 +7675,36 @@ SDValue ARM64TargetLowering::PerformDAGCombine(SDNode *N,
     return performVSelectCombine(N, DCI.DAG);
   case ISD::STORE:
     return performSTORECombine(N, DCI, DAG, Subtarget);
-  case ARM64ISD::BRCOND:
+  case AArch64ISD::BRCOND:
     return performBRCONDCombine(N, DCI, DAG);
-  case ARM64ISD::DUP:
+  case AArch64ISD::DUP:
     return performPostLD1Combine(N, DCI, false);
   case ISD::INSERT_VECTOR_ELT:
     return performPostLD1Combine(N, DCI, true);
   case ISD::INTRINSIC_VOID:
   case ISD::INTRINSIC_W_CHAIN:
     switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
-    case Intrinsic::arm64_neon_ld2:
-    case Intrinsic::arm64_neon_ld3:
-    case Intrinsic::arm64_neon_ld4:
-    case Intrinsic::arm64_neon_ld1x2:
-    case Intrinsic::arm64_neon_ld1x3:
-    case Intrinsic::arm64_neon_ld1x4:
-    case Intrinsic::arm64_neon_ld2lane:
-    case Intrinsic::arm64_neon_ld3lane:
-    case Intrinsic::arm64_neon_ld4lane:
-    case Intrinsic::arm64_neon_ld2r:
-    case Intrinsic::arm64_neon_ld3r:
-    case Intrinsic::arm64_neon_ld4r:
-    case Intrinsic::arm64_neon_st2:
-    case Intrinsic::arm64_neon_st3:
-    case Intrinsic::arm64_neon_st4:
-    case Intrinsic::arm64_neon_st1x2:
-    case Intrinsic::arm64_neon_st1x3:
-    case Intrinsic::arm64_neon_st1x4:
-    case Intrinsic::arm64_neon_st2lane:
-    case Intrinsic::arm64_neon_st3lane:
-    case Intrinsic::arm64_neon_st4lane:
+    case Intrinsic::aarch64_neon_ld2:
+    case Intrinsic::aarch64_neon_ld3:
+    case Intrinsic::aarch64_neon_ld4:
+    case Intrinsic::aarch64_neon_ld1x2:
+    case Intrinsic::aarch64_neon_ld1x3:
+    case Intrinsic::aarch64_neon_ld1x4:
+    case Intrinsic::aarch64_neon_ld2lane:
+    case Intrinsic::aarch64_neon_ld3lane:
+    case Intrinsic::aarch64_neon_ld4lane:
+    case Intrinsic::aarch64_neon_ld2r:
+    case Intrinsic::aarch64_neon_ld3r:
+    case Intrinsic::aarch64_neon_ld4r:
+    case Intrinsic::aarch64_neon_st2:
+    case Intrinsic::aarch64_neon_st3:
+    case Intrinsic::aarch64_neon_st4:
+    case Intrinsic::aarch64_neon_st1x2:
+    case Intrinsic::aarch64_neon_st1x3:
+    case Intrinsic::aarch64_neon_st1x4:
+    case Intrinsic::aarch64_neon_st2lane:
+    case Intrinsic::aarch64_neon_st3lane:
+    case Intrinsic::aarch64_neon_st4lane:
       return performNEONPostLDSTCombine(N, DCI, DAG);
     default:
       break;
@@ -7684,7 +7717,8 @@ SDValue ARM64TargetLowering::PerformDAGCombine(SDNode *N,
 // we can't perform a tail-call. In particular, we need to check for
 // target ISD nodes that are returns and any other "odd" constructs
 // that the generic analysis code won't necessarily catch.
-bool ARM64TargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
+bool AArch64TargetLowering::isUsedByReturnOnly(SDNode *N,
+                                               SDValue &Chain) const {
   if (N->getNumValues() != 1)
     return false;
   if (!N->hasNUsesOfValue(1, 0))
@@ -7704,7 +7738,7 @@ bool ARM64TargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
 
   bool HasRet = false;
   for (SDNode *Node : Copy->uses()) {
-    if (Node->getOpcode() != ARM64ISD::RET_FLAG)
+    if (Node->getOpcode() != AArch64ISD::RET_FLAG)
       return false;
     HasRet = true;
   }
@@ -7720,18 +7754,18 @@ bool ARM64TargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
 // call. This will cause the optimizers to attempt to move, or duplicate,
 // return instructions to help enable tail call optimizations for this
 // instruction.
-bool ARM64TargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const {
+bool AArch64TargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const {
   if (!CI->isTailCall())
     return false;
 
   return true;
 }
 
-bool ARM64TargetLowering::getIndexedAddressParts(SDNode *Op, SDValue &Base,
-                                                 SDValue &Offset,
-                                                 ISD::MemIndexedMode &AM,
-                                                 bool &IsInc,
-                                                 SelectionDAG &DAG) const {
+bool AArch64TargetLowering::getIndexedAddressParts(SDNode *Op, SDValue &Base,
+                                                   SDValue &Offset,
+                                                   ISD::MemIndexedMode &AM,
+                                                   bool &IsInc,
+                                                   SelectionDAG &DAG) const {
   if (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB)
     return false;
 
@@ -7749,10 +7783,10 @@ bool ARM64TargetLowering::getIndexedAddressParts(SDNode *Op, SDValue &Base,
   return false;
 }
 
-bool ARM64TargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
-                                                    SDValue &Offset,
-                                                    ISD::MemIndexedMode &AM,
-                                                    SelectionDAG &DAG) const {
+bool AArch64TargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
+                                                      SDValue &Offset,
+                                                      ISD::MemIndexedMode &AM,
+                                                      SelectionDAG &DAG) const {
   EVT VT;
   SDValue Ptr;
   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
@@ -7771,11 +7805,9 @@ bool ARM64TargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
   return true;
 }
 
-bool ARM64TargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op,
-                                                     SDValue &Base,
-                                                     SDValue &Offset,
-                                                     ISD::MemIndexedMode &AM,
-                                                     SelectionDAG &DAG) const {
+bool AArch64TargetLowering::getPostIndexedAddressParts(
+    SDNode *N, SDNode *Op, SDValue &Base, SDValue &Offset,
+    ISD::MemIndexedMode &AM, SelectionDAG &DAG) const {
   EVT VT;
   SDValue Ptr;
   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
@@ -7798,9 +7830,8 @@ bool ARM64TargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op,
   return true;
 }
 
-void ARM64TargetLowering::ReplaceNodeResults(SDNode *N,
-                                             SmallVectorImpl<SDValue> &Results,
-                                             SelectionDAG &DAG) const {
+void AArch64TargetLowering::ReplaceNodeResults(
+    SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const {
   switch (N->getOpcode()) {
   default:
     llvm_unreachable("Don't know how to custom expand this");
@@ -7812,7 +7843,7 @@ void ARM64TargetLowering::ReplaceNodeResults(SDNode *N,
   }
 }
 
-bool ARM64TargetLowering::shouldExpandAtomicInIR(Instruction *Inst) const {
+bool AArch64TargetLowering::shouldExpandAtomicInIR(Instruction *Inst) const {
   // Loads and stores less than 128-bits are already atomic; ones above that
   // are doomed anyway, so defer to the default libcall and blame the OS when
   // things go wrong:
@@ -7825,8 +7856,8 @@ bool ARM64TargetLowering::shouldExpandAtomicInIR(Instruction *Inst) const {
   return Inst->getType()->getPrimitiveSizeInBits() <= 128;
 }
 
-Value *ARM64TargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
-                                           AtomicOrdering Ord) const {
+Value *AArch64TargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
+                                             AtomicOrdering Ord) const {
   Module *M = Builder.GetInsertBlock()->getParent()->getParent();
   Type *ValTy = cast<PointerType>(Addr->getType())->getElementType();
   bool IsAcquire =
@@ -7837,7 +7868,7 @@ Value *ARM64TargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
   // single i128 here.
   if (ValTy->getPrimitiveSizeInBits() == 128) {
     Intrinsic::ID Int =
-        IsAcquire ? Intrinsic::arm64_ldaxp : Intrinsic::arm64_ldxp;
+        IsAcquire ? Intrinsic::aarch64_ldaxp : Intrinsic::aarch64_ldxp;
     Function *Ldxr = llvm::Intrinsic::getDeclaration(M, Int);
 
     Addr = Builder.CreateBitCast(Addr, Type::getInt8PtrTy(M->getContext()));
@@ -7853,7 +7884,7 @@ Value *ARM64TargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
 
   Type *Tys[] = { Addr->getType() };
   Intrinsic::ID Int =
-      IsAcquire ? Intrinsic::arm64_ldaxr : Intrinsic::arm64_ldxr;
+      IsAcquire ? Intrinsic::aarch64_ldaxr : Intrinsic::aarch64_ldxr;
   Function *Ldxr = llvm::Intrinsic::getDeclaration(M, Int, Tys);
 
   return Builder.CreateTruncOrBitCast(
@@ -7861,9 +7892,9 @@ Value *ARM64TargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
       cast<PointerType>(Addr->getType())->getElementType());
 }
 
-Value *ARM64TargetLowering::emitStoreConditional(IRBuilder<> &Builder,
-                                                 Value *Val, Value *Addr,
-                                                 AtomicOrdering Ord) const {
+Value *AArch64TargetLowering::emitStoreConditional(IRBuilder<> &Builder,
+                                                   Value *Val, Value *Addr,
+                                                   AtomicOrdering Ord) const {
   Module *M = Builder.GetInsertBlock()->getParent()->getParent();
   bool IsRelease =
       Ord == Release || Ord == AcquireRelease || Ord == SequentiallyConsistent;
@@ -7873,7 +7904,7 @@ Value *ARM64TargetLowering::emitStoreConditional(IRBuilder<> &Builder,
   // before the call.
   if (Val->getType()->getPrimitiveSizeInBits() == 128) {
     Intrinsic::ID Int =
-        IsRelease ? Intrinsic::arm64_stlxp : Intrinsic::arm64_stxp;
+        IsRelease ? Intrinsic::aarch64_stlxp : Intrinsic::aarch64_stxp;
     Function *Stxr = Intrinsic::getDeclaration(M, Int);
     Type *Int64Ty = Type::getInt64Ty(M->getContext());
 
@@ -7884,7 +7915,7 @@ Value *ARM64TargetLowering::emitStoreConditional(IRBuilder<> &Builder,
   }
 
   Intrinsic::ID Int =
-      IsRelease ? Intrinsic::arm64_stlxr : Intrinsic::arm64_stxr;
+      IsRelease ? Intrinsic::aarch64_stlxr : Intrinsic::aarch64_stxr;
   Type *Tys[] = { Addr->getType() };
   Function *Stxr = Intrinsic::getDeclaration(M, Int, Tys);
 
diff --git a/lib/Target/ARM64/ARM64ISelLowering.h b/lib/Target/AArch64/AArch64ISelLowering.h
similarity index 96%
rename from lib/Target/ARM64/ARM64ISelLowering.h
rename to lib/Target/AArch64/AArch64ISelLowering.h
index b2402c9791c..de16c4d9d4b 100644
--- a/lib/Target/ARM64/ARM64ISelLowering.h
+++ b/lib/Target/AArch64/AArch64ISelLowering.h
@@ -1,4 +1,4 @@
-//==-- ARM64ISelLowering.h - ARM64 DAG Lowering Interface --------*- C++ -*-==//
+//==-- AArch64ISelLowering.h - AArch64 DAG Lowering Interface ----*- C++ -*-==//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -7,13 +7,13 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file defines the interfaces that ARM64 uses to lower LLVM code into a
+// This file defines the interfaces that AArch64 uses to lower LLVM code into a
 // selection DAG.
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_TARGET_ARM64_ISELLOWERING_H
-#define LLVM_TARGET_ARM64_ISELLOWERING_H
+#ifndef LLVM_TARGET_AArch64_ISELLOWERING_H
+#define LLVM_TARGET_AArch64_ISELLOWERING_H
 
 #include "llvm/CodeGen/CallingConvLower.h"
 #include "llvm/CodeGen/SelectionDAG.h"
@@ -22,7 +22,7 @@
 
 namespace llvm {
 
-namespace ARM64ISD {
+namespace AArch64ISD {
 
 enum {
   FIRST_NUMBER = ISD::BUILTIN_OP_END,
@@ -188,16 +188,16 @@ enum {
   ST4LANEpost
 };
 
-} // end namespace ARM64ISD
+} // end namespace AArch64ISD
 
-class ARM64Subtarget;
-class ARM64TargetMachine;
+class AArch64Subtarget;
+class AArch64TargetMachine;
 
-class ARM64TargetLowering : public TargetLowering {
+class AArch64TargetLowering : public TargetLowering {
   bool RequireStrictAlign;
 
 public:
-  explicit ARM64TargetLowering(ARM64TargetMachine &TM);
+  explicit AArch64TargetLowering(AArch64TargetMachine &TM);
 
   /// Selects the correct CCAssignFn for a the given CallingConvention
   /// value.
@@ -325,9 +325,9 @@ public:
   bool shouldExpandAtomicInIR(Instruction *Inst) const override;
 
 private:
-  /// Subtarget - Keep a pointer to the ARM64Subtarget around so that we can
+  /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
   /// make the right decision when generating code for different targets.
-  const ARM64Subtarget *Subtarget;
+  const AArch64Subtarget *Subtarget;
 
   void addTypeForNEON(EVT VT, EVT PromotedBitwiseVT);
   void addDRTypeForNEON(MVT VT);
@@ -454,11 +454,11 @@ private:
                           SelectionDAG &DAG) const override;
 };
 
-namespace ARM64 {
+namespace AArch64 {
 FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
                          const TargetLibraryInfo *libInfo);
-} // end namespace ARM64
+} // end namespace AArch64
 
 } // end namespace llvm
 
-#endif // LLVM_TARGET_ARM64_ISELLOWERING_H
+#endif // LLVM_TARGET_AArch64_ISELLOWERING_H
diff --git a/lib/Target/ARM64/ARM64InstrAtomics.td b/lib/Target/AArch64/AArch64InstrAtomics.td
similarity index 92%
rename from lib/Target/ARM64/ARM64InstrAtomics.td
rename to lib/Target/AArch64/AArch64InstrAtomics.td
index 1d1483ac126..3b9e3c63059 100644
--- a/lib/Target/ARM64/ARM64InstrAtomics.td
+++ b/lib/Target/AArch64/AArch64InstrAtomics.td
@@ -1,4 +1,4 @@
-//===- ARM64InstrAtomics.td - ARM64 Atomic codegen support -*- tablegen -*-===//
+//=- AArch64InstrAtomics.td - AArch64 Atomic codegen support -*- tablegen -*-=//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -7,7 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// ARM64 Atomic operand code-gen constructs.
+// AArch64 Atomic operand code-gen constructs.
 //
 //===----------------------------------------------------------------------===//
 
@@ -117,7 +117,7 @@ class releasing_store<PatFrag base>
   return Ordering == Release || Ordering == SequentiallyConsistent;
 }]>;
 
-// An atomic store operation that doesn't actually need to be atomic on ARM64.
+// An atomic store operation that doesn't actually need to be atomic on AArch64.
 class relaxed_store<PatFrag base>
   : PatFrag<(ops node:$ptr, node:$val), (base node:$ptr, node:$val), [{
   AtomicOrdering Ordering = cast<AtomicSDNode>(N)->getOrdering();
@@ -202,19 +202,19 @@ def : Pat<(relaxed_store<atomic_store_64>
 
 // Load-exclusives.
 
-def ldxr_1 : PatFrag<(ops node:$ptr), (int_arm64_ldxr node:$ptr), [{
+def ldxr_1 : PatFrag<(ops node:$ptr), (int_aarch64_ldxr node:$ptr), [{
   return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i8;
 }]>;
 
-def ldxr_2 : PatFrag<(ops node:$ptr), (int_arm64_ldxr node:$ptr), [{
+def ldxr_2 : PatFrag<(ops node:$ptr), (int_aarch64_ldxr node:$ptr), [{
   return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i16;
 }]>;
 
-def ldxr_4 : PatFrag<(ops node:$ptr), (int_arm64_ldxr node:$ptr), [{
+def ldxr_4 : PatFrag<(ops node:$ptr), (int_aarch64_ldxr node:$ptr), [{
   return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i32;
 }]>;
 
-def ldxr_8 : PatFrag<(ops node:$ptr), (int_arm64_ldxr node:$ptr), [{
+def ldxr_8 : PatFrag<(ops node:$ptr), (int_aarch64_ldxr node:$ptr), [{
   return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i64;
 }]>;
 
@@ -235,19 +235,19 @@ def : Pat<(and (ldxr_4 GPR64sp:$addr), 0xffffffff),
 
 // Load-exclusives.
 
-def ldaxr_1 : PatFrag<(ops node:$ptr), (int_arm64_ldaxr node:$ptr), [{
+def ldaxr_1 : PatFrag<(ops node:$ptr), (int_aarch64_ldaxr node:$ptr), [{
   return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i8;
 }]>;
 
-def ldaxr_2 : PatFrag<(ops node:$ptr), (int_arm64_ldaxr node:$ptr), [{
+def ldaxr_2 : PatFrag<(ops node:$ptr), (int_aarch64_ldaxr node:$ptr), [{
   return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i16;
 }]>;
 
-def ldaxr_4 : PatFrag<(ops node:$ptr), (int_arm64_ldaxr node:$ptr), [{
+def ldaxr_4 : PatFrag<(ops node:$ptr), (int_aarch64_ldaxr node:$ptr), [{
   return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i32;
 }]>;
 
-def ldaxr_8 : PatFrag<(ops node:$ptr), (int_arm64_ldaxr node:$ptr), [{
+def ldaxr_8 : PatFrag<(ops node:$ptr), (int_aarch64_ldaxr node:$ptr), [{
   return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i64;
 }]>;
 
@@ -269,22 +269,22 @@ def : Pat<(and (ldaxr_4 GPR64sp:$addr), 0xffffffff),
 // Store-exclusives.
 
 def stxr_1 : PatFrag<(ops node:$val, node:$ptr),
-                     (int_arm64_stxr node:$val, node:$ptr), [{
+                     (int_aarch64_stxr node:$val, node:$ptr), [{
   return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i8;
 }]>;
 
 def stxr_2 : PatFrag<(ops node:$val, node:$ptr),
-                     (int_arm64_stxr node:$val, node:$ptr), [{
+                     (int_aarch64_stxr node:$val, node:$ptr), [{
   return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i16;
 }]>;
 
 def stxr_4 : PatFrag<(ops node:$val, node:$ptr),
-                     (int_arm64_stxr node:$val, node:$ptr), [{
+                     (int_aarch64_stxr node:$val, node:$ptr), [{
   return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i32;
 }]>;
 
 def stxr_8 : PatFrag<(ops node:$val, node:$ptr),
-                     (int_arm64_stxr node:$val, node:$ptr), [{
+                     (int_aarch64_stxr node:$val, node:$ptr), [{
   return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i64;
 }]>;
 
@@ -315,22 +315,22 @@ def : Pat<(stxr_4 (and GPR64:$val, 0xffffffff), GPR64sp:$addr),
 // Store-release-exclusives.
 
 def stlxr_1 : PatFrag<(ops node:$val, node:$ptr),
-                     (int_arm64_stlxr node:$val, node:$ptr), [{
+                     (int_aarch64_stlxr node:$val, node:$ptr), [{
   return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i8;
 }]>;
 
 def stlxr_2 : PatFrag<(ops node:$val, node:$ptr),
-                     (int_arm64_stlxr node:$val, node:$ptr), [{
+                     (int_aarch64_stlxr node:$val, node:$ptr), [{
   return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i16;
 }]>;
 
 def stlxr_4 : PatFrag<(ops node:$val, node:$ptr),
-                     (int_arm64_stlxr node:$val, node:$ptr), [{
+                     (int_aarch64_stlxr node:$val, node:$ptr), [{
   return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i32;
 }]>;
 
 def stlxr_8 : PatFrag<(ops node:$val, node:$ptr),
-                     (int_arm64_stlxr node:$val, node:$ptr), [{
+                     (int_aarch64_stlxr node:$val, node:$ptr), [{
   return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i64;
 }]>;
 
@@ -361,4 +361,4 @@ def : Pat<(stlxr_4 (and GPR64:$val, 0xffffffff), GPR64sp:$addr),
 
 // And clear exclusive.
 
-def : Pat<(int_arm64_clrex), (CLREX 0xf)>;
+def : Pat<(int_aarch64_clrex), (CLREX 0xf)>;
diff --git a/lib/Target/ARM64/ARM64InstrFormats.td b/lib/Target/AArch64/AArch64InstrFormats.td
similarity index 98%
rename from lib/Target/ARM64/ARM64InstrFormats.td
rename to lib/Target/AArch64/AArch64InstrFormats.td
index ea45b3d4fb2..d455d7e45e0 100644
--- a/lib/Target/ARM64/ARM64InstrFormats.td
+++ b/lib/Target/AArch64/AArch64InstrFormats.td
@@ -1,4 +1,4 @@
-//===- ARM64InstrFormats.td - ARM64 Instruction Formats ------*- tblgen -*-===//
+//===- AArch64InstrFormats.td - AArch64 Instruction Formats --*- tblgen -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -8,7 +8,7 @@
 //===----------------------------------------------------------------------===//
 
 //===----------------------------------------------------------------------===//
-//  Describe ARM64 instructions format here
+//  Describe AArch64 instructions format here
 //
 
 // Format specifies the encoding used by the instruction.  This is part of the
@@ -21,8 +21,8 @@ class Format<bits<2> val> {
 def PseudoFrm   : Format<0>;
 def NormalFrm   : Format<1>; // Do we need any others?
 
-// ARM64 Instruction Format
-class ARM64Inst<Format f, string cstr> : Instruction {
+// AArch64 Instruction Format
+class AArch64Inst<Format f, string cstr> : Instruction {
   field bits<32> Inst; // Instruction encoding.
   // Mask of bits that cause an encoding to be UNPREDICTABLE.
   // If a bit is set, then if the corresponding bit in the
@@ -32,7 +32,7 @@ class ARM64Inst<Format f, string cstr> : Instruction {
   // SoftFail is the generic name for this field, but we alias it so
   // as to make it more obvious what it means in ARM-land.
   field bits<32> SoftFail = Unpredictable;
-  let Namespace   = "ARM64";
+  let Namespace   = "AArch64";
   Format F        = f;
   bits<2> Form    = F.Value;
   let Pattern     = [];
@@ -41,7 +41,7 @@ class ARM64Inst<Format f, string cstr> : Instruction {
 
 // Pseudo instructions (don't have encoding information)
 class Pseudo<dag oops, dag iops, list<dag> pattern, string cstr = "">
-    : ARM64Inst<PseudoFrm, cstr> {
+    : AArch64Inst<PseudoFrm, cstr> {
   dag OutOperandList = oops;
   dag InOperandList  = iops;
   let Pattern        = pattern;
@@ -49,7 +49,7 @@ class Pseudo<dag oops, dag iops, list<dag> pattern, string cstr = "">
 }
 
 // Real instructions (have encoding information)
-class EncodedI<string cstr, list<dag> pattern> : ARM64Inst<NormalFrm, cstr> {
+class EncodedI<string cstr, list<dag> pattern> : AArch64Inst<NormalFrm, cstr> {
   let Pattern = pattern;
   let Size = 4;
 }
@@ -440,11 +440,11 @@ def vecshiftL64 : Operand<i32>, ImmLeaf<i32, [{
 // Crazy immediate formats used by 32-bit and 64-bit logical immediate
 // instructions for splatting repeating bit patterns across the immediate.
 def logical_imm32_XFORM : SDNodeXForm<imm, [{
-  uint64_t enc = ARM64_AM::encodeLogicalImmediate(N->getZExtValue(), 32);
+  uint64_t enc = AArch64_AM::encodeLogicalImmediate(N->getZExtValue(), 32);
   return CurDAG->getTargetConstant(enc, MVT::i32);
 }]>;
 def logical_imm64_XFORM : SDNodeXForm<imm, [{
-  uint64_t enc = ARM64_AM::encodeLogicalImmediate(N->getZExtValue(), 64);
+  uint64_t enc = AArch64_AM::encodeLogicalImmediate(N->getZExtValue(), 64);
   return CurDAG->getTargetConstant(enc, MVT::i32);
 }]>;
 
@@ -457,13 +457,13 @@ def LogicalImm64Operand : AsmOperandClass {
   let DiagnosticType = "LogicalSecondSource";
 }
 def logical_imm32 : Operand<i32>, PatLeaf<(imm), [{
-  return ARM64_AM::isLogicalImmediate(N->getZExtValue(), 32);
+  return AArch64_AM::isLogicalImmediate(N->getZExtValue(), 32);
 }], logical_imm32_XFORM> {
   let PrintMethod = "printLogicalImm32";
   let ParserMatchClass = LogicalImm32Operand;
 }
 def logical_imm64 : Operand<i64>, PatLeaf<(imm), [{
-  return ARM64_AM::isLogicalImmediate(N->getZExtValue(), 64);
+  return AArch64_AM::isLogicalImmediate(N->getZExtValue(), 64);
 }], logical_imm64_XFORM> {
   let PrintMethod = "printLogicalImm64";
   let ParserMatchClass = LogicalImm64Operand;
@@ -661,10 +661,10 @@ class arith_extended_reg32to64<ValueType Ty> : Operand<Ty>,
 // Floating-point immediate.
 def fpimm32 : Operand<f32>,
               PatLeaf<(f32 fpimm), [{
-      return ARM64_AM::getFP32Imm(N->getValueAPF()) != -1;
+      return AArch64_AM::getFP32Imm(N->getValueAPF()) != -1;
     }], SDNodeXForm<fpimm, [{
       APFloat InVal = N->getValueAPF();
-      uint32_t enc = ARM64_AM::getFP32Imm(InVal);
+      uint32_t enc = AArch64_AM::getFP32Imm(InVal);
       return CurDAG->getTargetConstant(enc, MVT::i32);
     }]>> {
   let ParserMatchClass = FPImmOperand;
@@ -672,10 +672,10 @@ def fpimm32 : Operand<f32>,
 }
 def fpimm64 : Operand<f64>,
               PatLeaf<(f64 fpimm), [{
-      return ARM64_AM::getFP64Imm(N->getValueAPF()) != -1;
+      return AArch64_AM::getFP64Imm(N->getValueAPF()) != -1;
     }], SDNodeXForm<fpimm, [{
       APFloat InVal = N->getValueAPF();
-      uint32_t enc = ARM64_AM::getFP64Imm(InVal);
+      uint32_t enc = AArch64_AM::getFP64Imm(InVal);
       return CurDAG->getTargetConstant(enc, MVT::i32);
     }]>> {
   let ParserMatchClass = FPImmOperand;
@@ -743,12 +743,12 @@ def VectorIndexD : Operand<i64>, ImmLeaf<i64, [{
 // are encoded as the eight bit value 'abcdefgh'.
 def simdimmtype10 : Operand<i32>,
                     PatLeaf<(f64 fpimm), [{
-      return ARM64_AM::isAdvSIMDModImmType10(N->getValueAPF()
+      return AArch64_AM::isAdvSIMDModImmType10(N->getValueAPF()
                                                .bitcastToAPInt()
                                                .getZExtValue());
     }], SDNodeXForm<fpimm, [{
       APFloat InVal = N->getValueAPF();
-      uint32_t enc = ARM64_AM::encodeAdvSIMDModImmType10(N->getValueAPF()
+      uint32_t enc = AArch64_AM::encodeAdvSIMDModImmType10(N->getValueAPF()
                                                            .bitcastToAPInt()
                                                            .getZExtValue());
       return CurDAG->getTargetConstant(enc, MVT::i32);
@@ -982,7 +982,7 @@ def am_brcond : Operand<OtherVT> {
 
 class BranchCond : I<(outs), (ins ccode:$cond, am_brcond:$target),
                      "b", ".$cond\t$target", "",
-                     [(ARM64brcond bb:$target, imm:$cond, NZCV)]>,
+                     [(AArch64brcond bb:$target, imm:$cond, NZCV)]>,
                    Sched<[WriteBr]> {
   let isBranch = 1;
   let isTerminator = 1;
@@ -1759,7 +1759,7 @@ multiclass AddSubS<bit isSub, string mnemonic, SDNode OpNode, string cmp> {
 //---
 def SDTA64EXTR : SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>,
                                       SDTCisPtrTy<3>]>;
-def ARM64Extr : SDNode<"ARM64ISD::EXTR", SDTA64EXTR>;
+def AArch64Extr : SDNode<"AArch64ISD::EXTR", SDTA64EXTR>;
 
 class BaseExtractImm<RegisterClass regtype, Operand imm_type, string asm,
                      list<dag> patterns>
@@ -1782,7 +1782,7 @@ class BaseExtractImm<RegisterClass regtype, Operand imm_type, string asm,
 multiclass ExtractImm<string asm> {
   def Wrri : BaseExtractImm<GPR32, imm0_31, asm,
                       [(set GPR32:$Rd,
-                        (ARM64Extr GPR32:$Rn, GPR32:$Rm, imm0_31:$imm))]> {
+                        (AArch64Extr GPR32:$Rn, GPR32:$Rm, imm0_31:$imm))]> {
     let Inst{31} = 0;
     let Inst{22} = 0;
     // imm<5> must be zero.
@@ -1790,7 +1790,7 @@ multiclass ExtractImm<string asm> {
   }
   def Xrri : BaseExtractImm<GPR64, imm0_63, asm,
                       [(set GPR64:$Rd,
-                        (ARM64Extr GPR64:$Rn, GPR64:$Rm, imm0_63:$imm))]> {
+                        (AArch64Extr GPR64:$Rn, GPR64:$Rm, imm0_63:$imm))]> {
 
     let Inst{31} = 1;
     let Inst{22} = 1;
@@ -2081,7 +2081,7 @@ class BaseCondSelect<bit op, bits<2> op2, RegisterClass regtype, string asm>
     : I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm, ccode:$cond),
          asm, "\t$Rd, $Rn, $Rm, $cond", "",
          [(set regtype:$Rd,
-               (ARM64csel regtype:$Rn, regtype:$Rm, (i32 imm:$cond), NZCV))]>,
+               (AArch64csel regtype:$Rn, regtype:$Rm, (i32 imm:$cond), NZCV))]>,
       Sched<[WriteI, ReadI, ReadI]> {
   let Uses = [NZCV];
 
@@ -2113,7 +2113,7 @@ class BaseCondSelectOp<bit op, bits<2> op2, RegisterClass regtype, string asm,
     : I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm, ccode:$cond),
          asm, "\t$Rd, $Rn, $Rm, $cond", "",
          [(set regtype:$Rd,
-               (ARM64csel regtype:$Rn, (frag regtype:$Rm),
+               (AArch64csel regtype:$Rn, (frag regtype:$Rm),
                (i32 imm:$cond), NZCV))]>,
       Sched<[WriteI, ReadI, ReadI]> {
   let Uses = [NZCV];
@@ -2133,8 +2133,8 @@ class BaseCondSelectOp<bit op, bits<2> op2, RegisterClass regtype, string asm,
 }
 
 def inv_cond_XFORM : SDNodeXForm<imm, [{
-  ARM64CC::CondCode CC = static_cast<ARM64CC::CondCode>(N->getZExtValue());
-  return CurDAG->getTargetConstant(ARM64CC::getInvertedCondCode(CC), MVT::i32);
+  AArch64CC::CondCode CC = static_cast<AArch64CC::CondCode>(N->getZExtValue());
+  return CurDAG->getTargetConstant(AArch64CC::getInvertedCondCode(CC), MVT::i32);
 }]>;
 
 multiclass CondSelectOp<bit op, bits<2> op2, string asm, PatFrag frag> {
@@ -2145,11 +2145,11 @@ multiclass CondSelectOp<bit op, bits<2> op2, string asm, PatFrag frag> {
     let Inst{31} = 1;
   }
 
-  def : Pat<(ARM64csel (frag GPR32:$Rm), GPR32:$Rn, (i32 imm:$cond), NZCV),
+  def : Pat<(AArch64csel (frag GPR32:$Rm), GPR32:$Rn, (i32 imm:$cond), NZCV),
             (!cast<Instruction>(NAME # Wr) GPR32:$Rn, GPR32:$Rm,
                                            (inv_cond_XFORM imm:$cond))>;
 
-  def : Pat<(ARM64csel (frag GPR64:$Rm), GPR64:$Rn, (i32 imm:$cond), NZCV),
+  def : Pat<(AArch64csel (frag GPR64:$Rm), GPR64:$Rn, (i32 imm:$cond), NZCV),
             (!cast<Instruction>(NAME # Xr) GPR64:$Rn, GPR64:$Rm,
                                            (inv_cond_XFORM imm:$cond))>;
 }
@@ -2194,7 +2194,7 @@ class uimm12_scaled<int Scale> : Operand<i64> {
   let ParserMatchClass
    = !cast<AsmOperandClass>("UImm12OffsetScale" # Scale # "Operand");
   let EncoderMethod
-   = "getLdStUImm12OpValue<ARM64::fixup_arm64_ldst_imm12_scale" # Scale # ">";
+   = "getLdStUImm12OpValue<AArch64::fixup_aarch64_ldst_imm12_scale" # Scale # ">";
   let PrintMethod = "printUImm12Offset<" # Scale # ">";
 }
 
@@ -2782,7 +2782,7 @@ class BasePrefetchRO<bits<2> sz, bit V, bits<2> opc, dag outs, dag ins,
 multiclass PrefetchRO<bits<2> sz, bit V, bits<2> opc, string asm> {
   def roW : BasePrefetchRO<sz, V, opc, (outs),
                 (ins prfop:$Rt, GPR64sp:$Rn, GPR32:$Rm, ro_Wextend64:$extend),
-                asm, [(ARM64Prefetch imm:$Rt,
+                asm, [(AArch64Prefetch imm:$Rt,
                                      (ro_Windexed64 GPR64sp:$Rn, GPR32:$Rm,
                                                     ro_Wextend64:$extend))]> {
     let Inst{13} = 0b0;
@@ -2790,7 +2790,7 @@ multiclass PrefetchRO<bits<2> sz, bit V, bits<2> opc, string asm> {
 
   def roX : BasePrefetchRO<sz, V, opc, (outs),
                 (ins prfop:$Rt, GPR64sp:$Rn, GPR64:$Rm, ro_Xextend64:$extend),
-                asm,  [(ARM64Prefetch imm:$Rt,
+                asm,  [(AArch64Prefetch imm:$Rt,
                                       (ro_Xindexed64 GPR64sp:$Rn, GPR64:$Rm,
                                                      ro_Xextend64:$extend))]> {
     let Inst{13} = 0b1;
@@ -3912,7 +3912,7 @@ class BaseFPCondSelect<RegisterClass regtype, ValueType vt, string asm>
     : I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm, ccode:$cond),
          asm, "\t$Rd, $Rn, $Rm, $cond", "",
          [(set regtype:$Rd,
-               (ARM64csel (vt regtype:$Rn), regtype:$Rm,
+               (AArch64csel (vt regtype:$Rn), regtype:$Rm,
                           (i32 imm:$cond), NZCV))]>,
       Sched<[WriteF]> {
   bits<5> Rd;
@@ -5074,28 +5074,28 @@ multiclass SIMDLongThreeVectorSQDMLXTiedHS<bit U, bits<4> opc, string asm,
                                                   asm, ".4s", ".4h", ".4h",
     [(set (v4i32 V128:$dst),
           (Accum (v4i32 V128:$Rd),
-                 (v4i32 (int_arm64_neon_sqdmull (v4i16 V64:$Rn),
+                 (v4i32 (int_aarch64_neon_sqdmull (v4i16 V64:$Rn),
                                                 (v4i16 V64:$Rm)))))]>;
   def v8i16_v4i32  : BaseSIMDDifferentThreeVectorTied<U, 0b011, opc,
                                                   V128, V128, V128,
                                                   asm#"2", ".4s", ".8h", ".8h",
     [(set (v4i32 V128:$dst),
           (Accum (v4i32 V128:$Rd),
-                 (v4i32 (int_arm64_neon_sqdmull (extract_high_v8i16 V128:$Rn),
+                 (v4i32 (int_aarch64_neon_sqdmull (extract_high_v8i16 V128:$Rn),
                                             (extract_high_v8i16 V128:$Rm)))))]>;
   def v2i32_v2i64  : BaseSIMDDifferentThreeVectorTied<U, 0b100, opc,
                                                   V128, V64, V64,
                                                   asm, ".2d", ".2s", ".2s",
     [(set (v2i64 V128:$dst),
           (Accum (v2i64 V128:$Rd),
-                 (v2i64 (int_arm64_neon_sqdmull (v2i32 V64:$Rn),
+                 (v2i64 (int_aarch64_neon_sqdmull (v2i32 V64:$Rn),
                                                 (v2i32 V64:$Rm)))))]>;
   def v4i32_v2i64  : BaseSIMDDifferentThreeVectorTied<U, 0b101, opc,
                                                   V128, V128, V128,
                                                   asm#"2", ".2d", ".4s", ".4s",
     [(set (v2i64 V128:$dst),
           (Accum (v2i64 V128:$Rd),
-                 (v2i64 (int_arm64_neon_sqdmull (extract_high_v4i32 V128:$Rn),
+                 (v2i64 (int_aarch64_neon_sqdmull (extract_high_v4i32 V128:$Rn),
                                             (extract_high_v4i32 V128:$Rm)))))]>;
 }
 
@@ -5140,7 +5140,7 @@ class BaseSIMDBitwiseExtract<bit size, RegisterOperand regtype, ValueType vty,
       "{\t$Rd" # kind # ", $Rn" # kind # ", $Rm" # kind # ", $imm" #
       "|" # kind # "\t$Rd, $Rn, $Rm, $imm}", "",
       [(set (vty regtype:$Rd),
-            (ARM64ext regtype:$Rn, regtype:$Rm, (i32 imm:$imm)))]>,
+            (AArch64ext regtype:$Rn, regtype:$Rm, (i32 imm:$imm)))]>,
     Sched<[WriteV]> {
   bits<5> Rd;
   bits<5> Rn;
@@ -5409,7 +5409,7 @@ class BaseSIMDCmpTwoScalar<bit U, bits<2> size, bits<5> opcode,
 
 class SIMDInexactCvtTwoScalar<bits<5> opcode, string asm>
   : I<(outs FPR32:$Rd), (ins FPR64:$Rn), asm, "\t$Rd, $Rn", "",
-     [(set (f32 FPR32:$Rd), (int_arm64_sisd_fcvtxn (f64 FPR64:$Rn)))]>,
+     [(set (f32 FPR32:$Rd), (int_aarch64_sisd_fcvtxn (f64 FPR64:$Rn)))]>,
     Sched<[WriteV]> {
   bits<5> Rd;
   bits<5> Rn;
@@ -5627,7 +5627,7 @@ class SIMDDupFromMain<bit Q, bits<5> imm5, string size, ValueType vectype,
   : BaseSIMDInsDup<Q, 0, (outs vecreg:$Rd), (ins regtype:$Rn), "dup",
                    "{\t$Rd" # size # ", $Rn" #
                    "|" # size # "\t$Rd, $Rn}", "",
-                   [(set (vectype vecreg:$Rd), (ARM64dup regtype:$Rn))]> {
+                   [(set (vectype vecreg:$Rd), (AArch64dup regtype:$Rn))]> {
   let Inst{20-16} = imm5;
   let Inst{14-11} = 0b0001;
 }
@@ -5646,7 +5646,7 @@ class SIMDDupFromElement<bit Q, string dstkind, string srckind,
 
 class SIMDDup64FromElement
   : SIMDDupFromElement<1, ".2d", ".d", v2i64, v2i64, V128,
-                       VectorIndexD, i64, ARM64duplane64> {
+                       VectorIndexD, i64, AArch64duplane64> {
   bits<1> idx;
   let Inst{20} = idx;
   let Inst{19-16} = 0b1000;
@@ -5655,7 +5655,7 @@ class SIMDDup64FromElement
 class SIMDDup32FromElement<bit Q, string size, ValueType vectype,
                            RegisterOperand vecreg>
   : SIMDDupFromElement<Q, size, ".s", vectype, v4i32, vecreg,
-                       VectorIndexS, i64, ARM64duplane32> {
+                       VectorIndexS, i64, AArch64duplane32> {
   bits<2> idx;
   let Inst{20-19} = idx;
   let Inst{18-16} = 0b100;
@@ -5664,7 +5664,7 @@ class SIMDDup32FromElement<bit Q, string size, ValueType vectype,
 class SIMDDup16FromElement<bit Q, string size, ValueType vectype,
                            RegisterOperand vecreg>
   : SIMDDupFromElement<Q, size, ".h", vectype, v8i16, vecreg,
-                       VectorIndexH, i64, ARM64duplane16> {
+                       VectorIndexH, i64, AArch64duplane16> {
   bits<3> idx;
   let Inst{20-18} = idx;
   let Inst{17-16} = 0b10;
@@ -5673,7 +5673,7 @@ class SIMDDup16FromElement<bit Q, string size, ValueType vectype,
 class SIMDDup8FromElement<bit Q, string size, ValueType vectype,
                           RegisterOperand vecreg>
   : SIMDDupFromElement<Q, size, ".b", vectype, v16i8, vecreg,
-                       VectorIndexB, i64, ARM64duplane8> {
+                       VectorIndexB, i64, AArch64duplane8> {
   bits<4> idx;
   let Inst{20-17} = idx;
   let Inst{16} = 1;
@@ -6312,7 +6312,7 @@ multiclass SIMDFPIndexedSD<bit U, bits<4> opc, string asm,
                                       asm, ".2s", ".2s", ".2s", ".s",
     [(set (v2f32 V64:$Rd),
         (OpNode (v2f32 V64:$Rn),
-         (v2f32 (ARM64duplane32 (v4f32 V128:$Rm), VectorIndexS:$idx))))]> {
+         (v2f32 (AArch64duplane32 (v4f32 V128:$Rm), VectorIndexS:$idx))))]> {
     bits<2> idx;
     let Inst{11} = idx{1};
     let Inst{21} = idx{0};
@@ -6324,7 +6324,7 @@ multiclass SIMDFPIndexedSD<bit U, bits<4> opc, string asm,
                                       asm, ".4s", ".4s", ".4s", ".s",
     [(set (v4f32 V128:$Rd),
         (OpNode (v4f32 V128:$Rn),
-         (v4f32 (ARM64duplane32 (v4f32 V128:$Rm), VectorIndexS:$idx))))]> {
+         (v4f32 (AArch64duplane32 (v4f32 V128:$Rm), VectorIndexS:$idx))))]> {
     bits<2> idx;
     let Inst{11} = idx{1};
     let Inst{21} = idx{0};
@@ -6336,7 +6336,7 @@ multiclass SIMDFPIndexedSD<bit U, bits<4> opc, string asm,
                                       asm, ".2d", ".2d", ".2d", ".d",
     [(set (v2f64 V128:$Rd),
         (OpNode (v2f64 V128:$Rn),
-         (v2f64 (ARM64duplane64 (v2f64 V128:$Rm), VectorIndexD:$idx))))]> {
+         (v2f64 (AArch64duplane64 (v2f64 V128:$Rm), VectorIndexD:$idx))))]> {
     bits<1> idx;
     let Inst{11} = idx{0};
     let Inst{21} = 0;
@@ -6370,35 +6370,35 @@ multiclass SIMDFPIndexedSD<bit U, bits<4> opc, string asm,
 multiclass SIMDFPIndexedSDTiedPatterns<string INST, SDPatternOperator OpNode> {
   // 2 variants for the .2s version: DUPLANE from 128-bit and DUP scalar.
   def : Pat<(v2f32 (OpNode (v2f32 V64:$Rd), (v2f32 V64:$Rn),
-                           (ARM64duplane32 (v4f32 V128:$Rm),
+                           (AArch64duplane32 (v4f32 V128:$Rm),
                                            VectorIndexS:$idx))),
             (!cast<Instruction>(INST # v2i32_indexed)
                 V64:$Rd, V64:$Rn, V128:$Rm, VectorIndexS:$idx)>;
   def : Pat<(v2f32 (OpNode (v2f32 V64:$Rd), (v2f32 V64:$Rn),
-                           (ARM64dup (f32 FPR32Op:$Rm)))),
+                           (AArch64dup (f32 FPR32Op:$Rm)))),
             (!cast<Instruction>(INST # "v2i32_indexed") V64:$Rd, V64:$Rn,
                 (SUBREG_TO_REG (i32 0), FPR32Op:$Rm, ssub), (i64 0))>;
 
 
   // 2 variants for the .4s version: DUPLANE from 128-bit and DUP scalar.
   def : Pat<(v4f32 (OpNode (v4f32 V128:$Rd), (v4f32 V128:$Rn),
-                           (ARM64duplane32 (v4f32 V128:$Rm),
+                           (AArch64duplane32 (v4f32 V128:$Rm),
                                            VectorIndexS:$idx))),
             (!cast<Instruction>(INST # "v4i32_indexed")
                 V128:$Rd, V128:$Rn, V128:$Rm, VectorIndexS:$idx)>;
   def : Pat<(v4f32 (OpNode (v4f32 V128:$Rd), (v4f32 V128:$Rn),
-                           (ARM64dup (f32 FPR32Op:$Rm)))),
+                           (AArch64dup (f32 FPR32Op:$Rm)))),
             (!cast<Instruction>(INST # "v4i32_indexed") V128:$Rd, V128:$Rn,
                 (SUBREG_TO_REG (i32 0), FPR32Op:$Rm, ssub), (i64 0))>;
 
   // 2 variants for the .2d version: DUPLANE from 128-bit and DUP scalar.
   def : Pat<(v2f64 (OpNode (v2f64 V128:$Rd), (v2f64 V128:$Rn),
-                           (ARM64duplane64 (v2f64 V128:$Rm),
+                           (AArch64duplane64 (v2f64 V128:$Rm),
                                            VectorIndexD:$idx))),
             (!cast<Instruction>(INST # "v2i64_indexed")
                 V128:$Rd, V128:$Rn, V128:$Rm, VectorIndexS:$idx)>;
   def : Pat<(v2f64 (OpNode (v2f64 V128:$Rd), (v2f64 V128:$Rn),
-                           (ARM64dup (f64 FPR64Op:$Rm)))),
+                           (AArch64dup (f64 FPR64Op:$Rm)))),
             (!cast<Instruction>(INST # "v2i64_indexed") V128:$Rd, V128:$Rn,
                 (SUBREG_TO_REG (i32 0), FPR64Op:$Rm, dsub), (i64 0))>;
 
@@ -6471,7 +6471,7 @@ multiclass SIMDIndexedHS<bit U, bits<4> opc, string asm,
                                       asm, ".4h", ".4h", ".4h", ".h",
     [(set (v4i16 V64:$Rd),
         (OpNode (v4i16 V64:$Rn),
-         (v4i16 (ARM64duplane16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx))))]> {
+         (v4i16 (AArch64duplane16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx))))]> {
     bits<3> idx;
     let Inst{11} = idx{2};
     let Inst{21} = idx{1};
@@ -6484,7 +6484,7 @@ multiclass SIMDIndexedHS<bit U, bits<4> opc, string asm,
                                       asm, ".8h", ".8h", ".8h", ".h",
     [(set (v8i16 V128:$Rd),
        (OpNode (v8i16 V128:$Rn),
-         (v8i16 (ARM64duplane16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx))))]> {
+         (v8i16 (AArch64duplane16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx))))]> {
     bits<3> idx;
     let Inst{11} = idx{2};
     let Inst{21} = idx{1};
@@ -6497,7 +6497,7 @@ multiclass SIMDIndexedHS<bit U, bits<4> opc, string asm,
                                       asm, ".2s", ".2s", ".2s",  ".s",
     [(set (v2i32 V64:$Rd),
        (OpNode (v2i32 V64:$Rn),
-          (v2i32 (ARM64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx))))]> {
+          (v2i32 (AArch64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx))))]> {
     bits<2> idx;
     let Inst{11} = idx{1};
     let Inst{21} = idx{0};
@@ -6509,7 +6509,7 @@ multiclass SIMDIndexedHS<bit U, bits<4> opc, string asm,
                                       asm, ".4s", ".4s", ".4s", ".s",
     [(set (v4i32 V128:$Rd),
        (OpNode (v4i32 V128:$Rn),
-          (v4i32 (ARM64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx))))]> {
+          (v4i32 (AArch64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx))))]> {
     bits<2> idx;
     let Inst{11} = idx{1};
     let Inst{21} = idx{0};
@@ -6545,7 +6545,7 @@ multiclass SIMDVectorIndexedHS<bit U, bits<4> opc, string asm,
                                       asm, ".4h", ".4h", ".4h", ".h",
     [(set (v4i16 V64:$Rd),
         (OpNode (v4i16 V64:$Rn),
-         (v4i16 (ARM64duplane16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx))))]> {
+         (v4i16 (AArch64duplane16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx))))]> {
     bits<3> idx;
     let Inst{11} = idx{2};
     let Inst{21} = idx{1};
@@ -6558,7 +6558,7 @@ multiclass SIMDVectorIndexedHS<bit U, bits<4> opc, string asm,
                                       asm, ".8h", ".8h", ".8h", ".h",
     [(set (v8i16 V128:$Rd),
        (OpNode (v8i16 V128:$Rn),
-         (v8i16 (ARM64duplane16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx))))]> {
+         (v8i16 (AArch64duplane16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx))))]> {
     bits<3> idx;
     let Inst{11} = idx{2};
     let Inst{21} = idx{1};
@@ -6571,7 +6571,7 @@ multiclass SIMDVectorIndexedHS<bit U, bits<4> opc, string asm,
                                       asm, ".2s", ".2s", ".2s", ".s",
     [(set (v2i32 V64:$Rd),
        (OpNode (v2i32 V64:$Rn),
-          (v2i32 (ARM64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx))))]> {
+          (v2i32 (AArch64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx))))]> {
     bits<2> idx;
     let Inst{11} = idx{1};
     let Inst{21} = idx{0};
@@ -6583,7 +6583,7 @@ multiclass SIMDVectorIndexedHS<bit U, bits<4> opc, string asm,
                                       asm, ".4s", ".4s", ".4s", ".s",
     [(set (v4i32 V128:$Rd),
        (OpNode (v4i32 V128:$Rn),
-          (v4i32 (ARM64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx))))]> {
+          (v4i32 (AArch64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx))))]> {
     bits<2> idx;
     let Inst{11} = idx{1};
     let Inst{21} = idx{0};
@@ -6597,7 +6597,7 @@ multiclass SIMDVectorIndexedHSTied<bit U, bits<4> opc, string asm,
                                           asm, ".4h", ".4h", ".4h", ".h",
     [(set (v4i16 V64:$dst),
         (OpNode (v4i16 V64:$Rd),(v4i16 V64:$Rn),
-         (v4i16 (ARM64duplane16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx))))]> {
+         (v4i16 (AArch64duplane16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx))))]> {
     bits<3> idx;
     let Inst{11} = idx{2};
     let Inst{21} = idx{1};
@@ -6610,7 +6610,7 @@ multiclass SIMDVectorIndexedHSTied<bit U, bits<4> opc, string asm,
                                       asm, ".8h", ".8h", ".8h", ".h",
     [(set (v8i16 V128:$dst),
        (OpNode (v8i16 V128:$Rd), (v8i16 V128:$Rn),
-         (v8i16 (ARM64duplane16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx))))]> {
+         (v8i16 (AArch64duplane16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx))))]> {
     bits<3> idx;
     let Inst{11} = idx{2};
     let Inst{21} = idx{1};
@@ -6623,7 +6623,7 @@ multiclass SIMDVectorIndexedHSTied<bit U, bits<4> opc, string asm,
                                       asm, ".2s", ".2s", ".2s", ".s",
     [(set (v2i32 V64:$dst),
        (OpNode (v2i32 V64:$Rd), (v2i32 V64:$Rn),
-          (v2i32 (ARM64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx))))]> {
+          (v2i32 (AArch64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx))))]> {
     bits<2> idx;
     let Inst{11} = idx{1};
     let Inst{21} = idx{0};
@@ -6635,7 +6635,7 @@ multiclass SIMDVectorIndexedHSTied<bit U, bits<4> opc, string asm,
                                       asm, ".4s", ".4s", ".4s", ".s",
     [(set (v4i32 V128:$dst),
        (OpNode (v4i32 V128:$Rd), (v4i32 V128:$Rn),
-          (v4i32 (ARM64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx))))]> {
+          (v4i32 (AArch64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx))))]> {
     bits<2> idx;
     let Inst{11} = idx{1};
     let Inst{21} = idx{0};
@@ -6650,7 +6650,7 @@ multiclass SIMDIndexedLongSD<bit U, bits<4> opc, string asm,
                                       asm, ".4s", ".4s", ".4h", ".h",
     [(set (v4i32 V128:$Rd),
         (OpNode (v4i16 V64:$Rn),
-         (v4i16 (ARM64duplane16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx))))]> {
+         (v4i16 (AArch64duplane16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx))))]> {
     bits<3> idx;
     let Inst{11} = idx{2};
     let Inst{21} = idx{1};
@@ -6663,7 +6663,7 @@ multiclass SIMDIndexedLongSD<bit U, bits<4> opc, string asm,
                                       asm#"2", ".4s", ".4s", ".8h", ".h",
     [(set (v4i32 V128:$Rd),
           (OpNode (extract_high_v8i16 V128:$Rn),
-                  (extract_high_v8i16 (ARM64duplane16 (v8i16 V128_lo:$Rm),
+                  (extract_high_v8i16 (AArch64duplane16 (v8i16 V128_lo:$Rm),
                                                       VectorIndexH:$idx))))]> {
 
     bits<3> idx;
@@ -6678,7 +6678,7 @@ multiclass SIMDIndexedLongSD<bit U, bits<4> opc, string asm,
                                       asm, ".2d", ".2d", ".2s", ".s",
     [(set (v2i64 V128:$Rd),
         (OpNode (v2i32 V64:$Rn),
-         (v2i32 (ARM64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx))))]> {
+         (v2i32 (AArch64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx))))]> {
     bits<2> idx;
     let Inst{11} = idx{1};
     let Inst{21} = idx{0};
@@ -6690,7 +6690,7 @@ multiclass SIMDIndexedLongSD<bit U, bits<4> opc, string asm,
                                       asm#"2", ".2d", ".2d", ".4s", ".s",
     [(set (v2i64 V128:$Rd),
           (OpNode (extract_high_v4i32 V128:$Rn),
-                  (extract_high_v4i32 (ARM64duplane32 (v4i32 V128:$Rm),
+                  (extract_high_v4i32 (AArch64duplane32 (v4i32 V128:$Rm),
                                                       VectorIndexS:$idx))))]> {
     bits<2> idx;
     let Inst{11} = idx{1};
@@ -6723,9 +6723,9 @@ multiclass SIMDIndexedLongSQDMLXSDTied<bit U, bits<4> opc, string asm,
                                       asm, ".4s", ".4s", ".4h", ".h",
     [(set (v4i32 V128:$dst),
           (Accum (v4i32 V128:$Rd),
-                 (v4i32 (int_arm64_neon_sqdmull
+                 (v4i32 (int_aarch64_neon_sqdmull
                              (v4i16 V64:$Rn),
-                             (v4i16 (ARM64duplane16 (v8i16 V128_lo:$Rm),
+                             (v4i16 (AArch64duplane16 (v8i16 V128_lo:$Rm),
                                                     VectorIndexH:$idx))))))]> {
     bits<3> idx;
     let Inst{11} = idx{2};
@@ -6737,8 +6737,8 @@ multiclass SIMDIndexedLongSQDMLXSDTied<bit U, bits<4> opc, string asm,
   // intermediate EXTRACT_SUBREG would be untyped.
   def : Pat<(i32 (Accum (i32 FPR32Op:$Rd),
                 (i32 (vector_extract (v4i32
-                         (int_arm64_neon_sqdmull (v4i16 V64:$Rn),
-                             (v4i16 (ARM64duplane16 (v8i16 V128_lo:$Rm),
+                         (int_aarch64_neon_sqdmull (v4i16 V64:$Rn),
+                             (v4i16 (AArch64duplane16 (v8i16 V128_lo:$Rm),
                                                     VectorIndexH:$idx)))),
                          (i64 0))))),
             (EXTRACT_SUBREG
@@ -6753,10 +6753,10 @@ multiclass SIMDIndexedLongSQDMLXSDTied<bit U, bits<4> opc, string asm,
                                       asm#"2", ".4s", ".4s", ".8h", ".h",
     [(set (v4i32 V128:$dst),
           (Accum (v4i32 V128:$Rd),
-                 (v4i32 (int_arm64_neon_sqdmull
+                 (v4i32 (int_aarch64_neon_sqdmull
                             (extract_high_v8i16 V128:$Rn),
                             (extract_high_v8i16
-                                (ARM64duplane16 (v8i16 V128_lo:$Rm),
+                                (AArch64duplane16 (v8i16 V128_lo:$Rm),
                                                 VectorIndexH:$idx))))))]> {
     bits<3> idx;
     let Inst{11} = idx{2};
@@ -6770,9 +6770,9 @@ multiclass SIMDIndexedLongSQDMLXSDTied<bit U, bits<4> opc, string asm,
                                       asm, ".2d", ".2d", ".2s", ".s",
     [(set (v2i64 V128:$dst),
         (Accum (v2i64 V128:$Rd),
-               (v2i64 (int_arm64_neon_sqdmull
+               (v2i64 (int_aarch64_neon_sqdmull
                           (v2i32 V64:$Rn),
-                          (v2i32 (ARM64duplane32 (v4i32 V128:$Rm),
+                          (v2i32 (AArch64duplane32 (v4i32 V128:$Rm),
                                                  VectorIndexS:$idx))))))]> {
     bits<2> idx;
     let Inst{11} = idx{1};
@@ -6785,10 +6785,10 @@ multiclass SIMDIndexedLongSQDMLXSDTied<bit U, bits<4> opc, string asm,
                                       asm#"2", ".2d", ".2d", ".4s", ".s",
     [(set (v2i64 V128:$dst),
           (Accum (v2i64 V128:$Rd),
-                 (v2i64 (int_arm64_neon_sqdmull
+                 (v2i64 (int_aarch64_neon_sqdmull
                             (extract_high_v4i32 V128:$Rn),
                             (extract_high_v4i32
-                                (ARM64duplane32 (v4i32 V128:$Rm),
+                                (AArch64duplane32 (v4i32 V128:$Rm),
                                                 VectorIndexS:$idx))))))]> {
     bits<2> idx;
     let Inst{11} = idx{1};
@@ -6810,7 +6810,7 @@ multiclass SIMDIndexedLongSQDMLXSDTied<bit U, bits<4> opc, string asm,
                                       asm, ".s", "", "", ".s",
     [(set (i64 FPR64Op:$dst),
           (Accum (i64 FPR64Op:$Rd),
-                 (i64 (int_arm64_neon_sqdmulls_scalar
+                 (i64 (int_aarch64_neon_sqdmulls_scalar
                             (i32 FPR32Op:$Rn),
                             (i32 (vector_extract (v4i32 V128:$Rm),
                                                  VectorIndexS:$idx))))))]> {
@@ -6830,7 +6830,7 @@ multiclass SIMDVectorIndexedLongSD<bit U, bits<4> opc, string asm,
                                       asm, ".4s", ".4s", ".4h", ".h",
     [(set (v4i32 V128:$Rd),
         (OpNode (v4i16 V64:$Rn),
-         (v4i16 (ARM64duplane16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx))))]> {
+         (v4i16 (AArch64duplane16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx))))]> {
     bits<3> idx;
     let Inst{11} = idx{2};
     let Inst{21} = idx{1};
@@ -6843,7 +6843,7 @@ multiclass SIMDVectorIndexedLongSD<bit U, bits<4> opc, string asm,
                                       asm#"2", ".4s", ".4s", ".8h", ".h",
     [(set (v4i32 V128:$Rd),
           (OpNode (extract_high_v8i16 V128:$Rn),
-                  (extract_high_v8i16 (ARM64duplane16 (v8i16 V128_lo:$Rm),
+                  (extract_high_v8i16 (AArch64duplane16 (v8i16 V128_lo:$Rm),
                                                       VectorIndexH:$idx))))]> {
 
     bits<3> idx;
@@ -6858,7 +6858,7 @@ multiclass SIMDVectorIndexedLongSD<bit U, bits<4> opc, string asm,
                                       asm, ".2d", ".2d", ".2s", ".s",
     [(set (v2i64 V128:$Rd),
         (OpNode (v2i32 V64:$Rn),
-         (v2i32 (ARM64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx))))]> {
+         (v2i32 (AArch64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx))))]> {
     bits<2> idx;
     let Inst{11} = idx{1};
     let Inst{21} = idx{0};
@@ -6870,7 +6870,7 @@ multiclass SIMDVectorIndexedLongSD<bit U, bits<4> opc, string asm,
                                       asm#"2", ".2d", ".2d", ".4s", ".s",
     [(set (v2i64 V128:$Rd),
           (OpNode (extract_high_v4i32 V128:$Rn),
-                  (extract_high_v4i32 (ARM64duplane32 (v4i32 V128:$Rm),
+                  (extract_high_v4i32 (AArch64duplane32 (v4i32 V128:$Rm),
                                                       VectorIndexS:$idx))))]> {
     bits<2> idx;
     let Inst{11} = idx{1};
@@ -6888,7 +6888,7 @@ multiclass SIMDVectorIndexedLongSDTied<bit U, bits<4> opc, string asm,
                                       asm, ".4s", ".4s", ".4h", ".h",
     [(set (v4i32 V128:$dst),
         (OpNode (v4i32 V128:$Rd), (v4i16 V64:$Rn),
-         (v4i16 (ARM64duplane16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx))))]> {
+         (v4i16 (AArch64duplane16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx))))]> {
     bits<3> idx;
     let Inst{11} = idx{2};
     let Inst{21} = idx{1};
@@ -6902,7 +6902,7 @@ multiclass SIMDVectorIndexedLongSDTied<bit U, bits<4> opc, string asm,
     [(set (v4i32 V128:$dst),
           (OpNode (v4i32 V128:$Rd),
                   (extract_high_v8i16 V128:$Rn),
-                  (extract_high_v8i16 (ARM64duplane16 (v8i16 V128_lo:$Rm),
+                  (extract_high_v8i16 (AArch64duplane16 (v8i16 V128_lo:$Rm),
                                                       VectorIndexH:$idx))))]> {
     bits<3> idx;
     let Inst{11} = idx{2};
@@ -6916,7 +6916,7 @@ multiclass SIMDVectorIndexedLongSDTied<bit U, bits<4> opc, string asm,
                                       asm, ".2d", ".2d", ".2s", ".s",
     [(set (v2i64 V128:$dst),
         (OpNode (v2i64 V128:$Rd), (v2i32 V64:$Rn),
-         (v2i32 (ARM64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx))))]> {
+         (v2i32 (AArch64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx))))]> {
     bits<2> idx;
     let Inst{11} = idx{1};
     let Inst{21} = idx{0};
@@ -6929,7 +6929,7 @@ multiclass SIMDVectorIndexedLongSDTied<bit U, bits<4> opc, string asm,
     [(set (v2i64 V128:$dst),
           (OpNode (v2i64 V128:$Rd),
                   (extract_high_v4i32 V128:$Rn),
-                  (extract_high_v4i32 (ARM64duplane32 (v4i32 V128:$Rm),
+                  (extract_high_v4i32 (AArch64duplane32 (v4i32 V128:$Rm),
                                                       VectorIndexS:$idx))))]> {
     bits<2> idx;
     let Inst{11} = idx{1};
diff --git a/lib/Target/ARM64/ARM64InstrInfo.cpp b/lib/Target/AArch64/AArch64InstrInfo.cpp
similarity index 55%
rename from lib/Target/ARM64/ARM64InstrInfo.cpp
rename to lib/Target/AArch64/AArch64InstrInfo.cpp
index fbbddd56660..52e3b333eb0 100644
--- a/lib/Target/ARM64/ARM64InstrInfo.cpp
+++ b/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -1,4 +1,4 @@
-//===- ARM64InstrInfo.cpp - ARM64 Instruction Information -----------------===//
+//===- AArch64InstrInfo.cpp - AArch64 Instruction Information -------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -7,13 +7,13 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file contains the ARM64 implementation of the TargetInstrInfo class.
+// This file contains the AArch64 implementation of the TargetInstrInfo class.
 //
 //===----------------------------------------------------------------------===//
 
-#include "ARM64InstrInfo.h"
-#include "ARM64Subtarget.h"
-#include "MCTargetDesc/ARM64AddressingModes.h"
+#include "AArch64InstrInfo.h"
+#include "AArch64Subtarget.h"
+#include "MCTargetDesc/AArch64AddressingModes.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineMemOperand.h"
@@ -26,15 +26,15 @@
 using namespace llvm;
 
 #define GET_INSTRINFO_CTOR_DTOR
-#include "ARM64GenInstrInfo.inc"
+#include "AArch64GenInstrInfo.inc"
 
-ARM64InstrInfo::ARM64InstrInfo(const ARM64Subtarget &STI)
-    : ARM64GenInstrInfo(ARM64::ADJCALLSTACKDOWN, ARM64::ADJCALLSTACKUP),
+AArch64InstrInfo::AArch64InstrInfo(const AArch64Subtarget &STI)
+    : AArch64GenInstrInfo(AArch64::ADJCALLSTACKDOWN, AArch64::ADJCALLSTACKUP),
       RI(this, &STI), Subtarget(STI) {}
 
 /// GetInstSize - Return the number of bytes of code the specified
 /// instruction may be.  This returns the maximum number of bytes.
-unsigned ARM64InstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
+unsigned AArch64InstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
   const MCInstrDesc &Desc = MI->getDesc();
 
   switch (Desc.getOpcode()) {
@@ -57,23 +57,23 @@ static void parseCondBranch(MachineInstr *LastInst, MachineBasicBlock *&Target,
   switch (LastInst->getOpcode()) {
   default:
     llvm_unreachable("Unknown branch instruction?");
-  case ARM64::Bcc:
+  case AArch64::Bcc:
     Target = LastInst->getOperand(1).getMBB();
     Cond.push_back(LastInst->getOperand(0));
     break;
-  case ARM64::CBZW:
-  case ARM64::CBZX:
-  case ARM64::CBNZW:
-  case ARM64::CBNZX:
+  case AArch64::CBZW:
+  case AArch64::CBZX:
+  case AArch64::CBNZW:
+  case AArch64::CBNZX:
     Target = LastInst->getOperand(1).getMBB();
     Cond.push_back(MachineOperand::CreateImm(-1));
     Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
     Cond.push_back(LastInst->getOperand(0));
     break;
-  case ARM64::TBZW:
-  case ARM64::TBZX:
-  case ARM64::TBNZW:
-  case ARM64::TBNZX:
+  case AArch64::TBZW:
+  case AArch64::TBZX:
+  case AArch64::TBNZW:
+  case AArch64::TBNZX:
     Target = LastInst->getOperand(2).getMBB();
     Cond.push_back(MachineOperand::CreateImm(-1));
     Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
@@ -83,7 +83,7 @@ static void parseCondBranch(MachineInstr *LastInst, MachineBasicBlock *&Target,
 }
 
 // Branch analysis.
-bool ARM64InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
+bool AArch64InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
                                    MachineBasicBlock *&TBB,
                                    MachineBasicBlock *&FBB,
                                    SmallVectorImpl<MachineOperand> &Cond,
@@ -175,40 +175,40 @@ bool ARM64InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
   return true;
 }
 
-bool ARM64InstrInfo::ReverseBranchCondition(
+bool AArch64InstrInfo::ReverseBranchCondition(
     SmallVectorImpl<MachineOperand> &Cond) const {
   if (Cond[0].getImm() != -1) {
     // Regular Bcc
-    ARM64CC::CondCode CC = (ARM64CC::CondCode)(int)Cond[0].getImm();
-    Cond[0].setImm(ARM64CC::getInvertedCondCode(CC));
+    AArch64CC::CondCode CC = (AArch64CC::CondCode)(int)Cond[0].getImm();
+    Cond[0].setImm(AArch64CC::getInvertedCondCode(CC));
   } else {
     // Folded compare-and-branch
     switch (Cond[1].getImm()) {
     default:
       llvm_unreachable("Unknown conditional branch!");
-    case ARM64::CBZW:
-      Cond[1].setImm(ARM64::CBNZW);
+    case AArch64::CBZW:
+      Cond[1].setImm(AArch64::CBNZW);
       break;
-    case ARM64::CBNZW:
-      Cond[1].setImm(ARM64::CBZW);
+    case AArch64::CBNZW:
+      Cond[1].setImm(AArch64::CBZW);
       break;
-    case ARM64::CBZX:
-      Cond[1].setImm(ARM64::CBNZX);
+    case AArch64::CBZX:
+      Cond[1].setImm(AArch64::CBNZX);
       break;
-    case ARM64::CBNZX:
-      Cond[1].setImm(ARM64::CBZX);
+    case AArch64::CBNZX:
+      Cond[1].setImm(AArch64::CBZX);
       break;
-    case ARM64::TBZW:
-      Cond[1].setImm(ARM64::TBNZW);
+    case AArch64::TBZW:
+      Cond[1].setImm(AArch64::TBNZW);
       break;
-    case ARM64::TBNZW:
-      Cond[1].setImm(ARM64::TBZW);
+    case AArch64::TBNZW:
+      Cond[1].setImm(AArch64::TBZW);
       break;
-    case ARM64::TBZX:
-      Cond[1].setImm(ARM64::TBNZX);
+    case AArch64::TBZX:
+      Cond[1].setImm(AArch64::TBNZX);
       break;
-    case ARM64::TBNZX:
-      Cond[1].setImm(ARM64::TBZX);
+    case AArch64::TBNZX:
+      Cond[1].setImm(AArch64::TBZX);
       break;
     }
   }
@@ -216,7 +216,7 @@ bool ARM64InstrInfo::ReverseBranchCondition(
   return false;
 }
 
-unsigned ARM64InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
+unsigned AArch64InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
   MachineBasicBlock::iterator I = MBB.end();
   if (I == MBB.begin())
     return 0;
@@ -246,12 +246,12 @@ unsigned ARM64InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
   return 2;
 }
 
-void ARM64InstrInfo::instantiateCondBranch(
+void AArch64InstrInfo::instantiateCondBranch(
     MachineBasicBlock &MBB, DebugLoc DL, MachineBasicBlock *TBB,
     const SmallVectorImpl<MachineOperand> &Cond) const {
   if (Cond[0].getImm() != -1) {
     // Regular Bcc
-    BuildMI(&MBB, DL, get(ARM64::Bcc)).addImm(Cond[0].getImm()).addMBB(TBB);
+    BuildMI(&MBB, DL, get(AArch64::Bcc)).addImm(Cond[0].getImm()).addMBB(TBB);
   } else {
     // Folded compare-and-branch
     const MachineInstrBuilder MIB =
@@ -262,7 +262,7 @@ void ARM64InstrInfo::instantiateCondBranch(
   }
 }
 
-unsigned ARM64InstrInfo::InsertBranch(
+unsigned AArch64InstrInfo::InsertBranch(
     MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB,
     const SmallVectorImpl<MachineOperand> &Cond, DebugLoc DL) const {
   // Shouldn't be a fall through.
@@ -270,7 +270,7 @@ unsigned ARM64InstrInfo::InsertBranch(
 
   if (!FBB) {
     if (Cond.empty()) // Unconditional branch?
-      BuildMI(&MBB, DL, get(ARM64::B)).addMBB(TBB);
+      BuildMI(&MBB, DL, get(AArch64::B)).addMBB(TBB);
     else
       instantiateCondBranch(MBB, DL, TBB, Cond);
     return 1;
@@ -278,7 +278,7 @@ unsigned ARM64InstrInfo::InsertBranch(
 
   // Two-way conditional branch.
   instantiateCondBranch(MBB, DL, TBB, Cond);
-  BuildMI(&MBB, DL, get(ARM64::B)).addMBB(FBB);
+  BuildMI(&MBB, DL, get(AArch64::B)).addMBB(FBB);
   return 2;
 }
 
@@ -302,52 +302,52 @@ static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg,
   if (!TargetRegisterInfo::isVirtualRegister(VReg))
     return 0;
 
-  bool Is64Bit = ARM64::GPR64allRegClass.hasSubClassEq(MRI.getRegClass(VReg));
+  bool Is64Bit = AArch64::GPR64allRegClass.hasSubClassEq(MRI.getRegClass(VReg));
   const MachineInstr *DefMI = MRI.getVRegDef(VReg);
   unsigned Opc = 0;
   unsigned SrcOpNum = 0;
   switch (DefMI->getOpcode()) {
-  case ARM64::ADDSXri:
-  case ARM64::ADDSWri:
+  case AArch64::ADDSXri:
+  case AArch64::ADDSWri:
     // if NZCV is used, do not fold.
-    if (DefMI->findRegisterDefOperandIdx(ARM64::NZCV, true) == -1)
+    if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) == -1)
       return 0;
   // fall-through to ADDXri and ADDWri.
-  case ARM64::ADDXri:
-  case ARM64::ADDWri:
+  case AArch64::ADDXri:
+  case AArch64::ADDWri:
     // add x, 1 -> csinc.
     if (!DefMI->getOperand(2).isImm() || DefMI->getOperand(2).getImm() != 1 ||
         DefMI->getOperand(3).getImm() != 0)
       return 0;
     SrcOpNum = 1;
-    Opc = Is64Bit ? ARM64::CSINCXr : ARM64::CSINCWr;
+    Opc = Is64Bit ? AArch64::CSINCXr : AArch64::CSINCWr;
     break;
 
-  case ARM64::ORNXrr:
-  case ARM64::ORNWrr: {
+  case AArch64::ORNXrr:
+  case AArch64::ORNWrr: {
     // not x -> csinv, represented as orn dst, xzr, src.
     unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
-    if (ZReg != ARM64::XZR && ZReg != ARM64::WZR)
+    if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
       return 0;
     SrcOpNum = 2;
-    Opc = Is64Bit ? ARM64::CSINVXr : ARM64::CSINVWr;
+    Opc = Is64Bit ? AArch64::CSINVXr : AArch64::CSINVWr;
     break;
   }
 
-  case ARM64::SUBSXrr:
-  case ARM64::SUBSWrr:
+  case AArch64::SUBSXrr:
+  case AArch64::SUBSWrr:
     // if NZCV is used, do not fold.
-    if (DefMI->findRegisterDefOperandIdx(ARM64::NZCV, true) == -1)
+    if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) == -1)
       return 0;
   // fall-through to SUBXrr and SUBWrr.
-  case ARM64::SUBXrr:
-  case ARM64::SUBWrr: {
+  case AArch64::SUBXrr:
+  case AArch64::SUBWrr: {
     // neg x -> csneg, represented as sub dst, xzr, src.
     unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
-    if (ZReg != ARM64::XZR && ZReg != ARM64::WZR)
+    if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
       return 0;
     SrcOpNum = 2;
-    Opc = Is64Bit ? ARM64::CSNEGXr : ARM64::CSNEGWr;
+    Opc = Is64Bit ? AArch64::CSNEGXr : AArch64::CSNEGWr;
     break;
   }
   default:
@@ -360,7 +360,7 @@ static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg,
   return Opc;
 }
 
-bool ARM64InstrInfo::canInsertSelect(
+bool AArch64InstrInfo::canInsertSelect(
     const MachineBasicBlock &MBB, const SmallVectorImpl<MachineOperand> &Cond,
     unsigned TrueReg, unsigned FalseReg, int &CondCycles, int &TrueCycles,
     int &FalseCycles) const {
@@ -376,8 +376,8 @@ bool ARM64InstrInfo::canInsertSelect(
 
   // GPRs are handled by csel.
   // FIXME: Fold in x+1, -x, and ~x when applicable.
-  if (ARM64::GPR64allRegClass.hasSubClassEq(RC) ||
-      ARM64::GPR32allRegClass.hasSubClassEq(RC)) {
+  if (AArch64::GPR64allRegClass.hasSubClassEq(RC) ||
+      AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
     // Single-cycle csel, csinc, csinv, and csneg.
     CondCycles = 1 + ExtraCondLat;
     TrueCycles = FalseCycles = 1;
@@ -390,8 +390,8 @@ bool ARM64InstrInfo::canInsertSelect(
 
   // Scalar floating point is handled by fcsel.
   // FIXME: Form fabs, fmin, and fmax when applicable.
-  if (ARM64::FPR64RegClass.hasSubClassEq(RC) ||
-      ARM64::FPR32RegClass.hasSubClassEq(RC)) {
+  if (AArch64::FPR64RegClass.hasSubClassEq(RC) ||
+      AArch64::FPR32RegClass.hasSubClassEq(RC)) {
     CondCycles = 5 + ExtraCondLat;
     TrueCycles = FalseCycles = 2;
     return true;
@@ -401,20 +401,20 @@ bool ARM64InstrInfo::canInsertSelect(
   return false;
 }
 
-void ARM64InstrInfo::insertSelect(MachineBasicBlock &MBB,
-                                  MachineBasicBlock::iterator I, DebugLoc DL,
-                                  unsigned DstReg,
-                                  const SmallVectorImpl<MachineOperand> &Cond,
-                                  unsigned TrueReg, unsigned FalseReg) const {
+void AArch64InstrInfo::insertSelect(MachineBasicBlock &MBB,
+                                    MachineBasicBlock::iterator I, DebugLoc DL,
+                                    unsigned DstReg,
+                                    const SmallVectorImpl<MachineOperand> &Cond,
+                                    unsigned TrueReg, unsigned FalseReg) const {
   MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
 
   // Parse the condition code, see parseCondBranch() above.
-  ARM64CC::CondCode CC;
+  AArch64CC::CondCode CC;
   switch (Cond.size()) {
   default:
     llvm_unreachable("Unknown condition opcode in Cond");
   case 1: // b.cc
-    CC = ARM64CC::CondCode(Cond[0].getImm());
+    CC = AArch64CC::CondCode(Cond[0].getImm());
     break;
   case 3: { // cbz/cbnz
     // We must insert a compare against 0.
@@ -422,34 +422,34 @@ void ARM64InstrInfo::insertSelect(MachineBasicBlock &MBB,
     switch (Cond[1].getImm()) {
     default:
       llvm_unreachable("Unknown branch opcode in Cond");
-    case ARM64::CBZW:
+    case AArch64::CBZW:
       Is64Bit = 0;
-      CC = ARM64CC::EQ;
+      CC = AArch64CC::EQ;
       break;
-    case ARM64::CBZX:
+    case AArch64::CBZX:
       Is64Bit = 1;
-      CC = ARM64CC::EQ;
+      CC = AArch64CC::EQ;
       break;
-    case ARM64::CBNZW:
+    case AArch64::CBNZW:
       Is64Bit = 0;
-      CC = ARM64CC::NE;
+      CC = AArch64CC::NE;
       break;
-    case ARM64::CBNZX:
+    case AArch64::CBNZX:
       Is64Bit = 1;
-      CC = ARM64CC::NE;
+      CC = AArch64CC::NE;
       break;
     }
     unsigned SrcReg = Cond[2].getReg();
     if (Is64Bit) {
       // cmp reg, #0 is actually subs xzr, reg, #0.
-      MRI.constrainRegClass(SrcReg, &ARM64::GPR64spRegClass);
-      BuildMI(MBB, I, DL, get(ARM64::SUBSXri), ARM64::XZR)
+      MRI.constrainRegClass(SrcReg, &AArch64::GPR64spRegClass);
+      BuildMI(MBB, I, DL, get(AArch64::SUBSXri), AArch64::XZR)
           .addReg(SrcReg)
           .addImm(0)
           .addImm(0);
     } else {
-      MRI.constrainRegClass(SrcReg, &ARM64::GPR32spRegClass);
-      BuildMI(MBB, I, DL, get(ARM64::SUBSWri), ARM64::WZR)
+      MRI.constrainRegClass(SrcReg, &AArch64::GPR32spRegClass);
+      BuildMI(MBB, I, DL, get(AArch64::SUBSWri), AArch64::WZR)
           .addReg(SrcReg)
           .addImm(0)
           .addImm(0);
@@ -461,24 +461,26 @@ void ARM64InstrInfo::insertSelect(MachineBasicBlock &MBB,
     switch (Cond[1].getImm()) {
     default:
       llvm_unreachable("Unknown branch opcode in Cond");
-    case ARM64::TBZW:
-    case ARM64::TBZX:
-      CC = ARM64CC::EQ;
+    case AArch64::TBZW:
+    case AArch64::TBZX:
+      CC = AArch64CC::EQ;
       break;
-    case ARM64::TBNZW:
-    case ARM64::TBNZX:
-      CC = ARM64CC::NE;
+    case AArch64::TBNZW:
+    case AArch64::TBNZX:
+      CC = AArch64CC::NE;
       break;
     }
     // cmp reg, #foo is actually ands xzr, reg, #1<<foo.
-    if (Cond[1].getImm() == ARM64::TBZW || Cond[1].getImm() == ARM64::TBNZW)
-      BuildMI(MBB, I, DL, get(ARM64::ANDSWri), ARM64::WZR)
+    if (Cond[1].getImm() == AArch64::TBZW || Cond[1].getImm() == AArch64::TBNZW)
+      BuildMI(MBB, I, DL, get(AArch64::ANDSWri), AArch64::WZR)
           .addReg(Cond[2].getReg())
-          .addImm(ARM64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 32));
+          .addImm(
+              AArch64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 32));
     else
-      BuildMI(MBB, I, DL, get(ARM64::ANDSXri), ARM64::XZR)
+      BuildMI(MBB, I, DL, get(AArch64::ANDSXri), AArch64::XZR)
           .addReg(Cond[2].getReg())
-          .addImm(ARM64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 64));
+          .addImm(
+              AArch64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 64));
     break;
   }
   }
@@ -486,20 +488,20 @@ void ARM64InstrInfo::insertSelect(MachineBasicBlock &MBB,
   unsigned Opc = 0;
   const TargetRegisterClass *RC = nullptr;
   bool TryFold = false;
-  if (MRI.constrainRegClass(DstReg, &ARM64::GPR64RegClass)) {
-    RC = &ARM64::GPR64RegClass;
-    Opc = ARM64::CSELXr;
+  if (MRI.constrainRegClass(DstReg, &AArch64::GPR64RegClass)) {
+    RC = &AArch64::GPR64RegClass;
+    Opc = AArch64::CSELXr;
     TryFold = true;
-  } else if (MRI.constrainRegClass(DstReg, &ARM64::GPR32RegClass)) {
-    RC = &ARM64::GPR32RegClass;
-    Opc = ARM64::CSELWr;
+  } else if (MRI.constrainRegClass(DstReg, &AArch64::GPR32RegClass)) {
+    RC = &AArch64::GPR32RegClass;
+    Opc = AArch64::CSELWr;
     TryFold = true;
-  } else if (MRI.constrainRegClass(DstReg, &ARM64::FPR64RegClass)) {
-    RC = &ARM64::FPR64RegClass;
-    Opc = ARM64::FCSELDrrr;
-  } else if (MRI.constrainRegClass(DstReg, &ARM64::FPR32RegClass)) {
-    RC = &ARM64::FPR32RegClass;
-    Opc = ARM64::FCSELSrrr;
+  } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR64RegClass)) {
+    RC = &AArch64::FPR64RegClass;
+    Opc = AArch64::FCSELDrrr;
+  } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR32RegClass)) {
+    RC = &AArch64::FPR32RegClass;
+    Opc = AArch64::FCSELSrrr;
   }
   assert(RC && "Unsupported regclass");
 
@@ -510,7 +512,7 @@ void ARM64InstrInfo::insertSelect(MachineBasicBlock &MBB,
     if (FoldedOpc) {
       // The folded opcodes csinc, csinc and csneg apply the operation to
       // FalseReg, so we need to invert the condition.
-      CC = ARM64CC::getInvertedCondCode(CC);
+      CC = AArch64CC::getInvertedCondCode(CC);
       TrueReg = FalseReg;
     } else
       FoldedOpc = canFoldIntoCSel(MRI, FalseReg, &NewVReg);
@@ -533,14 +535,14 @@ void ARM64InstrInfo::insertSelect(MachineBasicBlock &MBB,
       CC);
 }
 
-bool ARM64InstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
-                                           unsigned &SrcReg, unsigned &DstReg,
-                                           unsigned &SubIdx) const {
+bool AArch64InstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
+                                             unsigned &SrcReg, unsigned &DstReg,
+                                             unsigned &SubIdx) const {
   switch (MI.getOpcode()) {
   default:
     return false;
-  case ARM64::SBFMXri: // aka sxtw
-  case ARM64::UBFMXri: // aka uxtw
+  case AArch64::SBFMXri: // aka sxtw
+  case AArch64::UBFMXri: // aka uxtw
     // Check for the 32 -> 64 bit extension case, these instructions can do
     // much more.
     if (MI.getOperand(2).getImm() != 0 || MI.getOperand(3).getImm() != 31)
@@ -548,7 +550,7 @@ bool ARM64InstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
     // This is a signed or unsigned 32 -> 64 bit extension.
     SrcReg = MI.getOperand(1).getReg();
     DstReg = MI.getOperand(0).getReg();
-    SubIdx = ARM64::sub_32;
+    SubIdx = AArch64::sub_32;
     return true;
   }
 }
@@ -556,49 +558,49 @@ bool ARM64InstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
 /// analyzeCompare - For a comparison instruction, return the source registers
 /// in SrcReg and SrcReg2, and the value it compares against in CmpValue.
 /// Return true if the comparison instruction can be analyzed.
-bool ARM64InstrInfo::analyzeCompare(const MachineInstr *MI, unsigned &SrcReg,
-                                    unsigned &SrcReg2, int &CmpMask,
-                                    int &CmpValue) const {
+bool AArch64InstrInfo::analyzeCompare(const MachineInstr *MI, unsigned &SrcReg,
+                                      unsigned &SrcReg2, int &CmpMask,
+                                      int &CmpValue) const {
   switch (MI->getOpcode()) {
   default:
     break;
-  case ARM64::SUBSWrr:
-  case ARM64::SUBSWrs:
-  case ARM64::SUBSWrx:
-  case ARM64::SUBSXrr:
-  case ARM64::SUBSXrs:
-  case ARM64::SUBSXrx:
-  case ARM64::ADDSWrr:
-  case ARM64::ADDSWrs:
-  case ARM64::ADDSWrx:
-  case ARM64::ADDSXrr:
-  case ARM64::ADDSXrs:
-  case ARM64::ADDSXrx:
+  case AArch64::SUBSWrr:
+  case AArch64::SUBSWrs:
+  case AArch64::SUBSWrx:
+  case AArch64::SUBSXrr:
+  case AArch64::SUBSXrs:
+  case AArch64::SUBSXrx:
+  case AArch64::ADDSWrr:
+  case AArch64::ADDSWrs:
+  case AArch64::ADDSWrx:
+  case AArch64::ADDSXrr:
+  case AArch64::ADDSXrs:
+  case AArch64::ADDSXrx:
     // Replace SUBSWrr with SUBWrr if NZCV is not used.
     SrcReg = MI->getOperand(1).getReg();
     SrcReg2 = MI->getOperand(2).getReg();
     CmpMask = ~0;
     CmpValue = 0;
     return true;
-  case ARM64::SUBSWri:
-  case ARM64::ADDSWri:
-  case ARM64::SUBSXri:
-  case ARM64::ADDSXri:
+  case AArch64::SUBSWri:
+  case AArch64::ADDSWri:
+  case AArch64::SUBSXri:
+  case AArch64::ADDSXri:
     SrcReg = MI->getOperand(1).getReg();
     SrcReg2 = 0;
     CmpMask = ~0;
     CmpValue = MI->getOperand(2).getImm();
     return true;
-  case ARM64::ANDSWri:
-  case ARM64::ANDSXri:
+  case AArch64::ANDSWri:
+  case AArch64::ANDSXri:
     // ANDS does not use the same encoding scheme as the others xxxS
     // instructions.
     SrcReg = MI->getOperand(1).getReg();
     SrcReg2 = 0;
     CmpMask = ~0;
-    CmpValue = ARM64_AM::decodeLogicalImmediate(
+    CmpValue = AArch64_AM::decodeLogicalImmediate(
         MI->getOperand(2).getImm(),
-        MI->getOpcode() == ARM64::ANDSWri ? 32 : 64);
+        MI->getOpcode() == AArch64::ANDSWri ? 32 : 64);
     return true;
   }
 
@@ -646,33 +648,33 @@ static bool UpdateOperandRegClass(MachineInstr *Instr) {
 
 /// optimizeCompareInstr - Convert the instruction supplying the argument to the
 /// comparison into one that sets the zero bit in the flags register.
-bool ARM64InstrInfo::optimizeCompareInstr(
+bool AArch64InstrInfo::optimizeCompareInstr(
     MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2, int CmpMask,
     int CmpValue, const MachineRegisterInfo *MRI) const {
 
   // Replace SUBSWrr with SUBWrr if NZCV is not used.
-  int Cmp_NZCV = CmpInstr->findRegisterDefOperandIdx(ARM64::NZCV, true);
+  int Cmp_NZCV = CmpInstr->findRegisterDefOperandIdx(AArch64::NZCV, true);
   if (Cmp_NZCV != -1) {
     unsigned NewOpc;
     switch (CmpInstr->getOpcode()) {
     default:
       return false;
-    case ARM64::ADDSWrr:      NewOpc = ARM64::ADDWrr; break;
-    case ARM64::ADDSWri:      NewOpc = ARM64::ADDWri; break;
-    case ARM64::ADDSWrs:      NewOpc = ARM64::ADDWrs; break;
-    case ARM64::ADDSWrx:      NewOpc = ARM64::ADDWrx; break;
-    case ARM64::ADDSXrr:      NewOpc = ARM64::ADDXrr; break;
-    case ARM64::ADDSXri:      NewOpc = ARM64::ADDXri; break;
-    case ARM64::ADDSXrs:      NewOpc = ARM64::ADDXrs; break;
-    case ARM64::ADDSXrx:      NewOpc = ARM64::ADDXrx; break;
-    case ARM64::SUBSWrr:      NewOpc = ARM64::SUBWrr; break;
-    case ARM64::SUBSWri:      NewOpc = ARM64::SUBWri; break;
-    case ARM64::SUBSWrs:      NewOpc = ARM64::SUBWrs; break;
-    case ARM64::SUBSWrx:      NewOpc = ARM64::SUBWrx; break;
-    case ARM64::SUBSXrr:      NewOpc = ARM64::SUBXrr; break;
-    case ARM64::SUBSXri:      NewOpc = ARM64::SUBXri; break;
-    case ARM64::SUBSXrs:      NewOpc = ARM64::SUBXrs; break;
-    case ARM64::SUBSXrx:      NewOpc = ARM64::SUBXrx; break;
+    case AArch64::ADDSWrr:      NewOpc = AArch64::ADDWrr; break;
+    case AArch64::ADDSWri:      NewOpc = AArch64::ADDWri; break;
+    case AArch64::ADDSWrs:      NewOpc = AArch64::ADDWrs; break;
+    case AArch64::ADDSWrx:      NewOpc = AArch64::ADDWrx; break;
+    case AArch64::ADDSXrr:      NewOpc = AArch64::ADDXrr; break;
+    case AArch64::ADDSXri:      NewOpc = AArch64::ADDXri; break;
+    case AArch64::ADDSXrs:      NewOpc = AArch64::ADDXrs; break;
+    case AArch64::ADDSXrx:      NewOpc = AArch64::ADDXrx; break;
+    case AArch64::SUBSWrr:      NewOpc = AArch64::SUBWrr; break;
+    case AArch64::SUBSWri:      NewOpc = AArch64::SUBWri; break;
+    case AArch64::SUBSWrs:      NewOpc = AArch64::SUBWrs; break;
+    case AArch64::SUBSWrx:      NewOpc = AArch64::SUBWrx; break;
+    case AArch64::SUBSXrr:      NewOpc = AArch64::SUBXrr; break;
+    case AArch64::SUBSXri:      NewOpc = AArch64::SUBXri; break;
+    case AArch64::SUBSXrs:      NewOpc = AArch64::SUBXrs; break;
+    case AArch64::SUBSXrx:      NewOpc = AArch64::SUBXrx; break;
     }
 
     const MCInstrDesc &MCID = get(NewOpc);
@@ -718,8 +720,8 @@ bool ARM64InstrInfo::optimizeCompareInstr(
   for (--I; I != E; --I) {
     const MachineInstr &Instr = *I;
 
-    if (Instr.modifiesRegister(ARM64::NZCV, TRI) ||
-        Instr.readsRegister(ARM64::NZCV, TRI))
+    if (Instr.modifiesRegister(AArch64::NZCV, TRI) ||
+        Instr.readsRegister(AArch64::NZCV, TRI))
       // This instruction modifies or uses NZCV after the one we want to
       // change. We can't do this transformation.
       return false;
@@ -732,29 +734,29 @@ bool ARM64InstrInfo::optimizeCompareInstr(
   switch (MI->getOpcode()) {
   default:
     return false;
-  case ARM64::ADDSWrr:
-  case ARM64::ADDSWri:
-  case ARM64::ADDSXrr:
-  case ARM64::ADDSXri:
-  case ARM64::SUBSWrr:
-  case ARM64::SUBSWri:
-  case ARM64::SUBSXrr:
-  case ARM64::SUBSXri:
-    break;
-  case ARM64::ADDWrr:    NewOpc = ARM64::ADDSWrr; break;
-  case ARM64::ADDWri:    NewOpc = ARM64::ADDSWri; break;
-  case ARM64::ADDXrr:    NewOpc = ARM64::ADDSXrr; break;
-  case ARM64::ADDXri:    NewOpc = ARM64::ADDSXri; break;
-  case ARM64::ADCWr:     NewOpc = ARM64::ADCSWr; break;
-  case ARM64::ADCXr:     NewOpc = ARM64::ADCSXr; break;
-  case ARM64::SUBWrr:    NewOpc = ARM64::SUBSWrr; break;
-  case ARM64::SUBWri:    NewOpc = ARM64::SUBSWri; break;
-  case ARM64::SUBXrr:    NewOpc = ARM64::SUBSXrr; break;
-  case ARM64::SUBXri:    NewOpc = ARM64::SUBSXri; break;
-  case ARM64::SBCWr:     NewOpc = ARM64::SBCSWr; break;
-  case ARM64::SBCXr:     NewOpc = ARM64::SBCSXr; break;
-  case ARM64::ANDWri:    NewOpc = ARM64::ANDSWri; break;
-  case ARM64::ANDXri:    NewOpc = ARM64::ANDSXri; break;
+  case AArch64::ADDSWrr:
+  case AArch64::ADDSWri:
+  case AArch64::ADDSXrr:
+  case AArch64::ADDSXri:
+  case AArch64::SUBSWrr:
+  case AArch64::SUBSWri:
+  case AArch64::SUBSXrr:
+  case AArch64::SUBSXri:
+    break;
+  case AArch64::ADDWrr:    NewOpc = AArch64::ADDSWrr; break;
+  case AArch64::ADDWri:    NewOpc = AArch64::ADDSWri; break;
+  case AArch64::ADDXrr:    NewOpc = AArch64::ADDSXrr; break;
+  case AArch64::ADDXri:    NewOpc = AArch64::ADDSXri; break;
+  case AArch64::ADCWr:     NewOpc = AArch64::ADCSWr; break;
+  case AArch64::ADCXr:     NewOpc = AArch64::ADCSXr; break;
+  case AArch64::SUBWrr:    NewOpc = AArch64::SUBSWrr; break;
+  case AArch64::SUBWri:    NewOpc = AArch64::SUBSWri; break;
+  case AArch64::SUBXrr:    NewOpc = AArch64::SUBSXrr; break;
+  case AArch64::SUBXri:    NewOpc = AArch64::SUBSXri; break;
+  case AArch64::SBCWr:     NewOpc = AArch64::SBCSWr; break;
+  case AArch64::SBCXr:     NewOpc = AArch64::SBCSXr; break;
+  case AArch64::ANDWri:    NewOpc = AArch64::ANDSWri; break;
+  case AArch64::ANDXri:    NewOpc = AArch64::ANDSXri; break;
   }
 
   // Scan forward for the use of NZCV.
@@ -771,11 +773,11 @@ bool ARM64InstrInfo::optimizeCompareInstr(
     for (unsigned IO = 0, EO = Instr.getNumOperands(); !IsSafe && IO != EO;
          ++IO) {
       const MachineOperand &MO = Instr.getOperand(IO);
-      if (MO.isRegMask() && MO.clobbersPhysReg(ARM64::NZCV)) {
+      if (MO.isRegMask() && MO.clobbersPhysReg(AArch64::NZCV)) {
         IsSafe = true;
         break;
       }
-      if (!MO.isReg() || MO.getReg() != ARM64::NZCV)
+      if (!MO.isReg() || MO.getReg() != AArch64::NZCV)
         continue;
       if (MO.isDef()) {
         IsSafe = true;
@@ -784,24 +786,24 @@ bool ARM64InstrInfo::optimizeCompareInstr(
 
       // Decode the condition code.
       unsigned Opc = Instr.getOpcode();
-      ARM64CC::CondCode CC;
+      AArch64CC::CondCode CC;
       switch (Opc) {
       default:
         return false;
-      case ARM64::Bcc:
-        CC = (ARM64CC::CondCode)Instr.getOperand(IO - 2).getImm();
+      case AArch64::Bcc:
+        CC = (AArch64CC::CondCode)Instr.getOperand(IO - 2).getImm();
         break;
-      case ARM64::CSINVWr:
-      case ARM64::CSINVXr:
-      case ARM64::CSINCWr:
-      case ARM64::CSINCXr:
-      case ARM64::CSELWr:
-      case ARM64::CSELXr:
-      case ARM64::CSNEGWr:
-      case ARM64::CSNEGXr:
-      case ARM64::FCSELSrrr:
-      case ARM64::FCSELDrrr:
-        CC = (ARM64CC::CondCode)Instr.getOperand(IO - 1).getImm();
+      case AArch64::CSINVWr:
+      case AArch64::CSINVXr:
+      case AArch64::CSINCWr:
+      case AArch64::CSINCXr:
+      case AArch64::CSELWr:
+      case AArch64::CSELXr:
+      case AArch64::CSNEGWr:
+      case AArch64::CSNEGXr:
+      case AArch64::FCSELSrrr:
+      case AArch64::FCSELDrrr:
+        CC = (AArch64CC::CondCode)Instr.getOperand(IO - 1).getImm();
         break;
       }
 
@@ -810,12 +812,12 @@ bool ARM64InstrInfo::optimizeCompareInstr(
       default:
         // NZCV can be used multiple times, we should continue.
         break;
-      case ARM64CC::VS:
-      case ARM64CC::VC:
-      case ARM64CC::GE:
-      case ARM64CC::LT:
-      case ARM64CC::GT:
-      case ARM64CC::LE:
+      case AArch64CC::VS:
+      case AArch64CC::VC:
+      case AArch64CC::GE:
+      case AArch64CC::LT:
+      case AArch64CC::GT:
+      case AArch64CC::LE:
         return false;
       }
     }
@@ -826,7 +828,7 @@ bool ARM64InstrInfo::optimizeCompareInstr(
   if (!IsSafe) {
     MachineBasicBlock *ParentBlock = CmpInstr->getParent();
     for (auto *MBB : ParentBlock->successors())
-      if (MBB->isLiveIn(ARM64::NZCV))
+      if (MBB->isLiveIn(AArch64::NZCV))
         return false;
   }
 
@@ -836,47 +838,47 @@ bool ARM64InstrInfo::optimizeCompareInstr(
   bool succeeded = UpdateOperandRegClass(MI);
   (void)succeeded;
   assert(succeeded && "Some operands reg class are incompatible!");
-  MI->addRegisterDefined(ARM64::NZCV, TRI);
+  MI->addRegisterDefined(AArch64::NZCV, TRI);
   return true;
 }
 
 /// Return true if this is this instruction has a non-zero immediate
-bool ARM64InstrInfo::hasShiftedReg(const MachineInstr *MI) const {
+bool AArch64InstrInfo::hasShiftedReg(const MachineInstr *MI) const {
   switch (MI->getOpcode()) {
   default:
     break;
-  case ARM64::ADDSWrs:
-  case ARM64::ADDSXrs:
-  case ARM64::ADDWrs:
-  case ARM64::ADDXrs:
-  case ARM64::ANDSWrs:
-  case ARM64::ANDSXrs:
-  case ARM64::ANDWrs:
-  case ARM64::ANDXrs:
-  case ARM64::BICSWrs:
-  case ARM64::BICSXrs:
-  case ARM64::BICWrs:
-  case ARM64::BICXrs:
-  case ARM64::CRC32Brr:
-  case ARM64::CRC32CBrr:
-  case ARM64::CRC32CHrr:
-  case ARM64::CRC32CWrr:
-  case ARM64::CRC32CXrr:
-  case ARM64::CRC32Hrr:
-  case ARM64::CRC32Wrr:
-  case ARM64::CRC32Xrr:
-  case ARM64::EONWrs:
-  case ARM64::EONXrs:
-  case ARM64::EORWrs:
-  case ARM64::EORXrs:
-  case ARM64::ORNWrs:
-  case ARM64::ORNXrs:
-  case ARM64::ORRWrs:
-  case ARM64::ORRXrs:
-  case ARM64::SUBSWrs:
-  case ARM64::SUBSXrs:
-  case ARM64::SUBWrs:
-  case ARM64::SUBXrs:
+  case AArch64::ADDSWrs:
+  case AArch64::ADDSXrs:
+  case AArch64::ADDWrs:
+  case AArch64::ADDXrs:
+  case AArch64::ANDSWrs:
+  case AArch64::ANDSXrs:
+  case AArch64::ANDWrs:
+  case AArch64::ANDXrs:
+  case AArch64::BICSWrs:
+  case AArch64::BICSXrs:
+  case AArch64::BICWrs:
+  case AArch64::BICXrs:
+  case AArch64::CRC32Brr:
+  case AArch64::CRC32CBrr:
+  case AArch64::CRC32CHrr:
+  case AArch64::CRC32CWrr:
+  case AArch64::CRC32CXrr:
+  case AArch64::CRC32Hrr:
+  case AArch64::CRC32Wrr:
+  case AArch64::CRC32Xrr:
+  case AArch64::EONWrs:
+  case AArch64::EONXrs:
+  case AArch64::EORWrs:
+  case AArch64::EORXrs:
+  case AArch64::ORNWrs:
+  case AArch64::ORNXrs:
+  case AArch64::ORRWrs:
+  case AArch64::ORRXrs:
+  case AArch64::SUBSWrs:
+  case AArch64::SUBSXrs:
+  case AArch64::SUBWrs:
+  case AArch64::SUBXrs:
     if (MI->getOperand(3).isImm()) {
       unsigned val = MI->getOperand(3).getImm();
       return (val != 0);
@@ -887,22 +889,22 @@ bool ARM64InstrInfo::hasShiftedReg(const MachineInstr *MI) const {
 }
 
 /// Return true if this is this instruction has a non-zero immediate
-bool ARM64InstrInfo::hasExtendedReg(const MachineInstr *MI) const {
+bool AArch64InstrInfo::hasExtendedReg(const MachineInstr *MI) const {
   switch (MI->getOpcode()) {
   default:
     break;
-  case ARM64::ADDSWrx:
-  case ARM64::ADDSXrx:
-  case ARM64::ADDSXrx64:
-  case ARM64::ADDWrx:
-  case ARM64::ADDXrx:
-  case ARM64::ADDXrx64:
-  case ARM64::SUBSWrx:
-  case ARM64::SUBSXrx:
-  case ARM64::SUBSXrx64:
-  case ARM64::SUBWrx:
-  case ARM64::SUBXrx:
-  case ARM64::SUBXrx64:
+  case AArch64::ADDSWrx:
+  case AArch64::ADDSXrx:
+  case AArch64::ADDSXrx64:
+  case AArch64::ADDWrx:
+  case AArch64::ADDXrx:
+  case AArch64::ADDXrx64:
+  case AArch64::SUBSWrx:
+  case AArch64::SUBSXrx:
+  case AArch64::SUBSXrx64:
+  case AArch64::SUBWrx:
+  case AArch64::SUBXrx:
+  case AArch64::SUBXrx64:
     if (MI->getOperand(3).isImm()) {
       unsigned val = MI->getOperand(3).getImm();
       return (val != 0);
@@ -915,47 +917,47 @@ bool ARM64InstrInfo::hasExtendedReg(const MachineInstr *MI) const {
 
 // Return true if this instruction simply sets its single destination register
 // to zero. This is equivalent to a register rename of the zero-register.
-bool ARM64InstrInfo::isGPRZero(const MachineInstr *MI) const {
+bool AArch64InstrInfo::isGPRZero(const MachineInstr *MI) const {
   switch (MI->getOpcode()) {
   default:
     break;
-  case ARM64::MOVZWi:
-  case ARM64::MOVZXi: // movz Rd, #0 (LSL #0)
+  case AArch64::MOVZWi:
+  case AArch64::MOVZXi: // movz Rd, #0 (LSL #0)
     if (MI->getOperand(1).isImm() && MI->getOperand(1).getImm() == 0) {
       assert(MI->getDesc().getNumOperands() == 3 &&
              MI->getOperand(2).getImm() == 0 && "invalid MOVZi operands");
       return true;
     }
     break;
-  case ARM64::ANDWri: // and Rd, Rzr, #imm
-    return MI->getOperand(1).getReg() == ARM64::WZR;
-  case ARM64::ANDXri:
-    return MI->getOperand(1).getReg() == ARM64::XZR;
+  case AArch64::ANDWri: // and Rd, Rzr, #imm
+    return MI->getOperand(1).getReg() == AArch64::WZR;
+  case AArch64::ANDXri:
+    return MI->getOperand(1).getReg() == AArch64::XZR;
   case TargetOpcode::COPY:
-    return MI->getOperand(1).getReg() == ARM64::WZR;
+    return MI->getOperand(1).getReg() == AArch64::WZR;
   }
   return false;
 }
 
 // Return true if this instruction simply renames a general register without
 // modifying bits.
-bool ARM64InstrInfo::isGPRCopy(const MachineInstr *MI) const {
+bool AArch64InstrInfo::isGPRCopy(const MachineInstr *MI) const {
   switch (MI->getOpcode()) {
   default:
     break;
   case TargetOpcode::COPY: {
     // GPR32 copies will by lowered to ORRXrs
     unsigned DstReg = MI->getOperand(0).getReg();
-    return (ARM64::GPR32RegClass.contains(DstReg) ||
-            ARM64::GPR64RegClass.contains(DstReg));
+    return (AArch64::GPR32RegClass.contains(DstReg) ||
+            AArch64::GPR64RegClass.contains(DstReg));
   }
-  case ARM64::ORRXrs: // orr Xd, Xzr, Xm (LSL #0)
-    if (MI->getOperand(1).getReg() == ARM64::XZR) {
+  case AArch64::ORRXrs: // orr Xd, Xzr, Xm (LSL #0)
+    if (MI->getOperand(1).getReg() == AArch64::XZR) {
       assert(MI->getDesc().getNumOperands() == 4 &&
              MI->getOperand(3).getImm() == 0 && "invalid ORRrs operands");
       return true;
     }
-  case ARM64::ADDXri: // add Xd, Xn, #0 (LSL #0)
+  case AArch64::ADDXri: // add Xd, Xn, #0 (LSL #0)
     if (MI->getOperand(2).getImm() == 0) {
       assert(MI->getDesc().getNumOperands() == 4 &&
              MI->getOperand(3).getImm() == 0 && "invalid ADDXri operands");
@@ -967,17 +969,17 @@ bool ARM64InstrInfo::isGPRCopy(const MachineInstr *MI) const {
 
 // Return true if this instruction simply renames a general register without
 // modifying bits.
-bool ARM64InstrInfo::isFPRCopy(const MachineInstr *MI) const {
+bool AArch64InstrInfo::isFPRCopy(const MachineInstr *MI) const {
   switch (MI->getOpcode()) {
   default:
     break;
   case TargetOpcode::COPY: {
     // FPR64 copies will by lowered to ORR.16b
     unsigned DstReg = MI->getOperand(0).getReg();
-    return (ARM64::FPR64RegClass.contains(DstReg) ||
-            ARM64::FPR128RegClass.contains(DstReg));
+    return (AArch64::FPR64RegClass.contains(DstReg) ||
+            AArch64::FPR128RegClass.contains(DstReg));
   }
-  case ARM64::ORRv16i8:
+  case AArch64::ORRv16i8:
     if (MI->getOperand(1).getReg() == MI->getOperand(2).getReg()) {
       assert(MI->getDesc().getNumOperands() == 3 && MI->getOperand(0).isReg() &&
              "invalid ORRv16i8 operands");
@@ -987,18 +989,18 @@ bool ARM64InstrInfo::isFPRCopy(const MachineInstr *MI) const {
   return false;
 }
 
-unsigned ARM64InstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
-                                             int &FrameIndex) const {
+unsigned AArch64InstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
+                                               int &FrameIndex) const {
   switch (MI->getOpcode()) {
   default:
     break;
-  case ARM64::LDRWui:
-  case ARM64::LDRXui:
-  case ARM64::LDRBui:
-  case ARM64::LDRHui:
-  case ARM64::LDRSui:
-  case ARM64::LDRDui:
-  case ARM64::LDRQui:
+  case AArch64::LDRWui:
+  case AArch64::LDRXui:
+  case AArch64::LDRBui:
+  case AArch64::LDRHui:
+  case AArch64::LDRSui:
+  case AArch64::LDRDui:
+  case AArch64::LDRQui:
     if (MI->getOperand(0).getSubReg() == 0 && MI->getOperand(1).isFI() &&
         MI->getOperand(2).isImm() && MI->getOperand(2).getImm() == 0) {
       FrameIndex = MI->getOperand(1).getIndex();
@@ -1010,18 +1012,18 @@ unsigned ARM64InstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
   return 0;
 }
 
-unsigned ARM64InstrInfo::isStoreToStackSlot(const MachineInstr *MI,
-                                            int &FrameIndex) const {
+unsigned AArch64InstrInfo::isStoreToStackSlot(const MachineInstr *MI,
+                                              int &FrameIndex) const {
   switch (MI->getOpcode()) {
   default:
     break;
-  case ARM64::STRWui:
-  case ARM64::STRXui:
-  case ARM64::STRBui:
-  case ARM64::STRHui:
-  case ARM64::STRSui:
-  case ARM64::STRDui:
-  case ARM64::STRQui:
+  case AArch64::STRWui:
+  case AArch64::STRXui:
+  case AArch64::STRBui:
+  case AArch64::STRHui:
+  case AArch64::STRSui:
+  case AArch64::STRDui:
+  case AArch64::STRQui:
     if (MI->getOperand(0).getSubReg() == 0 && MI->getOperand(1).isFI() &&
         MI->getOperand(2).isImm() && MI->getOperand(2).getImm() == 0) {
       FrameIndex = MI->getOperand(1).getIndex();
@@ -1035,66 +1037,66 @@ unsigned ARM64InstrInfo::isStoreToStackSlot(const MachineInstr *MI,
 /// Return true if this is load/store scales or extends its register offset.
 /// This refers to scaling a dynamic index as opposed to scaled immediates.
 /// MI should be a memory op that allows scaled addressing.
-bool ARM64InstrInfo::isScaledAddr(const MachineInstr *MI) const {
+bool AArch64InstrInfo::isScaledAddr(const MachineInstr *MI) const {
   switch (MI->getOpcode()) {
   default:
     break;
-  case ARM64::LDRBBroW:
-  case ARM64::LDRBroW:
-  case ARM64::LDRDroW:
-  case ARM64::LDRHHroW:
-  case ARM64::LDRHroW:
-  case ARM64::LDRQroW:
-  case ARM64::LDRSBWroW:
-  case ARM64::LDRSBXroW:
-  case ARM64::LDRSHWroW:
-  case ARM64::LDRSHXroW:
-  case ARM64::LDRSWroW:
-  case ARM64::LDRSroW:
-  case ARM64::LDRWroW:
-  case ARM64::LDRXroW:
-  case ARM64::STRBBroW:
-  case ARM64::STRBroW:
-  case ARM64::STRDroW:
-  case ARM64::STRHHroW:
-  case ARM64::STRHroW:
-  case ARM64::STRQroW:
-  case ARM64::STRSroW:
-  case ARM64::STRWroW:
-  case ARM64::STRXroW:
-  case ARM64::LDRBBroX:
-  case ARM64::LDRBroX:
-  case ARM64::LDRDroX:
-  case ARM64::LDRHHroX:
-  case ARM64::LDRHroX:
-  case ARM64::LDRQroX:
-  case ARM64::LDRSBWroX:
-  case ARM64::LDRSBXroX:
-  case ARM64::LDRSHWroX:
-  case ARM64::LDRSHXroX:
-  case ARM64::LDRSWroX:
-  case ARM64::LDRSroX:
-  case ARM64::LDRWroX:
-  case ARM64::LDRXroX:
-  case ARM64::STRBBroX:
-  case ARM64::STRBroX:
-  case ARM64::STRDroX:
-  case ARM64::STRHHroX:
-  case ARM64::STRHroX:
-  case ARM64::STRQroX:
-  case ARM64::STRSroX:
-  case ARM64::STRWroX:
-  case ARM64::STRXroX:
+  case AArch64::LDRBBroW:
+  case AArch64::LDRBroW:
+  case AArch64::LDRDroW:
+  case AArch64::LDRHHroW:
+  case AArch64::LDRHroW:
+  case AArch64::LDRQroW:
+  case AArch64::LDRSBWroW:
+  case AArch64::LDRSBXroW:
+  case AArch64::LDRSHWroW:
+  case AArch64::LDRSHXroW:
+  case AArch64::LDRSWroW:
+  case AArch64::LDRSroW:
+  case AArch64::LDRWroW:
+  case AArch64::LDRXroW:
+  case AArch64::STRBBroW:
+  case AArch64::STRBroW:
+  case AArch64::STRDroW:
+  case AArch64::STRHHroW:
+  case AArch64::STRHroW:
+  case AArch64::STRQroW:
+  case AArch64::STRSroW:
+  case AArch64::STRWroW:
+  case AArch64::STRXroW:
+  case AArch64::LDRBBroX:
+  case AArch64::LDRBroX:
+  case AArch64::LDRDroX:
+  case AArch64::LDRHHroX:
+  case AArch64::LDRHroX:
+  case AArch64::LDRQroX:
+  case AArch64::LDRSBWroX:
+  case AArch64::LDRSBXroX:
+  case AArch64::LDRSHWroX:
+  case AArch64::LDRSHXroX:
+  case AArch64::LDRSWroX:
+  case AArch64::LDRSroX:
+  case AArch64::LDRWroX:
+  case AArch64::LDRXroX:
+  case AArch64::STRBBroX:
+  case AArch64::STRBroX:
+  case AArch64::STRDroX:
+  case AArch64::STRHHroX:
+  case AArch64::STRHroX:
+  case AArch64::STRQroX:
+  case AArch64::STRSroX:
+  case AArch64::STRWroX:
+  case AArch64::STRXroX:
 
     unsigned Val = MI->getOperand(3).getImm();
-    ARM64_AM::ShiftExtendType ExtType = ARM64_AM::getMemExtendType(Val);
-    return (ExtType != ARM64_AM::UXTX) || ARM64_AM::getMemDoShift(Val);
+    AArch64_AM::ShiftExtendType ExtType = AArch64_AM::getMemExtendType(Val);
+    return (ExtType != AArch64_AM::UXTX) || AArch64_AM::getMemDoShift(Val);
   }
   return false;
 }
 
 /// Check all MachineMemOperands for a hint to suppress pairing.
-bool ARM64InstrInfo::isLdStPairSuppressed(const MachineInstr *MI) const {
+bool AArch64InstrInfo::isLdStPairSuppressed(const MachineInstr *MI) const {
   assert(MOSuppressPair < (1 << MachineMemOperand::MOTargetNumBits) &&
          "Too many target MO flags");
   for (auto *MM : MI->memoperands()) {
@@ -1107,7 +1109,7 @@ bool ARM64InstrInfo::isLdStPairSuppressed(const MachineInstr *MI) const {
 }
 
 /// Set a flag on the first MachineMemOperand to suppress pairing.
-void ARM64InstrInfo::suppressLdStPair(MachineInstr *MI) const {
+void AArch64InstrInfo::suppressLdStPair(MachineInstr *MI) const {
   if (MI->memoperands_empty())
     return;
 
@@ -1117,22 +1119,23 @@ void ARM64InstrInfo::suppressLdStPair(MachineInstr *MI) const {
       ->setFlags(MOSuppressPair << MachineMemOperand::MOTargetStartBit);
 }
 
-bool ARM64InstrInfo::getLdStBaseRegImmOfs(MachineInstr *LdSt, unsigned &BaseReg,
-                                          unsigned &Offset,
-                                          const TargetRegisterInfo *TRI) const {
+bool
+AArch64InstrInfo::getLdStBaseRegImmOfs(MachineInstr *LdSt, unsigned &BaseReg,
+                                       unsigned &Offset,
+                                       const TargetRegisterInfo *TRI) const {
   switch (LdSt->getOpcode()) {
   default:
     return false;
-  case ARM64::STRSui:
-  case ARM64::STRDui:
-  case ARM64::STRQui:
-  case ARM64::STRXui:
-  case ARM64::STRWui:
-  case ARM64::LDRSui:
-  case ARM64::LDRDui:
-  case ARM64::LDRQui:
-  case ARM64::LDRXui:
-  case ARM64::LDRWui:
+  case AArch64::STRSui:
+  case AArch64::STRDui:
+  case AArch64::STRQui:
+  case AArch64::STRXui:
+  case AArch64::STRWui:
+  case AArch64::LDRSui:
+  case AArch64::LDRDui:
+  case AArch64::LDRQui:
+  case AArch64::LDRXui:
+  case AArch64::LDRWui:
     if (!LdSt->getOperand(1).isReg() || !LdSt->getOperand(2).isImm())
       return false;
     BaseReg = LdSt->getOperand(1).getReg();
@@ -1146,9 +1149,9 @@ bool ARM64InstrInfo::getLdStBaseRegImmOfs(MachineInstr *LdSt, unsigned &BaseReg,
 /// Detect opportunities for ldp/stp formation.
 ///
 /// Only called for LdSt for which getLdStBaseRegImmOfs returns true.
-bool ARM64InstrInfo::shouldClusterLoads(MachineInstr *FirstLdSt,
-                                        MachineInstr *SecondLdSt,
-                                        unsigned NumLoads) const {
+bool AArch64InstrInfo::shouldClusterLoads(MachineInstr *FirstLdSt,
+                                          MachineInstr *SecondLdSt,
+                                          unsigned NumLoads) const {
   // Only cluster up to a single pair.
   if (NumLoads > 1)
     return false;
@@ -1164,33 +1167,33 @@ bool ARM64InstrInfo::shouldClusterLoads(MachineInstr *FirstLdSt,
   return Ofs1 + 1 == Ofs2;
 }
 
-bool ARM64InstrInfo::shouldScheduleAdjacent(MachineInstr *First,
-                                            MachineInstr *Second) const {
+bool AArch64InstrInfo::shouldScheduleAdjacent(MachineInstr *First,
+                                              MachineInstr *Second) const {
   // Cyclone can fuse CMN, CMP followed by Bcc.
 
   // FIXME: B0 can also fuse:
   // AND, BIC, ORN, ORR, or EOR (optional S) followed by Bcc or CBZ or CBNZ.
-  if (Second->getOpcode() != ARM64::Bcc)
+  if (Second->getOpcode() != AArch64::Bcc)
     return false;
   switch (First->getOpcode()) {
   default:
     return false;
-  case ARM64::SUBSWri:
-  case ARM64::ADDSWri:
-  case ARM64::ANDSWri:
-  case ARM64::SUBSXri:
-  case ARM64::ADDSXri:
-  case ARM64::ANDSXri:
+  case AArch64::SUBSWri:
+  case AArch64::ADDSWri:
+  case AArch64::ANDSWri:
+  case AArch64::SUBSXri:
+  case AArch64::ADDSXri:
+  case AArch64::ANDSXri:
     return true;
   }
 }
 
-MachineInstr *ARM64InstrInfo::emitFrameIndexDebugValue(MachineFunction &MF,
-                                                       int FrameIx,
-                                                       uint64_t Offset,
-                                                       const MDNode *MDPtr,
-                                                       DebugLoc DL) const {
-  MachineInstrBuilder MIB = BuildMI(MF, DL, get(ARM64::DBG_VALUE))
+MachineInstr *AArch64InstrInfo::emitFrameIndexDebugValue(MachineFunction &MF,
+                                                         int FrameIx,
+                                                         uint64_t Offset,
+                                                         const MDNode *MDPtr,
+                                                         DebugLoc DL) const {
+  MachineInstrBuilder MIB = BuildMI(MF, DL, get(AArch64::DBG_VALUE))
                                 .addFrameIndex(FrameIx)
                                 .addImm(0)
                                 .addImm(Offset)
@@ -1217,12 +1220,10 @@ static bool forwardCopyWillClobberTuple(unsigned DestReg, unsigned SrcReg,
   return ((DestReg - SrcReg) & 0x1f) < NumRegs;
 }
 
-void ARM64InstrInfo::copyPhysRegTuple(MachineBasicBlock &MBB,
-                                      MachineBasicBlock::iterator I,
-                                      DebugLoc DL, unsigned DestReg,
-                                      unsigned SrcReg, bool KillSrc,
-                                      unsigned Opcode,
-                                      llvm::ArrayRef<unsigned> Indices) const {
+void AArch64InstrInfo::copyPhysRegTuple(
+    MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL,
+    unsigned DestReg, unsigned SrcReg, bool KillSrc, unsigned Opcode,
+    llvm::ArrayRef<unsigned> Indices) const {
   assert(getSubTarget().hasNEON() &&
          "Unexpected register copy without NEON");
   const TargetRegisterInfo *TRI = &getRegisterInfo();
@@ -1245,258 +1246,263 @@ void ARM64InstrInfo::copyPhysRegTuple(MachineBasicBlock &MBB,
   }
 }
 
-void ARM64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
-                                 MachineBasicBlock::iterator I, DebugLoc DL,
-                                 unsigned DestReg, unsigned SrcReg,
-                                 bool KillSrc) const {
-  if (ARM64::GPR32spRegClass.contains(DestReg) &&
-      (ARM64::GPR32spRegClass.contains(SrcReg) || SrcReg == ARM64::WZR)) {
+void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+                                   MachineBasicBlock::iterator I, DebugLoc DL,
+                                   unsigned DestReg, unsigned SrcReg,
+                                   bool KillSrc) const {
+  if (AArch64::GPR32spRegClass.contains(DestReg) &&
+      (AArch64::GPR32spRegClass.contains(SrcReg) || SrcReg == AArch64::WZR)) {
     const TargetRegisterInfo *TRI = &getRegisterInfo();
 
-    if (DestReg == ARM64::WSP || SrcReg == ARM64::WSP) {
+    if (DestReg == AArch64::WSP || SrcReg == AArch64::WSP) {
       // If either operand is WSP, expand to ADD #0.
       if (Subtarget.hasZeroCycleRegMove()) {
         // Cyclone recognizes "ADD Xd, Xn, #0" as a zero-cycle register move.
-        unsigned DestRegX = TRI->getMatchingSuperReg(DestReg, ARM64::sub_32,
-                                                     &ARM64::GPR64spRegClass);
-        unsigned SrcRegX = TRI->getMatchingSuperReg(SrcReg, ARM64::sub_32,
-                                                    &ARM64::GPR64spRegClass);
+        unsigned DestRegX = TRI->getMatchingSuperReg(DestReg, AArch64::sub_32,
+                                                     &AArch64::GPR64spRegClass);
+        unsigned SrcRegX = TRI->getMatchingSuperReg(SrcReg, AArch64::sub_32,
+                                                    &AArch64::GPR64spRegClass);
         // This instruction is reading and writing X registers.  This may upset
         // the register scavenger and machine verifier, so we need to indicate
         // that we are reading an undefined value from SrcRegX, but a proper
         // value from SrcReg.
-        BuildMI(MBB, I, DL, get(ARM64::ADDXri), DestRegX)
+        BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestRegX)
             .addReg(SrcRegX, RegState::Undef)
             .addImm(0)
-            .addImm(ARM64_AM::getShifterImm(ARM64_AM::LSL, 0))
+            .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0))
             .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
       } else {
-        BuildMI(MBB, I, DL, get(ARM64::ADDWri), DestReg)
+        BuildMI(MBB, I, DL, get(AArch64::ADDWri), DestReg)
             .addReg(SrcReg, getKillRegState(KillSrc))
             .addImm(0)
-            .addImm(ARM64_AM::getShifterImm(ARM64_AM::LSL, 0));
+            .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
       }
-    } else if (SrcReg == ARM64::WZR && Subtarget.hasZeroCycleZeroing()) {
-      BuildMI(MBB, I, DL, get(ARM64::MOVZWi), DestReg).addImm(0).addImm(
-          ARM64_AM::getShifterImm(ARM64_AM::LSL, 0));
+    } else if (SrcReg == AArch64::WZR && Subtarget.hasZeroCycleZeroing()) {
+      BuildMI(MBB, I, DL, get(AArch64::MOVZWi), DestReg).addImm(0).addImm(
+          AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
     } else {
       if (Subtarget.hasZeroCycleRegMove()) {
         // Cyclone recognizes "ORR Xd, XZR, Xm" as a zero-cycle register move.
-        unsigned DestRegX = TRI->getMatchingSuperReg(DestReg, ARM64::sub_32,
-                                                     &ARM64::GPR64spRegClass);
-        unsigned SrcRegX = TRI->getMatchingSuperReg(SrcReg, ARM64::sub_32,
-                                                    &ARM64::GPR64spRegClass);
+        unsigned DestRegX = TRI->getMatchingSuperReg(DestReg, AArch64::sub_32,
+                                                     &AArch64::GPR64spRegClass);
+        unsigned SrcRegX = TRI->getMatchingSuperReg(SrcReg, AArch64::sub_32,
+                                                    &AArch64::GPR64spRegClass);
         // This instruction is reading and writing X registers.  This may upset
         // the register scavenger and machine verifier, so we need to indicate
         // that we are reading an undefined value from SrcRegX, but a proper
         // value from SrcReg.
-        BuildMI(MBB, I, DL, get(ARM64::ORRXrr), DestRegX)
-            .addReg(ARM64::XZR)
+        BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestRegX)
+            .addReg(AArch64::XZR)
             .addReg(SrcRegX, RegState::Undef)
             .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
       } else {
         // Otherwise, expand to ORR WZR.
-        BuildMI(MBB, I, DL, get(ARM64::ORRWrr), DestReg)
-            .addReg(ARM64::WZR)
+        BuildMI(MBB, I, DL, get(AArch64::ORRWrr), DestReg)
+            .addReg(AArch64::WZR)
             .addReg(SrcReg, getKillRegState(KillSrc));
       }
     }
     return;
   }
 
-  if (ARM64::GPR64spRegClass.contains(DestReg) &&
-      (ARM64::GPR64spRegClass.contains(SrcReg) || SrcReg == ARM64::XZR)) {
-    if (DestReg == ARM64::SP || SrcReg == ARM64::SP) {
+  if (AArch64::GPR64spRegClass.contains(DestReg) &&
+      (AArch64::GPR64spRegClass.contains(SrcReg) || SrcReg == AArch64::XZR)) {
+    if (DestReg == AArch64::SP || SrcReg == AArch64::SP) {
       // If either operand is SP, expand to ADD #0.
-      BuildMI(MBB, I, DL, get(ARM64::ADDXri), DestReg)
+      BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestReg)
           .addReg(SrcReg, getKillRegState(KillSrc))
           .addImm(0)
-          .addImm(ARM64_AM::getShifterImm(ARM64_AM::LSL, 0));
-    } else if (SrcReg == ARM64::XZR && Subtarget.hasZeroCycleZeroing()) {
-      BuildMI(MBB, I, DL, get(ARM64::MOVZXi), DestReg).addImm(0).addImm(
-          ARM64_AM::getShifterImm(ARM64_AM::LSL, 0));
+          .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
+    } else if (SrcReg == AArch64::XZR && Subtarget.hasZeroCycleZeroing()) {
+      BuildMI(MBB, I, DL, get(AArch64::MOVZXi), DestReg).addImm(0).addImm(
+          AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
     } else {
       // Otherwise, expand to ORR XZR.
-      BuildMI(MBB, I, DL, get(ARM64::ORRXrr), DestReg)
-          .addReg(ARM64::XZR)
+      BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestReg)
+          .addReg(AArch64::XZR)
           .addReg(SrcReg, getKillRegState(KillSrc));
     }
     return;
   }
 
   // Copy a DDDD register quad by copying the individual sub-registers.
-  if (ARM64::DDDDRegClass.contains(DestReg) &&
-      ARM64::DDDDRegClass.contains(SrcReg)) {
-    static const unsigned Indices[] = { ARM64::dsub0, ARM64::dsub1,
-                                        ARM64::dsub2, ARM64::dsub3 };
-    copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, ARM64::ORRv8i8,
+  if (AArch64::DDDDRegClass.contains(DestReg) &&
+      AArch64::DDDDRegClass.contains(SrcReg)) {
+    static const unsigned Indices[] = { AArch64::dsub0, AArch64::dsub1,
+                                        AArch64::dsub2, AArch64::dsub3 };
+    copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
                      Indices);
     return;
   }
 
   // Copy a DDD register triple by copying the individual sub-registers.
-  if (ARM64::DDDRegClass.contains(DestReg) &&
-      ARM64::DDDRegClass.contains(SrcReg)) {
-    static const unsigned Indices[] = { ARM64::dsub0, ARM64::dsub1,
-                                        ARM64::dsub2 };
-    copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, ARM64::ORRv8i8,
+  if (AArch64::DDDRegClass.contains(DestReg) &&
+      AArch64::DDDRegClass.contains(SrcReg)) {
+    static const unsigned Indices[] = { AArch64::dsub0, AArch64::dsub1,
+                                        AArch64::dsub2 };
+    copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
                      Indices);
     return;
   }
 
   // Copy a DD register pair by copying the individual sub-registers.
-  if (ARM64::DDRegClass.contains(DestReg) &&
-      ARM64::DDRegClass.contains(SrcReg)) {
-    static const unsigned Indices[] = { ARM64::dsub0, ARM64::dsub1 };
-    copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, ARM64::ORRv8i8,
+  if (AArch64::DDRegClass.contains(DestReg) &&
+      AArch64::DDRegClass.contains(SrcReg)) {
+    static const unsigned Indices[] = { AArch64::dsub0, AArch64::dsub1 };
+    copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
                      Indices);
     return;
   }
 
   // Copy a QQQQ register quad by copying the individual sub-registers.
-  if (ARM64::QQQQRegClass.contains(DestReg) &&
-      ARM64::QQQQRegClass.contains(SrcReg)) {
-    static const unsigned Indices[] = { ARM64::qsub0, ARM64::qsub1,
-                                        ARM64::qsub2, ARM64::qsub3 };
-    copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, ARM64::ORRv16i8,
+  if (AArch64::QQQQRegClass.contains(DestReg) &&
+      AArch64::QQQQRegClass.contains(SrcReg)) {
+    static const unsigned Indices[] = { AArch64::qsub0, AArch64::qsub1,
+                                        AArch64::qsub2, AArch64::qsub3 };
+    copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
                      Indices);
     return;
   }
 
   // Copy a QQQ register triple by copying the individual sub-registers.
-  if (ARM64::QQQRegClass.contains(DestReg) &&
-      ARM64::QQQRegClass.contains(SrcReg)) {
-    static const unsigned Indices[] = { ARM64::qsub0, ARM64::qsub1,
-                                        ARM64::qsub2 };
-    copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, ARM64::ORRv16i8,
+  if (AArch64::QQQRegClass.contains(DestReg) &&
+      AArch64::QQQRegClass.contains(SrcReg)) {
+    static const unsigned Indices[] = { AArch64::qsub0, AArch64::qsub1,
+                                        AArch64::qsub2 };
+    copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
                      Indices);
     return;
   }
 
   // Copy a QQ register pair by copying the individual sub-registers.
-  if (ARM64::QQRegClass.contains(DestReg) &&
-      ARM64::QQRegClass.contains(SrcReg)) {
-    static const unsigned Indices[] = { ARM64::qsub0, ARM64::qsub1 };
-    copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, ARM64::ORRv16i8,
+  if (AArch64::QQRegClass.contains(DestReg) &&
+      AArch64::QQRegClass.contains(SrcReg)) {
+    static const unsigned Indices[] = { AArch64::qsub0, AArch64::qsub1 };
+    copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
                      Indices);
     return;
   }
 
-  if (ARM64::FPR128RegClass.contains(DestReg) &&
-      ARM64::FPR128RegClass.contains(SrcReg)) {
+  if (AArch64::FPR128RegClass.contains(DestReg) &&
+      AArch64::FPR128RegClass.contains(SrcReg)) {
     if(getSubTarget().hasNEON()) {
-      BuildMI(MBB, I, DL, get(ARM64::ORRv16i8), DestReg).addReg(SrcReg).addReg(
-          SrcReg, getKillRegState(KillSrc));
+      BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
+          .addReg(SrcReg)
+          .addReg(SrcReg, getKillRegState(KillSrc));
     } else {
-      BuildMI(MBB, I, DL, get(ARM64::STRQpre))
-        .addReg(ARM64::SP, RegState::Define)
+      BuildMI(MBB, I, DL, get(AArch64::STRQpre))
+        .addReg(AArch64::SP, RegState::Define)
         .addReg(SrcReg, getKillRegState(KillSrc))
-        .addReg(ARM64::SP)
+        .addReg(AArch64::SP)
         .addImm(-16);
-      BuildMI(MBB, I, DL, get(ARM64::LDRQpre))
-        .addReg(ARM64::SP, RegState::Define)
+      BuildMI(MBB, I, DL, get(AArch64::LDRQpre))
+        .addReg(AArch64::SP, RegState::Define)
         .addReg(DestReg, RegState::Define)
-        .addReg(ARM64::SP)
+        .addReg(AArch64::SP)
         .addImm(16);
     }
     return;
   }
 
-  if (ARM64::FPR64RegClass.contains(DestReg) &&
-      ARM64::FPR64RegClass.contains(SrcReg)) {
+  if (AArch64::FPR64RegClass.contains(DestReg) &&
+      AArch64::FPR64RegClass.contains(SrcReg)) {
     if(getSubTarget().hasNEON()) {
-      DestReg =
-          RI.getMatchingSuperReg(DestReg, ARM64::dsub, &ARM64::FPR128RegClass);
-      SrcReg =
-          RI.getMatchingSuperReg(SrcReg, ARM64::dsub, &ARM64::FPR128RegClass);
-      BuildMI(MBB, I, DL, get(ARM64::ORRv16i8), DestReg).addReg(SrcReg).addReg(
-          SrcReg, getKillRegState(KillSrc));
+      DestReg = RI.getMatchingSuperReg(DestReg, AArch64::dsub,
+                                       &AArch64::FPR128RegClass);
+      SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::dsub,
+                                      &AArch64::FPR128RegClass);
+      BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
+          .addReg(SrcReg)
+          .addReg(SrcReg, getKillRegState(KillSrc));
     } else {
-      BuildMI(MBB, I, DL, get(ARM64::FMOVDr), DestReg)
+      BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestReg)
           .addReg(SrcReg, getKillRegState(KillSrc));
     }
     return;
   }
 
-  if (ARM64::FPR32RegClass.contains(DestReg) &&
-      ARM64::FPR32RegClass.contains(SrcReg)) {
+  if (AArch64::FPR32RegClass.contains(DestReg) &&
+      AArch64::FPR32RegClass.contains(SrcReg)) {
     if(getSubTarget().hasNEON()) {
-      DestReg =
-          RI.getMatchingSuperReg(DestReg, ARM64::ssub, &ARM64::FPR128RegClass);
-      SrcReg =
-          RI.getMatchingSuperReg(SrcReg, ARM64::ssub, &ARM64::FPR128RegClass);
-      BuildMI(MBB, I, DL, get(ARM64::ORRv16i8), DestReg).addReg(SrcReg).addReg(
-          SrcReg, getKillRegState(KillSrc));
+      DestReg = RI.getMatchingSuperReg(DestReg, AArch64::ssub,
+                                       &AArch64::FPR128RegClass);
+      SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::ssub,
+                                      &AArch64::FPR128RegClass);
+      BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
+          .addReg(SrcReg)
+          .addReg(SrcReg, getKillRegState(KillSrc));
     } else {
-      BuildMI(MBB, I, DL, get(ARM64::FMOVSr), DestReg)
+      BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
           .addReg(SrcReg, getKillRegState(KillSrc));
     }
     return;
   }
 
-  if (ARM64::FPR16RegClass.contains(DestReg) &&
-      ARM64::FPR16RegClass.contains(SrcReg)) {
+  if (AArch64::FPR16RegClass.contains(DestReg) &&
+      AArch64::FPR16RegClass.contains(SrcReg)) {
     if(getSubTarget().hasNEON()) {
-      DestReg =
-          RI.getMatchingSuperReg(DestReg, ARM64::hsub, &ARM64::FPR128RegClass);
-      SrcReg =
-          RI.getMatchingSuperReg(SrcReg, ARM64::hsub, &ARM64::FPR128RegClass);
-      BuildMI(MBB, I, DL, get(ARM64::ORRv16i8), DestReg).addReg(SrcReg).addReg(
-          SrcReg, getKillRegState(KillSrc));
+      DestReg = RI.getMatchingSuperReg(DestReg, AArch64::hsub,
+                                       &AArch64::FPR128RegClass);
+      SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::hsub,
+                                      &AArch64::FPR128RegClass);
+      BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
+          .addReg(SrcReg)
+          .addReg(SrcReg, getKillRegState(KillSrc));
     } else {
-      DestReg =
-          RI.getMatchingSuperReg(DestReg, ARM64::hsub, &ARM64::FPR32RegClass);
-      SrcReg =
-          RI.getMatchingSuperReg(SrcReg, ARM64::hsub, &ARM64::FPR32RegClass);
-      BuildMI(MBB, I, DL, get(ARM64::FMOVSr), DestReg)
+      DestReg = RI.getMatchingSuperReg(DestReg, AArch64::hsub,
+                                       &AArch64::FPR32RegClass);
+      SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::hsub,
+                                      &AArch64::FPR32RegClass);
+      BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
           .addReg(SrcReg, getKillRegState(KillSrc));
     }
     return;
   }
 
-  if (ARM64::FPR8RegClass.contains(DestReg) &&
-      ARM64::FPR8RegClass.contains(SrcReg)) {
+  if (AArch64::FPR8RegClass.contains(DestReg) &&
+      AArch64::FPR8RegClass.contains(SrcReg)) {
     if(getSubTarget().hasNEON()) {
-      DestReg =
-          RI.getMatchingSuperReg(DestReg, ARM64::bsub, &ARM64::FPR128RegClass);
-      SrcReg =
-          RI.getMatchingSuperReg(SrcReg, ARM64::bsub, &ARM64::FPR128RegClass);
-      BuildMI(MBB, I, DL, get(ARM64::ORRv16i8), DestReg).addReg(SrcReg).addReg(
-          SrcReg, getKillRegState(KillSrc));
+      DestReg = RI.getMatchingSuperReg(DestReg, AArch64::bsub,
+                                       &AArch64::FPR128RegClass);
+      SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::bsub,
+                                      &AArch64::FPR128RegClass);
+      BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
+          .addReg(SrcReg)
+          .addReg(SrcReg, getKillRegState(KillSrc));
     } else {
-      DestReg =
-          RI.getMatchingSuperReg(DestReg, ARM64::bsub, &ARM64::FPR32RegClass);
-      SrcReg =
-          RI.getMatchingSuperReg(SrcReg, ARM64::bsub, &ARM64::FPR32RegClass);
-      BuildMI(MBB, I, DL, get(ARM64::FMOVSr), DestReg)
+      DestReg = RI.getMatchingSuperReg(DestReg, AArch64::bsub,
+                                       &AArch64::FPR32RegClass);
+      SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::bsub,
+                                      &AArch64::FPR32RegClass);
+      BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
           .addReg(SrcReg, getKillRegState(KillSrc));
     }
     return;
   }
 
   // Copies between GPR64 and FPR64.
-  if (ARM64::FPR64RegClass.contains(DestReg) &&
-      ARM64::GPR64RegClass.contains(SrcReg)) {
-    BuildMI(MBB, I, DL, get(ARM64::FMOVXDr), DestReg)
+  if (AArch64::FPR64RegClass.contains(DestReg) &&
+      AArch64::GPR64RegClass.contains(SrcReg)) {
+    BuildMI(MBB, I, DL, get(AArch64::FMOVXDr), DestReg)
         .addReg(SrcReg, getKillRegState(KillSrc));
     return;
   }
-  if (ARM64::GPR64RegClass.contains(DestReg) &&
-      ARM64::FPR64RegClass.contains(SrcReg)) {
-    BuildMI(MBB, I, DL, get(ARM64::FMOVDXr), DestReg)
+  if (AArch64::GPR64RegClass.contains(DestReg) &&
+      AArch64::FPR64RegClass.contains(SrcReg)) {
+    BuildMI(MBB, I, DL, get(AArch64::FMOVDXr), DestReg)
         .addReg(SrcReg, getKillRegState(KillSrc));
     return;
   }
   // Copies between GPR32 and FPR32.
-  if (ARM64::FPR32RegClass.contains(DestReg) &&
-      ARM64::GPR32RegClass.contains(SrcReg)) {
-    BuildMI(MBB, I, DL, get(ARM64::FMOVWSr), DestReg)
+  if (AArch64::FPR32RegClass.contains(DestReg) &&
+      AArch64::GPR32RegClass.contains(SrcReg)) {
+    BuildMI(MBB, I, DL, get(AArch64::FMOVWSr), DestReg)
         .addReg(SrcReg, getKillRegState(KillSrc));
     return;
   }
-  if (ARM64::GPR32RegClass.contains(DestReg) &&
-      ARM64::FPR32RegClass.contains(SrcReg)) {
-    BuildMI(MBB, I, DL, get(ARM64::FMOVSWr), DestReg)
+  if (AArch64::GPR32RegClass.contains(DestReg) &&
+      AArch64::FPR32RegClass.contains(SrcReg)) {
+    BuildMI(MBB, I, DL, get(AArch64::FMOVSWr), DestReg)
         .addReg(SrcReg, getKillRegState(KillSrc));
     return;
   }
@@ -1504,11 +1510,10 @@ void ARM64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
   assert(0 && "unimplemented reg-to-reg copy");
 }
 
-void ARM64InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
-                                         MachineBasicBlock::iterator MBBI,
-                                         unsigned SrcReg, bool isKill, int FI,
-                                         const TargetRegisterClass *RC,
-                                         const TargetRegisterInfo *TRI) const {
+void AArch64InstrInfo::storeRegToStackSlot(
+    MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned SrcReg,
+    bool isKill, int FI, const TargetRegisterClass *RC,
+    const TargetRegisterInfo *TRI) const {
   DebugLoc DL;
   if (MBBI != MBB.end())
     DL = MBBI->getDebugLoc();
@@ -1523,72 +1528,72 @@ void ARM64InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
   bool Offset = true;
   switch (RC->getSize()) {
   case 1:
-    if (ARM64::FPR8RegClass.hasSubClassEq(RC))
-      Opc = ARM64::STRBui;
+    if (AArch64::FPR8RegClass.hasSubClassEq(RC))
+      Opc = AArch64::STRBui;
     break;
   case 2:
-    if (ARM64::FPR16RegClass.hasSubClassEq(RC))
-      Opc = ARM64::STRHui;
+    if (AArch64::FPR16RegClass.hasSubClassEq(RC))
+      Opc = AArch64::STRHui;
     break;
   case 4:
-    if (ARM64::GPR32allRegClass.hasSubClassEq(RC)) {
-      Opc = ARM64::STRWui;
+    if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
+      Opc = AArch64::STRWui;
       if (TargetRegisterInfo::isVirtualRegister(SrcReg))
-        MF.getRegInfo().constrainRegClass(SrcReg, &ARM64::GPR32RegClass);
+        MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR32RegClass);
       else
-        assert(SrcReg != ARM64::WSP);
-    } else if (ARM64::FPR32RegClass.hasSubClassEq(RC))
-      Opc = ARM64::STRSui;
+        assert(SrcReg != AArch64::WSP);
+    } else if (AArch64::FPR32RegClass.hasSubClassEq(RC))
+      Opc = AArch64::STRSui;
     break;
   case 8:
-    if (ARM64::GPR64allRegClass.hasSubClassEq(RC)) {
-      Opc = ARM64::STRXui;
+    if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
+      Opc = AArch64::STRXui;
       if (TargetRegisterInfo::isVirtualRegister(SrcReg))
-        MF.getRegInfo().constrainRegClass(SrcReg, &ARM64::GPR64RegClass);
+        MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass);
       else
-        assert(SrcReg != ARM64::SP);
-    } else if (ARM64::FPR64RegClass.hasSubClassEq(RC))
-      Opc = ARM64::STRDui;
+        assert(SrcReg != AArch64::SP);
+    } else if (AArch64::FPR64RegClass.hasSubClassEq(RC))
+      Opc = AArch64::STRDui;
     break;
   case 16:
-    if (ARM64::FPR128RegClass.hasSubClassEq(RC))
-      Opc = ARM64::STRQui;
-    else if (ARM64::DDRegClass.hasSubClassEq(RC)) {
+    if (AArch64::FPR128RegClass.hasSubClassEq(RC))
+      Opc = AArch64::STRQui;
+    else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
       assert(getSubTarget().hasNEON() &&
              "Unexpected register store without NEON");
-      Opc = ARM64::ST1Twov1d, Offset = false;
+      Opc = AArch64::ST1Twov1d, Offset = false;
     }
     break;
   case 24:
-    if (ARM64::DDDRegClass.hasSubClassEq(RC)) {
+    if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
       assert(getSubTarget().hasNEON() &&
              "Unexpected register store without NEON");
-      Opc = ARM64::ST1Threev1d, Offset = false;
+      Opc = AArch64::ST1Threev1d, Offset = false;
     }
     break;
   case 32:
-    if (ARM64::DDDDRegClass.hasSubClassEq(RC)) {
+    if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
       assert(getSubTarget().hasNEON() &&
              "Unexpected register store without NEON");
-      Opc = ARM64::ST1Fourv1d, Offset = false;
-    } else if (ARM64::QQRegClass.hasSubClassEq(RC)) {
+      Opc = AArch64::ST1Fourv1d, Offset = false;
+    } else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
       assert(getSubTarget().hasNEON() &&
              "Unexpected register store without NEON");
-      Opc = ARM64::ST1Twov2d, Offset = false;
+      Opc = AArch64::ST1Twov2d, Offset = false;
     }
     break;
   case 48:
-    if (ARM64::QQQRegClass.hasSubClassEq(RC)) {
+    if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
       assert(getSubTarget().hasNEON() &&
              "Unexpected register store without NEON");
-      Opc = ARM64::ST1Threev2d, Offset = false;
+      Opc = AArch64::ST1Threev2d, Offset = false;
     }
     break;
   case 64:
-    if (ARM64::QQQQRegClass.hasSubClassEq(RC)) {
+    if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
       assert(getSubTarget().hasNEON() &&
              "Unexpected register store without NEON");
-      Opc = ARM64::ST1Fourv2d, Offset = false;
+      Opc = AArch64::ST1Fourv2d, Offset = false;
     }
     break;
   }
@@ -1603,11 +1608,10 @@ void ARM64InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
   MI.addMemOperand(MMO);
 }
 
-void ARM64InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
-                                          MachineBasicBlock::iterator MBBI,
-                                          unsigned DestReg, int FI,
-                                          const TargetRegisterClass *RC,
-                                          const TargetRegisterInfo *TRI) const {
+void AArch64InstrInfo::loadRegFromStackSlot(
+    MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned DestReg,
+    int FI, const TargetRegisterClass *RC,
+    const TargetRegisterInfo *TRI) const {
   DebugLoc DL;
   if (MBBI != MBB.end())
     DL = MBBI->getDebugLoc();
@@ -1622,72 +1626,72 @@ void ARM64InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
   bool Offset = true;
   switch (RC->getSize()) {
   case 1:
-    if (ARM64::FPR8RegClass.hasSubClassEq(RC))
-      Opc = ARM64::LDRBui;
+    if (AArch64::FPR8RegClass.hasSubClassEq(RC))
+      Opc = AArch64::LDRBui;
     break;
   case 2:
-    if (ARM64::FPR16RegClass.hasSubClassEq(RC))
-      Opc = ARM64::LDRHui;
+    if (AArch64::FPR16RegClass.hasSubClassEq(RC))
+      Opc = AArch64::LDRHui;
     break;
   case 4:
-    if (ARM64::GPR32allRegClass.hasSubClassEq(RC)) {
-      Opc = ARM64::LDRWui;
+    if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
+      Opc = AArch64::LDRWui;
       if (TargetRegisterInfo::isVirtualRegister(DestReg))
-        MF.getRegInfo().constrainRegClass(DestReg, &ARM64::GPR32RegClass);
+        MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR32RegClass);
       else
-        assert(DestReg != ARM64::WSP);
-    } else if (ARM64::FPR32RegClass.hasSubClassEq(RC))
-      Opc = ARM64::LDRSui;
+        assert(DestReg != AArch64::WSP);
+    } else if (AArch64::FPR32RegClass.hasSubClassEq(RC))
+      Opc = AArch64::LDRSui;
     break;
   case 8:
-    if (ARM64::GPR64allRegClass.hasSubClassEq(RC)) {
-      Opc = ARM64::LDRXui;
+    if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
+      Opc = AArch64::LDRXui;
       if (TargetRegisterInfo::isVirtualRegister(DestReg))
-        MF.getRegInfo().constrainRegClass(DestReg, &ARM64::GPR64RegClass);
+        MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR64RegClass);
       else
-        assert(DestReg != ARM64::SP);
-    } else if (ARM64::FPR64RegClass.hasSubClassEq(RC))
-      Opc = ARM64::LDRDui;
+        assert(DestReg != AArch64::SP);
+    } else if (AArch64::FPR64RegClass.hasSubClassEq(RC))
+      Opc = AArch64::LDRDui;
     break;
   case 16:
-    if (ARM64::FPR128RegClass.hasSubClassEq(RC))
-      Opc = ARM64::LDRQui;
-    else if (ARM64::DDRegClass.hasSubClassEq(RC)) {
+    if (AArch64::FPR128RegClass.hasSubClassEq(RC))
+      Opc = AArch64::LDRQui;
+    else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
       assert(getSubTarget().hasNEON() &&
              "Unexpected register load without NEON");
-      Opc = ARM64::LD1Twov1d, Offset = false;
+      Opc = AArch64::LD1Twov1d, Offset = false;
     }
     break;
   case 24:
-    if (ARM64::DDDRegClass.hasSubClassEq(RC)) {
+    if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
       assert(getSubTarget().hasNEON() &&
              "Unexpected register load without NEON");
-      Opc = ARM64::LD1Threev1d, Offset = false;
+      Opc = AArch64::LD1Threev1d, Offset = false;
     }
     break;
   case 32:
-    if (ARM64::DDDDRegClass.hasSubClassEq(RC)) {
+    if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
       assert(getSubTarget().hasNEON() &&
              "Unexpected register load without NEON");
-      Opc = ARM64::LD1Fourv1d, Offset = false;
-    } else if (ARM64::QQRegClass.hasSubClassEq(RC)) {
+      Opc = AArch64::LD1Fourv1d, Offset = false;
+    } else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
       assert(getSubTarget().hasNEON() &&
              "Unexpected register load without NEON");
-      Opc = ARM64::LD1Twov2d, Offset = false;
+      Opc = AArch64::LD1Twov2d, Offset = false;
     }
     break;
   case 48:
-    if (ARM64::QQQRegClass.hasSubClassEq(RC)) {
+    if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
       assert(getSubTarget().hasNEON() &&
              "Unexpected register load without NEON");
-      Opc = ARM64::LD1Threev2d, Offset = false;
+      Opc = AArch64::LD1Threev2d, Offset = false;
     }
     break;
   case 64:
-    if (ARM64::QQQQRegClass.hasSubClassEq(RC)) {
+    if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
       assert(getSubTarget().hasNEON() &&
              "Unexpected register load without NEON");
-      Opc = ARM64::LD1Fourv2d, Offset = false;
+      Opc = AArch64::LD1Fourv2d, Offset = false;
     }
     break;
   }
@@ -1704,8 +1708,8 @@ void ARM64InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
 void llvm::emitFrameOffset(MachineBasicBlock &MBB,
                            MachineBasicBlock::iterator MBBI, DebugLoc DL,
                            unsigned DestReg, unsigned SrcReg, int Offset,
-                           const ARM64InstrInfo *TII, MachineInstr::MIFlag Flag,
-                           bool SetNZCV) {
+                           const AArch64InstrInfo *TII,
+                           MachineInstr::MIFlag Flag, bool SetNZCV) {
   if (DestReg == SrcReg && Offset == 0)
     return;
 
@@ -1726,9 +1730,9 @@ void llvm::emitFrameOffset(MachineBasicBlock &MBB,
 
   unsigned Opc;
   if (SetNZCV)
-    Opc = isSub ? ARM64::SUBSXri : ARM64::ADDSXri;
+    Opc = isSub ? AArch64::SUBSXri : AArch64::ADDSXri;
   else
-    Opc = isSub ? ARM64::SUBXri : ARM64::ADDXri;
+    Opc = isSub ? AArch64::SUBXri : AArch64::ADDXri;
   const unsigned MaxEncoding = 0xfff;
   const unsigned ShiftSize = 12;
   const unsigned MaxEncodableValue = MaxEncoding << ShiftSize;
@@ -1744,7 +1748,7 @@ void llvm::emitFrameOffset(MachineBasicBlock &MBB,
     BuildMI(MBB, MBBI, DL, TII->get(Opc), DestReg)
         .addReg(SrcReg)
         .addImm(ThisVal >> ShiftSize)
-        .addImm(ARM64_AM::getShifterImm(ARM64_AM::LSL, ShiftSize))
+        .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftSize))
         .setMIFlag(Flag);
 
     SrcReg = DestReg;
@@ -1755,14 +1759,14 @@ void llvm::emitFrameOffset(MachineBasicBlock &MBB,
   BuildMI(MBB, MBBI, DL, TII->get(Opc), DestReg)
       .addReg(SrcReg)
       .addImm(Offset)
-      .addImm(ARM64_AM::getShifterImm(ARM64_AM::LSL, 0))
+      .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0))
       .setMIFlag(Flag);
 }
 
 MachineInstr *
-ARM64InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
-                                      const SmallVectorImpl<unsigned> &Ops,
-                                      int FrameIndex) const {
+AArch64InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
+                                        const SmallVectorImpl<unsigned> &Ops,
+                                        int FrameIndex) const {
   // This is a bit of a hack. Consider this instruction:
   //
   //   %vreg0<def> = COPY %SP; GPR64all:%vreg0
@@ -1779,12 +1783,14 @@ ARM64InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
   if (MI->isCopy()) {
     unsigned DstReg = MI->getOperand(0).getReg();
     unsigned SrcReg = MI->getOperand(1).getReg();
-    if (SrcReg == ARM64::SP && TargetRegisterInfo::isVirtualRegister(DstReg)) {
-      MF.getRegInfo().constrainRegClass(DstReg, &ARM64::GPR64RegClass);
+    if (SrcReg == AArch64::SP &&
+        TargetRegisterInfo::isVirtualRegister(DstReg)) {
+      MF.getRegInfo().constrainRegClass(DstReg, &AArch64::GPR64RegClass);
       return nullptr;
     }
-    if (DstReg == ARM64::SP && TargetRegisterInfo::isVirtualRegister(SrcReg)) {
-      MF.getRegInfo().constrainRegClass(SrcReg, &ARM64::GPR64RegClass);
+    if (DstReg == AArch64::SP &&
+        TargetRegisterInfo::isVirtualRegister(SrcReg)) {
+      MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass);
       return nullptr;
     }
   }
@@ -1793,10 +1799,10 @@ ARM64InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
   return nullptr;
 }
 
-int llvm::isARM64FrameOffsetLegal(const MachineInstr &MI, int &Offset,
-                                  bool *OutUseUnscaledOp,
-                                  unsigned *OutUnscaledOp,
-                                  int *EmittableOffset) {
+int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI, int &Offset,
+                                    bool *OutUseUnscaledOp,
+                                    unsigned *OutUnscaledOp,
+                                    int *EmittableOffset) {
   int Scale = 1;
   bool IsSigned = false;
   // The ImmIdx should be changed case by case if it is not 2.
@@ -1811,162 +1817,162 @@ int llvm::isARM64FrameOffsetLegal(const MachineInstr &MI, int &Offset,
     *OutUnscaledOp = 0;
   switch (MI.getOpcode()) {
   default:
-    assert(0 && "unhandled opcode in rewriteARM64FrameIndex");
+    assert(0 && "unhandled opcode in rewriteAArch64FrameIndex");
   // Vector spills/fills can't take an immediate offset.
-  case ARM64::LD1Twov2d:
-  case ARM64::LD1Threev2d:
-  case ARM64::LD1Fourv2d:
-  case ARM64::LD1Twov1d:
-  case ARM64::LD1Threev1d:
-  case ARM64::LD1Fourv1d:
-  case ARM64::ST1Twov2d:
-  case ARM64::ST1Threev2d:
-  case ARM64::ST1Fourv2d:
-  case ARM64::ST1Twov1d:
-  case ARM64::ST1Threev1d:
-  case ARM64::ST1Fourv1d:
-    return ARM64FrameOffsetCannotUpdate;
-  case ARM64::PRFMui:
+  case AArch64::LD1Twov2d:
+  case AArch64::LD1Threev2d:
+  case AArch64::LD1Fourv2d:
+  case AArch64::LD1Twov1d:
+  case AArch64::LD1Threev1d:
+  case AArch64::LD1Fourv1d:
+  case AArch64::ST1Twov2d:
+  case AArch64::ST1Threev2d:
+  case AArch64::ST1Fourv2d:
+  case AArch64::ST1Twov1d:
+  case AArch64::ST1Threev1d:
+  case AArch64::ST1Fourv1d:
+    return AArch64FrameOffsetCannotUpdate;
+  case AArch64::PRFMui:
     Scale = 8;
-    UnscaledOp = ARM64::PRFUMi;
+    UnscaledOp = AArch64::PRFUMi;
     break;
-  case ARM64::LDRXui:
+  case AArch64::LDRXui:
     Scale = 8;
-    UnscaledOp = ARM64::LDURXi;
+    UnscaledOp = AArch64::LDURXi;
     break;
-  case ARM64::LDRWui:
+  case AArch64::LDRWui:
     Scale = 4;
-    UnscaledOp = ARM64::LDURWi;
+    UnscaledOp = AArch64::LDURWi;
     break;
-  case ARM64::LDRBui:
+  case AArch64::LDRBui:
     Scale = 1;
-    UnscaledOp = ARM64::LDURBi;
+    UnscaledOp = AArch64::LDURBi;
     break;
-  case ARM64::LDRHui:
+  case AArch64::LDRHui:
     Scale = 2;
-    UnscaledOp = ARM64::LDURHi;
+    UnscaledOp = AArch64::LDURHi;
     break;
-  case ARM64::LDRSui:
+  case AArch64::LDRSui:
     Scale = 4;
-    UnscaledOp = ARM64::LDURSi;
+    UnscaledOp = AArch64::LDURSi;
     break;
-  case ARM64::LDRDui:
+  case AArch64::LDRDui:
     Scale = 8;
-    UnscaledOp = ARM64::LDURDi;
+    UnscaledOp = AArch64::LDURDi;
     break;
-  case ARM64::LDRQui:
+  case AArch64::LDRQui:
     Scale = 16;
-    UnscaledOp = ARM64::LDURQi;
+    UnscaledOp = AArch64::LDURQi;
     break;
-  case ARM64::LDRBBui:
+  case AArch64::LDRBBui:
     Scale = 1;
-    UnscaledOp = ARM64::LDURBBi;
+    UnscaledOp = AArch64::LDURBBi;
     break;
-  case ARM64::LDRHHui:
+  case AArch64::LDRHHui:
     Scale = 2;
-    UnscaledOp = ARM64::LDURHHi;
+    UnscaledOp = AArch64::LDURHHi;
     break;
-  case ARM64::LDRSBXui:
+  case AArch64::LDRSBXui:
     Scale = 1;
-    UnscaledOp = ARM64::LDURSBXi;
+    UnscaledOp = AArch64::LDURSBXi;
     break;
-  case ARM64::LDRSBWui:
+  case AArch64::LDRSBWui:
     Scale = 1;
-    UnscaledOp = ARM64::LDURSBWi;
+    UnscaledOp = AArch64::LDURSBWi;
     break;
-  case ARM64::LDRSHXui:
+  case AArch64::LDRSHXui:
     Scale = 2;
-    UnscaledOp = ARM64::LDURSHXi;
+    UnscaledOp = AArch64::LDURSHXi;
     break;
-  case ARM64::LDRSHWui:
+  case AArch64::LDRSHWui:
     Scale = 2;
-    UnscaledOp = ARM64::LDURSHWi;
+    UnscaledOp = AArch64::LDURSHWi;
     break;
-  case ARM64::LDRSWui:
+  case AArch64::LDRSWui:
     Scale = 4;
-    UnscaledOp = ARM64::LDURSWi;
+    UnscaledOp = AArch64::LDURSWi;
     break;
 
-  case ARM64::STRXui:
+  case AArch64::STRXui:
     Scale = 8;
-    UnscaledOp = ARM64::STURXi;
+    UnscaledOp = AArch64::STURXi;
     break;
-  case ARM64::STRWui:
+  case AArch64::STRWui:
     Scale = 4;
-    UnscaledOp = ARM64::STURWi;
+    UnscaledOp = AArch64::STURWi;
     break;
-  case ARM64::STRBui:
+  case AArch64::STRBui:
     Scale = 1;
-    UnscaledOp = ARM64::STURBi;
+    UnscaledOp = AArch64::STURBi;
     break;
-  case ARM64::STRHui:
+  case AArch64::STRHui:
     Scale = 2;
-    UnscaledOp = ARM64::STURHi;
+    UnscaledOp = AArch64::STURHi;
     break;
-  case ARM64::STRSui:
+  case AArch64::STRSui:
     Scale = 4;
-    UnscaledOp = ARM64::STURSi;
+    UnscaledOp = AArch64::STURSi;
     break;
-  case ARM64::STRDui:
+  case AArch64::STRDui:
     Scale = 8;
-    UnscaledOp = ARM64::STURDi;
+    UnscaledOp = AArch64::STURDi;
     break;
-  case ARM64::STRQui:
+  case AArch64::STRQui:
     Scale = 16;
-    UnscaledOp = ARM64::STURQi;
+    UnscaledOp = AArch64::STURQi;
     break;
-  case ARM64::STRBBui:
+  case AArch64::STRBBui:
     Scale = 1;
-    UnscaledOp = ARM64::STURBBi;
+    UnscaledOp = AArch64::STURBBi;
     break;
-  case ARM64::STRHHui:
+  case AArch64::STRHHui:
     Scale = 2;
-    UnscaledOp = ARM64::STURHHi;
+    UnscaledOp = AArch64::STURHHi;
     break;
 
-  case ARM64::LDPXi:
-  case ARM64::LDPDi:
-  case ARM64::STPXi:
-  case ARM64::STPDi:
+  case AArch64::LDPXi:
+  case AArch64::LDPDi:
+  case AArch64::STPXi:
+  case AArch64::STPDi:
     IsSigned = true;
     Scale = 8;
     break;
-  case ARM64::LDPQi:
-  case ARM64::STPQi:
+  case AArch64::LDPQi:
+  case AArch64::STPQi:
     IsSigned = true;
     Scale = 16;
     break;
-  case ARM64::LDPWi:
-  case ARM64::LDPSi:
-  case ARM64::STPWi:
-  case ARM64::STPSi:
+  case AArch64::LDPWi:
+  case AArch64::LDPSi:
+  case AArch64::STPWi:
+  case AArch64::STPSi:
     IsSigned = true;
     Scale = 4;
     break;
 
-  case ARM64::LDURXi:
-  case ARM64::LDURWi:
-  case ARM64::LDURBi:
-  case ARM64::LDURHi:
-  case ARM64::LDURSi:
-  case ARM64::LDURDi:
-  case ARM64::LDURQi:
-  case ARM64::LDURHHi:
-  case ARM64::LDURBBi:
-  case ARM64::LDURSBXi:
-  case ARM64::LDURSBWi:
-  case ARM64::LDURSHXi:
-  case ARM64::LDURSHWi:
-  case ARM64::LDURSWi:
-  case ARM64::STURXi:
-  case ARM64::STURWi:
-  case ARM64::STURBi:
-  case ARM64::STURHi:
-  case ARM64::STURSi:
-  case ARM64::STURDi:
-  case ARM64::STURQi:
-  case ARM64::STURBBi:
-  case ARM64::STURHHi:
+  case AArch64::LDURXi:
+  case AArch64::LDURWi:
+  case AArch64::LDURBi:
+  case AArch64::LDURHi:
+  case AArch64::LDURSi:
+  case AArch64::LDURDi:
+  case AArch64::LDURQi:
+  case AArch64::LDURHHi:
+  case AArch64::LDURBBi:
+  case AArch64::LDURSBXi:
+  case AArch64::LDURSBWi:
+  case AArch64::LDURSHXi:
+  case AArch64::LDURSHWi:
+  case AArch64::LDURSWi:
+  case AArch64::STURXi:
+  case AArch64::STURWi:
+  case AArch64::STURBi:
+  case AArch64::STURHi:
+  case AArch64::STURSi:
+  case AArch64::STURDi:
+  case AArch64::STURQi:
+  case AArch64::STURBBi:
+  case AArch64::STURHHi:
     Scale = 1;
     break;
   }
@@ -2014,21 +2020,21 @@ int llvm::isARM64FrameOffsetLegal(const MachineInstr &MI, int &Offset,
     *OutUseUnscaledOp = useUnscaledOp;
   if (OutUnscaledOp)
     *OutUnscaledOp = UnscaledOp;
-  return ARM64FrameOffsetCanUpdate |
-         (Offset == 0 ? ARM64FrameOffsetIsLegal : 0);
+  return AArch64FrameOffsetCanUpdate |
+         (Offset == 0 ? AArch64FrameOffsetIsLegal : 0);
 }
 
-bool llvm::rewriteARM64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
-                                  unsigned FrameReg, int &Offset,
-                                  const ARM64InstrInfo *TII) {
+bool llvm::rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
+                                    unsigned FrameReg, int &Offset,
+                                    const AArch64InstrInfo *TII) {
   unsigned Opcode = MI.getOpcode();
   unsigned ImmIdx = FrameRegIdx + 1;
 
-  if (Opcode == ARM64::ADDSXri || Opcode == ARM64::ADDXri) {
+  if (Opcode == AArch64::ADDSXri || Opcode == AArch64::ADDXri) {
     Offset += MI.getOperand(ImmIdx).getImm();
     emitFrameOffset(*MI.getParent(), MI, MI.getDebugLoc(),
                     MI.getOperand(0).getReg(), FrameReg, Offset, TII,
-                    MachineInstr::NoFlags, (Opcode == ARM64::ADDSXri));
+                    MachineInstr::NoFlags, (Opcode == AArch64::ADDSXri));
     MI.eraseFromParent();
     Offset = 0;
     return true;
@@ -2037,10 +2043,10 @@ bool llvm::rewriteARM64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
   int NewOffset;
   unsigned UnscaledOp;
   bool UseUnscaledOp;
-  int Status = isARM64FrameOffsetLegal(MI, Offset, &UseUnscaledOp, &UnscaledOp,
-                                       &NewOffset);
-  if (Status & ARM64FrameOffsetCanUpdate) {
-    if (Status & ARM64FrameOffsetIsLegal)
+  int Status = isAArch64FrameOffsetLegal(MI, Offset, &UseUnscaledOp,
+                                         &UnscaledOp, &NewOffset);
+  if (Status & AArch64FrameOffsetCanUpdate) {
+    if (Status & AArch64FrameOffsetIsLegal)
       // Replace the FrameIndex with FrameReg.
       MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
     if (UseUnscaledOp)
@@ -2053,7 +2059,7 @@ bool llvm::rewriteARM64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
   return false;
 }
 
-void ARM64InstrInfo::getNoopForMachoTarget(MCInst &NopInst) const {
-  NopInst.setOpcode(ARM64::HINT);
+void AArch64InstrInfo::getNoopForMachoTarget(MCInst &NopInst) const {
+  NopInst.setOpcode(AArch64::HINT);
   NopInst.addOperand(MCOperand::CreateImm(0));
 }
diff --git a/lib/Target/ARM64/ARM64InstrInfo.h b/lib/Target/AArch64/AArch64InstrInfo.h
similarity index 81%
rename from lib/Target/ARM64/ARM64InstrInfo.h
rename to lib/Target/AArch64/AArch64InstrInfo.h
index ce195e763b2..90ce75f26d4 100644
--- a/lib/Target/ARM64/ARM64InstrInfo.h
+++ b/lib/Target/AArch64/AArch64InstrInfo.h
@@ -1,4 +1,4 @@
-//===- ARM64InstrInfo.h - ARM64 Instruction Information ---------*- C++ -*-===//
+//===- AArch64InstrInfo.h - AArch64 Instruction Information -----*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -7,44 +7,44 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file contains the ARM64 implementation of the TargetInstrInfo class.
+// This file contains the AArch64 implementation of the TargetInstrInfo class.
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_TARGET_ARM64INSTRINFO_H
-#define LLVM_TARGET_ARM64INSTRINFO_H
+#ifndef LLVM_TARGET_AArch64INSTRINFO_H
+#define LLVM_TARGET_AArch64INSTRINFO_H
 
-#include "ARM64.h"
-#include "ARM64RegisterInfo.h"
+#include "AArch64.h"
+#include "AArch64RegisterInfo.h"
 #include "llvm/Target/TargetInstrInfo.h"
 
 #define GET_INSTRINFO_HEADER
-#include "ARM64GenInstrInfo.inc"
+#include "AArch64GenInstrInfo.inc"
 
 namespace llvm {
 
-class ARM64Subtarget;
-class ARM64TargetMachine;
+class AArch64Subtarget;
+class AArch64TargetMachine;
 
-class ARM64InstrInfo : public ARM64GenInstrInfo {
+class AArch64InstrInfo : public AArch64GenInstrInfo {
   // Reserve bits in the MachineMemOperand target hint flags, starting at 1.
   // They will be shifted into MOTargetHintStart when accessed.
   enum TargetMemOperandFlags {
     MOSuppressPair = 1
   };
 
-  const ARM64RegisterInfo RI;
-  const ARM64Subtarget &Subtarget;
+  const AArch64RegisterInfo RI;
+  const AArch64Subtarget &Subtarget;
 
 public:
-  explicit ARM64InstrInfo(const ARM64Subtarget &STI);
+  explicit AArch64InstrInfo(const AArch64Subtarget &STI);
 
   /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info.  As
   /// such, whenever a client has an instance of instruction info, it should
   /// always be able to get register info as well (through this method).
-  const ARM64RegisterInfo &getRegisterInfo() const { return RI; }
+  const AArch64RegisterInfo &getRegisterInfo() const { return RI; }
 
-  const ARM64Subtarget &getSubTarget() const { return Subtarget; }
+  const AArch64Subtarget &getSubTarget() const { return Subtarget; }
 
   unsigned GetInstSizeInBytes(const MachineInstr *MI) const;
 
@@ -60,8 +60,8 @@ public:
   /// is non-zero.
   bool hasShiftedReg(const MachineInstr *MI) const;
 
-  /// Returns true if there is an extendable register and that the extending value
-  /// is non-zero.
+  /// Returns true if there is an extendable register and that the extending
+  /// value is non-zero.
   bool hasExtendedReg(const MachineInstr *MI) const;
 
   /// \brief Does this instruction set its full destination register to zero?
@@ -168,63 +168,63 @@ private:
 /// if necessary, to be replaced by the scavenger at the end of PEI.
 void emitFrameOffset(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
                      DebugLoc DL, unsigned DestReg, unsigned SrcReg, int Offset,
-                     const ARM64InstrInfo *TII,
+                     const AArch64InstrInfo *TII,
                      MachineInstr::MIFlag = MachineInstr::NoFlags,
                      bool SetNZCV = false);
 
-/// rewriteARM64FrameIndex - Rewrite MI to access 'Offset' bytes from the
+/// rewriteAArch64FrameIndex - Rewrite MI to access 'Offset' bytes from the
 /// FP. Return false if the offset could not be handled directly in MI, and
 /// return the left-over portion by reference.
-bool rewriteARM64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
+bool rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
                             unsigned FrameReg, int &Offset,
-                            const ARM64InstrInfo *TII);
+                            const AArch64InstrInfo *TII);
 
-/// \brief Use to report the frame offset status in isARM64FrameOffsetLegal.
-enum ARM64FrameOffsetStatus {
-  ARM64FrameOffsetCannotUpdate = 0x0, ///< Offset cannot apply.
-  ARM64FrameOffsetIsLegal = 0x1,      ///< Offset is legal.
-  ARM64FrameOffsetCanUpdate = 0x2     ///< Offset can apply, at least partly.
+/// \brief Use to report the frame offset status in isAArch64FrameOffsetLegal.
+enum AArch64FrameOffsetStatus {
+  AArch64FrameOffsetCannotUpdate = 0x0, ///< Offset cannot apply.
+  AArch64FrameOffsetIsLegal = 0x1,      ///< Offset is legal.
+  AArch64FrameOffsetCanUpdate = 0x2     ///< Offset can apply, at least partly.
 };
 
 /// \brief Check if the @p Offset is a valid frame offset for @p MI.
 /// The returned value reports the validity of the frame offset for @p MI.
-/// It uses the values defined by ARM64FrameOffsetStatus for that.
-/// If result == ARM64FrameOffsetCannotUpdate, @p MI cannot be updated to
+/// It uses the values defined by AArch64FrameOffsetStatus for that.
+/// If result == AArch64FrameOffsetCannotUpdate, @p MI cannot be updated to
 /// use an offset.eq
-/// If result & ARM64FrameOffsetIsLegal, @p Offset can completely be
+/// If result & AArch64FrameOffsetIsLegal, @p Offset can completely be
 /// rewriten in @p MI.
-/// If result & ARM64FrameOffsetCanUpdate, @p Offset contains the
+/// If result & AArch64FrameOffsetCanUpdate, @p Offset contains the
 /// amount that is off the limit of the legal offset.
 /// If set, @p OutUseUnscaledOp will contain the whether @p MI should be
 /// turned into an unscaled operator, which opcode is in @p OutUnscaledOp.
 /// If set, @p EmittableOffset contains the amount that can be set in @p MI
 /// (possibly with @p OutUnscaledOp if OutUseUnscaledOp is true) and that
 /// is a legal offset.
-int isARM64FrameOffsetLegal(const MachineInstr &MI, int &Offset,
+int isAArch64FrameOffsetLegal(const MachineInstr &MI, int &Offset,
                             bool *OutUseUnscaledOp = nullptr,
                             unsigned *OutUnscaledOp = nullptr,
                             int *EmittableOffset = nullptr);
 
-static inline bool isUncondBranchOpcode(int Opc) { return Opc == ARM64::B; }
+static inline bool isUncondBranchOpcode(int Opc) { return Opc == AArch64::B; }
 
 static inline bool isCondBranchOpcode(int Opc) {
   switch (Opc) {
-  case ARM64::Bcc:
-  case ARM64::CBZW:
-  case ARM64::CBZX:
-  case ARM64::CBNZW:
-  case ARM64::CBNZX:
-  case ARM64::TBZW:
-  case ARM64::TBZX:
-  case ARM64::TBNZW:
-  case ARM64::TBNZX:
+  case AArch64::Bcc:
+  case AArch64::CBZW:
+  case AArch64::CBZX:
+  case AArch64::CBNZW:
+  case AArch64::CBNZX:
+  case AArch64::TBZW:
+  case AArch64::TBZX:
+  case AArch64::TBNZW:
+  case AArch64::TBNZX:
     return true;
   default:
     return false;
   }
 }
 
-static inline bool isIndirectBranchOpcode(int Opc) { return Opc == ARM64::BR; }
+static inline bool isIndirectBranchOpcode(int Opc) { return Opc == AArch64::BR; }
 
 } // end namespace llvm
 
diff --git a/lib/Target/ARM64/ARM64InstrInfo.td b/lib/Target/AArch64/AArch64InstrInfo.td
similarity index 83%
rename from lib/Target/ARM64/ARM64InstrInfo.td
rename to lib/Target/AArch64/AArch64InstrInfo.td
index e68980c83c5..9ad36e8740d 100644
--- a/lib/Target/ARM64/ARM64InstrInfo.td
+++ b/lib/Target/AArch64/AArch64InstrInfo.td
@@ -1,4 +1,4 @@
-//===- ARM64InstrInfo.td - Describe the ARM64 Instructions -*- tablegen -*-===//
+//=- AArch64InstrInfo.td - Describe the AArch64 Instructions -*- tablegen -*-=//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -7,7 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// ARM64 Instruction definitions.
+// AArch64 Instruction definitions.
 //
 //===----------------------------------------------------------------------===//
 
@@ -26,7 +26,7 @@ def IsLE             : Predicate<"Subtarget->isLittleEndian()">;
 def IsBE             : Predicate<"!Subtarget->isLittleEndian()">;
 
 //===----------------------------------------------------------------------===//
-// ARM64-specific DAG Nodes.
+// AArch64-specific DAG Nodes.
 //
 
 // SDTBinaryArithWithFlagsOut - RES1, FLAGS = op LHS, RHS
@@ -50,196 +50,198 @@ def SDTBinaryArithWithFlagsInOut : SDTypeProfile<2, 3,
                                              SDTCisVT<1, i32>,
                                              SDTCisVT<4, i32>]>;
 
-def SDT_ARM64Brcond  : SDTypeProfile<0, 3,
+def SDT_AArch64Brcond  : SDTypeProfile<0, 3,
                                      [SDTCisVT<0, OtherVT>, SDTCisVT<1, i32>,
                                       SDTCisVT<2, i32>]>;
-def SDT_ARM64cbz : SDTypeProfile<0, 2, [SDTCisInt<0>, SDTCisVT<1, OtherVT>]>;
-def SDT_ARM64tbz : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>,
+def SDT_AArch64cbz : SDTypeProfile<0, 2, [SDTCisInt<0>, SDTCisVT<1, OtherVT>]>;
+def SDT_AArch64tbz : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>,
                                         SDTCisVT<2, OtherVT>]>;
 
 
-def SDT_ARM64CSel  : SDTypeProfile<1, 4,
+def SDT_AArch64CSel  : SDTypeProfile<1, 4,
                                    [SDTCisSameAs<0, 1>,
                                     SDTCisSameAs<0, 2>,
                                     SDTCisInt<3>,
                                     SDTCisVT<4, i32>]>;
-def SDT_ARM64FCmp   : SDTypeProfile<0, 2,
+def SDT_AArch64FCmp   : SDTypeProfile<0, 2,
                                    [SDTCisFP<0>,
                                     SDTCisSameAs<0, 1>]>;
-def SDT_ARM64Dup   : SDTypeProfile<1, 1, [SDTCisVec<0>]>;
-def SDT_ARM64DupLane   : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisInt<2>]>;
-def SDT_ARM64Zip   : SDTypeProfile<1, 2, [SDTCisVec<0>,
+def SDT_AArch64Dup   : SDTypeProfile<1, 1, [SDTCisVec<0>]>;
+def SDT_AArch64DupLane   : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisInt<2>]>;
+def SDT_AArch64Zip   : SDTypeProfile<1, 2, [SDTCisVec<0>,
                                           SDTCisSameAs<0, 1>,
                                           SDTCisSameAs<0, 2>]>;
-def SDT_ARM64MOVIedit : SDTypeProfile<1, 1, [SDTCisInt<1>]>;
-def SDT_ARM64MOVIshift : SDTypeProfile<1, 2, [SDTCisInt<1>, SDTCisInt<2>]>;
-def SDT_ARM64vecimm : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
+def SDT_AArch64MOVIedit : SDTypeProfile<1, 1, [SDTCisInt<1>]>;
+def SDT_AArch64MOVIshift : SDTypeProfile<1, 2, [SDTCisInt<1>, SDTCisInt<2>]>;
+def SDT_AArch64vecimm : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
                                            SDTCisInt<2>, SDTCisInt<3>]>;
-def SDT_ARM64UnaryVec: SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0,1>]>;
-def SDT_ARM64ExtVec: SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
+def SDT_AArch64UnaryVec: SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0,1>]>;
+def SDT_AArch64ExtVec: SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
                                           SDTCisSameAs<0,2>, SDTCisInt<3>]>;
-def SDT_ARM64vshift : SDTypeProfile<1, 2, [SDTCisSameAs<0,1>, SDTCisInt<2>]>;
+def SDT_AArch64vshift : SDTypeProfile<1, 2, [SDTCisSameAs<0,1>, SDTCisInt<2>]>;
 
-def SDT_ARM64unvec : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0,1>]>;
-def SDT_ARM64fcmpz : SDTypeProfile<1, 1, []>;
-def SDT_ARM64fcmp  : SDTypeProfile<1, 2, [SDTCisSameAs<1,2>]>;
-def SDT_ARM64binvec : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
+def SDT_AArch64unvec : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0,1>]>;
+def SDT_AArch64fcmpz : SDTypeProfile<1, 1, []>;
+def SDT_AArch64fcmp  : SDTypeProfile<1, 2, [SDTCisSameAs<1,2>]>;
+def SDT_AArch64binvec : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
                                            SDTCisSameAs<0,2>]>;
-def SDT_ARM64trivec : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
+def SDT_AArch64trivec : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
                                            SDTCisSameAs<0,2>,
                                            SDTCisSameAs<0,3>]>;
-def SDT_ARM64TCRET : SDTypeProfile<0, 2, [SDTCisPtrTy<0>]>;
-def SDT_ARM64PREFETCH : SDTypeProfile<0, 2, [SDTCisVT<0, i32>, SDTCisPtrTy<1>]>;
+def SDT_AArch64TCRET : SDTypeProfile<0, 2, [SDTCisPtrTy<0>]>;
+def SDT_AArch64PREFETCH : SDTypeProfile<0, 2, [SDTCisVT<0, i32>, SDTCisPtrTy<1>]>;
 
-def SDT_ARM64ITOF  : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisSameAs<0,1>]>;
+def SDT_AArch64ITOF  : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisSameAs<0,1>]>;
 
-def SDT_ARM64TLSDescCall : SDTypeProfile<0, -2, [SDTCisPtrTy<0>,
+def SDT_AArch64TLSDescCall : SDTypeProfile<0, -2, [SDTCisPtrTy<0>,
                                                  SDTCisPtrTy<1>]>;
-def SDT_ARM64WrapperLarge : SDTypeProfile<1, 4,
+def SDT_AArch64WrapperLarge : SDTypeProfile<1, 4,
                                         [SDTCisVT<0, i64>, SDTCisVT<1, i32>,
                                          SDTCisSameAs<1, 2>, SDTCisSameAs<1, 3>,
                                          SDTCisSameAs<1, 4>]>;
 
 
 // Node definitions.
-def ARM64adrp          : SDNode<"ARM64ISD::ADRP", SDTIntUnaryOp, []>;
-def ARM64addlow        : SDNode<"ARM64ISD::ADDlow", SDTIntBinOp, []>;
-def ARM64LOADgot       : SDNode<"ARM64ISD::LOADgot", SDTIntUnaryOp>;
-def ARM64callseq_start : SDNode<"ISD::CALLSEQ_START",
+def AArch64adrp          : SDNode<"AArch64ISD::ADRP", SDTIntUnaryOp, []>;
+def AArch64addlow        : SDNode<"AArch64ISD::ADDlow", SDTIntBinOp, []>;
+def AArch64LOADgot       : SDNode<"AArch64ISD::LOADgot", SDTIntUnaryOp>;
+def AArch64callseq_start : SDNode<"ISD::CALLSEQ_START",
                                 SDCallSeqStart<[ SDTCisVT<0, i32> ]>,
                                 [SDNPHasChain, SDNPOutGlue]>;
-def ARM64callseq_end   : SDNode<"ISD::CALLSEQ_END",
+def AArch64callseq_end   : SDNode<"ISD::CALLSEQ_END",
                                 SDCallSeqEnd<[ SDTCisVT<0, i32>,
                                                SDTCisVT<1, i32> ]>,
                                 [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
-def ARM64call          : SDNode<"ARM64ISD::CALL",
+def AArch64call          : SDNode<"AArch64ISD::CALL",
                                 SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>,
                                 [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
                                  SDNPVariadic]>;
-def ARM64brcond        : SDNode<"ARM64ISD::BRCOND", SDT_ARM64Brcond,
+def AArch64brcond        : SDNode<"AArch64ISD::BRCOND", SDT_AArch64Brcond,
                                 [SDNPHasChain]>;
-def ARM64cbz           : SDNode<"ARM64ISD::CBZ", SDT_ARM64cbz,
+def AArch64cbz           : SDNode<"AArch64ISD::CBZ", SDT_AArch64cbz,
                                 [SDNPHasChain]>;
-def ARM64cbnz           : SDNode<"ARM64ISD::CBNZ", SDT_ARM64cbz,
+def AArch64cbnz           : SDNode<"AArch64ISD::CBNZ", SDT_AArch64cbz,
                                 [SDNPHasChain]>;
-def ARM64tbz           : SDNode<"ARM64ISD::TBZ", SDT_ARM64tbz,
+def AArch64tbz           : SDNode<"AArch64ISD::TBZ", SDT_AArch64tbz,
                                 [SDNPHasChain]>;
-def ARM64tbnz           : SDNode<"ARM64ISD::TBNZ", SDT_ARM64tbz,
+def AArch64tbnz           : SDNode<"AArch64ISD::TBNZ", SDT_AArch64tbz,
                                 [SDNPHasChain]>;
 
 
-def ARM64csel          : SDNode<"ARM64ISD::CSEL", SDT_ARM64CSel>;
-def ARM64csinv         : SDNode<"ARM64ISD::CSINV", SDT_ARM64CSel>;
-def ARM64csneg         : SDNode<"ARM64ISD::CSNEG", SDT_ARM64CSel>;
-def ARM64csinc         : SDNode<"ARM64ISD::CSINC", SDT_ARM64CSel>;
-def ARM64retflag       : SDNode<"ARM64ISD::RET_FLAG", SDTNone,
+def AArch64csel          : SDNode<"AArch64ISD::CSEL", SDT_AArch64CSel>;
+def AArch64csinv         : SDNode<"AArch64ISD::CSINV", SDT_AArch64CSel>;
+def AArch64csneg         : SDNode<"AArch64ISD::CSNEG", SDT_AArch64CSel>;
+def AArch64csinc         : SDNode<"AArch64ISD::CSINC", SDT_AArch64CSel>;
+def AArch64retflag       : SDNode<"AArch64ISD::RET_FLAG", SDTNone,
                                 [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
-def ARM64adc       : SDNode<"ARM64ISD::ADC",  SDTBinaryArithWithFlagsIn >;
-def ARM64sbc       : SDNode<"ARM64ISD::SBC",  SDTBinaryArithWithFlagsIn>;
-def ARM64add_flag  : SDNode<"ARM64ISD::ADDS",  SDTBinaryArithWithFlagsOut,
+def AArch64adc       : SDNode<"AArch64ISD::ADC",  SDTBinaryArithWithFlagsIn >;
+def AArch64sbc       : SDNode<"AArch64ISD::SBC",  SDTBinaryArithWithFlagsIn>;
+def AArch64add_flag  : SDNode<"AArch64ISD::ADDS",  SDTBinaryArithWithFlagsOut,
                             [SDNPCommutative]>;
-def ARM64sub_flag  : SDNode<"ARM64ISD::SUBS",  SDTBinaryArithWithFlagsOut>;
-def ARM64and_flag  : SDNode<"ARM64ISD::ANDS",  SDTBinaryArithWithFlagsOut,
+def AArch64sub_flag  : SDNode<"AArch64ISD::SUBS",  SDTBinaryArithWithFlagsOut>;
+def AArch64and_flag  : SDNode<"AArch64ISD::ANDS",  SDTBinaryArithWithFlagsOut,
                             [SDNPCommutative]>;
-def ARM64adc_flag  : SDNode<"ARM64ISD::ADCS",  SDTBinaryArithWithFlagsInOut>;
-def ARM64sbc_flag  : SDNode<"ARM64ISD::SBCS",  SDTBinaryArithWithFlagsInOut>;
-
-def ARM64threadpointer : SDNode<"ARM64ISD::THREAD_POINTER", SDTPtrLeaf>;
-
-def ARM64fcmp      : SDNode<"ARM64ISD::FCMP", SDT_ARM64FCmp>;
-
-def ARM64fmax      : SDNode<"ARM64ISD::FMAX", SDTFPBinOp>;
-def ARM64fmin      : SDNode<"ARM64ISD::FMIN", SDTFPBinOp>;
-
-def ARM64dup       : SDNode<"ARM64ISD::DUP", SDT_ARM64Dup>;
-def ARM64duplane8  : SDNode<"ARM64ISD::DUPLANE8", SDT_ARM64DupLane>;
-def ARM64duplane16 : SDNode<"ARM64ISD::DUPLANE16", SDT_ARM64DupLane>;
-def ARM64duplane32 : SDNode<"ARM64ISD::DUPLANE32", SDT_ARM64DupLane>;
-def ARM64duplane64 : SDNode<"ARM64ISD::DUPLANE64", SDT_ARM64DupLane>;
-
-def ARM64zip1      : SDNode<"ARM64ISD::ZIP1", SDT_ARM64Zip>;
-def ARM64zip2      : SDNode<"ARM64ISD::ZIP2", SDT_ARM64Zip>;
-def ARM64uzp1      : SDNode<"ARM64ISD::UZP1", SDT_ARM64Zip>;
-def ARM64uzp2      : SDNode<"ARM64ISD::UZP2", SDT_ARM64Zip>;
-def ARM64trn1      : SDNode<"ARM64ISD::TRN1", SDT_ARM64Zip>;
-def ARM64trn2      : SDNode<"ARM64ISD::TRN2", SDT_ARM64Zip>;
-
-def ARM64movi_edit : SDNode<"ARM64ISD::MOVIedit", SDT_ARM64MOVIedit>;
-def ARM64movi_shift : SDNode<"ARM64ISD::MOVIshift", SDT_ARM64MOVIshift>;
-def ARM64movi_msl : SDNode<"ARM64ISD::MOVImsl", SDT_ARM64MOVIshift>;
-def ARM64mvni_shift : SDNode<"ARM64ISD::MVNIshift", SDT_ARM64MOVIshift>;
-def ARM64mvni_msl : SDNode<"ARM64ISD::MVNImsl", SDT_ARM64MOVIshift>;
-def ARM64movi : SDNode<"ARM64ISD::MOVI", SDT_ARM64MOVIedit>;
-def ARM64fmov : SDNode<"ARM64ISD::FMOV", SDT_ARM64MOVIedit>;
-
-def ARM64rev16 : SDNode<"ARM64ISD::REV16", SDT_ARM64UnaryVec>;
-def ARM64rev32 : SDNode<"ARM64ISD::REV32", SDT_ARM64UnaryVec>;
-def ARM64rev64 : SDNode<"ARM64ISD::REV64", SDT_ARM64UnaryVec>;
-def ARM64ext : SDNode<"ARM64ISD::EXT", SDT_ARM64ExtVec>;
-
-def ARM64vashr : SDNode<"ARM64ISD::VASHR", SDT_ARM64vshift>;
-def ARM64vlshr : SDNode<"ARM64ISD::VLSHR", SDT_ARM64vshift>;
-def ARM64vshl : SDNode<"ARM64ISD::VSHL", SDT_ARM64vshift>;
-def ARM64sqshli : SDNode<"ARM64ISD::SQSHL_I", SDT_ARM64vshift>;
-def ARM64uqshli : SDNode<"ARM64ISD::UQSHL_I", SDT_ARM64vshift>;
-def ARM64sqshlui : SDNode<"ARM64ISD::SQSHLU_I", SDT_ARM64vshift>;
-def ARM64srshri : SDNode<"ARM64ISD::SRSHR_I", SDT_ARM64vshift>;
-def ARM64urshri : SDNode<"ARM64ISD::URSHR_I", SDT_ARM64vshift>;
-
-def ARM64not: SDNode<"ARM64ISD::NOT", SDT_ARM64unvec>;
-def ARM64bit: SDNode<"ARM64ISD::BIT", SDT_ARM64trivec>;
-def ARM64bsl: SDNode<"ARM64ISD::BSL", SDT_ARM64trivec>;
-
-def ARM64cmeq: SDNode<"ARM64ISD::CMEQ", SDT_ARM64binvec>;
-def ARM64cmge: SDNode<"ARM64ISD::CMGE", SDT_ARM64binvec>;
-def ARM64cmgt: SDNode<"ARM64ISD::CMGT", SDT_ARM64binvec>;
-def ARM64cmhi: SDNode<"ARM64ISD::CMHI", SDT_ARM64binvec>;
-def ARM64cmhs: SDNode<"ARM64ISD::CMHS", SDT_ARM64binvec>;
-
-def ARM64fcmeq: SDNode<"ARM64ISD::FCMEQ", SDT_ARM64fcmp>;
-def ARM64fcmge: SDNode<"ARM64ISD::FCMGE", SDT_ARM64fcmp>;
-def ARM64fcmgt: SDNode<"ARM64ISD::FCMGT", SDT_ARM64fcmp>;
-
-def ARM64cmeqz: SDNode<"ARM64ISD::CMEQz", SDT_ARM64unvec>;
-def ARM64cmgez: SDNode<"ARM64ISD::CMGEz", SDT_ARM64unvec>;
-def ARM64cmgtz: SDNode<"ARM64ISD::CMGTz", SDT_ARM64unvec>;
-def ARM64cmlez: SDNode<"ARM64ISD::CMLEz", SDT_ARM64unvec>;
-def ARM64cmltz: SDNode<"ARM64ISD::CMLTz", SDT_ARM64unvec>;
-def ARM64cmtst : PatFrag<(ops node:$LHS, node:$RHS),
-                         (ARM64not (ARM64cmeqz (and node:$LHS, node:$RHS)))>;
-
-def ARM64fcmeqz: SDNode<"ARM64ISD::FCMEQz", SDT_ARM64fcmpz>;
-def ARM64fcmgez: SDNode<"ARM64ISD::FCMGEz", SDT_ARM64fcmpz>;
-def ARM64fcmgtz: SDNode<"ARM64ISD::FCMGTz", SDT_ARM64fcmpz>;
-def ARM64fcmlez: SDNode<"ARM64ISD::FCMLEz", SDT_ARM64fcmpz>;
-def ARM64fcmltz: SDNode<"ARM64ISD::FCMLTz", SDT_ARM64fcmpz>;
-
-def ARM64bici: SDNode<"ARM64ISD::BICi", SDT_ARM64vecimm>;
-def ARM64orri: SDNode<"ARM64ISD::ORRi", SDT_ARM64vecimm>;
-
-def ARM64neg : SDNode<"ARM64ISD::NEG", SDT_ARM64unvec>;
-
-def ARM64tcret: SDNode<"ARM64ISD::TC_RETURN", SDT_ARM64TCRET,
+def AArch64adc_flag  : SDNode<"AArch64ISD::ADCS",  SDTBinaryArithWithFlagsInOut>;
+def AArch64sbc_flag  : SDNode<"AArch64ISD::SBCS",  SDTBinaryArithWithFlagsInOut>;
+
+def AArch64threadpointer : SDNode<"AArch64ISD::THREAD_POINTER", SDTPtrLeaf>;
+
+def AArch64fcmp      : SDNode<"AArch64ISD::FCMP", SDT_AArch64FCmp>;
+
+def AArch64fmax      : SDNode<"AArch64ISD::FMAX", SDTFPBinOp>;
+def AArch64fmin      : SDNode<"AArch64ISD::FMIN", SDTFPBinOp>;
+
+def AArch64dup       : SDNode<"AArch64ISD::DUP", SDT_AArch64Dup>;
+def AArch64duplane8  : SDNode<"AArch64ISD::DUPLANE8", SDT_AArch64DupLane>;
+def AArch64duplane16 : SDNode<"AArch64ISD::DUPLANE16", SDT_AArch64DupLane>;
+def AArch64duplane32 : SDNode<"AArch64ISD::DUPLANE32", SDT_AArch64DupLane>;
+def AArch64duplane64 : SDNode<"AArch64ISD::DUPLANE64", SDT_AArch64DupLane>;
+
+def AArch64zip1      : SDNode<"AArch64ISD::ZIP1", SDT_AArch64Zip>;
+def AArch64zip2      : SDNode<"AArch64ISD::ZIP2", SDT_AArch64Zip>;
+def AArch64uzp1      : SDNode<"AArch64ISD::UZP1", SDT_AArch64Zip>;
+def AArch64uzp2      : SDNode<"AArch64ISD::UZP2", SDT_AArch64Zip>;
+def AArch64trn1      : SDNode<"AArch64ISD::TRN1", SDT_AArch64Zip>;
+def AArch64trn2      : SDNode<"AArch64ISD::TRN2", SDT_AArch64Zip>;
+
+def AArch64movi_edit : SDNode<"AArch64ISD::MOVIedit", SDT_AArch64MOVIedit>;
+def AArch64movi_shift : SDNode<"AArch64ISD::MOVIshift", SDT_AArch64MOVIshift>;
+def AArch64movi_msl : SDNode<"AArch64ISD::MOVImsl", SDT_AArch64MOVIshift>;
+def AArch64mvni_shift : SDNode<"AArch64ISD::MVNIshift", SDT_AArch64MOVIshift>;
+def AArch64mvni_msl : SDNode<"AArch64ISD::MVNImsl", SDT_AArch64MOVIshift>;
+def AArch64movi : SDNode<"AArch64ISD::MOVI", SDT_AArch64MOVIedit>;
+def AArch64fmov : SDNode<"AArch64ISD::FMOV", SDT_AArch64MOVIedit>;
+
+def AArch64rev16 : SDNode<"AArch64ISD::REV16", SDT_AArch64UnaryVec>;
+def AArch64rev32 : SDNode<"AArch64ISD::REV32", SDT_AArch64UnaryVec>;
+def AArch64rev64 : SDNode<"AArch64ISD::REV64", SDT_AArch64UnaryVec>;
+def AArch64ext : SDNode<"AArch64ISD::EXT", SDT_AArch64ExtVec>;
+
+def AArch64vashr : SDNode<"AArch64ISD::VASHR", SDT_AArch64vshift>;
+def AArch64vlshr : SDNode<"AArch64ISD::VLSHR", SDT_AArch64vshift>;
+def AArch64vshl : SDNode<"AArch64ISD::VSHL", SDT_AArch64vshift>;
+def AArch64sqshli : SDNode<"AArch64ISD::SQSHL_I", SDT_AArch64vshift>;
+def AArch64uqshli : SDNode<"AArch64ISD::UQSHL_I", SDT_AArch64vshift>;
+def AArch64sqshlui : SDNode<"AArch64ISD::SQSHLU_I", SDT_AArch64vshift>;
+def AArch64srshri : SDNode<"AArch64ISD::SRSHR_I", SDT_AArch64vshift>;
+def AArch64urshri : SDNode<"AArch64ISD::URSHR_I", SDT_AArch64vshift>;
+
+def AArch64not: SDNode<"AArch64ISD::NOT", SDT_AArch64unvec>;
+def AArch64bit: SDNode<"AArch64ISD::BIT", SDT_AArch64trivec>;
+def AArch64bsl: SDNode<"AArch64ISD::BSL", SDT_AArch64trivec>;
+
+def AArch64cmeq: SDNode<"AArch64ISD::CMEQ", SDT_AArch64binvec>;
+def AArch64cmge: SDNode<"AArch64ISD::CMGE", SDT_AArch64binvec>;
+def AArch64cmgt: SDNode<"AArch64ISD::CMGT", SDT_AArch64binvec>;
+def AArch64cmhi: SDNode<"AArch64ISD::CMHI", SDT_AArch64binvec>;
+def AArch64cmhs: SDNode<"AArch64ISD::CMHS", SDT_AArch64binvec>;
+
+def AArch64fcmeq: SDNode<"AArch64ISD::FCMEQ", SDT_AArch64fcmp>;
+def AArch64fcmge: SDNode<"AArch64ISD::FCMGE", SDT_AArch64fcmp>;
+def AArch64fcmgt: SDNode<"AArch64ISD::FCMGT", SDT_AArch64fcmp>;
+
+def AArch64cmeqz: SDNode<"AArch64ISD::CMEQz", SDT_AArch64unvec>;
+def AArch64cmgez: SDNode<"AArch64ISD::CMGEz", SDT_AArch64unvec>;
+def AArch64cmgtz: SDNode<"AArch64ISD::CMGTz", SDT_AArch64unvec>;
+def AArch64cmlez: SDNode<"AArch64ISD::CMLEz", SDT_AArch64unvec>;
+def AArch64cmltz: SDNode<"AArch64ISD::CMLTz", SDT_AArch64unvec>;
+def AArch64cmtst : PatFrag<(ops node:$LHS, node:$RHS),
+                        (AArch64not (AArch64cmeqz (and node:$LHS, node:$RHS)))>;
+
+def AArch64fcmeqz: SDNode<"AArch64ISD::FCMEQz", SDT_AArch64fcmpz>;
+def AArch64fcmgez: SDNode<"AArch64ISD::FCMGEz", SDT_AArch64fcmpz>;
+def AArch64fcmgtz: SDNode<"AArch64ISD::FCMGTz", SDT_AArch64fcmpz>;
+def AArch64fcmlez: SDNode<"AArch64ISD::FCMLEz", SDT_AArch64fcmpz>;
+def AArch64fcmltz: SDNode<"AArch64ISD::FCMLTz", SDT_AArch64fcmpz>;
+
+def AArch64bici: SDNode<"AArch64ISD::BICi", SDT_AArch64vecimm>;
+def AArch64orri: SDNode<"AArch64ISD::ORRi", SDT_AArch64vecimm>;
+
+def AArch64neg : SDNode<"AArch64ISD::NEG", SDT_AArch64unvec>;
+
+def AArch64tcret: SDNode<"AArch64ISD::TC_RETURN", SDT_AArch64TCRET,
                   [SDNPHasChain,  SDNPOptInGlue, SDNPVariadic]>;
 
-def ARM64Prefetch        : SDNode<"ARM64ISD::PREFETCH", SDT_ARM64PREFETCH,
+def AArch64Prefetch        : SDNode<"AArch64ISD::PREFETCH", SDT_AArch64PREFETCH,
                                [SDNPHasChain, SDNPSideEffect]>;
 
-def ARM64sitof: SDNode<"ARM64ISD::SITOF", SDT_ARM64ITOF>;
-def ARM64uitof: SDNode<"ARM64ISD::UITOF", SDT_ARM64ITOF>;
+def AArch64sitof: SDNode<"AArch64ISD::SITOF", SDT_AArch64ITOF>;
+def AArch64uitof: SDNode<"AArch64ISD::UITOF", SDT_AArch64ITOF>;
 
-def ARM64tlsdesc_call : SDNode<"ARM64ISD::TLSDESC_CALL", SDT_ARM64TLSDescCall,
-                               [SDNPInGlue, SDNPOutGlue, SDNPHasChain,
-                                SDNPVariadic]>;
+def AArch64tlsdesc_call : SDNode<"AArch64ISD::TLSDESC_CALL",
+                                 SDT_AArch64TLSDescCall,
+                                 [SDNPInGlue, SDNPOutGlue, SDNPHasChain,
+                                  SDNPVariadic]>;
 
-def ARM64WrapperLarge : SDNode<"ARM64ISD::WrapperLarge", SDT_ARM64WrapperLarge>;
+def AArch64WrapperLarge : SDNode<"AArch64ISD::WrapperLarge",
+                                 SDT_AArch64WrapperLarge>;
 
 
 //===----------------------------------------------------------------------===//
 
 //===----------------------------------------------------------------------===//
 
-// ARM64 Instruction Predicate Definitions.
+// AArch64 Instruction Predicate Definitions.
 //
 def HasZCZ    : Predicate<"Subtarget->hasZeroCycleZeroing()">;
 def NoZCZ     : Predicate<"!Subtarget->hasZeroCycleZeroing()">;
@@ -248,7 +250,7 @@ def IsNotDarwin: Predicate<"!Subtarget->isTargetDarwin()">;
 def ForCodeSize   : Predicate<"ForCodeSize">;
 def NotForCodeSize   : Predicate<"!ForCodeSize">;
 
-include "ARM64InstrFormats.td"
+include "AArch64InstrFormats.td"
 
 //===----------------------------------------------------------------------===//
 
@@ -258,63 +260,63 @@ include "ARM64InstrFormats.td"
 
 let Defs = [SP], Uses = [SP], hasSideEffects = 1, isCodeGenOnly = 1 in {
 def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i32imm:$amt),
-                              [(ARM64callseq_start timm:$amt)]>;
+                              [(AArch64callseq_start timm:$amt)]>;
 def ADJCALLSTACKUP : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2),
-                            [(ARM64callseq_end timm:$amt1, timm:$amt2)]>;
+                            [(AArch64callseq_end timm:$amt1, timm:$amt2)]>;
 } // Defs = [SP], Uses = [SP], hasSideEffects = 1, isCodeGenOnly = 1
 
 let isReMaterializable = 1, isCodeGenOnly = 1 in {
 // FIXME: The following pseudo instructions are only needed because remat
 // cannot handle multiple instructions.  When that changes, they can be
-// removed, along with the ARM64Wrapper node.
+// removed, along with the AArch64Wrapper node.
 
 let AddedComplexity = 10 in
 def LOADgot : Pseudo<(outs GPR64:$dst), (ins i64imm:$addr),
-                     [(set GPR64:$dst, (ARM64LOADgot tglobaladdr:$addr))]>,
+                     [(set GPR64:$dst, (AArch64LOADgot tglobaladdr:$addr))]>,
               Sched<[WriteLDAdr]>;
 
 // The MOVaddr instruction should match only when the add is not folded
 // into a load or store address.
 def MOVaddr
     : Pseudo<(outs GPR64:$dst), (ins i64imm:$hi, i64imm:$low),
-             [(set GPR64:$dst, (ARM64addlow (ARM64adrp tglobaladdr:$hi),
+             [(set GPR64:$dst, (AArch64addlow (AArch64adrp tglobaladdr:$hi),
                                             tglobaladdr:$low))]>,
       Sched<[WriteAdrAdr]>;
 def MOVaddrJT
     : Pseudo<(outs GPR64:$dst), (ins i64imm:$hi, i64imm:$low),
-             [(set GPR64:$dst, (ARM64addlow (ARM64adrp tjumptable:$hi),
+             [(set GPR64:$dst, (AArch64addlow (AArch64adrp tjumptable:$hi),
                                              tjumptable:$low))]>,
       Sched<[WriteAdrAdr]>;
 def MOVaddrCP
     : Pseudo<(outs GPR64:$dst), (ins i64imm:$hi, i64imm:$low),
-             [(set GPR64:$dst, (ARM64addlow (ARM64adrp tconstpool:$hi),
+             [(set GPR64:$dst, (AArch64addlow (AArch64adrp tconstpool:$hi),
                                              tconstpool:$low))]>,
       Sched<[WriteAdrAdr]>;
 def MOVaddrBA
     : Pseudo<(outs GPR64:$dst), (ins i64imm:$hi, i64imm:$low),
-             [(set GPR64:$dst, (ARM64addlow (ARM64adrp tblockaddress:$hi),
+             [(set GPR64:$dst, (AArch64addlow (AArch64adrp tblockaddress:$hi),
                                              tblockaddress:$low))]>,
       Sched<[WriteAdrAdr]>;
 def MOVaddrTLS
     : Pseudo<(outs GPR64:$dst), (ins i64imm:$hi, i64imm:$low),
-             [(set GPR64:$dst, (ARM64addlow (ARM64adrp tglobaltlsaddr:$hi),
+             [(set GPR64:$dst, (AArch64addlow (AArch64adrp tglobaltlsaddr:$hi),
                                             tglobaltlsaddr:$low))]>,
       Sched<[WriteAdrAdr]>;
 def MOVaddrEXT
     : Pseudo<(outs GPR64:$dst), (ins i64imm:$hi, i64imm:$low),
-             [(set GPR64:$dst, (ARM64addlow (ARM64adrp texternalsym:$hi),
+             [(set GPR64:$dst, (AArch64addlow (AArch64adrp texternalsym:$hi),
                                             texternalsym:$low))]>,
       Sched<[WriteAdrAdr]>;
 
 } // isReMaterializable, isCodeGenOnly
 
-def : Pat<(ARM64LOADgot tglobaltlsaddr:$addr),
+def : Pat<(AArch64LOADgot tglobaltlsaddr:$addr),
           (LOADgot tglobaltlsaddr:$addr)>;
 
-def : Pat<(ARM64LOADgot texternalsym:$addr),
+def : Pat<(AArch64LOADgot texternalsym:$addr),
           (LOADgot texternalsym:$addr)>;
 
-def : Pat<(ARM64LOADgot tconstpool:$addr),
+def : Pat<(AArch64LOADgot tconstpool:$addr),
           (LOADgot tconstpool:$addr)>;
 
 //===----------------------------------------------------------------------===//
@@ -345,7 +347,7 @@ def MSRpstate: MSRpstateI;
 
 // The thread pointer (on Linux, at least, where this has been implemented) is
 // TPIDR_EL0.
-def : Pat<(ARM64threadpointer), (MRS 0xde82)>;
+def : Pat<(AArch64threadpointer), (MRS 0xde82)>;
 
 // Generic system instructions
 def SYSxt  : SystemXtI<0, "sys">;
@@ -464,28 +466,28 @@ def : Pat<(i64 i64imm_32bit:$src),
 
 // Deal with the various forms of (ELF) large addressing with MOVZ/MOVK
 // sequences.
-def : Pat<(ARM64WrapperLarge tglobaladdr:$g3, tglobaladdr:$g2,
+def : Pat<(AArch64WrapperLarge tglobaladdr:$g3, tglobaladdr:$g2,
                              tglobaladdr:$g1, tglobaladdr:$g0),
           (MOVKXi (MOVKXi (MOVKXi (MOVZXi tglobaladdr:$g3, 48),
                                   tglobaladdr:$g2, 32),
                           tglobaladdr:$g1, 16),
                   tglobaladdr:$g0, 0)>;
 
-def : Pat<(ARM64WrapperLarge tblockaddress:$g3, tblockaddress:$g2,
+def : Pat<(AArch64WrapperLarge tblockaddress:$g3, tblockaddress:$g2,
                              tblockaddress:$g1, tblockaddress:$g0),
           (MOVKXi (MOVKXi (MOVKXi (MOVZXi tblockaddress:$g3, 48),
                                   tblockaddress:$g2, 32),
                           tblockaddress:$g1, 16),
                   tblockaddress:$g0, 0)>;
 
-def : Pat<(ARM64WrapperLarge tconstpool:$g3, tconstpool:$g2,
+def : Pat<(AArch64WrapperLarge tconstpool:$g3, tconstpool:$g2,
                              tconstpool:$g1, tconstpool:$g0),
           (MOVKXi (MOVKXi (MOVKXi (MOVZXi tconstpool:$g3, 48),
                                   tconstpool:$g2, 32),
                           tconstpool:$g1, 16),
                   tconstpool:$g0, 0)>;
 
-def : Pat<(ARM64WrapperLarge tjumptable:$g3, tjumptable:$g2,
+def : Pat<(AArch64WrapperLarge tjumptable:$g3, tjumptable:$g2,
                              tjumptable:$g1, tjumptable:$g0),
           (MOVKXi (MOVKXi (MOVKXi (MOVZXi tjumptable:$g3, 48),
                                   tjumptable:$g2, 32),
@@ -498,8 +500,8 @@ def : Pat<(ARM64WrapperLarge tjumptable:$g3, tjumptable:$g2,
 //===----------------------------------------------------------------------===//
 
 // Add/subtract with carry.
-defm ADC : AddSubCarry<0, "adc", "adcs", ARM64adc, ARM64adc_flag>;
-defm SBC : AddSubCarry<1, "sbc", "sbcs", ARM64sbc, ARM64sbc_flag>;
+defm ADC : AddSubCarry<0, "adc", "adcs", AArch64adc, AArch64adc_flag>;
+defm SBC : AddSubCarry<1, "sbc", "sbcs", AArch64sbc, AArch64sbc_flag>;
 
 def : InstAlias<"ngc $dst, $src",  (SBCWr  GPR32:$dst, WZR, GPR32:$src)>;
 def : InstAlias<"ngc $dst, $src",  (SBCXr  GPR64:$dst, XZR, GPR64:$src)>;
@@ -519,8 +521,8 @@ def : InstAlias<"mov $dst, $src",
 def : InstAlias<"mov $dst, $src",
                 (ADDXri GPR64sp:$dst, GPR64sponly:$src, 0, 0)>;
 
-defm ADDS : AddSubS<0, "adds", ARM64add_flag, "cmn">;
-defm SUBS : AddSubS<1, "subs", ARM64sub_flag, "cmp">;
+defm ADDS : AddSubS<0, "adds", AArch64add_flag, "cmn">;
+defm SUBS : AddSubS<1, "subs", AArch64sub_flag, "cmp">;
 
 // Use SUBS instead of SUB to enable CSE between SUBS and SUB.
 def : Pat<(sub GPR32sp:$Rn, addsub_shifted_imm32:$imm),
@@ -558,13 +560,13 @@ def : Pat<(sub GPR64:$Rn, neg_addsub_shifted_imm64:$imm),
 // expression (add x, -1) must be transformed to (SUB{W,X}ri x, 1).
 //  These patterns capture that transformation.
 let AddedComplexity = 1 in {
-def : Pat<(ARM64add_flag GPR32:$Rn, neg_addsub_shifted_imm32:$imm),
+def : Pat<(AArch64add_flag GPR32:$Rn, neg_addsub_shifted_imm32:$imm),
           (SUBSWri GPR32:$Rn, neg_addsub_shifted_imm32:$imm)>;
-def : Pat<(ARM64add_flag GPR64:$Rn, neg_addsub_shifted_imm64:$imm),
+def : Pat<(AArch64add_flag GPR64:$Rn, neg_addsub_shifted_imm64:$imm),
           (SUBSXri GPR64:$Rn, neg_addsub_shifted_imm64:$imm)>;
-def : Pat<(ARM64sub_flag GPR32:$Rn, neg_addsub_shifted_imm32:$imm),
+def : Pat<(AArch64sub_flag GPR32:$Rn, neg_addsub_shifted_imm32:$imm),
           (ADDSWri GPR32:$Rn, neg_addsub_shifted_imm32:$imm)>;
-def : Pat<(ARM64sub_flag GPR64:$Rn, neg_addsub_shifted_imm64:$imm),
+def : Pat<(AArch64sub_flag GPR64:$Rn, neg_addsub_shifted_imm64:$imm),
           (ADDSXri GPR64:$Rn, neg_addsub_shifted_imm64:$imm)>;
 }
 
@@ -587,8 +589,8 @@ def : InstAlias<"negs $dst, $src$shift",
 defm UDIV : Div<0, "udiv", udiv>;
 defm SDIV : Div<1, "sdiv", sdiv>;
 let isCodeGenOnly = 1 in {
-defm UDIV_Int : Div<0, "udiv", int_arm64_udiv>;
-defm SDIV_Int : Div<1, "sdiv", int_arm64_sdiv>;
+defm UDIV_Int : Div<0, "udiv", int_aarch64_udiv>;
+defm SDIV_Int : Div<1, "sdiv", int_aarch64_sdiv>;
 }
 
 // Variable shift
@@ -653,15 +655,15 @@ def SMULHrr : MulHi<0b010, "smulh", mulhs>;
 def UMULHrr : MulHi<0b110, "umulh", mulhu>;
 
 // CRC32
-def CRC32Brr : BaseCRC32<0, 0b00, 0, GPR32, int_arm64_crc32b, "crc32b">;
-def CRC32Hrr : BaseCRC32<0, 0b01, 0, GPR32, int_arm64_crc32h, "crc32h">;
-def CRC32Wrr : BaseCRC32<0, 0b10, 0, GPR32, int_arm64_crc32w, "crc32w">;
-def CRC32Xrr : BaseCRC32<1, 0b11, 0, GPR64, int_arm64_crc32x, "crc32x">;
+def CRC32Brr : BaseCRC32<0, 0b00, 0, GPR32, int_aarch64_crc32b, "crc32b">;
+def CRC32Hrr : BaseCRC32<0, 0b01, 0, GPR32, int_aarch64_crc32h, "crc32h">;
+def CRC32Wrr : BaseCRC32<0, 0b10, 0, GPR32, int_aarch64_crc32w, "crc32w">;
+def CRC32Xrr : BaseCRC32<1, 0b11, 0, GPR64, int_aarch64_crc32x, "crc32x">;
 
-def CRC32CBrr : BaseCRC32<0, 0b00, 1, GPR32, int_arm64_crc32cb, "crc32cb">;
-def CRC32CHrr : BaseCRC32<0, 0b01, 1, GPR32, int_arm64_crc32ch, "crc32ch">;
-def CRC32CWrr : BaseCRC32<0, 0b10, 1, GPR32, int_arm64_crc32cw, "crc32cw">;
-def CRC32CXrr : BaseCRC32<1, 0b11, 1, GPR64, int_arm64_crc32cx, "crc32cx">;
+def CRC32CBrr : BaseCRC32<0, 0b00, 1, GPR32, int_aarch64_crc32cb, "crc32cb">;
+def CRC32CHrr : BaseCRC32<0, 0b01, 1, GPR32, int_aarch64_crc32ch, "crc32ch">;
+def CRC32CWrr : BaseCRC32<0, 0b10, 1, GPR32, int_aarch64_crc32cw, "crc32cw">;
+def CRC32CXrr : BaseCRC32<1, 0b11, 1, GPR64, int_aarch64_crc32cx, "crc32cx">;
 
 
 //===----------------------------------------------------------------------===//
@@ -669,7 +671,7 @@ def CRC32CXrr : BaseCRC32<1, 0b11, 1, GPR64, int_arm64_crc32cx, "crc32cx">;
 //===----------------------------------------------------------------------===//
 
 // (immediate)
-defm ANDS : LogicalImmS<0b11, "ands", ARM64and_flag>;
+defm ANDS : LogicalImmS<0b11, "ands", AArch64and_flag>;
 defm AND  : LogicalImm<0b00, "and", and>;
 defm EOR  : LogicalImm<0b10, "eor", xor>;
 defm ORR  : LogicalImm<0b01, "orr", or>;
@@ -684,9 +686,9 @@ def : InstAlias<"mov $dst, $imm", (ORRXri GPR64sp:$dst, XZR,
 
 
 // (register)
-defm ANDS : LogicalRegS<0b11, 0, "ands", ARM64and_flag>;
+defm ANDS : LogicalRegS<0b11, 0, "ands", AArch64and_flag>;
 defm BICS : LogicalRegS<0b11, 1, "bics",
-                        BinOpFrag<(ARM64and_flag node:$LHS, (not node:$RHS))>>;
+                        BinOpFrag<(AArch64and_flag node:$LHS, (not node:$RHS))>>;
 defm AND  : LogicalReg<0b00, 0, "and", and>;
 defm BIC  : LogicalReg<0b00, 1, "bic",
                        BinOpFrag<(and node:$LHS, (not node:$RHS))>>;
@@ -900,26 +902,26 @@ defm CSINC : CondSelectOp<0, 0b01, "csinc", inc>;
 defm CSINV : CondSelectOp<1, 0b00, "csinv", not>;
 defm CSNEG : CondSelectOp<1, 0b01, "csneg", ineg>;
 
-def : Pat<(ARM64csinv GPR32:$tval, GPR32:$fval, (i32 imm:$cc), NZCV),
+def : Pat<(AArch64csinv GPR32:$tval, GPR32:$fval, (i32 imm:$cc), NZCV),
           (CSINVWr GPR32:$tval, GPR32:$fval, (i32 imm:$cc))>;
-def : Pat<(ARM64csinv GPR64:$tval, GPR64:$fval, (i32 imm:$cc), NZCV),
+def : Pat<(AArch64csinv GPR64:$tval, GPR64:$fval, (i32 imm:$cc), NZCV),
           (CSINVXr GPR64:$tval, GPR64:$fval, (i32 imm:$cc))>;
-def : Pat<(ARM64csneg GPR32:$tval, GPR32:$fval, (i32 imm:$cc), NZCV),
+def : Pat<(AArch64csneg GPR32:$tval, GPR32:$fval, (i32 imm:$cc), NZCV),
           (CSNEGWr GPR32:$tval, GPR32:$fval, (i32 imm:$cc))>;
-def : Pat<(ARM64csneg GPR64:$tval, GPR64:$fval, (i32 imm:$cc), NZCV),
+def : Pat<(AArch64csneg GPR64:$tval, GPR64:$fval, (i32 imm:$cc), NZCV),
           (CSNEGXr GPR64:$tval, GPR64:$fval, (i32 imm:$cc))>;
-def : Pat<(ARM64csinc GPR32:$tval, GPR32:$fval, (i32 imm:$cc), NZCV),
+def : Pat<(AArch64csinc GPR32:$tval, GPR32:$fval, (i32 imm:$cc), NZCV),
           (CSINCWr GPR32:$tval, GPR32:$fval, (i32 imm:$cc))>;
-def : Pat<(ARM64csinc GPR64:$tval, GPR64:$fval, (i32 imm:$cc), NZCV),
+def : Pat<(AArch64csinc GPR64:$tval, GPR64:$fval, (i32 imm:$cc), NZCV),
           (CSINCXr GPR64:$tval, GPR64:$fval, (i32 imm:$cc))>;
 
-def : Pat<(ARM64csel (i32 0), (i32 1), (i32 imm:$cc), NZCV),
+def : Pat<(AArch64csel (i32 0), (i32 1), (i32 imm:$cc), NZCV),
           (CSINCWr WZR, WZR, (i32 imm:$cc))>;
-def : Pat<(ARM64csel (i64 0), (i64 1), (i32 imm:$cc), NZCV),
+def : Pat<(AArch64csel (i64 0), (i64 1), (i32 imm:$cc), NZCV),
           (CSINCXr XZR, XZR, (i32 imm:$cc))>;
-def : Pat<(ARM64csel (i32 0), (i32 -1), (i32 imm:$cc), NZCV),
+def : Pat<(AArch64csel (i32 0), (i32 -1), (i32 imm:$cc), NZCV),
           (CSINVWr WZR, WZR, (i32 imm:$cc))>;
-def : Pat<(ARM64csel (i64 0), (i64 -1), (i32 imm:$cc), NZCV),
+def : Pat<(AArch64csel (i64 0), (i64 -1), (i32 imm:$cc), NZCV),
           (CSINVXr XZR, XZR, (i32 imm:$cc))>;
 
 // The inverse of the condition code from the alias instruction is what is used
@@ -959,12 +961,12 @@ def ADR  : ADRI<0, "adr", adrlabel, []>;
 } // neverHasSideEffects = 1
 
 def ADRP : ADRI<1, "adrp", adrplabel,
-                [(set GPR64:$Xd, (ARM64adrp tglobaladdr:$label))]>;
+                [(set GPR64:$Xd, (AArch64adrp tglobaladdr:$label))]>;
 } // isReMaterializable = 1
 
 // page address of a constant pool entry, block address
-def : Pat<(ARM64adrp tconstpool:$cp), (ADRP tconstpool:$cp)>;
-def : Pat<(ARM64adrp tblockaddress:$cp), (ADRP tblockaddress:$cp)>;
+def : Pat<(AArch64adrp tconstpool:$cp), (ADRP tconstpool:$cp)>;
+def : Pat<(AArch64adrp tblockaddress:$cp), (ADRP tblockaddress:$cp)>;
 
 //===----------------------------------------------------------------------===//
 // Unconditional branch (register) instructions.
@@ -980,7 +982,7 @@ def ERET : SpecialReturn<0b0100, "eret">;
 def : InstAlias<"ret", (RET LR)>;
 
 let isCall = 1, Defs = [LR], Uses = [SP] in {
-def BLR : BranchReg<0b0001, "blr", [(ARM64call GPR64:$Rn)]>;
+def BLR : BranchReg<0b0001, "blr", [(AArch64call GPR64:$Rn)]>;
 } // isCall
 
 let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
@@ -990,7 +992,7 @@ def BR  : BranchReg<0b0000, "br", [(brind GPR64:$Rn)]>;
 // Create a separate pseudo-instruction for codegen to use so that we don't
 // flag lr as used in every function. It'll be restored before the RET by the
 // epilogue if it's legitimately used.
-def RET_ReallyLR : Pseudo<(outs), (ins), [(ARM64retflag)]> {
+def RET_ReallyLR : Pseudo<(outs), (ins), [(AArch64retflag)]> {
   let isTerminator = 1;
   let isBarrier = 1;
   let isReturn = 1;
@@ -1009,9 +1011,9 @@ def TLSDESCCALL : Pseudo<(outs), (ins i64imm:$sym), []> {
 let isCall = 1, Defs = [LR] in
 def TLSDESC_BLR
     : Pseudo<(outs), (ins GPR64:$dest, i64imm:$sym),
-             [(ARM64tlsdesc_call GPR64:$dest, tglobaltlsaddr:$sym)]>;
+             [(AArch64tlsdesc_call GPR64:$dest, tglobaltlsaddr:$sym)]>;
 
-def : Pat<(ARM64tlsdesc_call GPR64:$dest, texternalsym:$sym),
+def : Pat<(AArch64tlsdesc_call GPR64:$dest, texternalsym:$sym),
           (TLSDESC_BLR GPR64:$dest, texternalsym:$sym)>;
 //===----------------------------------------------------------------------===//
 // Conditional branch (immediate) instruction.
@@ -1021,14 +1023,14 @@ def Bcc : BranchCond;
 //===----------------------------------------------------------------------===//
 // Compare-and-branch instructions.
 //===----------------------------------------------------------------------===//
-defm CBZ  : CmpBranch<0, "cbz", ARM64cbz>;
-defm CBNZ : CmpBranch<1, "cbnz", ARM64cbnz>;
+defm CBZ  : CmpBranch<0, "cbz", AArch64cbz>;
+defm CBNZ : CmpBranch<1, "cbnz", AArch64cbnz>;
 
 //===----------------------------------------------------------------------===//
 // Test-bit-and-branch instructions.
 //===----------------------------------------------------------------------===//
-defm TBZ  : TestBranch<0, "tbz", ARM64tbz>;
-defm TBNZ : TestBranch<1, "tbnz", ARM64tbnz>;
+defm TBZ  : TestBranch<0, "tbz", AArch64tbz>;
+defm TBNZ : TestBranch<1, "tbnz", AArch64tbnz>;
 
 //===----------------------------------------------------------------------===//
 // Unconditional branch (immediate) instructions.
@@ -1038,9 +1040,9 @@ def B  : BranchImm<0, "b", [(br bb:$addr)]>;
 } // isBranch, isTerminator, isBarrier
 
 let isCall = 1, Defs = [LR], Uses = [SP] in {
-def BL : CallImm<1, "bl", [(ARM64call tglobaladdr:$addr)]>;
+def BL : CallImm<1, "bl", [(AArch64call tglobaladdr:$addr)]>;
 } // isCall
-def : Pat<(ARM64call texternalsym:$func), (BL texternalsym:$func)>;
+def : Pat<(AArch64call texternalsym:$func), (BL texternalsym:$func)>;
 
 //===----------------------------------------------------------------------===//
 // Exception generation instructions.
@@ -1432,7 +1434,7 @@ def : Pat<(i64 (zextloadi32 (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))),
 
 // Pre-fetch.
 def PRFMui : PrefetchUI<0b11, 0, 0b10, "prfm",
-                        [(ARM64Prefetch imm:$Rt,
+                        [(AArch64Prefetch imm:$Rt,
                                         (am_indexed64 GPR64sp:$Rn,
                                                       uimm12s8:$offset))]>;
 
@@ -1451,7 +1453,7 @@ def LDRSWl : LoadLiteral<0b10, 0, GPR64, "ldrsw">;
 
 // prefetch
 def PRFMl : PrefetchLiteral<0b11, 0, "prfm", []>;
-//                   [(ARM64Prefetch imm:$Rt, tglobaladdr:$label)]>;
+//                   [(AArch64Prefetch imm:$Rt, tglobaladdr:$label)]>;
 
 //---
 // (unscaled immediate)
@@ -1650,7 +1652,7 @@ def : InstAlias<"ldrsw $Rt, [$Rn, $offset]",
 
 // Pre-fetch.
 defm PRFUM : PrefetchUnscaled<0b11, 0, 0b10, "prfum",
-                  [(ARM64Prefetch imm:$Rt,
+                  [(AArch64Prefetch imm:$Rt,
                                   (am_unscaled64 GPR64sp:$Rn, simm9:$offset))]>;
 
 //---
@@ -2187,23 +2189,23 @@ def STXPX  : StoreExclusivePair<0b11, 0, 0, 1, 0, GPR64, "stxp">;
 // Scaled floating point to integer conversion instructions.
 //===----------------------------------------------------------------------===//
 
-defm FCVTAS : FPToIntegerUnscaled<0b00, 0b100, "fcvtas", int_arm64_neon_fcvtas>;
-defm FCVTAU : FPToIntegerUnscaled<0b00, 0b101, "fcvtau", int_arm64_neon_fcvtau>;
-defm FCVTMS : FPToIntegerUnscaled<0b10, 0b000, "fcvtms", int_arm64_neon_fcvtms>;
-defm FCVTMU : FPToIntegerUnscaled<0b10, 0b001, "fcvtmu", int_arm64_neon_fcvtmu>;
-defm FCVTNS : FPToIntegerUnscaled<0b00, 0b000, "fcvtns", int_arm64_neon_fcvtns>;
-defm FCVTNU : FPToIntegerUnscaled<0b00, 0b001, "fcvtnu", int_arm64_neon_fcvtnu>;
-defm FCVTPS : FPToIntegerUnscaled<0b01, 0b000, "fcvtps", int_arm64_neon_fcvtps>;
-defm FCVTPU : FPToIntegerUnscaled<0b01, 0b001, "fcvtpu", int_arm64_neon_fcvtpu>;
+defm FCVTAS : FPToIntegerUnscaled<0b00, 0b100, "fcvtas", int_aarch64_neon_fcvtas>;
+defm FCVTAU : FPToIntegerUnscaled<0b00, 0b101, "fcvtau", int_aarch64_neon_fcvtau>;
+defm FCVTMS : FPToIntegerUnscaled<0b10, 0b000, "fcvtms", int_aarch64_neon_fcvtms>;
+defm FCVTMU : FPToIntegerUnscaled<0b10, 0b001, "fcvtmu", int_aarch64_neon_fcvtmu>;
+defm FCVTNS : FPToIntegerUnscaled<0b00, 0b000, "fcvtns", int_aarch64_neon_fcvtns>;
+defm FCVTNU : FPToIntegerUnscaled<0b00, 0b001, "fcvtnu", int_aarch64_neon_fcvtnu>;
+defm FCVTPS : FPToIntegerUnscaled<0b01, 0b000, "fcvtps", int_aarch64_neon_fcvtps>;
+defm FCVTPU : FPToIntegerUnscaled<0b01, 0b001, "fcvtpu", int_aarch64_neon_fcvtpu>;
 defm FCVTZS : FPToIntegerUnscaled<0b11, 0b000, "fcvtzs", fp_to_sint>;
 defm FCVTZU : FPToIntegerUnscaled<0b11, 0b001, "fcvtzu", fp_to_uint>;
 defm FCVTZS : FPToIntegerScaled<0b11, 0b000, "fcvtzs", fp_to_sint>;
 defm FCVTZU : FPToIntegerScaled<0b11, 0b001, "fcvtzu", fp_to_uint>;
 let isCodeGenOnly = 1 in {
-defm FCVTZS_Int : FPToIntegerUnscaled<0b11, 0b000, "fcvtzs", int_arm64_neon_fcvtzs>;
-defm FCVTZU_Int : FPToIntegerUnscaled<0b11, 0b001, "fcvtzu", int_arm64_neon_fcvtzu>;
-defm FCVTZS_Int : FPToIntegerScaled<0b11, 0b000, "fcvtzs", int_arm64_neon_fcvtzs>;
-defm FCVTZU_Int : FPToIntegerScaled<0b11, 0b001, "fcvtzu", int_arm64_neon_fcvtzu>;
+defm FCVTZS_Int : FPToIntegerUnscaled<0b11, 0b000, "fcvtzs", int_aarch64_neon_fcvtzs>;
+defm FCVTZU_Int : FPToIntegerUnscaled<0b11, 0b001, "fcvtzu", int_aarch64_neon_fcvtzu>;
+defm FCVTZS_Int : FPToIntegerScaled<0b11, 0b000, "fcvtzs", int_aarch64_neon_fcvtzs>;
+defm FCVTZU_Int : FPToIntegerScaled<0b11, 0b001, "fcvtzu", int_aarch64_neon_fcvtzu>;
 }
 
 //===----------------------------------------------------------------------===//
@@ -2246,10 +2248,10 @@ defm FNEG   : SingleOperandFPData<0b0010, "fneg", fneg>;
 defm FRINTA : SingleOperandFPData<0b1100, "frinta", frnd>;
 defm FRINTI : SingleOperandFPData<0b1111, "frinti", fnearbyint>;
 defm FRINTM : SingleOperandFPData<0b1010, "frintm", ffloor>;
-defm FRINTN : SingleOperandFPData<0b1000, "frintn", int_arm64_neon_frintn>;
+defm FRINTN : SingleOperandFPData<0b1000, "frintn", int_aarch64_neon_frintn>;
 defm FRINTP : SingleOperandFPData<0b1001, "frintp", fceil>;
 
-def : Pat<(v1f64 (int_arm64_neon_frintn (v1f64 FPR64:$Rn))),
+def : Pat<(v1f64 (int_aarch64_neon_frintn (v1f64 FPR64:$Rn))),
           (FRINTNDr FPR64:$Rn)>;
 
 // FRINTX is inserted to set the flags as required by FENV_ACCESS ON behavior
@@ -2274,23 +2276,23 @@ defm FADD   : TwoOperandFPData<0b0010, "fadd", fadd>;
 let SchedRW = [WriteFDiv] in {
 defm FDIV   : TwoOperandFPData<0b0001, "fdiv", fdiv>;
 }
-defm FMAXNM : TwoOperandFPData<0b0110, "fmaxnm", int_arm64_neon_fmaxnm>;
-defm FMAX   : TwoOperandFPData<0b0100, "fmax", ARM64fmax>;
-defm FMINNM : TwoOperandFPData<0b0111, "fminnm", int_arm64_neon_fminnm>;
-defm FMIN   : TwoOperandFPData<0b0101, "fmin", ARM64fmin>;
+defm FMAXNM : TwoOperandFPData<0b0110, "fmaxnm", int_aarch64_neon_fmaxnm>;
+defm FMAX   : TwoOperandFPData<0b0100, "fmax", AArch64fmax>;
+defm FMINNM : TwoOperandFPData<0b0111, "fminnm", int_aarch64_neon_fminnm>;
+defm FMIN   : TwoOperandFPData<0b0101, "fmin", AArch64fmin>;
 let SchedRW = [WriteFMul] in {
 defm FMUL   : TwoOperandFPData<0b0000, "fmul", fmul>;
 defm FNMUL  : TwoOperandFPDataNeg<0b1000, "fnmul", fmul>;
 }
 defm FSUB   : TwoOperandFPData<0b0011, "fsub", fsub>;
 
-def : Pat<(v1f64 (ARM64fmax (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
+def : Pat<(v1f64 (AArch64fmax (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
           (FMAXDrr FPR64:$Rn, FPR64:$Rm)>;
-def : Pat<(v1f64 (ARM64fmin (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
+def : Pat<(v1f64 (AArch64fmin (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
           (FMINDrr FPR64:$Rn, FPR64:$Rm)>;
-def : Pat<(v1f64 (int_arm64_neon_fmaxnm (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
+def : Pat<(v1f64 (int_aarch64_neon_fmaxnm (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
           (FMAXNMDrr FPR64:$Rn, FPR64:$Rm)>;
-def : Pat<(v1f64 (int_arm64_neon_fminnm (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
+def : Pat<(v1f64 (int_aarch64_neon_fminnm (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
           (FMINNMDrr FPR64:$Rn, FPR64:$Rm)>;
 
 //===----------------------------------------------------------------------===//
@@ -2335,7 +2337,7 @@ def : Pat<(f64 (fma FPR64:$Rn, (fneg FPR64:$Rm), (fneg FPR64:$Ra))),
 //===----------------------------------------------------------------------===//
 
 defm FCMPE : FPComparison<1, "fcmpe">;
-defm FCMP  : FPComparison<0, "fcmp", ARM64fcmp>;
+defm FCMP  : FPComparison<0, "fcmp", AArch64fcmp>;
 
 //===----------------------------------------------------------------------===//
 // Floating point conditional comparison instructions.
@@ -2356,7 +2358,7 @@ defm FCSEL : FPCondSelect<"fcsel">;
 def F128CSEL : Pseudo<(outs FPR128:$Rd),
                       (ins FPR128:$Rn, FPR128:$Rm, ccode:$cond),
                       [(set (f128 FPR128:$Rd),
-                            (ARM64csel FPR128:$Rn, FPR128:$Rm,
+                            (AArch64csel FPR128:$Rn, FPR128:$Rm,
                                        (i32 imm:$cond), NZCV))]> {
   let Uses = [NZCV];
   let usesCustomInserter = 1;
@@ -2375,28 +2377,28 @@ defm FMOV : FPMoveImmediate<"fmov">;
 // Advanced SIMD two vector instructions.
 //===----------------------------------------------------------------------===//
 
-defm ABS    : SIMDTwoVectorBHSD<0, 0b01011, "abs", int_arm64_neon_abs>;
-defm CLS    : SIMDTwoVectorBHS<0, 0b00100, "cls", int_arm64_neon_cls>;
+defm ABS    : SIMDTwoVectorBHSD<0, 0b01011, "abs", int_aarch64_neon_abs>;
+defm CLS    : SIMDTwoVectorBHS<0, 0b00100, "cls", int_aarch64_neon_cls>;
 defm CLZ    : SIMDTwoVectorBHS<1, 0b00100, "clz", ctlz>;
-defm CMEQ   : SIMDCmpTwoVector<0, 0b01001, "cmeq", ARM64cmeqz>;
-defm CMGE   : SIMDCmpTwoVector<1, 0b01000, "cmge", ARM64cmgez>;
-defm CMGT   : SIMDCmpTwoVector<0, 0b01000, "cmgt", ARM64cmgtz>;
-defm CMLE   : SIMDCmpTwoVector<1, 0b01001, "cmle", ARM64cmlez>;
-defm CMLT   : SIMDCmpTwoVector<0, 0b01010, "cmlt", ARM64cmltz>;
+defm CMEQ   : SIMDCmpTwoVector<0, 0b01001, "cmeq", AArch64cmeqz>;
+defm CMGE   : SIMDCmpTwoVector<1, 0b01000, "cmge", AArch64cmgez>;
+defm CMGT   : SIMDCmpTwoVector<0, 0b01000, "cmgt", AArch64cmgtz>;
+defm CMLE   : SIMDCmpTwoVector<1, 0b01001, "cmle", AArch64cmlez>;
+defm CMLT   : SIMDCmpTwoVector<0, 0b01010, "cmlt", AArch64cmltz>;
 defm CNT    : SIMDTwoVectorB<0, 0b00, 0b00101, "cnt", ctpop>;
 defm FABS   : SIMDTwoVectorFP<0, 1, 0b01111, "fabs", fabs>;
 
-defm FCMEQ  : SIMDFPCmpTwoVector<0, 1, 0b01101, "fcmeq", ARM64fcmeqz>;
-defm FCMGE  : SIMDFPCmpTwoVector<1, 1, 0b01100, "fcmge", ARM64fcmgez>;
-defm FCMGT  : SIMDFPCmpTwoVector<0, 1, 0b01100, "fcmgt", ARM64fcmgtz>;
-defm FCMLE  : SIMDFPCmpTwoVector<1, 1, 0b01101, "fcmle", ARM64fcmlez>;
-defm FCMLT  : SIMDFPCmpTwoVector<0, 1, 0b01110, "fcmlt", ARM64fcmltz>;
-defm FCVTAS : SIMDTwoVectorFPToInt<0,0,0b11100, "fcvtas",int_arm64_neon_fcvtas>;
-defm FCVTAU : SIMDTwoVectorFPToInt<1,0,0b11100, "fcvtau",int_arm64_neon_fcvtau>;
+defm FCMEQ  : SIMDFPCmpTwoVector<0, 1, 0b01101, "fcmeq", AArch64fcmeqz>;
+defm FCMGE  : SIMDFPCmpTwoVector<1, 1, 0b01100, "fcmge", AArch64fcmgez>;
+defm FCMGT  : SIMDFPCmpTwoVector<0, 1, 0b01100, "fcmgt", AArch64fcmgtz>;
+defm FCMLE  : SIMDFPCmpTwoVector<1, 1, 0b01101, "fcmle", AArch64fcmlez>;
+defm FCMLT  : SIMDFPCmpTwoVector<0, 1, 0b01110, "fcmlt", AArch64fcmltz>;
+defm FCVTAS : SIMDTwoVectorFPToInt<0,0,0b11100, "fcvtas",int_aarch64_neon_fcvtas>;
+defm FCVTAU : SIMDTwoVectorFPToInt<1,0,0b11100, "fcvtau",int_aarch64_neon_fcvtau>;
 defm FCVTL  : SIMDFPWidenTwoVector<0, 0, 0b10111, "fcvtl">;
-def : Pat<(v4f32 (int_arm64_neon_vcvthf2fp (v4i16 V64:$Rn))),
+def : Pat<(v4f32 (int_aarch64_neon_vcvthf2fp (v4i16 V64:$Rn))),
           (FCVTLv4i16 V64:$Rn)>;
-def : Pat<(v4f32 (int_arm64_neon_vcvthf2fp (extract_subvector (v8i16 V128:$Rn),
+def : Pat<(v4f32 (int_aarch64_neon_vcvthf2fp (extract_subvector (v8i16 V128:$Rn),
                                                               (i64 4)))),
           (FCVTLv8i16 V128:$Rn)>;
 def : Pat<(v2f64 (fextend (v2f32 V64:$Rn))), (FCVTLv2i32 V64:$Rn)>;
@@ -2404,41 +2406,41 @@ def : Pat<(v2f64 (fextend (v2f32 (extract_subvector (v4f32 V128:$Rn),
                                                     (i64 2))))),
           (FCVTLv4i32 V128:$Rn)>;
 
-defm FCVTMS : SIMDTwoVectorFPToInt<0,0,0b11011, "fcvtms",int_arm64_neon_fcvtms>;
-defm FCVTMU : SIMDTwoVectorFPToInt<1,0,0b11011, "fcvtmu",int_arm64_neon_fcvtmu>;
-defm FCVTNS : SIMDTwoVectorFPToInt<0,0,0b11010, "fcvtns",int_arm64_neon_fcvtns>;
-defm FCVTNU : SIMDTwoVectorFPToInt<1,0,0b11010, "fcvtnu",int_arm64_neon_fcvtnu>;
+defm FCVTMS : SIMDTwoVectorFPToInt<0,0,0b11011, "fcvtms",int_aarch64_neon_fcvtms>;
+defm FCVTMU : SIMDTwoVectorFPToInt<1,0,0b11011, "fcvtmu",int_aarch64_neon_fcvtmu>;
+defm FCVTNS : SIMDTwoVectorFPToInt<0,0,0b11010, "fcvtns",int_aarch64_neon_fcvtns>;
+defm FCVTNU : SIMDTwoVectorFPToInt<1,0,0b11010, "fcvtnu",int_aarch64_neon_fcvtnu>;
 defm FCVTN  : SIMDFPNarrowTwoVector<0, 0, 0b10110, "fcvtn">;
-def : Pat<(v4i16 (int_arm64_neon_vcvtfp2hf (v4f32 V128:$Rn))),
+def : Pat<(v4i16 (int_aarch64_neon_vcvtfp2hf (v4f32 V128:$Rn))),
           (FCVTNv4i16 V128:$Rn)>;
 def : Pat<(concat_vectors V64:$Rd,
-                          (v4i16 (int_arm64_neon_vcvtfp2hf (v4f32 V128:$Rn)))),
+                          (v4i16 (int_aarch64_neon_vcvtfp2hf (v4f32 V128:$Rn)))),
           (FCVTNv8i16 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>;
 def : Pat<(v2f32 (fround (v2f64 V128:$Rn))), (FCVTNv2i32 V128:$Rn)>;
 def : Pat<(concat_vectors V64:$Rd, (v2f32 (fround (v2f64 V128:$Rn)))),
           (FCVTNv4i32 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>;
-defm FCVTPS : SIMDTwoVectorFPToInt<0,1,0b11010, "fcvtps",int_arm64_neon_fcvtps>;
-defm FCVTPU : SIMDTwoVectorFPToInt<1,1,0b11010, "fcvtpu",int_arm64_neon_fcvtpu>;
+defm FCVTPS : SIMDTwoVectorFPToInt<0,1,0b11010, "fcvtps",int_aarch64_neon_fcvtps>;
+defm FCVTPU : SIMDTwoVectorFPToInt<1,1,0b11010, "fcvtpu",int_aarch64_neon_fcvtpu>;
 defm FCVTXN : SIMDFPInexactCvtTwoVector<1, 0, 0b10110, "fcvtxn",
-                                        int_arm64_neon_fcvtxn>;
+                                        int_aarch64_neon_fcvtxn>;
 defm FCVTZS : SIMDTwoVectorFPToInt<0, 1, 0b11011, "fcvtzs", fp_to_sint>;
 defm FCVTZU : SIMDTwoVectorFPToInt<1, 1, 0b11011, "fcvtzu", fp_to_uint>;
 let isCodeGenOnly = 1 in {
 defm FCVTZS_Int : SIMDTwoVectorFPToInt<0, 1, 0b11011, "fcvtzs",
-                                       int_arm64_neon_fcvtzs>;
+                                       int_aarch64_neon_fcvtzs>;
 defm FCVTZU_Int : SIMDTwoVectorFPToInt<1, 1, 0b11011, "fcvtzu",
-                                       int_arm64_neon_fcvtzu>;
+                                       int_aarch64_neon_fcvtzu>;
 }
 defm FNEG   : SIMDTwoVectorFP<1, 1, 0b01111, "fneg", fneg>;
-defm FRECPE : SIMDTwoVectorFP<0, 1, 0b11101, "frecpe", int_arm64_neon_frecpe>;
+defm FRECPE : SIMDTwoVectorFP<0, 1, 0b11101, "frecpe", int_aarch64_neon_frecpe>;
 defm FRINTA : SIMDTwoVectorFP<1, 0, 0b11000, "frinta", frnd>;
 defm FRINTI : SIMDTwoVectorFP<1, 1, 0b11001, "frinti", fnearbyint>;
 defm FRINTM : SIMDTwoVectorFP<0, 0, 0b11001, "frintm", ffloor>;
-defm FRINTN : SIMDTwoVectorFP<0, 0, 0b11000, "frintn", int_arm64_neon_frintn>;
+defm FRINTN : SIMDTwoVectorFP<0, 0, 0b11000, "frintn", int_aarch64_neon_frintn>;
 defm FRINTP : SIMDTwoVectorFP<0, 1, 0b11000, "frintp", fceil>;
 defm FRINTX : SIMDTwoVectorFP<1, 0, 0b11001, "frintx", frint>;
 defm FRINTZ : SIMDTwoVectorFP<0, 1, 0b11001, "frintz", ftrunc>;
-defm FRSQRTE: SIMDTwoVectorFP<1, 1, 0b11101, "frsqrte", int_arm64_neon_frsqrte>;
+defm FRSQRTE: SIMDTwoVectorFP<1, 1, 0b11101, "frsqrte", int_aarch64_neon_frsqrte>;
 defm FSQRT  : SIMDTwoVectorFP<1, 1, 0b11111, "fsqrt", fsqrt>;
 defm NEG    : SIMDTwoVectorBHSD<1, 0b01011, "neg",
                                UnOpFrag<(sub immAllZerosV, node:$LHS)> >;
@@ -2449,22 +2451,22 @@ def : InstAlias<"mvn{ $Vd.8b, $Vn.8b|.8b $Vd, $Vn}",
 def : InstAlias<"mvn{ $Vd.16b, $Vn.16b|.16b $Vd, $Vn}",
                 (NOTv16i8 V128:$Vd, V128:$Vn)>;
 
-def : Pat<(ARM64neg (v8i8  V64:$Rn)),  (NEGv8i8  V64:$Rn)>;
-def : Pat<(ARM64neg (v16i8 V128:$Rn)), (NEGv16i8 V128:$Rn)>;
-def : Pat<(ARM64neg (v4i16 V64:$Rn)),  (NEGv4i16 V64:$Rn)>;
-def : Pat<(ARM64neg (v8i16 V128:$Rn)), (NEGv8i16 V128:$Rn)>;
-def : Pat<(ARM64neg (v2i32 V64:$Rn)),  (NEGv2i32 V64:$Rn)>;
-def : Pat<(ARM64neg (v4i32 V128:$Rn)), (NEGv4i32 V128:$Rn)>;
-def : Pat<(ARM64neg (v2i64 V128:$Rn)), (NEGv2i64 V128:$Rn)>;
-
-def : Pat<(ARM64not (v8i8 V64:$Rn)),   (NOTv8i8  V64:$Rn)>;
-def : Pat<(ARM64not (v16i8 V128:$Rn)), (NOTv16i8 V128:$Rn)>;
-def : Pat<(ARM64not (v4i16 V64:$Rn)),  (NOTv8i8  V64:$Rn)>;
-def : Pat<(ARM64not (v8i16 V128:$Rn)), (NOTv16i8 V128:$Rn)>;
-def : Pat<(ARM64not (v2i32 V64:$Rn)),  (NOTv8i8  V64:$Rn)>;
-def : Pat<(ARM64not (v1i64 V64:$Rn)),  (NOTv8i8  V64:$Rn)>;
-def : Pat<(ARM64not (v4i32 V128:$Rn)), (NOTv16i8 V128:$Rn)>;
-def : Pat<(ARM64not (v2i64 V128:$Rn)), (NOTv16i8 V128:$Rn)>;
+def : Pat<(AArch64neg (v8i8  V64:$Rn)),  (NEGv8i8  V64:$Rn)>;
+def : Pat<(AArch64neg (v16i8 V128:$Rn)), (NEGv16i8 V128:$Rn)>;
+def : Pat<(AArch64neg (v4i16 V64:$Rn)),  (NEGv4i16 V64:$Rn)>;
+def : Pat<(AArch64neg (v8i16 V128:$Rn)), (NEGv8i16 V128:$Rn)>;
+def : Pat<(AArch64neg (v2i32 V64:$Rn)),  (NEGv2i32 V64:$Rn)>;
+def : Pat<(AArch64neg (v4i32 V128:$Rn)), (NEGv4i32 V128:$Rn)>;
+def : Pat<(AArch64neg (v2i64 V128:$Rn)), (NEGv2i64 V128:$Rn)>;
+
+def : Pat<(AArch64not (v8i8 V64:$Rn)),   (NOTv8i8  V64:$Rn)>;
+def : Pat<(AArch64not (v16i8 V128:$Rn)), (NOTv16i8 V128:$Rn)>;
+def : Pat<(AArch64not (v4i16 V64:$Rn)),  (NOTv8i8  V64:$Rn)>;
+def : Pat<(AArch64not (v8i16 V128:$Rn)), (NOTv16i8 V128:$Rn)>;
+def : Pat<(AArch64not (v2i32 V64:$Rn)),  (NOTv8i8  V64:$Rn)>;
+def : Pat<(AArch64not (v1i64 V64:$Rn)),  (NOTv8i8  V64:$Rn)>;
+def : Pat<(AArch64not (v4i32 V128:$Rn)), (NOTv16i8 V128:$Rn)>;
+def : Pat<(AArch64not (v2i64 V128:$Rn)), (NOTv16i8 V128:$Rn)>;
 
 def : Pat<(vnot (v4i16 V64:$Rn)),  (NOTv8i8  V64:$Rn)>;
 def : Pat<(vnot (v8i16 V128:$Rn)), (NOTv16i8 V128:$Rn)>;
@@ -2472,49 +2474,49 @@ def : Pat<(vnot (v2i32 V64:$Rn)),  (NOTv8i8  V64:$Rn)>;
 def : Pat<(vnot (v4i32 V128:$Rn)), (NOTv16i8 V128:$Rn)>;
 def : Pat<(vnot (v2i64 V128:$Rn)), (NOTv16i8 V128:$Rn)>;
 
-defm RBIT   : SIMDTwoVectorB<1, 0b01, 0b00101, "rbit", int_arm64_neon_rbit>;
-defm REV16  : SIMDTwoVectorB<0, 0b00, 0b00001, "rev16", ARM64rev16>;
-defm REV32  : SIMDTwoVectorBH<1, 0b00000, "rev32", ARM64rev32>;
-defm REV64  : SIMDTwoVectorBHS<0, 0b00000, "rev64", ARM64rev64>;
+defm RBIT   : SIMDTwoVectorB<1, 0b01, 0b00101, "rbit", int_aarch64_neon_rbit>;
+defm REV16  : SIMDTwoVectorB<0, 0b00, 0b00001, "rev16", AArch64rev16>;
+defm REV32  : SIMDTwoVectorBH<1, 0b00000, "rev32", AArch64rev32>;
+defm REV64  : SIMDTwoVectorBHS<0, 0b00000, "rev64", AArch64rev64>;
 defm SADALP : SIMDLongTwoVectorTied<0, 0b00110, "sadalp",
-       BinOpFrag<(add node:$LHS, (int_arm64_neon_saddlp node:$RHS))> >;
-defm SADDLP : SIMDLongTwoVector<0, 0b00010, "saddlp", int_arm64_neon_saddlp>;
+       BinOpFrag<(add node:$LHS, (int_aarch64_neon_saddlp node:$RHS))> >;
+defm SADDLP : SIMDLongTwoVector<0, 0b00010, "saddlp", int_aarch64_neon_saddlp>;
 defm SCVTF  : SIMDTwoVectorIntToFP<0, 0, 0b11101, "scvtf", sint_to_fp>;
 defm SHLL   : SIMDVectorLShiftLongBySizeBHS;
-defm SQABS  : SIMDTwoVectorBHSD<0, 0b00111, "sqabs", int_arm64_neon_sqabs>;
-defm SQNEG  : SIMDTwoVectorBHSD<1, 0b00111, "sqneg", int_arm64_neon_sqneg>;
-defm SQXTN  : SIMDMixedTwoVector<0, 0b10100, "sqxtn", int_arm64_neon_sqxtn>;
-defm SQXTUN : SIMDMixedTwoVector<1, 0b10010, "sqxtun", int_arm64_neon_sqxtun>;
-defm SUQADD : SIMDTwoVectorBHSDTied<0, 0b00011, "suqadd",int_arm64_neon_suqadd>;
+defm SQABS  : SIMDTwoVectorBHSD<0, 0b00111, "sqabs", int_aarch64_neon_sqabs>;
+defm SQNEG  : SIMDTwoVectorBHSD<1, 0b00111, "sqneg", int_aarch64_neon_sqneg>;
+defm SQXTN  : SIMDMixedTwoVector<0, 0b10100, "sqxtn", int_aarch64_neon_sqxtn>;
+defm SQXTUN : SIMDMixedTwoVector<1, 0b10010, "sqxtun", int_aarch64_neon_sqxtun>;
+defm SUQADD : SIMDTwoVectorBHSDTied<0, 0b00011, "suqadd",int_aarch64_neon_suqadd>;
 defm UADALP : SIMDLongTwoVectorTied<1, 0b00110, "uadalp",
-       BinOpFrag<(add node:$LHS, (int_arm64_neon_uaddlp node:$RHS))> >;
+       BinOpFrag<(add node:$LHS, (int_aarch64_neon_uaddlp node:$RHS))> >;
 defm UADDLP : SIMDLongTwoVector<1, 0b00010, "uaddlp",
-                    int_arm64_neon_uaddlp>;
+                    int_aarch64_neon_uaddlp>;
 defm UCVTF  : SIMDTwoVectorIntToFP<1, 0, 0b11101, "ucvtf", uint_to_fp>;
-defm UQXTN  : SIMDMixedTwoVector<1, 0b10100, "uqxtn", int_arm64_neon_uqxtn>;
-defm URECPE : SIMDTwoVectorS<0, 1, 0b11100, "urecpe", int_arm64_neon_urecpe>;
-defm URSQRTE: SIMDTwoVectorS<1, 1, 0b11100, "ursqrte", int_arm64_neon_ursqrte>;
-defm USQADD : SIMDTwoVectorBHSDTied<1, 0b00011, "usqadd",int_arm64_neon_usqadd>;
+defm UQXTN  : SIMDMixedTwoVector<1, 0b10100, "uqxtn", int_aarch64_neon_uqxtn>;
+defm URECPE : SIMDTwoVectorS<0, 1, 0b11100, "urecpe", int_aarch64_neon_urecpe>;
+defm URSQRTE: SIMDTwoVectorS<1, 1, 0b11100, "ursqrte", int_aarch64_neon_ursqrte>;
+defm USQADD : SIMDTwoVectorBHSDTied<1, 0b00011, "usqadd",int_aarch64_neon_usqadd>;
 defm XTN    : SIMDMixedTwoVector<0, 0b10010, "xtn", trunc>;
 
-def : Pat<(v2f32 (ARM64rev64 V64:$Rn)), (REV64v2i32 V64:$Rn)>;
-def : Pat<(v4f32 (ARM64rev64 V128:$Rn)), (REV64v4i32 V128:$Rn)>;
+def : Pat<(v2f32 (AArch64rev64 V64:$Rn)), (REV64v2i32 V64:$Rn)>;
+def : Pat<(v4f32 (AArch64rev64 V128:$Rn)), (REV64v4i32 V128:$Rn)>;
 
 // Patterns for vector long shift (by element width). These need to match all
 // three of zext, sext and anyext so it's easier to pull the patterns out of the
 // definition.
 multiclass SIMDVectorLShiftLongBySizeBHSPats<SDPatternOperator ext> {
-  def : Pat<(ARM64vshl (v8i16 (ext (v8i8 V64:$Rn))), (i32 8)),
+  def : Pat<(AArch64vshl (v8i16 (ext (v8i8 V64:$Rn))), (i32 8)),
             (SHLLv8i8 V64:$Rn)>;
-  def : Pat<(ARM64vshl (v8i16 (ext (extract_high_v16i8 V128:$Rn))), (i32 8)),
+  def : Pat<(AArch64vshl (v8i16 (ext (extract_high_v16i8 V128:$Rn))), (i32 8)),
             (SHLLv16i8 V128:$Rn)>;
-  def : Pat<(ARM64vshl (v4i32 (ext (v4i16 V64:$Rn))), (i32 16)),
+  def : Pat<(AArch64vshl (v4i32 (ext (v4i16 V64:$Rn))), (i32 16)),
             (SHLLv4i16 V64:$Rn)>;
-  def : Pat<(ARM64vshl (v4i32 (ext (extract_high_v8i16 V128:$Rn))), (i32 16)),
+  def : Pat<(AArch64vshl (v4i32 (ext (extract_high_v8i16 V128:$Rn))), (i32 16)),
             (SHLLv8i16 V128:$Rn)>;
-  def : Pat<(ARM64vshl (v2i64 (ext (v2i32 V64:$Rn))), (i32 32)),
+  def : Pat<(AArch64vshl (v2i64 (ext (v2i32 V64:$Rn))), (i32 32)),
             (SHLLv2i32 V64:$Rn)>;
-  def : Pat<(ARM64vshl (v2i64 (ext (extract_high_v4i32 V128:$Rn))), (i32 32)),
+  def : Pat<(AArch64vshl (v2i64 (ext (extract_high_v4i32 V128:$Rn))), (i32 32)),
             (SHLLv4i32 V128:$Rn)>;
 }
 
@@ -2527,30 +2529,30 @@ defm : SIMDVectorLShiftLongBySizeBHSPats<sext>;
 //===----------------------------------------------------------------------===//
 
 defm ADD     : SIMDThreeSameVector<0, 0b10000, "add", add>;
-defm ADDP    : SIMDThreeSameVector<0, 0b10111, "addp", int_arm64_neon_addp>;
-defm CMEQ    : SIMDThreeSameVector<1, 0b10001, "cmeq", ARM64cmeq>;
-defm CMGE    : SIMDThreeSameVector<0, 0b00111, "cmge", ARM64cmge>;
-defm CMGT    : SIMDThreeSameVector<0, 0b00110, "cmgt", ARM64cmgt>;
-defm CMHI    : SIMDThreeSameVector<1, 0b00110, "cmhi", ARM64cmhi>;
-defm CMHS    : SIMDThreeSameVector<1, 0b00111, "cmhs", ARM64cmhs>;
-defm CMTST   : SIMDThreeSameVector<0, 0b10001, "cmtst", ARM64cmtst>;
-defm FABD    : SIMDThreeSameVectorFP<1,1,0b11010,"fabd", int_arm64_neon_fabd>;
-defm FACGE   : SIMDThreeSameVectorFPCmp<1,0,0b11101,"facge",int_arm64_neon_facge>;
-defm FACGT   : SIMDThreeSameVectorFPCmp<1,1,0b11101,"facgt",int_arm64_neon_facgt>;
-defm FADDP   : SIMDThreeSameVectorFP<1,0,0b11010,"faddp",int_arm64_neon_addp>;
+defm ADDP    : SIMDThreeSameVector<0, 0b10111, "addp", int_aarch64_neon_addp>;
+defm CMEQ    : SIMDThreeSameVector<1, 0b10001, "cmeq", AArch64cmeq>;
+defm CMGE    : SIMDThreeSameVector<0, 0b00111, "cmge", AArch64cmge>;
+defm CMGT    : SIMDThreeSameVector<0, 0b00110, "cmgt", AArch64cmgt>;
+defm CMHI    : SIMDThreeSameVector<1, 0b00110, "cmhi", AArch64cmhi>;
+defm CMHS    : SIMDThreeSameVector<1, 0b00111, "cmhs", AArch64cmhs>;
+defm CMTST   : SIMDThreeSameVector<0, 0b10001, "cmtst", AArch64cmtst>;
+defm FABD    : SIMDThreeSameVectorFP<1,1,0b11010,"fabd", int_aarch64_neon_fabd>;
+defm FACGE   : SIMDThreeSameVectorFPCmp<1,0,0b11101,"facge",int_aarch64_neon_facge>;
+defm FACGT   : SIMDThreeSameVectorFPCmp<1,1,0b11101,"facgt",int_aarch64_neon_facgt>;
+defm FADDP   : SIMDThreeSameVectorFP<1,0,0b11010,"faddp",int_aarch64_neon_addp>;
 defm FADD    : SIMDThreeSameVectorFP<0,0,0b11010,"fadd", fadd>;
-defm FCMEQ   : SIMDThreeSameVectorFPCmp<0, 0, 0b11100, "fcmeq", ARM64fcmeq>;
-defm FCMGE   : SIMDThreeSameVectorFPCmp<1, 0, 0b11100, "fcmge", ARM64fcmge>;
-defm FCMGT   : SIMDThreeSameVectorFPCmp<1, 1, 0b11100, "fcmgt", ARM64fcmgt>;
+defm FCMEQ   : SIMDThreeSameVectorFPCmp<0, 0, 0b11100, "fcmeq", AArch64fcmeq>;
+defm FCMGE   : SIMDThreeSameVectorFPCmp<1, 0, 0b11100, "fcmge", AArch64fcmge>;
+defm FCMGT   : SIMDThreeSameVectorFPCmp<1, 1, 0b11100, "fcmgt", AArch64fcmgt>;
 defm FDIV    : SIMDThreeSameVectorFP<1,0,0b11111,"fdiv", fdiv>;
-defm FMAXNMP : SIMDThreeSameVectorFP<1,0,0b11000,"fmaxnmp", int_arm64_neon_fmaxnmp>;
-defm FMAXNM  : SIMDThreeSameVectorFP<0,0,0b11000,"fmaxnm", int_arm64_neon_fmaxnm>;
-defm FMAXP   : SIMDThreeSameVectorFP<1,0,0b11110,"fmaxp", int_arm64_neon_fmaxp>;
-defm FMAX    : SIMDThreeSameVectorFP<0,0,0b11110,"fmax", ARM64fmax>;
-defm FMINNMP : SIMDThreeSameVectorFP<1,1,0b11000,"fminnmp", int_arm64_neon_fminnmp>;
-defm FMINNM  : SIMDThreeSameVectorFP<0,1,0b11000,"fminnm", int_arm64_neon_fminnm>;
-defm FMINP   : SIMDThreeSameVectorFP<1,1,0b11110,"fminp", int_arm64_neon_fminp>;
-defm FMIN    : SIMDThreeSameVectorFP<0,1,0b11110,"fmin", ARM64fmin>;
+defm FMAXNMP : SIMDThreeSameVectorFP<1,0,0b11000,"fmaxnmp", int_aarch64_neon_fmaxnmp>;
+defm FMAXNM  : SIMDThreeSameVectorFP<0,0,0b11000,"fmaxnm", int_aarch64_neon_fmaxnm>;
+defm FMAXP   : SIMDThreeSameVectorFP<1,0,0b11110,"fmaxp", int_aarch64_neon_fmaxp>;
+defm FMAX    : SIMDThreeSameVectorFP<0,0,0b11110,"fmax", AArch64fmax>;
+defm FMINNMP : SIMDThreeSameVectorFP<1,1,0b11000,"fminnmp", int_aarch64_neon_fminnmp>;
+defm FMINNM  : SIMDThreeSameVectorFP<0,1,0b11000,"fminnm", int_aarch64_neon_fminnm>;
+defm FMINP   : SIMDThreeSameVectorFP<1,1,0b11110,"fminp", int_aarch64_neon_fminp>;
+defm FMIN    : SIMDThreeSameVectorFP<0,1,0b11110,"fmin", AArch64fmin>;
 
 // NOTE: The operands of the PatFrag are reordered on FMLA/FMLS because the
 // instruction expects the addend first, while the fma intrinsic puts it last.
@@ -2570,58 +2572,58 @@ def : Pat<(v4f32 (fma (fneg V128:$Rn), V128:$Rm, V128:$Rd)),
 def : Pat<(v2f64 (fma (fneg V128:$Rn), V128:$Rm, V128:$Rd)),
           (FMLSv2f64 V128:$Rd, V128:$Rn, V128:$Rm)>;
 
-defm FMULX    : SIMDThreeSameVectorFP<0,0,0b11011,"fmulx", int_arm64_neon_fmulx>;
+defm FMULX    : SIMDThreeSameVectorFP<0,0,0b11011,"fmulx", int_aarch64_neon_fmulx>;
 defm FMUL     : SIMDThreeSameVectorFP<1,0,0b11011,"fmul", fmul>;
-defm FRECPS   : SIMDThreeSameVectorFP<0,0,0b11111,"frecps", int_arm64_neon_frecps>;
-defm FRSQRTS  : SIMDThreeSameVectorFP<0,1,0b11111,"frsqrts", int_arm64_neon_frsqrts>;
+defm FRECPS   : SIMDThreeSameVectorFP<0,0,0b11111,"frecps", int_aarch64_neon_frecps>;
+defm FRSQRTS  : SIMDThreeSameVectorFP<0,1,0b11111,"frsqrts", int_aarch64_neon_frsqrts>;
 defm FSUB     : SIMDThreeSameVectorFP<0,1,0b11010,"fsub", fsub>;
 defm MLA      : SIMDThreeSameVectorBHSTied<0, 0b10010, "mla",
                       TriOpFrag<(add node:$LHS, (mul node:$MHS, node:$RHS))> >;
 defm MLS      : SIMDThreeSameVectorBHSTied<1, 0b10010, "mls",
                       TriOpFrag<(sub node:$LHS, (mul node:$MHS, node:$RHS))> >;
 defm MUL      : SIMDThreeSameVectorBHS<0, 0b10011, "mul", mul>;
-defm PMUL     : SIMDThreeSameVectorB<1, 0b10011, "pmul", int_arm64_neon_pmul>;
+defm PMUL     : SIMDThreeSameVectorB<1, 0b10011, "pmul", int_aarch64_neon_pmul>;
 defm SABA     : SIMDThreeSameVectorBHSTied<0, 0b01111, "saba",
-      TriOpFrag<(add node:$LHS, (int_arm64_neon_sabd node:$MHS, node:$RHS))> >;
-defm SABD     : SIMDThreeSameVectorBHS<0,0b01110,"sabd", int_arm64_neon_sabd>;
-defm SHADD    : SIMDThreeSameVectorBHS<0,0b00000,"shadd", int_arm64_neon_shadd>;
-defm SHSUB    : SIMDThreeSameVectorBHS<0,0b00100,"shsub", int_arm64_neon_shsub>;
-defm SMAXP    : SIMDThreeSameVectorBHS<0,0b10100,"smaxp", int_arm64_neon_smaxp>;
-defm SMAX     : SIMDThreeSameVectorBHS<0,0b01100,"smax", int_arm64_neon_smax>;
-defm SMINP    : SIMDThreeSameVectorBHS<0,0b10101,"sminp", int_arm64_neon_sminp>;
-defm SMIN     : SIMDThreeSameVectorBHS<0,0b01101,"smin", int_arm64_neon_smin>;
-defm SQADD    : SIMDThreeSameVector<0,0b00001,"sqadd", int_arm64_neon_sqadd>;
-defm SQDMULH  : SIMDThreeSameVectorHS<0,0b10110,"sqdmulh",int_arm64_neon_sqdmulh>;
-defm SQRDMULH : SIMDThreeSameVectorHS<1,0b10110,"sqrdmulh",int_arm64_neon_sqrdmulh>;
-defm SQRSHL   : SIMDThreeSameVector<0,0b01011,"sqrshl", int_arm64_neon_sqrshl>;
-defm SQSHL    : SIMDThreeSameVector<0,0b01001,"sqshl", int_arm64_neon_sqshl>;
-defm SQSUB    : SIMDThreeSameVector<0,0b00101,"sqsub", int_arm64_neon_sqsub>;
-defm SRHADD   : SIMDThreeSameVectorBHS<0,0b00010,"srhadd",int_arm64_neon_srhadd>;
-defm SRSHL    : SIMDThreeSameVector<0,0b01010,"srshl", int_arm64_neon_srshl>;
-defm SSHL     : SIMDThreeSameVector<0,0b01000,"sshl", int_arm64_neon_sshl>;
+      TriOpFrag<(add node:$LHS, (int_aarch64_neon_sabd node:$MHS, node:$RHS))> >;
+defm SABD     : SIMDThreeSameVectorBHS<0,0b01110,"sabd", int_aarch64_neon_sabd>;
+defm SHADD    : SIMDThreeSameVectorBHS<0,0b00000,"shadd", int_aarch64_neon_shadd>;
+defm SHSUB    : SIMDThreeSameVectorBHS<0,0b00100,"shsub", int_aarch64_neon_shsub>;
+defm SMAXP    : SIMDThreeSameVectorBHS<0,0b10100,"smaxp", int_aarch64_neon_smaxp>;
+defm SMAX     : SIMDThreeSameVectorBHS<0,0b01100,"smax", int_aarch64_neon_smax>;
+defm SMINP    : SIMDThreeSameVectorBHS<0,0b10101,"sminp", int_aarch64_neon_sminp>;
+defm SMIN     : SIMDThreeSameVectorBHS<0,0b01101,"smin", int_aarch64_neon_smin>;
+defm SQADD    : SIMDThreeSameVector<0,0b00001,"sqadd", int_aarch64_neon_sqadd>;
+defm SQDMULH  : SIMDThreeSameVectorHS<0,0b10110,"sqdmulh",int_aarch64_neon_sqdmulh>;
+defm SQRDMULH : SIMDThreeSameVectorHS<1,0b10110,"sqrdmulh",int_aarch64_neon_sqrdmulh>;
+defm SQRSHL   : SIMDThreeSameVector<0,0b01011,"sqrshl", int_aarch64_neon_sqrshl>;
+defm SQSHL    : SIMDThreeSameVector<0,0b01001,"sqshl", int_aarch64_neon_sqshl>;
+defm SQSUB    : SIMDThreeSameVector<0,0b00101,"sqsub", int_aarch64_neon_sqsub>;
+defm SRHADD   : SIMDThreeSameVectorBHS<0,0b00010,"srhadd",int_aarch64_neon_srhadd>;
+defm SRSHL    : SIMDThreeSameVector<0,0b01010,"srshl", int_aarch64_neon_srshl>;
+defm SSHL     : SIMDThreeSameVector<0,0b01000,"sshl", int_aarch64_neon_sshl>;
 defm SUB      : SIMDThreeSameVector<1,0b10000,"sub", sub>;
 defm UABA     : SIMDThreeSameVectorBHSTied<1, 0b01111, "uaba",
-      TriOpFrag<(add node:$LHS, (int_arm64_neon_uabd node:$MHS, node:$RHS))> >;
-defm UABD     : SIMDThreeSameVectorBHS<1,0b01110,"uabd", int_arm64_neon_uabd>;
-defm UHADD    : SIMDThreeSameVectorBHS<1,0b00000,"uhadd", int_arm64_neon_uhadd>;
-defm UHSUB    : SIMDThreeSameVectorBHS<1,0b00100,"uhsub", int_arm64_neon_uhsub>;
-defm UMAXP    : SIMDThreeSameVectorBHS<1,0b10100,"umaxp", int_arm64_neon_umaxp>;
-defm UMAX     : SIMDThreeSameVectorBHS<1,0b01100,"umax", int_arm64_neon_umax>;
-defm UMINP    : SIMDThreeSameVectorBHS<1,0b10101,"uminp", int_arm64_neon_uminp>;
-defm UMIN     : SIMDThreeSameVectorBHS<1,0b01101,"umin", int_arm64_neon_umin>;
-defm UQADD    : SIMDThreeSameVector<1,0b00001,"uqadd", int_arm64_neon_uqadd>;
-defm UQRSHL   : SIMDThreeSameVector<1,0b01011,"uqrshl", int_arm64_neon_uqrshl>;
-defm UQSHL    : SIMDThreeSameVector<1,0b01001,"uqshl", int_arm64_neon_uqshl>;
-defm UQSUB    : SIMDThreeSameVector<1,0b00101,"uqsub", int_arm64_neon_uqsub>;
-defm URHADD   : SIMDThreeSameVectorBHS<1,0b00010,"urhadd", int_arm64_neon_urhadd>;
-defm URSHL    : SIMDThreeSameVector<1,0b01010,"urshl", int_arm64_neon_urshl>;
-defm USHL     : SIMDThreeSameVector<1,0b01000,"ushl", int_arm64_neon_ushl>;
+      TriOpFrag<(add node:$LHS, (int_aarch64_neon_uabd node:$MHS, node:$RHS))> >;
+defm UABD     : SIMDThreeSameVectorBHS<1,0b01110,"uabd", int_aarch64_neon_uabd>;
+defm UHADD    : SIMDThreeSameVectorBHS<1,0b00000,"uhadd", int_aarch64_neon_uhadd>;
+defm UHSUB    : SIMDThreeSameVectorBHS<1,0b00100,"uhsub", int_aarch64_neon_uhsub>;
+defm UMAXP    : SIMDThreeSameVectorBHS<1,0b10100,"umaxp", int_aarch64_neon_umaxp>;
+defm UMAX     : SIMDThreeSameVectorBHS<1,0b01100,"umax", int_aarch64_neon_umax>;
+defm UMINP    : SIMDThreeSameVectorBHS<1,0b10101,"uminp", int_aarch64_neon_uminp>;
+defm UMIN     : SIMDThreeSameVectorBHS<1,0b01101,"umin", int_aarch64_neon_umin>;
+defm UQADD    : SIMDThreeSameVector<1,0b00001,"uqadd", int_aarch64_neon_uqadd>;
+defm UQRSHL   : SIMDThreeSameVector<1,0b01011,"uqrshl", int_aarch64_neon_uqrshl>;
+defm UQSHL    : SIMDThreeSameVector<1,0b01001,"uqshl", int_aarch64_neon_uqshl>;
+defm UQSUB    : SIMDThreeSameVector<1,0b00101,"uqsub", int_aarch64_neon_uqsub>;
+defm URHADD   : SIMDThreeSameVectorBHS<1,0b00010,"urhadd", int_aarch64_neon_urhadd>;
+defm URSHL    : SIMDThreeSameVector<1,0b01010,"urshl", int_aarch64_neon_urshl>;
+defm USHL     : SIMDThreeSameVector<1,0b01000,"ushl", int_aarch64_neon_ushl>;
 
 defm AND : SIMDLogicalThreeVector<0, 0b00, "and", and>;
 defm BIC : SIMDLogicalThreeVector<0, 0b01, "bic",
                                   BinOpFrag<(and node:$LHS, (vnot node:$RHS))> >;
 defm BIF : SIMDLogicalThreeVector<1, 0b11, "bif">;
-defm BIT : SIMDLogicalThreeVectorTied<1, 0b10, "bit", ARM64bit>;
+defm BIT : SIMDLogicalThreeVectorTied<1, 0b10, "bit", AArch64bit>;
 defm BSL : SIMDLogicalThreeVectorTied<1, 0b01, "bsl",
     TriOpFrag<(or (and node:$LHS, node:$MHS), (and (vnot node:$LHS), node:$RHS))>>;
 defm EOR : SIMDLogicalThreeVector<1, 0b00, "eor", xor>;
@@ -2629,22 +2631,22 @@ defm ORN : SIMDLogicalThreeVector<0, 0b11, "orn",
                                   BinOpFrag<(or node:$LHS, (vnot node:$RHS))> >;
 defm ORR : SIMDLogicalThreeVector<0, 0b10, "orr", or>;
 
-def : Pat<(ARM64bsl (v8i8 V64:$Rd), V64:$Rn, V64:$Rm),
+def : Pat<(AArch64bsl (v8i8 V64:$Rd), V64:$Rn, V64:$Rm),
           (BSLv8i8 V64:$Rd, V64:$Rn, V64:$Rm)>;
-def : Pat<(ARM64bsl (v4i16 V64:$Rd), V64:$Rn, V64:$Rm),
+def : Pat<(AArch64bsl (v4i16 V64:$Rd), V64:$Rn, V64:$Rm),
           (BSLv8i8 V64:$Rd, V64:$Rn, V64:$Rm)>;
-def : Pat<(ARM64bsl (v2i32 V64:$Rd), V64:$Rn, V64:$Rm),
+def : Pat<(AArch64bsl (v2i32 V64:$Rd), V64:$Rn, V64:$Rm),
           (BSLv8i8 V64:$Rd, V64:$Rn, V64:$Rm)>;
-def : Pat<(ARM64bsl (v1i64 V64:$Rd), V64:$Rn, V64:$Rm),
+def : Pat<(AArch64bsl (v1i64 V64:$Rd), V64:$Rn, V64:$Rm),
           (BSLv8i8 V64:$Rd, V64:$Rn, V64:$Rm)>;
 
-def : Pat<(ARM64bsl (v16i8 V128:$Rd), V128:$Rn, V128:$Rm),
+def : Pat<(AArch64bsl (v16i8 V128:$Rd), V128:$Rn, V128:$Rm),
           (BSLv16i8 V128:$Rd, V128:$Rn, V128:$Rm)>;
-def : Pat<(ARM64bsl (v8i16 V128:$Rd), V128:$Rn, V128:$Rm),
+def : Pat<(AArch64bsl (v8i16 V128:$Rd), V128:$Rn, V128:$Rm),
           (BSLv16i8 V128:$Rd, V128:$Rn, V128:$Rm)>;
-def : Pat<(ARM64bsl (v4i32 V128:$Rd), V128:$Rn, V128:$Rm),
+def : Pat<(AArch64bsl (v4i32 V128:$Rd), V128:$Rn, V128:$Rm),
           (BSLv16i8 V128:$Rd, V128:$Rn, V128:$Rm)>;
-def : Pat<(ARM64bsl (v2i64 V128:$Rd), V128:$Rn, V128:$Rm),
+def : Pat<(AArch64bsl (v2i64 V128:$Rd), V128:$Rn, V128:$Rm),
           (BSLv16i8 V128:$Rd, V128:$Rn, V128:$Rm)>;
 
 def : InstAlias<"mov{\t$dst.16b, $src.16b|.16b\t$dst, $src}",
@@ -2798,40 +2800,40 @@ def : InstAlias<"{faclt\t$dst.2d, $src1.2d, $src2.2d" #
 //===----------------------------------------------------------------------===//
 
 defm ADD      : SIMDThreeScalarD<0, 0b10000, "add", add>;
-defm CMEQ     : SIMDThreeScalarD<1, 0b10001, "cmeq", ARM64cmeq>;
-defm CMGE     : SIMDThreeScalarD<0, 0b00111, "cmge", ARM64cmge>;
-defm CMGT     : SIMDThreeScalarD<0, 0b00110, "cmgt", ARM64cmgt>;
-defm CMHI     : SIMDThreeScalarD<1, 0b00110, "cmhi", ARM64cmhi>;
-defm CMHS     : SIMDThreeScalarD<1, 0b00111, "cmhs", ARM64cmhs>;
-defm CMTST    : SIMDThreeScalarD<0, 0b10001, "cmtst", ARM64cmtst>;
-defm FABD     : SIMDThreeScalarSD<1, 1, 0b11010, "fabd", int_arm64_sisd_fabd>;
-def : Pat<(v1f64 (int_arm64_neon_fabd (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
+defm CMEQ     : SIMDThreeScalarD<1, 0b10001, "cmeq", AArch64cmeq>;
+defm CMGE     : SIMDThreeScalarD<0, 0b00111, "cmge", AArch64cmge>;
+defm CMGT     : SIMDThreeScalarD<0, 0b00110, "cmgt", AArch64cmgt>;
+defm CMHI     : SIMDThreeScalarD<1, 0b00110, "cmhi", AArch64cmhi>;
+defm CMHS     : SIMDThreeScalarD<1, 0b00111, "cmhs", AArch64cmhs>;
+defm CMTST    : SIMDThreeScalarD<0, 0b10001, "cmtst", AArch64cmtst>;
+defm FABD     : SIMDThreeScalarSD<1, 1, 0b11010, "fabd", int_aarch64_sisd_fabd>;
+def : Pat<(v1f64 (int_aarch64_neon_fabd (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
           (FABD64 FPR64:$Rn, FPR64:$Rm)>;
 defm FACGE    : SIMDThreeScalarFPCmp<1, 0, 0b11101, "facge",
-                                     int_arm64_neon_facge>;
+                                     int_aarch64_neon_facge>;
 defm FACGT    : SIMDThreeScalarFPCmp<1, 1, 0b11101, "facgt",
-                                     int_arm64_neon_facgt>;
-defm FCMEQ    : SIMDThreeScalarFPCmp<0, 0, 0b11100, "fcmeq", ARM64fcmeq>;
-defm FCMGE    : SIMDThreeScalarFPCmp<1, 0, 0b11100, "fcmge", ARM64fcmge>;
-defm FCMGT    : SIMDThreeScalarFPCmp<1, 1, 0b11100, "fcmgt", ARM64fcmgt>;
-defm FMULX    : SIMDThreeScalarSD<0, 0, 0b11011, "fmulx", int_arm64_neon_fmulx>;
-defm FRECPS   : SIMDThreeScalarSD<0, 0, 0b11111, "frecps", int_arm64_neon_frecps>;
-defm FRSQRTS  : SIMDThreeScalarSD<0, 1, 0b11111, "frsqrts", int_arm64_neon_frsqrts>;
-defm SQADD    : SIMDThreeScalarBHSD<0, 0b00001, "sqadd", int_arm64_neon_sqadd>;
-defm SQDMULH  : SIMDThreeScalarHS<  0, 0b10110, "sqdmulh", int_arm64_neon_sqdmulh>;
-defm SQRDMULH : SIMDThreeScalarHS<  1, 0b10110, "sqrdmulh", int_arm64_neon_sqrdmulh>;
-defm SQRSHL   : SIMDThreeScalarBHSD<0, 0b01011, "sqrshl",int_arm64_neon_sqrshl>;
-defm SQSHL    : SIMDThreeScalarBHSD<0, 0b01001, "sqshl", int_arm64_neon_sqshl>;
-defm SQSUB    : SIMDThreeScalarBHSD<0, 0b00101, "sqsub", int_arm64_neon_sqsub>;
-defm SRSHL    : SIMDThreeScalarD<   0, 0b01010, "srshl", int_arm64_neon_srshl>;
-defm SSHL     : SIMDThreeScalarD<   0, 0b01000, "sshl", int_arm64_neon_sshl>;
+                                     int_aarch64_neon_facgt>;
+defm FCMEQ    : SIMDThreeScalarFPCmp<0, 0, 0b11100, "fcmeq", AArch64fcmeq>;
+defm FCMGE    : SIMDThreeScalarFPCmp<1, 0, 0b11100, "fcmge", AArch64fcmge>;
+defm FCMGT    : SIMDThreeScalarFPCmp<1, 1, 0b11100, "fcmgt", AArch64fcmgt>;
+defm FMULX    : SIMDThreeScalarSD<0, 0, 0b11011, "fmulx", int_aarch64_neon_fmulx>;
+defm FRECPS   : SIMDThreeScalarSD<0, 0, 0b11111, "frecps", int_aarch64_neon_frecps>;
+defm FRSQRTS  : SIMDThreeScalarSD<0, 1, 0b11111, "frsqrts", int_aarch64_neon_frsqrts>;
+defm SQADD    : SIMDThreeScalarBHSD<0, 0b00001, "sqadd", int_aarch64_neon_sqadd>;
+defm SQDMULH  : SIMDThreeScalarHS<  0, 0b10110, "sqdmulh", int_aarch64_neon_sqdmulh>;
+defm SQRDMULH : SIMDThreeScalarHS<  1, 0b10110, "sqrdmulh", int_aarch64_neon_sqrdmulh>;
+defm SQRSHL   : SIMDThreeScalarBHSD<0, 0b01011, "sqrshl",int_aarch64_neon_sqrshl>;
+defm SQSHL    : SIMDThreeScalarBHSD<0, 0b01001, "sqshl", int_aarch64_neon_sqshl>;
+defm SQSUB    : SIMDThreeScalarBHSD<0, 0b00101, "sqsub", int_aarch64_neon_sqsub>;
+defm SRSHL    : SIMDThreeScalarD<   0, 0b01010, "srshl", int_aarch64_neon_srshl>;
+defm SSHL     : SIMDThreeScalarD<   0, 0b01000, "sshl", int_aarch64_neon_sshl>;
 defm SUB      : SIMDThreeScalarD<   1, 0b10000, "sub", sub>;
-defm UQADD    : SIMDThreeScalarBHSD<1, 0b00001, "uqadd", int_arm64_neon_uqadd>;
-defm UQRSHL   : SIMDThreeScalarBHSD<1, 0b01011, "uqrshl",int_arm64_neon_uqrshl>;
-defm UQSHL    : SIMDThreeScalarBHSD<1, 0b01001, "uqshl", int_arm64_neon_uqshl>;
-defm UQSUB    : SIMDThreeScalarBHSD<1, 0b00101, "uqsub", int_arm64_neon_uqsub>;
-defm URSHL    : SIMDThreeScalarD<   1, 0b01010, "urshl", int_arm64_neon_urshl>;
-defm USHL     : SIMDThreeScalarD<   1, 0b01000, "ushl", int_arm64_neon_ushl>;
+defm UQADD    : SIMDThreeScalarBHSD<1, 0b00001, "uqadd", int_aarch64_neon_uqadd>;
+defm UQRSHL   : SIMDThreeScalarBHSD<1, 0b01011, "uqrshl",int_aarch64_neon_uqrshl>;
+defm UQSHL    : SIMDThreeScalarBHSD<1, 0b01001, "uqshl", int_aarch64_neon_uqshl>;
+defm UQSUB    : SIMDThreeScalarBHSD<1, 0b00101, "uqsub", int_aarch64_neon_uqsub>;
+defm URSHL    : SIMDThreeScalarD<   1, 0b01010, "urshl", int_aarch64_neon_urshl>;
+defm USHL     : SIMDThreeScalarD<   1, 0b01000, "ushl", int_aarch64_neon_ushl>;
 
 def : InstAlias<"cmls $dst, $src1, $src2",
                 (CMHSv1i64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>;
@@ -2862,16 +2864,16 @@ def : InstAlias<"faclt $dst, $src1, $src2",
 // Advanced SIMD three scalar instructions (mixed operands).
 //===----------------------------------------------------------------------===//
 defm SQDMULL  : SIMDThreeScalarMixedHS<0, 0b11010, "sqdmull",
-                                       int_arm64_neon_sqdmulls_scalar>;
+                                       int_aarch64_neon_sqdmulls_scalar>;
 defm SQDMLAL  : SIMDThreeScalarMixedTiedHS<0, 0b10010, "sqdmlal">;
 defm SQDMLSL  : SIMDThreeScalarMixedTiedHS<0, 0b10110, "sqdmlsl">;
 
-def : Pat<(i64 (int_arm64_neon_sqadd (i64 FPR64:$Rd),
-                   (i64 (int_arm64_neon_sqdmulls_scalar (i32 FPR32:$Rn),
+def : Pat<(i64 (int_aarch64_neon_sqadd (i64 FPR64:$Rd),
+                   (i64 (int_aarch64_neon_sqdmulls_scalar (i32 FPR32:$Rn),
                                                         (i32 FPR32:$Rm))))),
           (SQDMLALi32 FPR64:$Rd, FPR32:$Rn, FPR32:$Rm)>;
-def : Pat<(i64 (int_arm64_neon_sqsub (i64 FPR64:$Rd),
-                   (i64 (int_arm64_neon_sqdmulls_scalar (i32 FPR32:$Rn),
+def : Pat<(i64 (int_aarch64_neon_sqsub (i64 FPR64:$Rd),
+                   (i64 (int_aarch64_neon_sqdmulls_scalar (i32 FPR32:$Rn),
                                                         (i32 FPR32:$Rm))))),
           (SQDMLSLi32 FPR64:$Rd, FPR32:$Rn, FPR32:$Rm)>;
 
@@ -2879,17 +2881,17 @@ def : Pat<(i64 (int_arm64_neon_sqsub (i64 FPR64:$Rd),
 // Advanced SIMD two scalar instructions.
 //===----------------------------------------------------------------------===//
 
-defm ABS    : SIMDTwoScalarD<    0, 0b01011, "abs", int_arm64_neon_abs>;
-defm CMEQ   : SIMDCmpTwoScalarD< 0, 0b01001, "cmeq", ARM64cmeqz>;
-defm CMGE   : SIMDCmpTwoScalarD< 1, 0b01000, "cmge", ARM64cmgez>;
-defm CMGT   : SIMDCmpTwoScalarD< 0, 0b01000, "cmgt", ARM64cmgtz>;
-defm CMLE   : SIMDCmpTwoScalarD< 1, 0b01001, "cmle", ARM64cmlez>;
-defm CMLT   : SIMDCmpTwoScalarD< 0, 0b01010, "cmlt", ARM64cmltz>;
-defm FCMEQ  : SIMDCmpTwoScalarSD<0, 1, 0b01101, "fcmeq", ARM64fcmeqz>;
-defm FCMGE  : SIMDCmpTwoScalarSD<1, 1, 0b01100, "fcmge", ARM64fcmgez>;
-defm FCMGT  : SIMDCmpTwoScalarSD<0, 1, 0b01100, "fcmgt", ARM64fcmgtz>;
-defm FCMLE  : SIMDCmpTwoScalarSD<1, 1, 0b01101, "fcmle", ARM64fcmlez>;
-defm FCMLT  : SIMDCmpTwoScalarSD<0, 1, 0b01110, "fcmlt", ARM64fcmltz>;
+defm ABS    : SIMDTwoScalarD<    0, 0b01011, "abs", int_aarch64_neon_abs>;
+defm CMEQ   : SIMDCmpTwoScalarD< 0, 0b01001, "cmeq", AArch64cmeqz>;
+defm CMGE   : SIMDCmpTwoScalarD< 1, 0b01000, "cmge", AArch64cmgez>;
+defm CMGT   : SIMDCmpTwoScalarD< 0, 0b01000, "cmgt", AArch64cmgtz>;
+defm CMLE   : SIMDCmpTwoScalarD< 1, 0b01001, "cmle", AArch64cmlez>;
+defm CMLT   : SIMDCmpTwoScalarD< 0, 0b01010, "cmlt", AArch64cmltz>;
+defm FCMEQ  : SIMDCmpTwoScalarSD<0, 1, 0b01101, "fcmeq", AArch64fcmeqz>;
+defm FCMGE  : SIMDCmpTwoScalarSD<1, 1, 0b01100, "fcmge", AArch64fcmgez>;
+defm FCMGT  : SIMDCmpTwoScalarSD<0, 1, 0b01100, "fcmgt", AArch64fcmgtz>;
+defm FCMLE  : SIMDCmpTwoScalarSD<1, 1, 0b01101, "fcmle", AArch64fcmlez>;
+defm FCMLT  : SIMDCmpTwoScalarSD<0, 1, 0b01110, "fcmlt", AArch64fcmltz>;
 defm FCVTAS : SIMDTwoScalarSD<   0, 0, 0b11100, "fcvtas">;
 defm FCVTAU : SIMDTwoScalarSD<   1, 0, 0b11100, "fcvtau">;
 defm FCVTMS : SIMDTwoScalarSD<   0, 0, 0b11011, "fcvtms">;
@@ -2906,54 +2908,54 @@ defm FRECPX : SIMDTwoScalarSD<   0, 1, 0b11111, "frecpx">;
 defm FRSQRTE : SIMDTwoScalarSD<  1, 1, 0b11101, "frsqrte">;
 defm NEG    : SIMDTwoScalarD<    1, 0b01011, "neg",
                                  UnOpFrag<(sub immAllZerosV, node:$LHS)> >;
-defm SCVTF  : SIMDTwoScalarCVTSD<   0, 0, 0b11101, "scvtf", ARM64sitof>;
-defm SQABS  : SIMDTwoScalarBHSD< 0, 0b00111, "sqabs", int_arm64_neon_sqabs>;
-defm SQNEG  : SIMDTwoScalarBHSD< 1, 0b00111, "sqneg", int_arm64_neon_sqneg>;
-defm SQXTN  : SIMDTwoScalarMixedBHS< 0, 0b10100, "sqxtn", int_arm64_neon_scalar_sqxtn>;
-defm SQXTUN : SIMDTwoScalarMixedBHS< 1, 0b10010, "sqxtun", int_arm64_neon_scalar_sqxtun>;
+defm SCVTF  : SIMDTwoScalarCVTSD<   0, 0, 0b11101, "scvtf", AArch64sitof>;
+defm SQABS  : SIMDTwoScalarBHSD< 0, 0b00111, "sqabs", int_aarch64_neon_sqabs>;
+defm SQNEG  : SIMDTwoScalarBHSD< 1, 0b00111, "sqneg", int_aarch64_neon_sqneg>;
+defm SQXTN  : SIMDTwoScalarMixedBHS< 0, 0b10100, "sqxtn", int_aarch64_neon_scalar_sqxtn>;
+defm SQXTUN : SIMDTwoScalarMixedBHS< 1, 0b10010, "sqxtun", int_aarch64_neon_scalar_sqxtun>;
 defm SUQADD : SIMDTwoScalarBHSDTied< 0, 0b00011, "suqadd",
-                                     int_arm64_neon_suqadd>;
-defm UCVTF  : SIMDTwoScalarCVTSD<   1, 0, 0b11101, "ucvtf", ARM64uitof>;
-defm UQXTN  : SIMDTwoScalarMixedBHS<1, 0b10100, "uqxtn", int_arm64_neon_scalar_uqxtn>;
+                                     int_aarch64_neon_suqadd>;
+defm UCVTF  : SIMDTwoScalarCVTSD<   1, 0, 0b11101, "ucvtf", AArch64uitof>;
+defm UQXTN  : SIMDTwoScalarMixedBHS<1, 0b10100, "uqxtn", int_aarch64_neon_scalar_uqxtn>;
 defm USQADD : SIMDTwoScalarBHSDTied< 1, 0b00011, "usqadd",
-                                    int_arm64_neon_usqadd>;
+                                    int_aarch64_neon_usqadd>;
 
-def : Pat<(ARM64neg (v1i64 V64:$Rn)), (NEGv1i64 V64:$Rn)>;
+def : Pat<(AArch64neg (v1i64 V64:$Rn)), (NEGv1i64 V64:$Rn)>;
 
-def : Pat<(v1i64 (int_arm64_neon_fcvtas (v1f64 FPR64:$Rn))),
+def : Pat<(v1i64 (int_aarch64_neon_fcvtas (v1f64 FPR64:$Rn))),
           (FCVTASv1i64 FPR64:$Rn)>;
-def : Pat<(v1i64 (int_arm64_neon_fcvtau (v1f64 FPR64:$Rn))),
+def : Pat<(v1i64 (int_aarch64_neon_fcvtau (v1f64 FPR64:$Rn))),
           (FCVTAUv1i64 FPR64:$Rn)>;
-def : Pat<(v1i64 (int_arm64_neon_fcvtms (v1f64 FPR64:$Rn))),
+def : Pat<(v1i64 (int_aarch64_neon_fcvtms (v1f64 FPR64:$Rn))),
           (FCVTMSv1i64 FPR64:$Rn)>;
-def : Pat<(v1i64 (int_arm64_neon_fcvtmu (v1f64 FPR64:$Rn))),
+def : Pat<(v1i64 (int_aarch64_neon_fcvtmu (v1f64 FPR64:$Rn))),
           (FCVTMUv1i64 FPR64:$Rn)>;
-def : Pat<(v1i64 (int_arm64_neon_fcvtns (v1f64 FPR64:$Rn))),
+def : Pat<(v1i64 (int_aarch64_neon_fcvtns (v1f64 FPR64:$Rn))),
           (FCVTNSv1i64 FPR64:$Rn)>;
-def : Pat<(v1i64 (int_arm64_neon_fcvtnu (v1f64 FPR64:$Rn))),
+def : Pat<(v1i64 (int_aarch64_neon_fcvtnu (v1f64 FPR64:$Rn))),
           (FCVTNUv1i64 FPR64:$Rn)>;
-def : Pat<(v1i64 (int_arm64_neon_fcvtps (v1f64 FPR64:$Rn))),
+def : Pat<(v1i64 (int_aarch64_neon_fcvtps (v1f64 FPR64:$Rn))),
           (FCVTPSv1i64 FPR64:$Rn)>;
-def : Pat<(v1i64 (int_arm64_neon_fcvtpu (v1f64 FPR64:$Rn))),
+def : Pat<(v1i64 (int_aarch64_neon_fcvtpu (v1f64 FPR64:$Rn))),
           (FCVTPUv1i64 FPR64:$Rn)>;
 
-def : Pat<(f32 (int_arm64_neon_frecpe (f32 FPR32:$Rn))),
+def : Pat<(f32 (int_aarch64_neon_frecpe (f32 FPR32:$Rn))),
           (FRECPEv1i32 FPR32:$Rn)>;
-def : Pat<(f64 (int_arm64_neon_frecpe (f64 FPR64:$Rn))),
+def : Pat<(f64 (int_aarch64_neon_frecpe (f64 FPR64:$Rn))),
           (FRECPEv1i64 FPR64:$Rn)>;
-def : Pat<(v1f64 (int_arm64_neon_frecpe (v1f64 FPR64:$Rn))),
+def : Pat<(v1f64 (int_aarch64_neon_frecpe (v1f64 FPR64:$Rn))),
           (FRECPEv1i64 FPR64:$Rn)>;
 
-def : Pat<(f32 (int_arm64_neon_frecpx (f32 FPR32:$Rn))),
+def : Pat<(f32 (int_aarch64_neon_frecpx (f32 FPR32:$Rn))),
           (FRECPXv1i32 FPR32:$Rn)>;
-def : Pat<(f64 (int_arm64_neon_frecpx (f64 FPR64:$Rn))),
+def : Pat<(f64 (int_aarch64_neon_frecpx (f64 FPR64:$Rn))),
           (FRECPXv1i64 FPR64:$Rn)>;
 
-def : Pat<(f32 (int_arm64_neon_frsqrte (f32 FPR32:$Rn))),
+def : Pat<(f32 (int_aarch64_neon_frsqrte (f32 FPR32:$Rn))),
           (FRSQRTEv1i32 FPR32:$Rn)>;
-def : Pat<(f64 (int_arm64_neon_frsqrte (f64 FPR64:$Rn))),
+def : Pat<(f64 (int_aarch64_neon_frsqrte (f64 FPR64:$Rn))),
           (FRSQRTEv1i64 FPR64:$Rn)>;
-def : Pat<(v1f64 (int_arm64_neon_frsqrte (v1f64 FPR64:$Rn))),
+def : Pat<(v1f64 (int_aarch64_neon_frsqrte (v1f64 FPR64:$Rn))),
           (FRSQRTEv1i64 FPR64:$Rn)>;
 
 // If an integer is about to be converted to a floating point value,
@@ -3047,56 +3049,56 @@ def : Pat <(f64 (uint_to_fp (i32
 // Advanced SIMD three different-sized vector instructions.
 //===----------------------------------------------------------------------===//
 
-defm ADDHN  : SIMDNarrowThreeVectorBHS<0,0b0100,"addhn", int_arm64_neon_addhn>;
-defm SUBHN  : SIMDNarrowThreeVectorBHS<0,0b0110,"subhn", int_arm64_neon_subhn>;
-defm RADDHN : SIMDNarrowThreeVectorBHS<1,0b0100,"raddhn",int_arm64_neon_raddhn>;
-defm RSUBHN : SIMDNarrowThreeVectorBHS<1,0b0110,"rsubhn",int_arm64_neon_rsubhn>;
-defm PMULL  : SIMDDifferentThreeVectorBD<0,0b1110,"pmull",int_arm64_neon_pmull>;
+defm ADDHN  : SIMDNarrowThreeVectorBHS<0,0b0100,"addhn", int_aarch64_neon_addhn>;
+defm SUBHN  : SIMDNarrowThreeVectorBHS<0,0b0110,"subhn", int_aarch64_neon_subhn>;
+defm RADDHN : SIMDNarrowThreeVectorBHS<1,0b0100,"raddhn",int_aarch64_neon_raddhn>;
+defm RSUBHN : SIMDNarrowThreeVectorBHS<1,0b0110,"rsubhn",int_aarch64_neon_rsubhn>;
+defm PMULL  : SIMDDifferentThreeVectorBD<0,0b1110,"pmull",int_aarch64_neon_pmull>;
 defm SABAL  : SIMDLongThreeVectorTiedBHSabal<0,0b0101,"sabal",
-                                             int_arm64_neon_sabd>;
+                                             int_aarch64_neon_sabd>;
 defm SABDL   : SIMDLongThreeVectorBHSabdl<0, 0b0111, "sabdl",
-                                          int_arm64_neon_sabd>;
+                                          int_aarch64_neon_sabd>;
 defm SADDL   : SIMDLongThreeVectorBHS<   0, 0b0000, "saddl",
             BinOpFrag<(add (sext node:$LHS), (sext node:$RHS))>>;
 defm SADDW   : SIMDWideThreeVectorBHS<   0, 0b0001, "saddw",
                  BinOpFrag<(add node:$LHS, (sext node:$RHS))>>;
 defm SMLAL   : SIMDLongThreeVectorTiedBHS<0, 0b1000, "smlal",
-    TriOpFrag<(add node:$LHS, (int_arm64_neon_smull node:$MHS, node:$RHS))>>;
+    TriOpFrag<(add node:$LHS, (int_aarch64_neon_smull node:$MHS, node:$RHS))>>;
 defm SMLSL   : SIMDLongThreeVectorTiedBHS<0, 0b1010, "smlsl",
-    TriOpFrag<(sub node:$LHS, (int_arm64_neon_smull node:$MHS, node:$RHS))>>;
-defm SMULL   : SIMDLongThreeVectorBHS<0, 0b1100, "smull", int_arm64_neon_smull>;
+    TriOpFrag<(sub node:$LHS, (int_aarch64_neon_smull node:$MHS, node:$RHS))>>;
+defm SMULL   : SIMDLongThreeVectorBHS<0, 0b1100, "smull", int_aarch64_neon_smull>;
 defm SQDMLAL : SIMDLongThreeVectorSQDMLXTiedHS<0, 0b1001, "sqdmlal",
-                                               int_arm64_neon_sqadd>;
+                                               int_aarch64_neon_sqadd>;
 defm SQDMLSL : SIMDLongThreeVectorSQDMLXTiedHS<0, 0b1011, "sqdmlsl",
-                                               int_arm64_neon_sqsub>;
+                                               int_aarch64_neon_sqsub>;
 defm SQDMULL : SIMDLongThreeVectorHS<0, 0b1101, "sqdmull",
-                                     int_arm64_neon_sqdmull>;
+                                     int_aarch64_neon_sqdmull>;
 defm SSUBL   : SIMDLongThreeVectorBHS<0, 0b0010, "ssubl",
                  BinOpFrag<(sub (sext node:$LHS), (sext node:$RHS))>>;
 defm SSUBW   : SIMDWideThreeVectorBHS<0, 0b0011, "ssubw",
                  BinOpFrag<(sub node:$LHS, (sext node:$RHS))>>;
 defm UABAL   : SIMDLongThreeVectorTiedBHSabal<1, 0b0101, "uabal",
-                                              int_arm64_neon_uabd>;
+                                              int_aarch64_neon_uabd>;
 defm UABDL   : SIMDLongThreeVectorBHSabdl<1, 0b0111, "uabdl",
-                                          int_arm64_neon_uabd>;
+                                          int_aarch64_neon_uabd>;
 defm UADDL   : SIMDLongThreeVectorBHS<1, 0b0000, "uaddl",
                  BinOpFrag<(add (zext node:$LHS), (zext node:$RHS))>>;
 defm UADDW   : SIMDWideThreeVectorBHS<1, 0b0001, "uaddw",
                  BinOpFrag<(add node:$LHS, (zext node:$RHS))>>;
 defm UMLAL   : SIMDLongThreeVectorTiedBHS<1, 0b1000, "umlal",
-    TriOpFrag<(add node:$LHS, (int_arm64_neon_umull node:$MHS, node:$RHS))>>;
+    TriOpFrag<(add node:$LHS, (int_aarch64_neon_umull node:$MHS, node:$RHS))>>;
 defm UMLSL   : SIMDLongThreeVectorTiedBHS<1, 0b1010, "umlsl",
-    TriOpFrag<(sub node:$LHS, (int_arm64_neon_umull node:$MHS, node:$RHS))>>;
-defm UMULL   : SIMDLongThreeVectorBHS<1, 0b1100, "umull", int_arm64_neon_umull>;
+    TriOpFrag<(sub node:$LHS, (int_aarch64_neon_umull node:$MHS, node:$RHS))>>;
+defm UMULL   : SIMDLongThreeVectorBHS<1, 0b1100, "umull", int_aarch64_neon_umull>;
 defm USUBL   : SIMDLongThreeVectorBHS<1, 0b0010, "usubl",
                  BinOpFrag<(sub (zext node:$LHS), (zext node:$RHS))>>;
 defm USUBW   : SIMDWideThreeVectorBHS<   1, 0b0011, "usubw",
                  BinOpFrag<(sub node:$LHS, (zext node:$RHS))>>;
 
 // Patterns for 64-bit pmull
-def : Pat<(int_arm64_neon_pmull64 V64:$Rn, V64:$Rm),
+def : Pat<(int_aarch64_neon_pmull64 V64:$Rn, V64:$Rm),
           (PMULLv1i64 V64:$Rn, V64:$Rm)>;
-def : Pat<(int_arm64_neon_pmull64 (vector_extract (v2i64 V128:$Rn), (i64 1)),
+def : Pat<(int_aarch64_neon_pmull64 (vector_extract (v2i64 V128:$Rn), (i64 1)),
                                   (vector_extract (v2i64 V128:$Rm), (i64 1))),
           (PMULLv2i64 V128:$Rn, V128:$Rm)>;
 
@@ -3104,51 +3106,51 @@ def : Pat<(int_arm64_neon_pmull64 (vector_extract (v2i64 V128:$Rn), (i64 1)),
 // written in LLVM IR without too much difficulty.
 
 // ADDHN
-def : Pat<(v8i8 (trunc (v8i16 (ARM64vlshr (add V128:$Rn, V128:$Rm), (i32 8))))),
+def : Pat<(v8i8 (trunc (v8i16 (AArch64vlshr (add V128:$Rn, V128:$Rm), (i32 8))))),
           (ADDHNv8i16_v8i8 V128:$Rn, V128:$Rm)>;
-def : Pat<(v4i16 (trunc (v4i32 (ARM64vlshr (add V128:$Rn, V128:$Rm),
+def : Pat<(v4i16 (trunc (v4i32 (AArch64vlshr (add V128:$Rn, V128:$Rm),
                                            (i32 16))))),
           (ADDHNv4i32_v4i16 V128:$Rn, V128:$Rm)>;
-def : Pat<(v2i32 (trunc (v2i64 (ARM64vlshr (add V128:$Rn, V128:$Rm),
+def : Pat<(v2i32 (trunc (v2i64 (AArch64vlshr (add V128:$Rn, V128:$Rm),
                                            (i32 32))))),
           (ADDHNv2i64_v2i32 V128:$Rn, V128:$Rm)>;
 def : Pat<(concat_vectors (v8i8 V64:$Rd),
-                          (trunc (v8i16 (ARM64vlshr (add V128:$Rn, V128:$Rm),
+                          (trunc (v8i16 (AArch64vlshr (add V128:$Rn, V128:$Rm),
                                                     (i32 8))))),
           (ADDHNv8i16_v16i8 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub),
                             V128:$Rn, V128:$Rm)>;
 def : Pat<(concat_vectors (v4i16 V64:$Rd),
-                          (trunc (v4i32 (ARM64vlshr (add V128:$Rn, V128:$Rm),
+                          (trunc (v4i32 (AArch64vlshr (add V128:$Rn, V128:$Rm),
                                                     (i32 16))))),
           (ADDHNv4i32_v8i16 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub),
                             V128:$Rn, V128:$Rm)>;
 def : Pat<(concat_vectors (v2i32 V64:$Rd),
-                          (trunc (v2i64 (ARM64vlshr (add V128:$Rn, V128:$Rm),
+                          (trunc (v2i64 (AArch64vlshr (add V128:$Rn, V128:$Rm),
                                                     (i32 32))))),
           (ADDHNv2i64_v4i32 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub),
                             V128:$Rn, V128:$Rm)>;
 
 // SUBHN
-def : Pat<(v8i8 (trunc (v8i16 (ARM64vlshr (sub V128:$Rn, V128:$Rm), (i32 8))))),
+def : Pat<(v8i8 (trunc (v8i16 (AArch64vlshr (sub V128:$Rn, V128:$Rm), (i32 8))))),
           (SUBHNv8i16_v8i8 V128:$Rn, V128:$Rm)>;
-def : Pat<(v4i16 (trunc (v4i32 (ARM64vlshr (sub V128:$Rn, V128:$Rm),
+def : Pat<(v4i16 (trunc (v4i32 (AArch64vlshr (sub V128:$Rn, V128:$Rm),
                                            (i32 16))))),
           (SUBHNv4i32_v4i16 V128:$Rn, V128:$Rm)>;
-def : Pat<(v2i32 (trunc (v2i64 (ARM64vlshr (sub V128:$Rn, V128:$Rm),
+def : Pat<(v2i32 (trunc (v2i64 (AArch64vlshr (sub V128:$Rn, V128:$Rm),
                                            (i32 32))))),
           (SUBHNv2i64_v2i32 V128:$Rn, V128:$Rm)>;
 def : Pat<(concat_vectors (v8i8 V64:$Rd),
-                          (trunc (v8i16 (ARM64vlshr (sub V128:$Rn, V128:$Rm),
+                          (trunc (v8i16 (AArch64vlshr (sub V128:$Rn, V128:$Rm),
                                                     (i32 8))))),
           (SUBHNv8i16_v16i8 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub),
                             V128:$Rn, V128:$Rm)>;
 def : Pat<(concat_vectors (v4i16 V64:$Rd),
-                          (trunc (v4i32 (ARM64vlshr (sub V128:$Rn, V128:$Rm),
+                          (trunc (v4i32 (AArch64vlshr (sub V128:$Rn, V128:$Rm),
                                                     (i32 16))))),
           (SUBHNv4i32_v8i16 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub),
                             V128:$Rn, V128:$Rm)>;
 def : Pat<(concat_vectors (v2i32 V64:$Rd),
-                          (trunc (v2i64 (ARM64vlshr (sub V128:$Rn, V128:$Rm),
+                          (trunc (v2i64 (AArch64vlshr (sub V128:$Rn, V128:$Rm),
                                                     (i32 32))))),
           (SUBHNv2i64_v4i32 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub),
                             V128:$Rn, V128:$Rm)>;
@@ -3159,21 +3161,21 @@ def : Pat<(concat_vectors (v2i32 V64:$Rd),
 
 defm EXT : SIMDBitwiseExtract<"ext">;
 
-def : Pat<(v4i16 (ARM64ext V64:$Rn, V64:$Rm, (i32 imm:$imm))),
+def : Pat<(v4i16 (AArch64ext V64:$Rn, V64:$Rm, (i32 imm:$imm))),
           (EXTv8i8 V64:$Rn, V64:$Rm, imm:$imm)>;
-def : Pat<(v8i16 (ARM64ext V128:$Rn, V128:$Rm, (i32 imm:$imm))),
+def : Pat<(v8i16 (AArch64ext V128:$Rn, V128:$Rm, (i32 imm:$imm))),
           (EXTv16i8 V128:$Rn, V128:$Rm, imm:$imm)>;
-def : Pat<(v2i32 (ARM64ext V64:$Rn, V64:$Rm, (i32 imm:$imm))),
+def : Pat<(v2i32 (AArch64ext V64:$Rn, V64:$Rm, (i32 imm:$imm))),
           (EXTv8i8 V64:$Rn, V64:$Rm, imm:$imm)>;
-def : Pat<(v2f32 (ARM64ext V64:$Rn, V64:$Rm, (i32 imm:$imm))),
+def : Pat<(v2f32 (AArch64ext V64:$Rn, V64:$Rm, (i32 imm:$imm))),
           (EXTv8i8 V64:$Rn, V64:$Rm, imm:$imm)>;
-def : Pat<(v4i32 (ARM64ext V128:$Rn, V128:$Rm, (i32 imm:$imm))),
+def : Pat<(v4i32 (AArch64ext V128:$Rn, V128:$Rm, (i32 imm:$imm))),
           (EXTv16i8 V128:$Rn, V128:$Rm, imm:$imm)>;
-def : Pat<(v4f32 (ARM64ext V128:$Rn, V128:$Rm, (i32 imm:$imm))),
+def : Pat<(v4f32 (AArch64ext V128:$Rn, V128:$Rm, (i32 imm:$imm))),
           (EXTv16i8 V128:$Rn, V128:$Rm, imm:$imm)>;
-def : Pat<(v2i64 (ARM64ext V128:$Rn, V128:$Rm, (i32 imm:$imm))),
+def : Pat<(v2i64 (AArch64ext V128:$Rn, V128:$Rm, (i32 imm:$imm))),
           (EXTv16i8 V128:$Rn, V128:$Rm, imm:$imm)>;
-def : Pat<(v2f64 (ARM64ext V128:$Rn, V128:$Rm, (i32 imm:$imm))),
+def : Pat<(v2f64 (AArch64ext V128:$Rn, V128:$Rm, (i32 imm:$imm))),
           (EXTv16i8 V128:$Rn, V128:$Rm, imm:$imm)>;
 
 // We use EXT to handle extract_subvector to copy the upper 64-bits of a
@@ -3196,12 +3198,12 @@ def : Pat<(v1f64 (extract_subvector V128:$Rn, (i64 1))),
 // AdvSIMD zip vector
 //----------------------------------------------------------------------------
 
-defm TRN1 : SIMDZipVector<0b010, "trn1", ARM64trn1>;
-defm TRN2 : SIMDZipVector<0b110, "trn2", ARM64trn2>;
-defm UZP1 : SIMDZipVector<0b001, "uzp1", ARM64uzp1>;
-defm UZP2 : SIMDZipVector<0b101, "uzp2", ARM64uzp2>;
-defm ZIP1 : SIMDZipVector<0b011, "zip1", ARM64zip1>;
-defm ZIP2 : SIMDZipVector<0b111, "zip2", ARM64zip2>;
+defm TRN1 : SIMDZipVector<0b010, "trn1", AArch64trn1>;
+defm TRN2 : SIMDZipVector<0b110, "trn2", AArch64trn2>;
+defm UZP1 : SIMDZipVector<0b001, "uzp1", AArch64uzp1>;
+defm UZP2 : SIMDZipVector<0b101, "uzp2", AArch64uzp2>;
+defm ZIP1 : SIMDZipVector<0b011, "zip1", AArch64zip1>;
+defm ZIP2 : SIMDZipVector<0b111, "zip2", AArch64zip2>;
 
 //----------------------------------------------------------------------------
 // AdvSIMD TBL/TBX instructions
@@ -3210,15 +3212,15 @@ defm ZIP2 : SIMDZipVector<0b111, "zip2", ARM64zip2>;
 defm TBL : SIMDTableLookup<    0, "tbl">;
 defm TBX : SIMDTableLookupTied<1, "tbx">;
 
-def : Pat<(v8i8 (int_arm64_neon_tbl1 (v16i8 VecListOne128:$Rn), (v8i8 V64:$Ri))),
+def : Pat<(v8i8 (int_aarch64_neon_tbl1 (v16i8 VecListOne128:$Rn), (v8i8 V64:$Ri))),
           (TBLv8i8One VecListOne128:$Rn, V64:$Ri)>;
-def : Pat<(v16i8 (int_arm64_neon_tbl1 (v16i8 V128:$Ri), (v16i8 V128:$Rn))),
+def : Pat<(v16i8 (int_aarch64_neon_tbl1 (v16i8 V128:$Ri), (v16i8 V128:$Rn))),
           (TBLv16i8One V128:$Ri, V128:$Rn)>;
 
-def : Pat<(v8i8 (int_arm64_neon_tbx1 (v8i8 V64:$Rd),
+def : Pat<(v8i8 (int_aarch64_neon_tbx1 (v8i8 V64:$Rd),
                   (v16i8 VecListOne128:$Rn), (v8i8 V64:$Ri))),
           (TBXv8i8One V64:$Rd, VecListOne128:$Rn, V64:$Ri)>;
-def : Pat<(v16i8 (int_arm64_neon_tbx1 (v16i8 V128:$Rd),
+def : Pat<(v16i8 (int_aarch64_neon_tbx1 (v16i8 V128:$Rd),
                    (v16i8 V128:$Ri), (v16i8 V128:$Rn))),
           (TBXv16i8One V128:$Rd, V128:$Ri, V128:$Rn)>;
 
@@ -3239,31 +3241,31 @@ defm FMAXNMP : SIMDPairwiseScalarSD<1, 0, 0b01100, "fmaxnmp">;
 defm FMAXP   : SIMDPairwiseScalarSD<1, 0, 0b01111, "fmaxp">;
 defm FMINNMP : SIMDPairwiseScalarSD<1, 1, 0b01100, "fminnmp">;
 defm FMINP   : SIMDPairwiseScalarSD<1, 1, 0b01111, "fminp">;
-def : Pat<(i64 (int_arm64_neon_saddv (v2i64 V128:$Rn))),
+def : Pat<(i64 (int_aarch64_neon_saddv (v2i64 V128:$Rn))),
           (ADDPv2i64p V128:$Rn)>;
-def : Pat<(i64 (int_arm64_neon_uaddv (v2i64 V128:$Rn))),
+def : Pat<(i64 (int_aarch64_neon_uaddv (v2i64 V128:$Rn))),
           (ADDPv2i64p V128:$Rn)>;
-def : Pat<(f32 (int_arm64_neon_faddv (v2f32 V64:$Rn))),
+def : Pat<(f32 (int_aarch64_neon_faddv (v2f32 V64:$Rn))),
           (FADDPv2i32p V64:$Rn)>;
-def : Pat<(f32 (int_arm64_neon_faddv (v4f32 V128:$Rn))),
+def : Pat<(f32 (int_aarch64_neon_faddv (v4f32 V128:$Rn))),
           (FADDPv2i32p (EXTRACT_SUBREG (FADDPv4f32 V128:$Rn, V128:$Rn), dsub))>;
-def : Pat<(f64 (int_arm64_neon_faddv (v2f64 V128:$Rn))),
+def : Pat<(f64 (int_aarch64_neon_faddv (v2f64 V128:$Rn))),
           (FADDPv2i64p V128:$Rn)>;
-def : Pat<(f32 (int_arm64_neon_fmaxnmv (v2f32 V64:$Rn))),
+def : Pat<(f32 (int_aarch64_neon_fmaxnmv (v2f32 V64:$Rn))),
           (FMAXNMPv2i32p V64:$Rn)>;
-def : Pat<(f64 (int_arm64_neon_fmaxnmv (v2f64 V128:$Rn))),
+def : Pat<(f64 (int_aarch64_neon_fmaxnmv (v2f64 V128:$Rn))),
           (FMAXNMPv2i64p V128:$Rn)>;
-def : Pat<(f32 (int_arm64_neon_fmaxv (v2f32 V64:$Rn))),
+def : Pat<(f32 (int_aarch64_neon_fmaxv (v2f32 V64:$Rn))),
           (FMAXPv2i32p V64:$Rn)>;
-def : Pat<(f64 (int_arm64_neon_fmaxv (v2f64 V128:$Rn))),
+def : Pat<(f64 (int_aarch64_neon_fmaxv (v2f64 V128:$Rn))),
           (FMAXPv2i64p V128:$Rn)>;
-def : Pat<(f32 (int_arm64_neon_fminnmv (v2f32 V64:$Rn))),
+def : Pat<(f32 (int_aarch64_neon_fminnmv (v2f32 V64:$Rn))),
           (FMINNMPv2i32p V64:$Rn)>;
-def : Pat<(f64 (int_arm64_neon_fminnmv (v2f64 V128:$Rn))),
+def : Pat<(f64 (int_aarch64_neon_fminnmv (v2f64 V128:$Rn))),
           (FMINNMPv2i64p V128:$Rn)>;
-def : Pat<(f32 (int_arm64_neon_fminv (v2f32 V64:$Rn))),
+def : Pat<(f32 (int_aarch64_neon_fminv (v2f32 V64:$Rn))),
           (FMINPv2i32p V64:$Rn)>;
-def : Pat<(f64 (int_arm64_neon_fminv (v2f64 V128:$Rn))),
+def : Pat<(f64 (int_aarch64_neon_fminv (v2f64 V128:$Rn))),
           (FMINPv2i64p V128:$Rn)>;
 
 //----------------------------------------------------------------------------
@@ -3286,27 +3288,27 @@ def DUPv8i16lane : SIMDDup16FromElement<1, ".8h", v8i16, V128>;
 def DUPv8i8lane  : SIMDDup8FromElement <0, ".8b", v8i8, V64>;
 def DUPv16i8lane : SIMDDup8FromElement <1, ".16b", v16i8, V128>;
 
-def : Pat<(v2f32 (ARM64dup (f32 FPR32:$Rn))),
+def : Pat<(v2f32 (AArch64dup (f32 FPR32:$Rn))),
           (v2f32 (DUPv2i32lane
             (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR32:$Rn, ssub),
             (i64 0)))>;
-def : Pat<(v4f32 (ARM64dup (f32 FPR32:$Rn))),
+def : Pat<(v4f32 (AArch64dup (f32 FPR32:$Rn))),
           (v4f32 (DUPv4i32lane
             (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR32:$Rn, ssub),
             (i64 0)))>;
-def : Pat<(v2f64 (ARM64dup (f64 FPR64:$Rn))),
+def : Pat<(v2f64 (AArch64dup (f64 FPR64:$Rn))),
           (v2f64 (DUPv2i64lane
             (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR64:$Rn, dsub),
             (i64 0)))>;
 
-def : Pat<(v2f32 (ARM64duplane32 (v4f32 V128:$Rn), VectorIndexS:$imm)),
+def : Pat<(v2f32 (AArch64duplane32 (v4f32 V128:$Rn), VectorIndexS:$imm)),
           (DUPv2i32lane V128:$Rn, VectorIndexS:$imm)>;
-def : Pat<(v4f32 (ARM64duplane32 (v4f32 V128:$Rn), VectorIndexS:$imm)),
+def : Pat<(v4f32 (AArch64duplane32 (v4f32 V128:$Rn), VectorIndexS:$imm)),
          (DUPv4i32lane V128:$Rn, VectorIndexS:$imm)>;
-def : Pat<(v2f64 (ARM64duplane64 (v2f64 V128:$Rn), VectorIndexD:$imm)),
+def : Pat<(v2f64 (AArch64duplane64 (v2f64 V128:$Rn), VectorIndexD:$imm)),
           (DUPv2i64lane V128:$Rn, VectorIndexD:$imm)>;
 
-// If there's an (ARM64dup (vector_extract ...) ...), we can use a duplane
+// If there's an (AArch64dup (vector_extract ...) ...), we can use a duplane
 // instruction even if the types don't match: we just have to remap the lane
 // carefully. N.b. this trick only applies to truncations.
 def VecIndex_x2 : SDNodeXForm<imm, [{
@@ -3322,11 +3324,11 @@ def VecIndex_x8 : SDNodeXForm<imm, [{
 multiclass DUPWithTruncPats<ValueType ResVT, ValueType Src64VT,
                             ValueType Src128VT, ValueType ScalVT,
                             Instruction DUP, SDNodeXForm IdxXFORM> {
-  def : Pat<(ResVT (ARM64dup (ScalVT (vector_extract (Src128VT V128:$Rn),
+  def : Pat<(ResVT (AArch64dup (ScalVT (vector_extract (Src128VT V128:$Rn),
                                                      imm:$idx)))),
             (DUP V128:$Rn, (IdxXFORM imm:$idx))>;
 
-  def : Pat<(ResVT (ARM64dup (ScalVT (vector_extract (Src64VT V64:$Rn),
+  def : Pat<(ResVT (AArch64dup (ScalVT (vector_extract (Src64VT V64:$Rn),
                                                      imm:$idx)))),
             (DUP (SUBREG_TO_REG (i64 0), V64:$Rn, dsub), (IdxXFORM imm:$idx))>;
 }
@@ -3341,11 +3343,11 @@ defm : DUPWithTruncPats<v8i16,  v2i32, v4i32, i32, DUPv8i16lane, VecIndex_x2>;
 
 multiclass DUPWithTrunci64Pats<ValueType ResVT, Instruction DUP,
                                SDNodeXForm IdxXFORM> {
-  def : Pat<(ResVT (ARM64dup (i32 (trunc (vector_extract (v2i64 V128:$Rn),
+  def : Pat<(ResVT (AArch64dup (i32 (trunc (vector_extract (v2i64 V128:$Rn),
                                                          imm:$idx))))),
             (DUP V128:$Rn, (IdxXFORM imm:$idx))>;
 
-  def : Pat<(ResVT (ARM64dup (i32 (trunc (vector_extract (v1i64 V64:$Rn),
+  def : Pat<(ResVT (AArch64dup (i32 (trunc (vector_extract (v1i64 V64:$Rn),
                                                          imm:$idx))))),
             (DUP (SUBREG_TO_REG (i64 0), V64:$Rn, dsub), (IdxXFORM imm:$idx))>;
 }
@@ -3377,7 +3379,7 @@ def : Pat<(sext (i32 (vector_extract (v4i32 V128:$Rn), VectorIndexS:$idx))),
 
 // Extracting i8 or i16 elements will have the zero-extend transformed to
 // an 'and' mask by type legalization since neither i8 nor i16 are legal types
-// for ARM64. Match these patterns here since UMOV already zeroes out the high
+// for AArch64. Match these patterns here since UMOV already zeroes out the high
 // bits of the destination register.
 def : Pat<(and (vector_extract (v16i8 V128:$Rn), VectorIndexB:$idx),
                (i32 0xff)),
@@ -3445,25 +3447,25 @@ def : Pat<(v2f64 (vector_insert (v2f64 V128:$Rn),
 // element of another.
 // FIXME refactor to a shared class/dev parameterized on vector type, vector
 // index type and INS extension
-def : Pat<(v16i8 (int_arm64_neon_vcopy_lane
+def : Pat<(v16i8 (int_aarch64_neon_vcopy_lane
                    (v16i8 V128:$Vd), VectorIndexB:$idx, (v16i8 V128:$Vs),
                    VectorIndexB:$idx2)),
           (v16i8 (INSvi8lane
                    V128:$Vd, VectorIndexB:$idx, V128:$Vs, VectorIndexB:$idx2)
           )>;
-def : Pat<(v8i16 (int_arm64_neon_vcopy_lane
+def : Pat<(v8i16 (int_aarch64_neon_vcopy_lane
                    (v8i16 V128:$Vd), VectorIndexH:$idx, (v8i16 V128:$Vs),
                    VectorIndexH:$idx2)),
           (v8i16 (INSvi16lane
                    V128:$Vd, VectorIndexH:$idx, V128:$Vs, VectorIndexH:$idx2)
           )>;
-def : Pat<(v4i32 (int_arm64_neon_vcopy_lane
+def : Pat<(v4i32 (int_aarch64_neon_vcopy_lane
                    (v4i32 V128:$Vd), VectorIndexS:$idx, (v4i32 V128:$Vs),
                    VectorIndexS:$idx2)),
           (v4i32 (INSvi32lane
                    V128:$Vd, VectorIndexS:$idx, V128:$Vs, VectorIndexS:$idx2)
           )>;
-def : Pat<(v2i64 (int_arm64_neon_vcopy_lane
+def : Pat<(v2i64 (int_aarch64_neon_vcopy_lane
                    (v2i64 V128:$Vd), VectorIndexD:$idx, (v2i64 V128:$Vs),
                    VectorIndexD:$idx2)),
           (v2i64 (INSvi64lane
@@ -3526,7 +3528,7 @@ def : Pat<(vector_extract (v4f32 V128:$Rn), VectorIndexS:$idx),
             ssub))>;
 
 // All concat_vectors operations are canonicalised to act on i64 vectors for
-// ARM64. In the general case we need an instruction, which had just as well be
+// AArch64. In the general case we need an instruction, which had just as well be
 // INS.
 class ConcatPat<ValueType DstTy, ValueType SrcTy>
   : Pat<(DstTy (concat_vectors (SrcTy V64:$Rd), V64:$Rn)),
@@ -3563,10 +3565,10 @@ defm UMAXV   : SIMDAcrossLanesBHS<1, 0b01010, "umaxv">;
 defm UMINV   : SIMDAcrossLanesBHS<1, 0b11010, "uminv">;
 defm SADDLV  : SIMDAcrossLanesHSD<0, 0b00011, "saddlv">;
 defm UADDLV  : SIMDAcrossLanesHSD<1, 0b00011, "uaddlv">;
-defm FMAXNMV : SIMDAcrossLanesS<0b01100, 0, "fmaxnmv", int_arm64_neon_fmaxnmv>;
-defm FMAXV   : SIMDAcrossLanesS<0b01111, 0, "fmaxv", int_arm64_neon_fmaxv>;
-defm FMINNMV : SIMDAcrossLanesS<0b01100, 1, "fminnmv", int_arm64_neon_fminnmv>;
-defm FMINV   : SIMDAcrossLanesS<0b01111, 1, "fminv", int_arm64_neon_fminv>;
+defm FMAXNMV : SIMDAcrossLanesS<0b01100, 0, "fmaxnmv", int_aarch64_neon_fmaxnmv>;
+defm FMAXV   : SIMDAcrossLanesS<0b01111, 0, "fmaxv", int_aarch64_neon_fmaxv>;
+defm FMINNMV : SIMDAcrossLanesS<0b01100, 1, "fminnmv", int_aarch64_neon_fminnmv>;
+defm FMINV   : SIMDAcrossLanesS<0b01111, 1, "fminv", int_aarch64_neon_fminv>;
 
 multiclass SIMDAcrossLanesSignedIntrinsic<string baseOpc, Intrinsic intOp> {
 // If there is a sign extension after this intrinsic, consume it as smov already
@@ -3745,43 +3747,43 @@ def : Pat<(i64 (intOp (v4i32 V128:$Rn))),
           dsub))>;
 }
 
-defm : SIMDAcrossLanesSignedIntrinsic<"ADDV",  int_arm64_neon_saddv>;
+defm : SIMDAcrossLanesSignedIntrinsic<"ADDV",  int_aarch64_neon_saddv>;
 // vaddv_[su]32 is special; -> ADDP Vd.2S,Vn.2S,Vm.2S; return Vd.s[0];Vn==Vm
-def : Pat<(i32 (int_arm64_neon_saddv (v2i32 V64:$Rn))),
+def : Pat<(i32 (int_aarch64_neon_saddv (v2i32 V64:$Rn))),
           (EXTRACT_SUBREG (ADDPv2i32 V64:$Rn, V64:$Rn), ssub)>;
 
-defm : SIMDAcrossLanesUnsignedIntrinsic<"ADDV",  int_arm64_neon_uaddv>;
+defm : SIMDAcrossLanesUnsignedIntrinsic<"ADDV",  int_aarch64_neon_uaddv>;
 // vaddv_[su]32 is special; -> ADDP Vd.2S,Vn.2S,Vm.2S; return Vd.s[0];Vn==Vm
-def : Pat<(i32 (int_arm64_neon_uaddv (v2i32 V64:$Rn))),
+def : Pat<(i32 (int_aarch64_neon_uaddv (v2i32 V64:$Rn))),
           (EXTRACT_SUBREG (ADDPv2i32 V64:$Rn, V64:$Rn), ssub)>;
 
-defm : SIMDAcrossLanesSignedIntrinsic<"SMAXV", int_arm64_neon_smaxv>;
-def : Pat<(i32 (int_arm64_neon_smaxv (v2i32 V64:$Rn))),
+defm : SIMDAcrossLanesSignedIntrinsic<"SMAXV", int_aarch64_neon_smaxv>;
+def : Pat<(i32 (int_aarch64_neon_smaxv (v2i32 V64:$Rn))),
            (EXTRACT_SUBREG (SMAXPv2i32 V64:$Rn, V64:$Rn), ssub)>;
 
-defm : SIMDAcrossLanesSignedIntrinsic<"SMINV", int_arm64_neon_sminv>;
-def : Pat<(i32 (int_arm64_neon_sminv (v2i32 V64:$Rn))),
+defm : SIMDAcrossLanesSignedIntrinsic<"SMINV", int_aarch64_neon_sminv>;
+def : Pat<(i32 (int_aarch64_neon_sminv (v2i32 V64:$Rn))),
            (EXTRACT_SUBREG (SMINPv2i32 V64:$Rn, V64:$Rn), ssub)>;
 
-defm : SIMDAcrossLanesUnsignedIntrinsic<"UMAXV", int_arm64_neon_umaxv>;
-def : Pat<(i32 (int_arm64_neon_umaxv (v2i32 V64:$Rn))),
+defm : SIMDAcrossLanesUnsignedIntrinsic<"UMAXV", int_aarch64_neon_umaxv>;
+def : Pat<(i32 (int_aarch64_neon_umaxv (v2i32 V64:$Rn))),
            (EXTRACT_SUBREG (UMAXPv2i32 V64:$Rn, V64:$Rn), ssub)>;
 
-defm : SIMDAcrossLanesUnsignedIntrinsic<"UMINV", int_arm64_neon_uminv>;
-def : Pat<(i32 (int_arm64_neon_uminv (v2i32 V64:$Rn))),
+defm : SIMDAcrossLanesUnsignedIntrinsic<"UMINV", int_aarch64_neon_uminv>;
+def : Pat<(i32 (int_aarch64_neon_uminv (v2i32 V64:$Rn))),
            (EXTRACT_SUBREG (UMINPv2i32 V64:$Rn, V64:$Rn), ssub)>;
 
-defm : SIMDAcrossLanesSignedLongIntrinsic<"SADDLV", int_arm64_neon_saddlv>;
-defm : SIMDAcrossLanesUnsignedLongIntrinsic<"UADDLV", int_arm64_neon_uaddlv>;
+defm : SIMDAcrossLanesSignedLongIntrinsic<"SADDLV", int_aarch64_neon_saddlv>;
+defm : SIMDAcrossLanesUnsignedLongIntrinsic<"UADDLV", int_aarch64_neon_uaddlv>;
 
 // The vaddlv_s32 intrinsic gets mapped to SADDLP.
-def : Pat<(i64 (int_arm64_neon_saddlv (v2i32 V64:$Rn))),
+def : Pat<(i64 (int_aarch64_neon_saddlv (v2i32 V64:$Rn))),
           (i64 (EXTRACT_SUBREG
             (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
               (SADDLPv2i32_v1i64 V64:$Rn), dsub),
             dsub))>;
 // The vaddlv_u32 intrinsic gets mapped to UADDLP.
-def : Pat<(i64 (int_arm64_neon_uaddlv (v2i32 V64:$Rn))),
+def : Pat<(i64 (int_aarch64_neon_uaddlv (v2i32 V64:$Rn))),
           (i64 (EXTRACT_SUBREG
             (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
               (UADDLPv2i32_v1i64 V64:$Rn), dsub),
@@ -3792,9 +3794,9 @@ def : Pat<(i64 (int_arm64_neon_uaddlv (v2i32 V64:$Rn))),
 //------------------------------------------------------------------------------
 
 // AdvSIMD BIC
-defm BIC : SIMDModifiedImmVectorShiftTied<1, 0b11, 0b01, "bic", ARM64bici>;
+defm BIC : SIMDModifiedImmVectorShiftTied<1, 0b11, 0b01, "bic", AArch64bici>;
 // AdvSIMD ORR
-defm ORR : SIMDModifiedImmVectorShiftTied<0, 0b11, 0b01, "orr", ARM64orri>;
+defm ORR : SIMDModifiedImmVectorShiftTied<0, 0b11, 0b01, "orr", AArch64orri>;
 
 def : InstAlias<"bic $Vd.4h, $imm", (BICv4i16 V64:$Vd,  imm0_255:$imm, 0)>;
 def : InstAlias<"bic $Vd.8h, $imm", (BICv8i16 V128:$Vd, imm0_255:$imm, 0)>;
@@ -3819,13 +3821,13 @@ def : InstAlias<"orr.4s $Vd, $imm", (ORRv4i32 V128:$Vd, imm0_255:$imm, 0), 0>;
 // AdvSIMD FMOV
 def FMOVv2f64_ns : SIMDModifiedImmVectorNoShift<1, 1, 0b1111, V128, fpimm8,
                                               "fmov", ".2d",
-                       [(set (v2f64 V128:$Rd), (ARM64fmov imm0_255:$imm8))]>;
+                       [(set (v2f64 V128:$Rd), (AArch64fmov imm0_255:$imm8))]>;
 def FMOVv2f32_ns : SIMDModifiedImmVectorNoShift<0, 0, 0b1111, V64,  fpimm8,
                                               "fmov", ".2s",
-                       [(set (v2f32 V64:$Rd), (ARM64fmov imm0_255:$imm8))]>;
+                       [(set (v2f32 V64:$Rd), (AArch64fmov imm0_255:$imm8))]>;
 def FMOVv4f32_ns : SIMDModifiedImmVectorNoShift<1, 0, 0b1111, V128, fpimm8,
                                               "fmov", ".4s",
-                       [(set (v4f32 V128:$Rd), (ARM64fmov imm0_255:$imm8))]>;
+                       [(set (v4f32 V128:$Rd), (AArch64fmov imm0_255:$imm8))]>;
 
 // AdvSIMD MOVI
 
@@ -3835,7 +3837,7 @@ def MOVID      : SIMDModifiedImmScalarNoShift<0, 1, 0b1110, "movi",
                     [(set FPR64:$Rd, simdimmtype10:$imm8)]>;
 // The movi_edit node has the immediate value already encoded, so we use
 // a plain imm0_255 here.
-def : Pat<(f64 (ARM64movi_edit imm0_255:$shift)),
+def : Pat<(f64 (AArch64movi_edit imm0_255:$shift)),
           (MOVID imm0_255:$shift)>;
 
 def : Pat<(v1i64 immAllZerosV), (MOVID (i32 0))>;
@@ -3856,7 +3858,7 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1 in
 def MOVIv2d_ns   : SIMDModifiedImmVectorNoShift<1, 1, 0b1110, V128,
                                                 simdimmtype10,
                                                 "movi", ".2d",
-                   [(set (v2i64 V128:$Rd), (ARM64movi_edit imm0_255:$imm8))]>;
+                   [(set (v2i64 V128:$Rd), (AArch64movi_edit imm0_255:$imm8))]>;
 
 
 // Use movi.2d to materialize 0.0 if the HW does zero-cycle zeroing.
@@ -3880,8 +3882,8 @@ def : Pat<(v4i32 immAllOnesV), (MOVIv2d_ns (i32 255))>;
 def : Pat<(v8i16 immAllOnesV), (MOVIv2d_ns (i32 255))>;
 def : Pat<(v16i8 immAllOnesV), (MOVIv2d_ns (i32 255))>;
 
-def : Pat<(v2f64 (ARM64dup (f64 fpimm0))), (MOVIv2d_ns (i32 0))>;
-def : Pat<(v4f32 (ARM64dup (f32 fpimm0))), (MOVIv2d_ns (i32 0))>;
+def : Pat<(v2f64 (AArch64dup (f64 fpimm0))), (MOVIv2d_ns (i32 0))>;
+def : Pat<(v4f32 (AArch64dup (f32 fpimm0))), (MOVIv2d_ns (i32 0))>;
 
 // EDIT per word & halfword: 2s, 4h, 4s, & 8h
 defm MOVI      : SIMDModifiedImmVectorShift<0, 0b10, 0b00, "movi">;
@@ -3896,30 +3898,30 @@ def : InstAlias<"movi.8h $Vd, $imm", (MOVIv8i16 V128:$Vd, imm0_255:$imm, 0), 0>;
 def : InstAlias<"movi.2s $Vd, $imm", (MOVIv2i32 V64:$Vd,  imm0_255:$imm, 0), 0>;
 def : InstAlias<"movi.4s $Vd, $imm", (MOVIv4i32 V128:$Vd, imm0_255:$imm, 0), 0>;
 
-def : Pat<(v2i32 (ARM64movi_shift imm0_255:$imm8, (i32 imm:$shift))),
+def : Pat<(v2i32 (AArch64movi_shift imm0_255:$imm8, (i32 imm:$shift))),
           (MOVIv2i32 imm0_255:$imm8, imm:$shift)>;
-def : Pat<(v4i32 (ARM64movi_shift imm0_255:$imm8, (i32 imm:$shift))),
+def : Pat<(v4i32 (AArch64movi_shift imm0_255:$imm8, (i32 imm:$shift))),
           (MOVIv4i32 imm0_255:$imm8, imm:$shift)>;
-def : Pat<(v4i16 (ARM64movi_shift imm0_255:$imm8, (i32 imm:$shift))),
+def : Pat<(v4i16 (AArch64movi_shift imm0_255:$imm8, (i32 imm:$shift))),
           (MOVIv4i16 imm0_255:$imm8, imm:$shift)>;
-def : Pat<(v8i16 (ARM64movi_shift imm0_255:$imm8, (i32 imm:$shift))),
+def : Pat<(v8i16 (AArch64movi_shift imm0_255:$imm8, (i32 imm:$shift))),
           (MOVIv8i16 imm0_255:$imm8, imm:$shift)>;
 
 // EDIT per word: 2s & 4s with MSL shifter
 def MOVIv2s_msl  : SIMDModifiedImmMoveMSL<0, 0, {1,1,0,?}, V64, "movi", ".2s",
                       [(set (v2i32 V64:$Rd),
-                            (ARM64movi_msl imm0_255:$imm8, (i32 imm:$shift)))]>;
+                            (AArch64movi_msl imm0_255:$imm8, (i32 imm:$shift)))]>;
 def MOVIv4s_msl  : SIMDModifiedImmMoveMSL<1, 0, {1,1,0,?}, V128, "movi", ".4s",
                       [(set (v4i32 V128:$Rd),
-                            (ARM64movi_msl imm0_255:$imm8, (i32 imm:$shift)))]>;
+                            (AArch64movi_msl imm0_255:$imm8, (i32 imm:$shift)))]>;
 
 // Per byte: 8b & 16b
 def MOVIv8b_ns   : SIMDModifiedImmVectorNoShift<0, 0, 0b1110, V64,  imm0_255,
                                                  "movi", ".8b",
-                       [(set (v8i8 V64:$Rd), (ARM64movi imm0_255:$imm8))]>;
+                       [(set (v8i8 V64:$Rd), (AArch64movi imm0_255:$imm8))]>;
 def MOVIv16b_ns  : SIMDModifiedImmVectorNoShift<1, 0, 0b1110, V128, imm0_255,
                                                  "movi", ".16b",
-                       [(set (v16i8 V128:$Rd), (ARM64movi imm0_255:$imm8))]>;
+                       [(set (v16i8 V128:$Rd), (AArch64movi imm0_255:$imm8))]>;
 
 // AdvSIMD MVNI
 
@@ -3936,22 +3938,22 @@ def : InstAlias<"mvni.8h $Vd, $imm", (MVNIv8i16 V128:$Vd, imm0_255:$imm, 0), 0>;
 def : InstAlias<"mvni.2s $Vd, $imm", (MVNIv2i32 V64:$Vd,  imm0_255:$imm, 0), 0>;
 def : InstAlias<"mvni.4s $Vd, $imm", (MVNIv4i32 V128:$Vd, imm0_255:$imm, 0), 0>;
 
-def : Pat<(v2i32 (ARM64mvni_shift imm0_255:$imm8, (i32 imm:$shift))),
+def : Pat<(v2i32 (AArch64mvni_shift imm0_255:$imm8, (i32 imm:$shift))),
           (MVNIv2i32 imm0_255:$imm8, imm:$shift)>;
-def : Pat<(v4i32 (ARM64mvni_shift imm0_255:$imm8, (i32 imm:$shift))),
+def : Pat<(v4i32 (AArch64mvni_shift imm0_255:$imm8, (i32 imm:$shift))),
           (MVNIv4i32 imm0_255:$imm8, imm:$shift)>;
-def : Pat<(v4i16 (ARM64mvni_shift imm0_255:$imm8, (i32 imm:$shift))),
+def : Pat<(v4i16 (AArch64mvni_shift imm0_255:$imm8, (i32 imm:$shift))),
           (MVNIv4i16 imm0_255:$imm8, imm:$shift)>;
-def : Pat<(v8i16 (ARM64mvni_shift imm0_255:$imm8, (i32 imm:$shift))),
+def : Pat<(v8i16 (AArch64mvni_shift imm0_255:$imm8, (i32 imm:$shift))),
           (MVNIv8i16 imm0_255:$imm8, imm:$shift)>;
 
 // EDIT per word: 2s & 4s with MSL shifter
 def MVNIv2s_msl   : SIMDModifiedImmMoveMSL<0, 1, {1,1,0,?}, V64, "mvni", ".2s",
                       [(set (v2i32 V64:$Rd),
-                            (ARM64mvni_msl imm0_255:$imm8, (i32 imm:$shift)))]>;
+                            (AArch64mvni_msl imm0_255:$imm8, (i32 imm:$shift)))]>;
 def MVNIv4s_msl   : SIMDModifiedImmMoveMSL<1, 1, {1,1,0,?}, V128, "mvni", ".4s",
                       [(set (v4i32 V128:$Rd),
-                            (ARM64mvni_msl imm0_255:$imm8, (i32 imm:$shift)))]>;
+                            (AArch64mvni_msl imm0_255:$imm8, (i32 imm:$shift)))]>;
 
 //----------------------------------------------------------------------------
 // AdvSIMD indexed element
@@ -3985,11 +3987,11 @@ multiclass FMLSIndexedAfterNegPatterns<SDPatternOperator OpNode> {
   // 3 variants for the .2s version: DUPLANE from 128-bit, DUPLANE from 64-bit
   // and DUP scalar.
   def : Pat<(v2f32 (OpNode (v2f32 V64:$Rd), (v2f32 V64:$Rn),
-                           (ARM64duplane32 (v4f32 (fneg V128:$Rm)),
+                           (AArch64duplane32 (v4f32 (fneg V128:$Rm)),
                                            VectorIndexS:$idx))),
             (FMLSv2i32_indexed V64:$Rd, V64:$Rn, V128:$Rm, VectorIndexS:$idx)>;
   def : Pat<(v2f32 (OpNode (v2f32 V64:$Rd), (v2f32 V64:$Rn),
-                           (v2f32 (ARM64duplane32
+                           (v2f32 (AArch64duplane32
                                       (v4f32 (insert_subvector undef,
                                                  (v2f32 (fneg V64:$Rm)),
                                                  (i32 0))),
@@ -3998,19 +4000,19 @@ multiclass FMLSIndexedAfterNegPatterns<SDPatternOperator OpNode> {
                                (SUBREG_TO_REG (i32 0), V64:$Rm, dsub),
                                VectorIndexS:$idx)>;
   def : Pat<(v2f32 (OpNode (v2f32 V64:$Rd), (v2f32 V64:$Rn),
-                           (ARM64dup (f32 (fneg FPR32Op:$Rm))))),
+                           (AArch64dup (f32 (fneg FPR32Op:$Rm))))),
             (FMLSv2i32_indexed V64:$Rd, V64:$Rn,
                 (SUBREG_TO_REG (i32 0), FPR32Op:$Rm, ssub), (i64 0))>;
 
   // 3 variants for the .4s version: DUPLANE from 128-bit, DUPLANE from 64-bit
   // and DUP scalar.
   def : Pat<(v4f32 (OpNode (v4f32 V128:$Rd), (v4f32 V128:$Rn),
-                           (ARM64duplane32 (v4f32 (fneg V128:$Rm)),
+                           (AArch64duplane32 (v4f32 (fneg V128:$Rm)),
                                            VectorIndexS:$idx))),
             (FMLSv4i32_indexed V128:$Rd, V128:$Rn, V128:$Rm,
                                VectorIndexS:$idx)>;
   def : Pat<(v4f32 (OpNode (v4f32 V128:$Rd), (v4f32 V128:$Rn),
-                           (v4f32 (ARM64duplane32
+                           (v4f32 (AArch64duplane32
                                       (v4f32 (insert_subvector undef,
                                                  (v2f32 (fneg V64:$Rm)),
                                                  (i32 0))),
@@ -4019,19 +4021,19 @@ multiclass FMLSIndexedAfterNegPatterns<SDPatternOperator OpNode> {
                                (SUBREG_TO_REG (i32 0), V64:$Rm, dsub),
                                VectorIndexS:$idx)>;
   def : Pat<(v4f32 (OpNode (v4f32 V128:$Rd), (v4f32 V128:$Rn),
-                           (ARM64dup (f32 (fneg FPR32Op:$Rm))))),
+                           (AArch64dup (f32 (fneg FPR32Op:$Rm))))),
             (FMLSv4i32_indexed V128:$Rd, V128:$Rn,
                 (SUBREG_TO_REG (i32 0), FPR32Op:$Rm, ssub), (i64 0))>;
 
   // 2 variants for the .2d version: DUPLANE from 128-bit, and DUP scalar
   // (DUPLANE from 64-bit would be trivial).
   def : Pat<(v2f64 (OpNode (v2f64 V128:$Rd), (v2f64 V128:$Rn),
-                           (ARM64duplane64 (v2f64 (fneg V128:$Rm)),
+                           (AArch64duplane64 (v2f64 (fneg V128:$Rm)),
                                            VectorIndexD:$idx))),
             (FMLSv2i64_indexed
                 V128:$Rd, V128:$Rn, V128:$Rm, VectorIndexS:$idx)>;
   def : Pat<(v2f64 (OpNode (v2f64 V128:$Rd), (v2f64 V128:$Rn),
-                           (ARM64dup (f64 (fneg FPR64Op:$Rm))))),
+                           (AArch64dup (f64 (fneg FPR64Op:$Rm))))),
             (FMLSv2i64_indexed V128:$Rd, V128:$Rn,
                 (SUBREG_TO_REG (i32 0), FPR64Op:$Rm, dsub), (i64 0))>;
 
@@ -4060,50 +4062,50 @@ defm : FMLSIndexedAfterNegPatterns<
 defm : FMLSIndexedAfterNegPatterns<
            TriOpFrag<(fma node:$MHS, node:$RHS, node:$LHS)> >;
 
-defm FMULX : SIMDFPIndexedSD<1, 0b1001, "fmulx", int_arm64_neon_fmulx>;
+defm FMULX : SIMDFPIndexedSD<1, 0b1001, "fmulx", int_aarch64_neon_fmulx>;
 defm FMUL  : SIMDFPIndexedSD<0, 0b1001, "fmul", fmul>;
 
-def : Pat<(v2f32 (fmul V64:$Rn, (ARM64dup (f32 FPR32:$Rm)))),
+def : Pat<(v2f32 (fmul V64:$Rn, (AArch64dup (f32 FPR32:$Rm)))),
           (FMULv2i32_indexed V64:$Rn,
             (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR32:$Rm, ssub),
             (i64 0))>;
-def : Pat<(v4f32 (fmul V128:$Rn, (ARM64dup (f32 FPR32:$Rm)))),
+def : Pat<(v4f32 (fmul V128:$Rn, (AArch64dup (f32 FPR32:$Rm)))),
           (FMULv4i32_indexed V128:$Rn,
             (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR32:$Rm, ssub),
             (i64 0))>;
-def : Pat<(v2f64 (fmul V128:$Rn, (ARM64dup (f64 FPR64:$Rm)))),
+def : Pat<(v2f64 (fmul V128:$Rn, (AArch64dup (f64 FPR64:$Rm)))),
           (FMULv2i64_indexed V128:$Rn,
             (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR64:$Rm, dsub),
             (i64 0))>;
 
-defm SQDMULH : SIMDIndexedHS<0, 0b1100, "sqdmulh", int_arm64_neon_sqdmulh>;
-defm SQRDMULH : SIMDIndexedHS<0, 0b1101, "sqrdmulh", int_arm64_neon_sqrdmulh>;
+defm SQDMULH : SIMDIndexedHS<0, 0b1100, "sqdmulh", int_aarch64_neon_sqdmulh>;
+defm SQRDMULH : SIMDIndexedHS<0, 0b1101, "sqrdmulh", int_aarch64_neon_sqrdmulh>;
 defm MLA   : SIMDVectorIndexedHSTied<1, 0b0000, "mla",
               TriOpFrag<(add node:$LHS, (mul node:$MHS, node:$RHS))>>;
 defm MLS   : SIMDVectorIndexedHSTied<1, 0b0100, "mls",
               TriOpFrag<(sub node:$LHS, (mul node:$MHS, node:$RHS))>>;
 defm MUL   : SIMDVectorIndexedHS<0, 0b1000, "mul", mul>;
 defm SMLAL : SIMDVectorIndexedLongSDTied<0, 0b0010, "smlal",
-    TriOpFrag<(add node:$LHS, (int_arm64_neon_smull node:$MHS, node:$RHS))>>;
+    TriOpFrag<(add node:$LHS, (int_aarch64_neon_smull node:$MHS, node:$RHS))>>;
 defm SMLSL : SIMDVectorIndexedLongSDTied<0, 0b0110, "smlsl",
-    TriOpFrag<(sub node:$LHS, (int_arm64_neon_smull node:$MHS, node:$RHS))>>;
+    TriOpFrag<(sub node:$LHS, (int_aarch64_neon_smull node:$MHS, node:$RHS))>>;
 defm SMULL : SIMDVectorIndexedLongSD<0, 0b1010, "smull",
-                int_arm64_neon_smull>;
+                int_aarch64_neon_smull>;
 defm SQDMLAL : SIMDIndexedLongSQDMLXSDTied<0, 0b0011, "sqdmlal",
-                                           int_arm64_neon_sqadd>;
+                                           int_aarch64_neon_sqadd>;
 defm SQDMLSL : SIMDIndexedLongSQDMLXSDTied<0, 0b0111, "sqdmlsl",
-                                           int_arm64_neon_sqsub>;
-defm SQDMULL : SIMDIndexedLongSD<0, 0b1011, "sqdmull", int_arm64_neon_sqdmull>;
+                                           int_aarch64_neon_sqsub>;
+defm SQDMULL : SIMDIndexedLongSD<0, 0b1011, "sqdmull", int_aarch64_neon_sqdmull>;
 defm UMLAL   : SIMDVectorIndexedLongSDTied<1, 0b0010, "umlal",
-    TriOpFrag<(add node:$LHS, (int_arm64_neon_umull node:$MHS, node:$RHS))>>;
+    TriOpFrag<(add node:$LHS, (int_aarch64_neon_umull node:$MHS, node:$RHS))>>;
 defm UMLSL   : SIMDVectorIndexedLongSDTied<1, 0b0110, "umlsl",
-    TriOpFrag<(sub node:$LHS, (int_arm64_neon_umull node:$MHS, node:$RHS))>>;
+    TriOpFrag<(sub node:$LHS, (int_aarch64_neon_umull node:$MHS, node:$RHS))>>;
 defm UMULL   : SIMDVectorIndexedLongSD<1, 0b1010, "umull",
-                int_arm64_neon_umull>;
+                int_aarch64_neon_umull>;
 
 // A scalar sqdmull with the second operand being a vector lane can be
 // handled directly with the indexed instruction encoding.
-def : Pat<(int_arm64_neon_sqdmulls_scalar (i32 FPR32:$Rn),
+def : Pat<(int_aarch64_neon_sqdmulls_scalar (i32 FPR32:$Rn),
                                           (vector_extract (v4i32 V128:$Vm),
                                                            VectorIndexS:$idx)),
           (SQDMULLv1i64_indexed FPR32:$Rn, V128:$Vm, VectorIndexS:$idx)>;
@@ -4118,149 +4120,149 @@ defm UCVTF  : SIMDScalarRShiftSD<1, 0b11100, "ucvtf">;
 // Codegen patterns for the above. We don't put these directly on the
 // instructions because TableGen's type inference can't handle the truth.
 // Having the same base pattern for fp <--> int totally freaks it out.
-def : Pat<(int_arm64_neon_vcvtfp2fxs FPR32:$Rn, vecshiftR32:$imm),
+def : Pat<(int_aarch64_neon_vcvtfp2fxs FPR32:$Rn, vecshiftR32:$imm),
           (FCVTZSs FPR32:$Rn, vecshiftR32:$imm)>;
-def : Pat<(int_arm64_neon_vcvtfp2fxu FPR32:$Rn, vecshiftR32:$imm),
+def : Pat<(int_aarch64_neon_vcvtfp2fxu FPR32:$Rn, vecshiftR32:$imm),
           (FCVTZUs FPR32:$Rn, vecshiftR32:$imm)>;
-def : Pat<(i64 (int_arm64_neon_vcvtfp2fxs (f64 FPR64:$Rn), vecshiftR64:$imm)),
+def : Pat<(i64 (int_aarch64_neon_vcvtfp2fxs (f64 FPR64:$Rn), vecshiftR64:$imm)),
           (FCVTZSd FPR64:$Rn, vecshiftR64:$imm)>;
-def : Pat<(i64 (int_arm64_neon_vcvtfp2fxu (f64 FPR64:$Rn), vecshiftR64:$imm)),
+def : Pat<(i64 (int_aarch64_neon_vcvtfp2fxu (f64 FPR64:$Rn), vecshiftR64:$imm)),
           (FCVTZUd FPR64:$Rn, vecshiftR64:$imm)>;
-def : Pat<(v1i64 (int_arm64_neon_vcvtfp2fxs (v1f64 FPR64:$Rn),
+def : Pat<(v1i64 (int_aarch64_neon_vcvtfp2fxs (v1f64 FPR64:$Rn),
                                             vecshiftR64:$imm)),
           (FCVTZSd FPR64:$Rn, vecshiftR64:$imm)>;
-def : Pat<(v1i64 (int_arm64_neon_vcvtfp2fxu (v1f64 FPR64:$Rn),
+def : Pat<(v1i64 (int_aarch64_neon_vcvtfp2fxu (v1f64 FPR64:$Rn),
                                             vecshiftR64:$imm)),
           (FCVTZUd FPR64:$Rn, vecshiftR64:$imm)>;
-def : Pat<(int_arm64_neon_vcvtfxs2fp FPR32:$Rn, vecshiftR32:$imm),
+def : Pat<(int_aarch64_neon_vcvtfxs2fp FPR32:$Rn, vecshiftR32:$imm),
           (SCVTFs FPR32:$Rn, vecshiftR32:$imm)>;
-def : Pat<(int_arm64_neon_vcvtfxu2fp FPR32:$Rn, vecshiftR32:$imm),
+def : Pat<(int_aarch64_neon_vcvtfxu2fp FPR32:$Rn, vecshiftR32:$imm),
           (UCVTFs FPR32:$Rn, vecshiftR32:$imm)>;
-def : Pat<(f64 (int_arm64_neon_vcvtfxs2fp (i64 FPR64:$Rn), vecshiftR64:$imm)),
+def : Pat<(f64 (int_aarch64_neon_vcvtfxs2fp (i64 FPR64:$Rn), vecshiftR64:$imm)),
           (SCVTFd FPR64:$Rn, vecshiftR64:$imm)>;
-def : Pat<(f64 (int_arm64_neon_vcvtfxu2fp (i64 FPR64:$Rn), vecshiftR64:$imm)),
+def : Pat<(f64 (int_aarch64_neon_vcvtfxu2fp (i64 FPR64:$Rn), vecshiftR64:$imm)),
           (UCVTFd FPR64:$Rn, vecshiftR64:$imm)>;
-def : Pat<(v1f64 (int_arm64_neon_vcvtfxs2fp (v1i64 FPR64:$Rn),
+def : Pat<(v1f64 (int_aarch64_neon_vcvtfxs2fp (v1i64 FPR64:$Rn),
                                             vecshiftR64:$imm)),
           (SCVTFd FPR64:$Rn, vecshiftR64:$imm)>;
-def : Pat<(v1f64 (int_arm64_neon_vcvtfxu2fp (v1i64 FPR64:$Rn),
+def : Pat<(v1f64 (int_aarch64_neon_vcvtfxu2fp (v1i64 FPR64:$Rn),
                                             vecshiftR64:$imm)),
           (UCVTFd FPR64:$Rn, vecshiftR64:$imm)>;
 
-defm SHL      : SIMDScalarLShiftD<   0, 0b01010, "shl", ARM64vshl>;
+defm SHL      : SIMDScalarLShiftD<   0, 0b01010, "shl", AArch64vshl>;
 defm SLI      : SIMDScalarLShiftDTied<1, 0b01010, "sli">;
 defm SQRSHRN  : SIMDScalarRShiftBHS< 0, 0b10011, "sqrshrn",
-                                     int_arm64_neon_sqrshrn>;
+                                     int_aarch64_neon_sqrshrn>;
 defm SQRSHRUN : SIMDScalarRShiftBHS< 1, 0b10001, "sqrshrun",
-                                     int_arm64_neon_sqrshrun>;
-defm SQSHLU   : SIMDScalarLShiftBHSD<1, 0b01100, "sqshlu", ARM64sqshlui>;
-defm SQSHL    : SIMDScalarLShiftBHSD<0, 0b01110, "sqshl", ARM64sqshli>;
+                                     int_aarch64_neon_sqrshrun>;
+defm SQSHLU   : SIMDScalarLShiftBHSD<1, 0b01100, "sqshlu", AArch64sqshlui>;
+defm SQSHL    : SIMDScalarLShiftBHSD<0, 0b01110, "sqshl", AArch64sqshli>;
 defm SQSHRN   : SIMDScalarRShiftBHS< 0, 0b10010, "sqshrn",
-                                     int_arm64_neon_sqshrn>;
+                                     int_aarch64_neon_sqshrn>;
 defm SQSHRUN  : SIMDScalarRShiftBHS< 1, 0b10000, "sqshrun",
-                                     int_arm64_neon_sqshrun>;
+                                     int_aarch64_neon_sqshrun>;
 defm SRI      : SIMDScalarRShiftDTied<   1, 0b01000, "sri">;
-defm SRSHR    : SIMDScalarRShiftD<   0, 0b00100, "srshr", ARM64srshri>;
+defm SRSHR    : SIMDScalarRShiftD<   0, 0b00100, "srshr", AArch64srshri>;
 defm SRSRA    : SIMDScalarRShiftDTied<   0, 0b00110, "srsra",
     TriOpFrag<(add node:$LHS,
-                   (ARM64srshri node:$MHS, node:$RHS))>>;
-defm SSHR     : SIMDScalarRShiftD<   0, 0b00000, "sshr", ARM64vashr>;
+                   (AArch64srshri node:$MHS, node:$RHS))>>;
+defm SSHR     : SIMDScalarRShiftD<   0, 0b00000, "sshr", AArch64vashr>;
 defm SSRA     : SIMDScalarRShiftDTied<   0, 0b00010, "ssra",
     TriOpFrag<(add node:$LHS,
-                   (ARM64vashr node:$MHS, node:$RHS))>>;
+                   (AArch64vashr node:$MHS, node:$RHS))>>;
 defm UQRSHRN  : SIMDScalarRShiftBHS< 1, 0b10011, "uqrshrn",
-                                     int_arm64_neon_uqrshrn>;
-defm UQSHL    : SIMDScalarLShiftBHSD<1, 0b01110, "uqshl", ARM64uqshli>;
+                                     int_aarch64_neon_uqrshrn>;
+defm UQSHL    : SIMDScalarLShiftBHSD<1, 0b01110, "uqshl", AArch64uqshli>;
 defm UQSHRN   : SIMDScalarRShiftBHS< 1, 0b10010, "uqshrn",
-                                     int_arm64_neon_uqshrn>;
-defm URSHR    : SIMDScalarRShiftD<   1, 0b00100, "urshr", ARM64urshri>;
+                                     int_aarch64_neon_uqshrn>;
+defm URSHR    : SIMDScalarRShiftD<   1, 0b00100, "urshr", AArch64urshri>;
 defm URSRA    : SIMDScalarRShiftDTied<   1, 0b00110, "ursra",
     TriOpFrag<(add node:$LHS,
-                   (ARM64urshri node:$MHS, node:$RHS))>>;
-defm USHR     : SIMDScalarRShiftD<   1, 0b00000, "ushr", ARM64vlshr>;
+                   (AArch64urshri node:$MHS, node:$RHS))>>;
+defm USHR     : SIMDScalarRShiftD<   1, 0b00000, "ushr", AArch64vlshr>;
 defm USRA     : SIMDScalarRShiftDTied<   1, 0b00010, "usra",
     TriOpFrag<(add node:$LHS,
-                   (ARM64vlshr node:$MHS, node:$RHS))>>;
+                   (AArch64vlshr node:$MHS, node:$RHS))>>;
 
 //----------------------------------------------------------------------------
 // AdvSIMD vector shift instructions
 //----------------------------------------------------------------------------
-defm FCVTZS:SIMDVectorRShiftSD<0, 0b11111, "fcvtzs", int_arm64_neon_vcvtfp2fxs>;
-defm FCVTZU:SIMDVectorRShiftSD<1, 0b11111, "fcvtzu", int_arm64_neon_vcvtfp2fxu>;
+defm FCVTZS:SIMDVectorRShiftSD<0, 0b11111, "fcvtzs", int_aarch64_neon_vcvtfp2fxs>;
+defm FCVTZU:SIMDVectorRShiftSD<1, 0b11111, "fcvtzu", int_aarch64_neon_vcvtfp2fxu>;
 defm SCVTF: SIMDVectorRShiftSDToFP<0, 0b11100, "scvtf",
-                                   int_arm64_neon_vcvtfxs2fp>;
+                                   int_aarch64_neon_vcvtfxs2fp>;
 defm RSHRN   : SIMDVectorRShiftNarrowBHS<0, 0b10001, "rshrn",
-                                         int_arm64_neon_rshrn>;
-defm SHL     : SIMDVectorLShiftBHSD<0, 0b01010, "shl", ARM64vshl>;
+                                         int_aarch64_neon_rshrn>;
+defm SHL     : SIMDVectorLShiftBHSD<0, 0b01010, "shl", AArch64vshl>;
 defm SHRN    : SIMDVectorRShiftNarrowBHS<0, 0b10000, "shrn",
-                          BinOpFrag<(trunc (ARM64vashr node:$LHS, node:$RHS))>>;
-defm SLI     : SIMDVectorLShiftBHSDTied<1, 0b01010, "sli", int_arm64_neon_vsli>;
-def : Pat<(v1i64 (int_arm64_neon_vsli (v1i64 FPR64:$Rd), (v1i64 FPR64:$Rn),
+                          BinOpFrag<(trunc (AArch64vashr node:$LHS, node:$RHS))>>;
+defm SLI     : SIMDVectorLShiftBHSDTied<1, 0b01010, "sli", int_aarch64_neon_vsli>;
+def : Pat<(v1i64 (int_aarch64_neon_vsli (v1i64 FPR64:$Rd), (v1i64 FPR64:$Rn),
                                       (i32 vecshiftL64:$imm))),
           (SLId FPR64:$Rd, FPR64:$Rn, vecshiftL64:$imm)>;
 defm SQRSHRN : SIMDVectorRShiftNarrowBHS<0, 0b10011, "sqrshrn",
-                                         int_arm64_neon_sqrshrn>;
+                                         int_aarch64_neon_sqrshrn>;
 defm SQRSHRUN: SIMDVectorRShiftNarrowBHS<1, 0b10001, "sqrshrun",
-                                         int_arm64_neon_sqrshrun>;
-defm SQSHLU : SIMDVectorLShiftBHSD<1, 0b01100, "sqshlu", ARM64sqshlui>;
-defm SQSHL  : SIMDVectorLShiftBHSD<0, 0b01110, "sqshl", ARM64sqshli>;
+                                         int_aarch64_neon_sqrshrun>;
+defm SQSHLU : SIMDVectorLShiftBHSD<1, 0b01100, "sqshlu", AArch64sqshlui>;
+defm SQSHL  : SIMDVectorLShiftBHSD<0, 0b01110, "sqshl", AArch64sqshli>;
 defm SQSHRN  : SIMDVectorRShiftNarrowBHS<0, 0b10010, "sqshrn",
-                                         int_arm64_neon_sqshrn>;
+                                         int_aarch64_neon_sqshrn>;
 defm SQSHRUN : SIMDVectorRShiftNarrowBHS<1, 0b10000, "sqshrun",
-                                         int_arm64_neon_sqshrun>;
-defm SRI     : SIMDVectorRShiftBHSDTied<1, 0b01000, "sri", int_arm64_neon_vsri>;
-def : Pat<(v1i64 (int_arm64_neon_vsri (v1i64 FPR64:$Rd), (v1i64 FPR64:$Rn),
+                                         int_aarch64_neon_sqshrun>;
+defm SRI     : SIMDVectorRShiftBHSDTied<1, 0b01000, "sri", int_aarch64_neon_vsri>;
+def : Pat<(v1i64 (int_aarch64_neon_vsri (v1i64 FPR64:$Rd), (v1i64 FPR64:$Rn),
                                       (i32 vecshiftR64:$imm))),
           (SRId FPR64:$Rd, FPR64:$Rn, vecshiftR64:$imm)>;
-defm SRSHR   : SIMDVectorRShiftBHSD<0, 0b00100, "srshr", ARM64srshri>;
+defm SRSHR   : SIMDVectorRShiftBHSD<0, 0b00100, "srshr", AArch64srshri>;
 defm SRSRA   : SIMDVectorRShiftBHSDTied<0, 0b00110, "srsra",
                  TriOpFrag<(add node:$LHS,
-                                (ARM64srshri node:$MHS, node:$RHS))> >;
+                                (AArch64srshri node:$MHS, node:$RHS))> >;
 defm SSHLL   : SIMDVectorLShiftLongBHSD<0, 0b10100, "sshll",
-                BinOpFrag<(ARM64vshl (sext node:$LHS), node:$RHS)>>;
+                BinOpFrag<(AArch64vshl (sext node:$LHS), node:$RHS)>>;
 
-defm SSHR    : SIMDVectorRShiftBHSD<0, 0b00000, "sshr", ARM64vashr>;
+defm SSHR    : SIMDVectorRShiftBHSD<0, 0b00000, "sshr", AArch64vashr>;
 defm SSRA    : SIMDVectorRShiftBHSDTied<0, 0b00010, "ssra",
-                TriOpFrag<(add node:$LHS, (ARM64vashr node:$MHS, node:$RHS))>>;
+                TriOpFrag<(add node:$LHS, (AArch64vashr node:$MHS, node:$RHS))>>;
 defm UCVTF   : SIMDVectorRShiftSDToFP<1, 0b11100, "ucvtf",
-                        int_arm64_neon_vcvtfxu2fp>;
+                        int_aarch64_neon_vcvtfxu2fp>;
 defm UQRSHRN : SIMDVectorRShiftNarrowBHS<1, 0b10011, "uqrshrn",
-                                         int_arm64_neon_uqrshrn>;
-defm UQSHL   : SIMDVectorLShiftBHSD<1, 0b01110, "uqshl", ARM64uqshli>;
+                                         int_aarch64_neon_uqrshrn>;
+defm UQSHL   : SIMDVectorLShiftBHSD<1, 0b01110, "uqshl", AArch64uqshli>;
 defm UQSHRN  : SIMDVectorRShiftNarrowBHS<1, 0b10010, "uqshrn",
-                                         int_arm64_neon_uqshrn>;
-defm URSHR   : SIMDVectorRShiftBHSD<1, 0b00100, "urshr", ARM64urshri>;
+                                         int_aarch64_neon_uqshrn>;
+defm URSHR   : SIMDVectorRShiftBHSD<1, 0b00100, "urshr", AArch64urshri>;
 defm URSRA   : SIMDVectorRShiftBHSDTied<1, 0b00110, "ursra",
                 TriOpFrag<(add node:$LHS,
-                               (ARM64urshri node:$MHS, node:$RHS))> >;
+                               (AArch64urshri node:$MHS, node:$RHS))> >;
 defm USHLL   : SIMDVectorLShiftLongBHSD<1, 0b10100, "ushll",
-                BinOpFrag<(ARM64vshl (zext node:$LHS), node:$RHS)>>;
-defm USHR    : SIMDVectorRShiftBHSD<1, 0b00000, "ushr", ARM64vlshr>;
+                BinOpFrag<(AArch64vshl (zext node:$LHS), node:$RHS)>>;
+defm USHR    : SIMDVectorRShiftBHSD<1, 0b00000, "ushr", AArch64vlshr>;
 defm USRA    : SIMDVectorRShiftBHSDTied<1, 0b00010, "usra",
-                TriOpFrag<(add node:$LHS, (ARM64vlshr node:$MHS, node:$RHS))> >;
+                TriOpFrag<(add node:$LHS, (AArch64vlshr node:$MHS, node:$RHS))> >;
 
 // SHRN patterns for when a logical right shift was used instead of arithmetic
 // (the immediate guarantees no sign bits actually end up in the result so it
 // doesn't matter).
-def : Pat<(v8i8 (trunc (ARM64vlshr (v8i16 V128:$Rn), vecshiftR16Narrow:$imm))),
+def : Pat<(v8i8 (trunc (AArch64vlshr (v8i16 V128:$Rn), vecshiftR16Narrow:$imm))),
           (SHRNv8i8_shift V128:$Rn, vecshiftR16Narrow:$imm)>;
-def : Pat<(v4i16 (trunc (ARM64vlshr (v4i32 V128:$Rn), vecshiftR32Narrow:$imm))),
+def : Pat<(v4i16 (trunc (AArch64vlshr (v4i32 V128:$Rn), vecshiftR32Narrow:$imm))),
           (SHRNv4i16_shift V128:$Rn, vecshiftR32Narrow:$imm)>;
-def : Pat<(v2i32 (trunc (ARM64vlshr (v2i64 V128:$Rn), vecshiftR64Narrow:$imm))),
+def : Pat<(v2i32 (trunc (AArch64vlshr (v2i64 V128:$Rn), vecshiftR64Narrow:$imm))),
           (SHRNv2i32_shift V128:$Rn, vecshiftR64Narrow:$imm)>;
 
 def : Pat<(v16i8 (concat_vectors (v8i8 V64:$Rd),
-                                 (trunc (ARM64vlshr (v8i16 V128:$Rn),
+                                 (trunc (AArch64vlshr (v8i16 V128:$Rn),
                                                     vecshiftR16Narrow:$imm)))),
           (SHRNv16i8_shift (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub),
                            V128:$Rn, vecshiftR16Narrow:$imm)>;
 def : Pat<(v8i16 (concat_vectors (v4i16 V64:$Rd),
-                                 (trunc (ARM64vlshr (v4i32 V128:$Rn),
+                                 (trunc (AArch64vlshr (v4i32 V128:$Rn),
                                                     vecshiftR32Narrow:$imm)))),
           (SHRNv8i16_shift (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub),
                            V128:$Rn, vecshiftR32Narrow:$imm)>;
 def : Pat<(v4i32 (concat_vectors (v2i32 V64:$Rd),
-                                 (trunc (ARM64vlshr (v2i64 V128:$Rn),
+                                 (trunc (AArch64vlshr (v2i64 V128:$Rn),
                                                     vecshiftR64Narrow:$imm)))),
           (SHRNv4i32_shift (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub),
                            V128:$Rn, vecshiftR32Narrow:$imm)>;
@@ -4530,30 +4532,30 @@ defm LD4 : SIMDLdSingleSTied<1, 0b101, 0b00, "ld4", VecListFours,  GPR64pi16>;
 defm LD4 : SIMDLdSingleDTied<1, 0b101, 0b01, "ld4", VecListFourd,  GPR64pi32>;
 }
 
-def : Pat<(v8i8 (ARM64dup (i32 (extloadi8 GPR64sp:$Rn)))),
+def : Pat<(v8i8 (AArch64dup (i32 (extloadi8 GPR64sp:$Rn)))),
           (LD1Rv8b GPR64sp:$Rn)>;
-def : Pat<(v16i8 (ARM64dup (i32 (extloadi8 GPR64sp:$Rn)))),
+def : Pat<(v16i8 (AArch64dup (i32 (extloadi8 GPR64sp:$Rn)))),
           (LD1Rv16b GPR64sp:$Rn)>;
-def : Pat<(v4i16 (ARM64dup (i32 (extloadi16 GPR64sp:$Rn)))),
+def : Pat<(v4i16 (AArch64dup (i32 (extloadi16 GPR64sp:$Rn)))),
           (LD1Rv4h GPR64sp:$Rn)>;
-def : Pat<(v8i16 (ARM64dup (i32 (extloadi16 GPR64sp:$Rn)))),
+def : Pat<(v8i16 (AArch64dup (i32 (extloadi16 GPR64sp:$Rn)))),
           (LD1Rv8h GPR64sp:$Rn)>;
-def : Pat<(v2i32 (ARM64dup (i32 (load GPR64sp:$Rn)))),
+def : Pat<(v2i32 (AArch64dup (i32 (load GPR64sp:$Rn)))),
           (LD1Rv2s GPR64sp:$Rn)>;
-def : Pat<(v4i32 (ARM64dup (i32 (load GPR64sp:$Rn)))),
+def : Pat<(v4i32 (AArch64dup (i32 (load GPR64sp:$Rn)))),
           (LD1Rv4s GPR64sp:$Rn)>;
-def : Pat<(v2i64 (ARM64dup (i64 (load GPR64sp:$Rn)))),
+def : Pat<(v2i64 (AArch64dup (i64 (load GPR64sp:$Rn)))),
           (LD1Rv2d GPR64sp:$Rn)>;
-def : Pat<(v1i64 (ARM64dup (i64 (load GPR64sp:$Rn)))),
+def : Pat<(v1i64 (AArch64dup (i64 (load GPR64sp:$Rn)))),
           (LD1Rv1d GPR64sp:$Rn)>;
 // Grab the floating point version too
-def : Pat<(v2f32 (ARM64dup (f32 (load GPR64sp:$Rn)))),
+def : Pat<(v2f32 (AArch64dup (f32 (load GPR64sp:$Rn)))),
           (LD1Rv2s GPR64sp:$Rn)>;
-def : Pat<(v4f32 (ARM64dup (f32 (load GPR64sp:$Rn)))),
+def : Pat<(v4f32 (AArch64dup (f32 (load GPR64sp:$Rn)))),
           (LD1Rv4s GPR64sp:$Rn)>;
-def : Pat<(v2f64 (ARM64dup (f64 (load GPR64sp:$Rn)))),
+def : Pat<(v2f64 (AArch64dup (f64 (load GPR64sp:$Rn)))),
           (LD1Rv2d GPR64sp:$Rn)>;
-def : Pat<(v1f64 (ARM64dup (f64 (load GPR64sp:$Rn)))),
+def : Pat<(v1f64 (AArch64dup (f64 (load GPR64sp:$Rn)))),
           (LD1Rv1d GPR64sp:$Rn)>;
 
 class Ld1Lane128Pat<SDPatternOperator scalar_load, Operand VecIndex,
@@ -4695,22 +4697,22 @@ defm ST4 : SIMDLdSt4SingleAliases<"st4">;
 // Crypto extensions
 //----------------------------------------------------------------------------
 
-def AESErr   : AESTiedInst<0b0100, "aese",   int_arm64_crypto_aese>;
-def AESDrr   : AESTiedInst<0b0101, "aesd",   int_arm64_crypto_aesd>;
-def AESMCrr  : AESInst<    0b0110, "aesmc",  int_arm64_crypto_aesmc>;
-def AESIMCrr : AESInst<    0b0111, "aesimc", int_arm64_crypto_aesimc>;
+def AESErr   : AESTiedInst<0b0100, "aese",   int_aarch64_crypto_aese>;
+def AESDrr   : AESTiedInst<0b0101, "aesd",   int_aarch64_crypto_aesd>;
+def AESMCrr  : AESInst<    0b0110, "aesmc",  int_aarch64_crypto_aesmc>;
+def AESIMCrr : AESInst<    0b0111, "aesimc", int_aarch64_crypto_aesimc>;
 
-def SHA1Crrr     : SHATiedInstQSV<0b000, "sha1c",   int_arm64_crypto_sha1c>;
-def SHA1Prrr     : SHATiedInstQSV<0b001, "sha1p",   int_arm64_crypto_sha1p>;
-def SHA1Mrrr     : SHATiedInstQSV<0b010, "sha1m",   int_arm64_crypto_sha1m>;
-def SHA1SU0rrr   : SHATiedInstVVV<0b011, "sha1su0", int_arm64_crypto_sha1su0>;
-def SHA256Hrrr   : SHATiedInstQQV<0b100, "sha256h", int_arm64_crypto_sha256h>;
-def SHA256H2rrr  : SHATiedInstQQV<0b101, "sha256h2",int_arm64_crypto_sha256h2>;
-def SHA256SU1rrr :SHATiedInstVVV<0b110, "sha256su1",int_arm64_crypto_sha256su1>;
+def SHA1Crrr     : SHATiedInstQSV<0b000, "sha1c",   int_aarch64_crypto_sha1c>;
+def SHA1Prrr     : SHATiedInstQSV<0b001, "sha1p",   int_aarch64_crypto_sha1p>;
+def SHA1Mrrr     : SHATiedInstQSV<0b010, "sha1m",   int_aarch64_crypto_sha1m>;
+def SHA1SU0rrr   : SHATiedInstVVV<0b011, "sha1su0", int_aarch64_crypto_sha1su0>;
+def SHA256Hrrr   : SHATiedInstQQV<0b100, "sha256h", int_aarch64_crypto_sha256h>;
+def SHA256H2rrr  : SHATiedInstQQV<0b101, "sha256h2",int_aarch64_crypto_sha256h2>;
+def SHA256SU1rrr :SHATiedInstVVV<0b110, "sha256su1",int_aarch64_crypto_sha256su1>;
 
-def SHA1Hrr     : SHAInstSS<    0b0000, "sha1h",    int_arm64_crypto_sha1h>;
-def SHA1SU1rr   : SHATiedInstVV<0b0001, "sha1su1",  int_arm64_crypto_sha1su1>;
-def SHA256SU0rr : SHATiedInstVV<0b0010, "sha256su0",int_arm64_crypto_sha256su0>;
+def SHA1Hrr     : SHAInstSS<    0b0000, "sha1h",    int_aarch64_crypto_sha1h>;
+def SHA1SU1rr   : SHATiedInstVV<0b0001, "sha1su1",  int_aarch64_crypto_sha1su1>;
+def SHA256SU0rr : SHATiedInstVV<0b0010, "sha256su0",int_aarch64_crypto_sha256su0>;
 
 //----------------------------------------------------------------------------
 // Compiler-pseudos
@@ -4799,7 +4801,7 @@ def : Pat<(sra (i64 (sext GPR32:$Rn)), (i64 imm0_31:$imm)),
 def : Pat<(i32 (trunc GPR64sp:$src)),
           (i32 (EXTRACT_SUBREG GPR64sp:$src, sub_32))>;
 
-// __builtin_trap() uses the BRK instruction on ARM64.
+// __builtin_trap() uses the BRK instruction on AArch64.
 def : Pat<(trap), (BRK 1)>;
 
 // Conversions within AdvSIMD types in the same register size are free.
@@ -5256,13 +5258,13 @@ def : Pat<(fadd (vector_extract (v4f32 FPR128:$Rn), (i64 0)),
           (f32 (FADDPv2i32p (EXTRACT_SUBREG FPR128:$Rn, dsub)))>;
 
 // Scalar 64-bit shifts in FPR64 registers.
-def : Pat<(i64 (int_arm64_neon_sshl (i64 FPR64:$Rn), (i64 FPR64:$Rm))),
+def : Pat<(i64 (int_aarch64_neon_sshl (i64 FPR64:$Rn), (i64 FPR64:$Rm))),
           (SSHLv1i64 FPR64:$Rn, FPR64:$Rm)>;
-def : Pat<(i64 (int_arm64_neon_ushl (i64 FPR64:$Rn), (i64 FPR64:$Rm))),
+def : Pat<(i64 (int_aarch64_neon_ushl (i64 FPR64:$Rn), (i64 FPR64:$Rm))),
           (USHLv1i64 FPR64:$Rn, FPR64:$Rm)>;
-def : Pat<(i64 (int_arm64_neon_srshl (i64 FPR64:$Rn), (i64 FPR64:$Rm))),
+def : Pat<(i64 (int_aarch64_neon_srshl (i64 FPR64:$Rn), (i64 FPR64:$Rm))),
           (SRSHLv1i64 FPR64:$Rn, FPR64:$Rm)>;
-def : Pat<(i64 (int_arm64_neon_urshl (i64 FPR64:$Rn), (i64 FPR64:$Rm))),
+def : Pat<(i64 (int_aarch64_neon_urshl (i64 FPR64:$Rn), (i64 FPR64:$Rm))),
           (URSHLv1i64 FPR64:$Rn, FPR64:$Rm)>;
 
 // Tail call return handling. These are all compiler pseudo-instructions,
@@ -5272,11 +5274,11 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [SP] in {
   def TCRETURNri : Pseudo<(outs), (ins tcGPR64:$dst, i32imm:$FPDiff), []>;
 }
 
-def : Pat<(ARM64tcret tcGPR64:$dst, (i32 timm:$FPDiff)),
+def : Pat<(AArch64tcret tcGPR64:$dst, (i32 timm:$FPDiff)),
           (TCRETURNri tcGPR64:$dst, imm:$FPDiff)>;
-def : Pat<(ARM64tcret tglobaladdr:$dst, (i32 timm:$FPDiff)),
+def : Pat<(AArch64tcret tglobaladdr:$dst, (i32 timm:$FPDiff)),
           (TCRETURNdi texternalsym:$dst, imm:$FPDiff)>;
-def : Pat<(ARM64tcret texternalsym:$dst, (i32 timm:$FPDiff)),
+def : Pat<(AArch64tcret texternalsym:$dst, (i32 timm:$FPDiff)),
           (TCRETURNdi texternalsym:$dst, imm:$FPDiff)>;
 
-include "ARM64InstrAtomics.td"
+include "AArch64InstrAtomics.td"
diff --git a/lib/Target/ARM64/ARM64LoadStoreOptimizer.cpp b/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
similarity index 80%
rename from lib/Target/ARM64/ARM64LoadStoreOptimizer.cpp
rename to lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
index e2c4b13f036..e7454be125b 100644
--- a/lib/Target/ARM64/ARM64LoadStoreOptimizer.cpp
+++ b/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
@@ -1,4 +1,4 @@
-//===-- ARM64LoadStoreOptimizer.cpp - ARM64 load/store opt. pass --*- C++ -*-=//
+//=- AArch64LoadStoreOptimizer.cpp - AArch64 load/store opt. pass -*- C++ -*-=//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -12,8 +12,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "ARM64InstrInfo.h"
-#include "MCTargetDesc/ARM64AddressingModes.h"
+#include "AArch64InstrInfo.h"
+#include "MCTargetDesc/AArch64AddressingModes.h"
 #include "llvm/ADT/BitVector.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
@@ -29,9 +29,9 @@
 #include "llvm/ADT/Statistic.h"
 using namespace llvm;
 
-#define DEBUG_TYPE "arm64-ldst-opt"
+#define DEBUG_TYPE "aarch64-ldst-opt"
 
-/// ARM64AllocLoadStoreOpt - Post-register allocation pass to combine
+/// AArch64AllocLoadStoreOpt - Post-register allocation pass to combine
 /// load / store instructions to form ldp / stp instructions.
 
 STATISTIC(NumPairCreated, "Number of load/store pair instructions generated");
@@ -40,21 +40,21 @@ STATISTIC(NumPreFolded, "Number of pre-index updates folded");
 STATISTIC(NumUnscaledPairCreated,
           "Number of load/store from unscaled generated");
 
-static cl::opt<unsigned> ScanLimit("arm64-load-store-scan-limit", cl::init(20),
+static cl::opt<unsigned> ScanLimit("aarch64-load-store-scan-limit", cl::init(20),
                                    cl::Hidden);
 
 // Place holder while testing unscaled load/store combining
 static cl::opt<bool>
-EnableARM64UnscaledMemOp("arm64-unscaled-mem-op", cl::Hidden,
-                         cl::desc("Allow ARM64 unscaled load/store combining"),
+EnableAArch64UnscaledMemOp("aarch64-unscaled-mem-op", cl::Hidden,
+                         cl::desc("Allow AArch64 unscaled load/store combining"),
                          cl::init(true));
 
 namespace {
-struct ARM64LoadStoreOpt : public MachineFunctionPass {
+struct AArch64LoadStoreOpt : public MachineFunctionPass {
   static char ID;
-  ARM64LoadStoreOpt() : MachineFunctionPass(ID) {}
+  AArch64LoadStoreOpt() : MachineFunctionPass(ID) {}
 
-  const ARM64InstrInfo *TII;
+  const AArch64InstrInfo *TII;
   const TargetRegisterInfo *TRI;
 
   // Scan the instructions looking for a load/store that can be combined
@@ -102,76 +102,76 @@ struct ARM64LoadStoreOpt : public MachineFunctionPass {
   bool runOnMachineFunction(MachineFunction &Fn) override;
 
   const char *getPassName() const override {
-    return "ARM64 load / store optimization pass";
+    return "AArch64 load / store optimization pass";
   }
 
 private:
   int getMemSize(MachineInstr *MemMI);
 };
-char ARM64LoadStoreOpt::ID = 0;
+char AArch64LoadStoreOpt::ID = 0;
 }
 
 static bool isUnscaledLdst(unsigned Opc) {
   switch (Opc) {
   default:
     return false;
-  case ARM64::STURSi:
+  case AArch64::STURSi:
     return true;
-  case ARM64::STURDi:
+  case AArch64::STURDi:
     return true;
-  case ARM64::STURQi:
+  case AArch64::STURQi:
     return true;
-  case ARM64::STURWi:
+  case AArch64::STURWi:
     return true;
-  case ARM64::STURXi:
+  case AArch64::STURXi:
     return true;
-  case ARM64::LDURSi:
+  case AArch64::LDURSi:
     return true;
-  case ARM64::LDURDi:
+  case AArch64::LDURDi:
     return true;
-  case ARM64::LDURQi:
+  case AArch64::LDURQi:
     return true;
-  case ARM64::LDURWi:
+  case AArch64::LDURWi:
     return true;
-  case ARM64::LDURXi:
+  case AArch64::LDURXi:
     return true;
   }
 }
 
 // Size in bytes of the data moved by an unscaled load or store
-int ARM64LoadStoreOpt::getMemSize(MachineInstr *MemMI) {
+int AArch64LoadStoreOpt::getMemSize(MachineInstr *MemMI) {
   switch (MemMI->getOpcode()) {
   default:
     llvm_unreachable("Opcode has has unknown size!");
-  case ARM64::STRSui:
-  case ARM64::STURSi:
+  case AArch64::STRSui:
+  case AArch64::STURSi:
     return 4;
-  case ARM64::STRDui:
-  case ARM64::STURDi:
+  case AArch64::STRDui:
+  case AArch64::STURDi:
     return 8;
-  case ARM64::STRQui:
-  case ARM64::STURQi:
+  case AArch64::STRQui:
+  case AArch64::STURQi:
     return 16;
-  case ARM64::STRWui:
-  case ARM64::STURWi:
+  case AArch64::STRWui:
+  case AArch64::STURWi:
     return 4;
-  case ARM64::STRXui:
-  case ARM64::STURXi:
+  case AArch64::STRXui:
+  case AArch64::STURXi:
     return 8;
-  case ARM64::LDRSui:
-  case ARM64::LDURSi:
+  case AArch64::LDRSui:
+  case AArch64::LDURSi:
     return 4;
-  case ARM64::LDRDui:
-  case ARM64::LDURDi:
+  case AArch64::LDRDui:
+  case AArch64::LDURDi:
     return 8;
-  case ARM64::LDRQui:
-  case ARM64::LDURQi:
+  case AArch64::LDRQui:
+  case AArch64::LDURQi:
     return 16;
-  case ARM64::LDRWui:
-  case ARM64::LDURWi:
+  case AArch64::LDRWui:
+  case AArch64::LDURWi:
     return 4;
-  case ARM64::LDRXui:
-  case ARM64::LDURXi:
+  case AArch64::LDRXui:
+  case AArch64::LDURXi:
     return 8;
   }
 }
@@ -180,36 +180,36 @@ static unsigned getMatchingPairOpcode(unsigned Opc) {
   switch (Opc) {
   default:
     llvm_unreachable("Opcode has no pairwise equivalent!");
-  case ARM64::STRSui:
-  case ARM64::STURSi:
-    return ARM64::STPSi;
-  case ARM64::STRDui:
-  case ARM64::STURDi:
-    return ARM64::STPDi;
-  case ARM64::STRQui:
-  case ARM64::STURQi:
-    return ARM64::STPQi;
-  case ARM64::STRWui:
-  case ARM64::STURWi:
-    return ARM64::STPWi;
-  case ARM64::STRXui:
-  case ARM64::STURXi:
-    return ARM64::STPXi;
-  case ARM64::LDRSui:
-  case ARM64::LDURSi:
-    return ARM64::LDPSi;
-  case ARM64::LDRDui:
-  case ARM64::LDURDi:
-    return ARM64::LDPDi;
-  case ARM64::LDRQui:
-  case ARM64::LDURQi:
-    return ARM64::LDPQi;
-  case ARM64::LDRWui:
-  case ARM64::LDURWi:
-    return ARM64::LDPWi;
-  case ARM64::LDRXui:
-  case ARM64::LDURXi:
-    return ARM64::LDPXi;
+  case AArch64::STRSui:
+  case AArch64::STURSi:
+    return AArch64::STPSi;
+  case AArch64::STRDui:
+  case AArch64::STURDi:
+    return AArch64::STPDi;
+  case AArch64::STRQui:
+  case AArch64::STURQi:
+    return AArch64::STPQi;
+  case AArch64::STRWui:
+  case AArch64::STURWi:
+    return AArch64::STPWi;
+  case AArch64::STRXui:
+  case AArch64::STURXi:
+    return AArch64::STPXi;
+  case AArch64::LDRSui:
+  case AArch64::LDURSi:
+    return AArch64::LDPSi;
+  case AArch64::LDRDui:
+  case AArch64::LDURDi:
+    return AArch64::LDPDi;
+  case AArch64::LDRQui:
+  case AArch64::LDURQi:
+    return AArch64::LDPQi;
+  case AArch64::LDRWui:
+  case AArch64::LDURWi:
+    return AArch64::LDPWi;
+  case AArch64::LDRXui:
+  case AArch64::LDURXi:
+    return AArch64::LDPXi;
   }
 }
 
@@ -217,16 +217,16 @@ static unsigned getPreIndexedOpcode(unsigned Opc) {
   switch (Opc) {
   default:
     llvm_unreachable("Opcode has no pre-indexed equivalent!");
-  case ARM64::STRSui:    return ARM64::STRSpre;
-  case ARM64::STRDui:    return ARM64::STRDpre;
-  case ARM64::STRQui:    return ARM64::STRQpre;
-  case ARM64::STRWui:    return ARM64::STRWpre;
-  case ARM64::STRXui:    return ARM64::STRXpre;
-  case ARM64::LDRSui:    return ARM64::LDRSpre;
-  case ARM64::LDRDui:    return ARM64::LDRDpre;
-  case ARM64::LDRQui:    return ARM64::LDRQpre;
-  case ARM64::LDRWui:    return ARM64::LDRWpre;
-  case ARM64::LDRXui:    return ARM64::LDRXpre;
+  case AArch64::STRSui:    return AArch64::STRSpre;
+  case AArch64::STRDui:    return AArch64::STRDpre;
+  case AArch64::STRQui:    return AArch64::STRQpre;
+  case AArch64::STRWui:    return AArch64::STRWpre;
+  case AArch64::STRXui:    return AArch64::STRXpre;
+  case AArch64::LDRSui:    return AArch64::LDRSpre;
+  case AArch64::LDRDui:    return AArch64::LDRDpre;
+  case AArch64::LDRQui:    return AArch64::LDRQpre;
+  case AArch64::LDRWui:    return AArch64::LDRWpre;
+  case AArch64::LDRXui:    return AArch64::LDRXpre;
   }
 }
 
@@ -234,33 +234,33 @@ static unsigned getPostIndexedOpcode(unsigned Opc) {
   switch (Opc) {
   default:
     llvm_unreachable("Opcode has no post-indexed wise equivalent!");
-  case ARM64::STRSui:
-    return ARM64::STRSpost;
-  case ARM64::STRDui:
-    return ARM64::STRDpost;
-  case ARM64::STRQui:
-    return ARM64::STRQpost;
-  case ARM64::STRWui:
-    return ARM64::STRWpost;
-  case ARM64::STRXui:
-    return ARM64::STRXpost;
-  case ARM64::LDRSui:
-    return ARM64::LDRSpost;
-  case ARM64::LDRDui:
-    return ARM64::LDRDpost;
-  case ARM64::LDRQui:
-    return ARM64::LDRQpost;
-  case ARM64::LDRWui:
-    return ARM64::LDRWpost;
-  case ARM64::LDRXui:
-    return ARM64::LDRXpost;
+  case AArch64::STRSui:
+    return AArch64::STRSpost;
+  case AArch64::STRDui:
+    return AArch64::STRDpost;
+  case AArch64::STRQui:
+    return AArch64::STRQpost;
+  case AArch64::STRWui:
+    return AArch64::STRWpost;
+  case AArch64::STRXui:
+    return AArch64::STRXpost;
+  case AArch64::LDRSui:
+    return AArch64::LDRSpost;
+  case AArch64::LDRDui:
+    return AArch64::LDRDpost;
+  case AArch64::LDRQui:
+    return AArch64::LDRQpost;
+  case AArch64::LDRWui:
+    return AArch64::LDRWpost;
+  case AArch64::LDRXui:
+    return AArch64::LDRXpost;
   }
 }
 
 MachineBasicBlock::iterator
-ARM64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
-                                    MachineBasicBlock::iterator Paired,
-                                    bool mergeForward) {
+AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
+                                      MachineBasicBlock::iterator Paired,
+                                      bool mergeForward) {
   MachineBasicBlock::iterator NextI = I;
   ++NextI;
   // If NextI is the second of the two instructions to be merged, we need
@@ -271,7 +271,8 @@ ARM64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
     ++NextI;
 
   bool IsUnscaled = isUnscaledLdst(I->getOpcode());
-  int OffsetStride = IsUnscaled && EnableARM64UnscaledMemOp ? getMemSize(I) : 1;
+  int OffsetStride =
+      IsUnscaled && EnableAArch64UnscaledMemOp ? getMemSize(I) : 1;
 
   unsigned NewOpc = getMatchingPairOpcode(I->getOpcode());
   // Insert our new paired instruction after whichever of the paired
@@ -294,7 +295,7 @@ ARM64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
   }
   // Handle Unscaled
   int OffsetImm = RtMI->getOperand(2).getImm();
-  if (IsUnscaled && EnableARM64UnscaledMemOp)
+  if (IsUnscaled && EnableAArch64UnscaledMemOp)
     OffsetImm /= OffsetStride;
 
   // Construct the new instruction.
@@ -372,8 +373,8 @@ static int alignTo(int Num, int PowOf2) {
 /// findMatchingInsn - Scan the instructions looking for a load/store that can
 /// be combined with the current instruction into a load/store pair.
 MachineBasicBlock::iterator
-ARM64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
-                                    bool &mergeForward, unsigned Limit) {
+AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
+                                      bool &mergeForward, unsigned Limit) {
   MachineBasicBlock::iterator E = I->getParent()->end();
   MachineBasicBlock::iterator MBBI = I;
   MachineInstr *FirstMI = I;
@@ -394,7 +395,7 @@ ARM64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
   if (FirstMI->modifiesRegister(BaseReg, TRI))
     return E;
   int OffsetStride =
-      IsUnscaled && EnableARM64UnscaledMemOp ? getMemSize(FirstMI) : 1;
+      IsUnscaled && EnableAArch64UnscaledMemOp ? getMemSize(FirstMI) : 1;
   if (!inBoundsForPair(IsUnscaled, Offset, OffsetStride))
     return E;
 
@@ -444,7 +445,7 @@ ARM64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
         // If the alignment requirements of the paired (scaled) instruction
         // can't express the offset of the unscaled input, bail and keep
         // looking.
-        if (IsUnscaled && EnableARM64UnscaledMemOp &&
+        if (IsUnscaled && EnableAArch64UnscaledMemOp &&
             (alignTo(MinOffset, OffsetStride) != MinOffset)) {
           trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI);
           continue;
@@ -507,10 +508,10 @@ ARM64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
 }
 
 MachineBasicBlock::iterator
-ARM64LoadStoreOpt::mergePreIdxUpdateInsn(MachineBasicBlock::iterator I,
-                                         MachineBasicBlock::iterator Update) {
-  assert((Update->getOpcode() == ARM64::ADDXri ||
-          Update->getOpcode() == ARM64::SUBXri) &&
+AArch64LoadStoreOpt::mergePreIdxUpdateInsn(MachineBasicBlock::iterator I,
+                                           MachineBasicBlock::iterator Update) {
+  assert((Update->getOpcode() == AArch64::ADDXri ||
+          Update->getOpcode() == AArch64::SUBXri) &&
          "Unexpected base register update instruction to merge!");
   MachineBasicBlock::iterator NextI = I;
   // Return the instruction following the merged instruction, which is
@@ -520,9 +521,9 @@ ARM64LoadStoreOpt::mergePreIdxUpdateInsn(MachineBasicBlock::iterator I,
     ++NextI;
 
   int Value = Update->getOperand(2).getImm();
-  assert(ARM64_AM::getShiftValue(Update->getOperand(3).getImm()) == 0 &&
+  assert(AArch64_AM::getShiftValue(Update->getOperand(3).getImm()) == 0 &&
          "Can't merge 1 << 12 offset into pre-indexed load / store");
-  if (Update->getOpcode() == ARM64::SUBXri)
+  if (Update->getOpcode() == AArch64::SUBXri)
     Value = -Value;
 
   unsigned NewOpc = getPreIndexedOpcode(I->getOpcode());
@@ -550,11 +551,10 @@ ARM64LoadStoreOpt::mergePreIdxUpdateInsn(MachineBasicBlock::iterator I,
   return NextI;
 }
 
-MachineBasicBlock::iterator
-ARM64LoadStoreOpt::mergePostIdxUpdateInsn(MachineBasicBlock::iterator I,
-                                          MachineBasicBlock::iterator Update) {
-  assert((Update->getOpcode() == ARM64::ADDXri ||
-          Update->getOpcode() == ARM64::SUBXri) &&
+MachineBasicBlock::iterator AArch64LoadStoreOpt::mergePostIdxUpdateInsn(
+    MachineBasicBlock::iterator I, MachineBasicBlock::iterator Update) {
+  assert((Update->getOpcode() == AArch64::ADDXri ||
+          Update->getOpcode() == AArch64::SUBXri) &&
          "Unexpected base register update instruction to merge!");
   MachineBasicBlock::iterator NextI = I;
   // Return the instruction following the merged instruction, which is
@@ -564,9 +564,9 @@ ARM64LoadStoreOpt::mergePostIdxUpdateInsn(MachineBasicBlock::iterator I,
     ++NextI;
 
   int Value = Update->getOperand(2).getImm();
-  assert(ARM64_AM::getShiftValue(Update->getOperand(3).getImm()) == 0 &&
+  assert(AArch64_AM::getShiftValue(Update->getOperand(3).getImm()) == 0 &&
          "Can't merge 1 << 12 offset into post-indexed load / store");
-  if (Update->getOpcode() == ARM64::SUBXri)
+  if (Update->getOpcode() == AArch64::SUBXri)
     Value = -Value;
 
   unsigned NewOpc = getPostIndexedOpcode(I->getOpcode());
@@ -599,17 +599,17 @@ static bool isMatchingUpdateInsn(MachineInstr *MI, unsigned BaseReg,
   switch (MI->getOpcode()) {
   default:
     break;
-  case ARM64::SUBXri:
+  case AArch64::SUBXri:
     // Negate the offset for a SUB instruction.
     Offset *= -1;
   // FALLTHROUGH
-  case ARM64::ADDXri:
+  case AArch64::ADDXri:
     // Make sure it's a vanilla immediate operand, not a relocation or
     // anything else we can't handle.
     if (!MI->getOperand(2).isImm())
       break;
     // Watch out for 1 << 12 shifted value.
-    if (ARM64_AM::getShiftValue(MI->getOperand(3).getImm()))
+    if (AArch64_AM::getShiftValue(MI->getOperand(3).getImm()))
       break;
     // If the instruction has the base register as source and dest and the
     // immediate will fit in a signed 9-bit integer, then we have a match.
@@ -627,9 +627,8 @@ static bool isMatchingUpdateInsn(MachineInstr *MI, unsigned BaseReg,
   return false;
 }
 
-MachineBasicBlock::iterator
-ARM64LoadStoreOpt::findMatchingUpdateInsnForward(MachineBasicBlock::iterator I,
-                                                 unsigned Limit, int Value) {
+MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnForward(
+    MachineBasicBlock::iterator I, unsigned Limit, int Value) {
   MachineBasicBlock::iterator E = I->getParent()->end();
   MachineInstr *MemMI = I;
   MachineBasicBlock::iterator MBBI = I;
@@ -682,9 +681,8 @@ ARM64LoadStoreOpt::findMatchingUpdateInsnForward(MachineBasicBlock::iterator I,
   return E;
 }
 
-MachineBasicBlock::iterator
-ARM64LoadStoreOpt::findMatchingUpdateInsnBackward(MachineBasicBlock::iterator I,
-                                                  unsigned Limit) {
+MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnBackward(
+    MachineBasicBlock::iterator I, unsigned Limit) {
   MachineBasicBlock::iterator B = I->getParent()->begin();
   MachineBasicBlock::iterator E = I->getParent()->end();
   MachineInstr *MemMI = I;
@@ -736,7 +734,7 @@ ARM64LoadStoreOpt::findMatchingUpdateInsnBackward(MachineBasicBlock::iterator I,
   return E;
 }
 
-bool ARM64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB) {
+bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB) {
   bool Modified = false;
   // Two tranformations to do here:
   // 1) Find loads and stores that can be merged into a single load or store
@@ -762,27 +760,27 @@ bool ARM64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB) {
       // Just move on to the next instruction.
       ++MBBI;
       break;
-    case ARM64::STRSui:
-    case ARM64::STRDui:
-    case ARM64::STRQui:
-    case ARM64::STRXui:
-    case ARM64::STRWui:
-    case ARM64::LDRSui:
-    case ARM64::LDRDui:
-    case ARM64::LDRQui:
-    case ARM64::LDRXui:
-    case ARM64::LDRWui:
+    case AArch64::STRSui:
+    case AArch64::STRDui:
+    case AArch64::STRQui:
+    case AArch64::STRXui:
+    case AArch64::STRWui:
+    case AArch64::LDRSui:
+    case AArch64::LDRDui:
+    case AArch64::LDRQui:
+    case AArch64::LDRXui:
+    case AArch64::LDRWui:
     // do the unscaled versions as well
-    case ARM64::STURSi:
-    case ARM64::STURDi:
-    case ARM64::STURQi:
-    case ARM64::STURWi:
-    case ARM64::STURXi:
-    case ARM64::LDURSi:
-    case ARM64::LDURDi:
-    case ARM64::LDURQi:
-    case ARM64::LDURWi:
-    case ARM64::LDURXi: {
+    case AArch64::STURSi:
+    case AArch64::STURDi:
+    case AArch64::STURQi:
+    case AArch64::STURWi:
+    case AArch64::STURXi:
+    case AArch64::LDURSi:
+    case AArch64::LDURDi:
+    case AArch64::LDURQi:
+    case AArch64::LDURWi:
+    case AArch64::LDURXi: {
       // If this is a volatile load/store, don't mess with it.
       if (MI->hasOrderedMemoryRef()) {
         ++MBBI;
@@ -794,7 +792,7 @@ bool ARM64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB) {
         break;
       }
       // Check if this load/store has a hint to avoid pair formation.
-      // MachineMemOperands hints are set by the ARM64StorePairSuppress pass.
+      // MachineMemOperands hints are set by the AArch64StorePairSuppress pass.
       if (TII->isLdStPairSuppressed(MI)) {
         ++MBBI;
         break;
@@ -833,27 +831,27 @@ bool ARM64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB) {
       // Just move on to the next instruction.
       ++MBBI;
       break;
-    case ARM64::STRSui:
-    case ARM64::STRDui:
-    case ARM64::STRQui:
-    case ARM64::STRXui:
-    case ARM64::STRWui:
-    case ARM64::LDRSui:
-    case ARM64::LDRDui:
-    case ARM64::LDRQui:
-    case ARM64::LDRXui:
-    case ARM64::LDRWui:
+    case AArch64::STRSui:
+    case AArch64::STRDui:
+    case AArch64::STRQui:
+    case AArch64::STRXui:
+    case AArch64::STRWui:
+    case AArch64::LDRSui:
+    case AArch64::LDRDui:
+    case AArch64::LDRQui:
+    case AArch64::LDRXui:
+    case AArch64::LDRWui:
     // do the unscaled versions as well
-    case ARM64::STURSi:
-    case ARM64::STURDi:
-    case ARM64::STURQi:
-    case ARM64::STURWi:
-    case ARM64::STURXi:
-    case ARM64::LDURSi:
-    case ARM64::LDURDi:
-    case ARM64::LDURQi:
-    case ARM64::LDURWi:
-    case ARM64::LDURXi: {
+    case AArch64::STURSi:
+    case AArch64::STURDi:
+    case AArch64::STURQi:
+    case AArch64::STURWi:
+    case AArch64::STURXi:
+    case AArch64::LDURSi:
+    case AArch64::LDURDi:
+    case AArch64::LDURQi:
+    case AArch64::LDURWi:
+    case AArch64::LDURXi: {
       // Make sure this is a reg+imm (as opposed to an address reloc).
       if (!MI->getOperand(2).isImm()) {
         ++MBBI;
@@ -922,9 +920,9 @@ bool ARM64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB) {
   return Modified;
 }
 
-bool ARM64LoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
+bool AArch64LoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
   const TargetMachine &TM = Fn.getTarget();
-  TII = static_cast<const ARM64InstrInfo *>(TM.getInstrInfo());
+  TII = static_cast<const AArch64InstrInfo *>(TM.getInstrInfo());
   TRI = TM.getRegisterInfo();
 
   bool Modified = false;
@@ -939,6 +937,6 @@ bool ARM64LoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
 
 /// createARMLoadStoreOptimizationPass - returns an instance of the load / store
 /// optimization pass.
-FunctionPass *llvm::createARM64LoadStoreOptimizationPass() {
-  return new ARM64LoadStoreOpt();
+FunctionPass *llvm::createAArch64LoadStoreOptimizationPass() {
+  return new AArch64LoadStoreOpt();
 }
diff --git a/lib/Target/ARM64/ARM64MCInstLower.cpp b/lib/Target/AArch64/AArch64MCInstLower.cpp
similarity index 58%
rename from lib/Target/ARM64/ARM64MCInstLower.cpp
rename to lib/Target/AArch64/AArch64MCInstLower.cpp
index 525f484ca4c..ab6d37532a7 100644
--- a/lib/Target/ARM64/ARM64MCInstLower.cpp
+++ b/lib/Target/AArch64/AArch64MCInstLower.cpp
@@ -1,4 +1,4 @@
-//===-- ARM64MCInstLower.cpp - Convert ARM64 MachineInstr to an MCInst---===//
+//==-- AArch64MCInstLower.cpp - Convert AArch64 MachineInstr to an MCInst --==//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -7,14 +7,14 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file contains code to lower ARM64 MachineInstrs to their corresponding
+// This file contains code to lower AArch64 MachineInstrs to their corresponding
 // MCInst records.
 //
 //===----------------------------------------------------------------------===//
 
-#include "ARM64MCInstLower.h"
-#include "MCTargetDesc/ARM64MCExpr.h"
-#include "Utils/ARM64BaseInfo.h"
+#include "AArch64MCInstLower.h"
+#include "MCTargetDesc/AArch64MCExpr.h"
+#include "Utils/AArch64BaseInfo.h"
 #include "llvm/CodeGen/AsmPrinter.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineInstr.h"
@@ -25,46 +25,46 @@
 #include "llvm/Target/TargetMachine.h"
 using namespace llvm;
 
-ARM64MCInstLower::ARM64MCInstLower(MCContext &ctx, Mangler &mang,
-                                   AsmPrinter &printer)
+AArch64MCInstLower::AArch64MCInstLower(MCContext &ctx, Mangler &mang,
+                                       AsmPrinter &printer)
     : Ctx(ctx), Printer(printer), TargetTriple(printer.getTargetTriple()) {}
 
 MCSymbol *
-ARM64MCInstLower::GetGlobalAddressSymbol(const MachineOperand &MO) const {
+AArch64MCInstLower::GetGlobalAddressSymbol(const MachineOperand &MO) const {
   return Printer.getSymbol(MO.getGlobal());
 }
 
 MCSymbol *
-ARM64MCInstLower::GetExternalSymbolSymbol(const MachineOperand &MO) const {
+AArch64MCInstLower::GetExternalSymbolSymbol(const MachineOperand &MO) const {
   return Printer.GetExternalSymbolSymbol(MO.getSymbolName());
 }
 
-MCOperand ARM64MCInstLower::lowerSymbolOperandDarwin(const MachineOperand &MO,
-                                                     MCSymbol *Sym) const {
+MCOperand AArch64MCInstLower::lowerSymbolOperandDarwin(const MachineOperand &MO,
+                                                       MCSymbol *Sym) const {
   // FIXME: We would like an efficient form for this, so we don't have to do a
   // lot of extra uniquing.
   MCSymbolRefExpr::VariantKind RefKind = MCSymbolRefExpr::VK_None;
-  if ((MO.getTargetFlags() & ARM64II::MO_GOT) != 0) {
-    if ((MO.getTargetFlags() & ARM64II::MO_FRAGMENT) == ARM64II::MO_PAGE)
+  if ((MO.getTargetFlags() & AArch64II::MO_GOT) != 0) {
+    if ((MO.getTargetFlags() & AArch64II::MO_FRAGMENT) == AArch64II::MO_PAGE)
       RefKind = MCSymbolRefExpr::VK_GOTPAGE;
-    else if ((MO.getTargetFlags() & ARM64II::MO_FRAGMENT) ==
-             ARM64II::MO_PAGEOFF)
+    else if ((MO.getTargetFlags() & AArch64II::MO_FRAGMENT) ==
+             AArch64II::MO_PAGEOFF)
       RefKind = MCSymbolRefExpr::VK_GOTPAGEOFF;
     else
       assert(0 && "Unexpected target flags with MO_GOT on GV operand");
-  } else if ((MO.getTargetFlags() & ARM64II::MO_TLS) != 0) {
-    if ((MO.getTargetFlags() & ARM64II::MO_FRAGMENT) == ARM64II::MO_PAGE)
+  } else if ((MO.getTargetFlags() & AArch64II::MO_TLS) != 0) {
+    if ((MO.getTargetFlags() & AArch64II::MO_FRAGMENT) == AArch64II::MO_PAGE)
       RefKind = MCSymbolRefExpr::VK_TLVPPAGE;
-    else if ((MO.getTargetFlags() & ARM64II::MO_FRAGMENT) ==
-             ARM64II::MO_PAGEOFF)
+    else if ((MO.getTargetFlags() & AArch64II::MO_FRAGMENT) ==
+             AArch64II::MO_PAGEOFF)
       RefKind = MCSymbolRefExpr::VK_TLVPPAGEOFF;
     else
       llvm_unreachable("Unexpected target flags with MO_TLS on GV operand");
   } else {
-    if ((MO.getTargetFlags() & ARM64II::MO_FRAGMENT) == ARM64II::MO_PAGE)
+    if ((MO.getTargetFlags() & AArch64II::MO_FRAGMENT) == AArch64II::MO_PAGE)
       RefKind = MCSymbolRefExpr::VK_PAGE;
-    else if ((MO.getTargetFlags() & ARM64II::MO_FRAGMENT) ==
-             ARM64II::MO_PAGEOFF)
+    else if ((MO.getTargetFlags() & AArch64II::MO_FRAGMENT) ==
+             AArch64II::MO_PAGEOFF)
       RefKind = MCSymbolRefExpr::VK_PAGEOFF;
   }
   const MCExpr *Expr = MCSymbolRefExpr::Create(Sym, RefKind, Ctx);
@@ -74,13 +74,13 @@ MCOperand ARM64MCInstLower::lowerSymbolOperandDarwin(const MachineOperand &MO,
   return MCOperand::CreateExpr(Expr);
 }
 
-MCOperand ARM64MCInstLower::lowerSymbolOperandELF(const MachineOperand &MO,
-                                                  MCSymbol *Sym) const {
+MCOperand AArch64MCInstLower::lowerSymbolOperandELF(const MachineOperand &MO,
+                                                    MCSymbol *Sym) const {
   uint32_t RefFlags = 0;
 
-  if (MO.getTargetFlags() & ARM64II::MO_GOT)
-    RefFlags |= ARM64MCExpr::VK_GOT;
-  else if (MO.getTargetFlags() & ARM64II::MO_TLS) {
+  if (MO.getTargetFlags() & AArch64II::MO_GOT)
+    RefFlags |= AArch64MCExpr::VK_GOT;
+  else if (MO.getTargetFlags() & AArch64II::MO_TLS) {
     TLSModel::Model Model;
     if (MO.isGlobal()) {
       const GlobalValue *GV = MO.getGlobal();
@@ -93,39 +93,40 @@ MCOperand ARM64MCInstLower::lowerSymbolOperandELF(const MachineOperand &MO,
     }
     switch (Model) {
     case TLSModel::InitialExec:
-      RefFlags |= ARM64MCExpr::VK_GOTTPREL;
+      RefFlags |= AArch64MCExpr::VK_GOTTPREL;
       break;
     case TLSModel::LocalExec:
-      RefFlags |= ARM64MCExpr::VK_TPREL;
+      RefFlags |= AArch64MCExpr::VK_TPREL;
       break;
     case TLSModel::LocalDynamic:
-      RefFlags |= ARM64MCExpr::VK_DTPREL;
+      RefFlags |= AArch64MCExpr::VK_DTPREL;
       break;
     case TLSModel::GeneralDynamic:
-      RefFlags |= ARM64MCExpr::VK_TLSDESC;
+      RefFlags |= AArch64MCExpr::VK_TLSDESC;
       break;
     }
   } else {
     // No modifier means this is a generic reference, classified as absolute for
     // the cases where it matters (:abs_g0: etc).
-    RefFlags |= ARM64MCExpr::VK_ABS;
+    RefFlags |= AArch64MCExpr::VK_ABS;
   }
 
-  if ((MO.getTargetFlags() & ARM64II::MO_FRAGMENT) == ARM64II::MO_PAGE)
-    RefFlags |= ARM64MCExpr::VK_PAGE;
-  else if ((MO.getTargetFlags() & ARM64II::MO_FRAGMENT) == ARM64II::MO_PAGEOFF)
-    RefFlags |= ARM64MCExpr::VK_PAGEOFF;
-  else if ((MO.getTargetFlags() & ARM64II::MO_FRAGMENT) == ARM64II::MO_G3)
-    RefFlags |= ARM64MCExpr::VK_G3;
-  else if ((MO.getTargetFlags() & ARM64II::MO_FRAGMENT) == ARM64II::MO_G2)
-    RefFlags |= ARM64MCExpr::VK_G2;
-  else if ((MO.getTargetFlags() & ARM64II::MO_FRAGMENT) == ARM64II::MO_G1)
-    RefFlags |= ARM64MCExpr::VK_G1;
-  else if ((MO.getTargetFlags() & ARM64II::MO_FRAGMENT) == ARM64II::MO_G0)
-    RefFlags |= ARM64MCExpr::VK_G0;
-
-  if (MO.getTargetFlags() & ARM64II::MO_NC)
-    RefFlags |= ARM64MCExpr::VK_NC;
+  if ((MO.getTargetFlags() & AArch64II::MO_FRAGMENT) == AArch64II::MO_PAGE)
+    RefFlags |= AArch64MCExpr::VK_PAGE;
+  else if ((MO.getTargetFlags() & AArch64II::MO_FRAGMENT) ==
+           AArch64II::MO_PAGEOFF)
+    RefFlags |= AArch64MCExpr::VK_PAGEOFF;
+  else if ((MO.getTargetFlags() & AArch64II::MO_FRAGMENT) == AArch64II::MO_G3)
+    RefFlags |= AArch64MCExpr::VK_G3;
+  else if ((MO.getTargetFlags() & AArch64II::MO_FRAGMENT) == AArch64II::MO_G2)
+    RefFlags |= AArch64MCExpr::VK_G2;
+  else if ((MO.getTargetFlags() & AArch64II::MO_FRAGMENT) == AArch64II::MO_G1)
+    RefFlags |= AArch64MCExpr::VK_G1;
+  else if ((MO.getTargetFlags() & AArch64II::MO_FRAGMENT) == AArch64II::MO_G0)
+    RefFlags |= AArch64MCExpr::VK_G0;
+
+  if (MO.getTargetFlags() & AArch64II::MO_NC)
+    RefFlags |= AArch64MCExpr::VK_NC;
 
   const MCExpr *Expr =
       MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_None, Ctx);
@@ -133,15 +134,15 @@ MCOperand ARM64MCInstLower::lowerSymbolOperandELF(const MachineOperand &MO,
     Expr = MCBinaryExpr::CreateAdd(
         Expr, MCConstantExpr::Create(MO.getOffset(), Ctx), Ctx);
 
-  ARM64MCExpr::VariantKind RefKind;
-  RefKind = static_cast<ARM64MCExpr::VariantKind>(RefFlags);
-  Expr = ARM64MCExpr::Create(Expr, RefKind, Ctx);
+  AArch64MCExpr::VariantKind RefKind;
+  RefKind = static_cast<AArch64MCExpr::VariantKind>(RefFlags);
+  Expr = AArch64MCExpr::Create(Expr, RefKind, Ctx);
 
   return MCOperand::CreateExpr(Expr);
 }
 
-MCOperand ARM64MCInstLower::LowerSymbolOperand(const MachineOperand &MO,
-                                               MCSymbol *Sym) const {
+MCOperand AArch64MCInstLower::LowerSymbolOperand(const MachineOperand &MO,
+                                                 MCSymbol *Sym) const {
   if (TargetTriple.isOSDarwin())
     return lowerSymbolOperandDarwin(MO, Sym);
 
@@ -149,8 +150,8 @@ MCOperand ARM64MCInstLower::LowerSymbolOperand(const MachineOperand &MO,
   return lowerSymbolOperandELF(MO, Sym);
 }
 
-bool ARM64MCInstLower::lowerOperand(const MachineOperand &MO,
-                                    MCOperand &MCOp) const {
+bool AArch64MCInstLower::lowerOperand(const MachineOperand &MO,
+                                      MCOperand &MCOp) const {
   switch (MO.getType()) {
   default:
     assert(0 && "unknown operand type");
@@ -190,7 +191,7 @@ bool ARM64MCInstLower::lowerOperand(const MachineOperand &MO,
   return true;
 }
 
-void ARM64MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
+void AArch64MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
   OutMI.setOpcode(MI->getOpcode());
 
   for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
diff --git a/lib/Target/ARM64/ARM64MCInstLower.h b/lib/Target/AArch64/AArch64MCInstLower.h
similarity index 78%
rename from lib/Target/ARM64/ARM64MCInstLower.h
rename to lib/Target/AArch64/AArch64MCInstLower.h
index 7e3a2c8e54f..ba50ba9e2fe 100644
--- a/lib/Target/ARM64/ARM64MCInstLower.h
+++ b/lib/Target/AArch64/AArch64MCInstLower.h
@@ -1,4 +1,4 @@
-//===-- ARM64MCInstLower.h - Lower MachineInstr to MCInst ----------------===//
+//===-- AArch64MCInstLower.h - Lower MachineInstr to MCInst ---------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -7,8 +7,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef ARM64_MCINSTLOWER_H
-#define ARM64_MCINSTLOWER_H
+#ifndef AArch64_MCINSTLOWER_H
+#define AArch64_MCINSTLOWER_H
 
 #include "llvm/ADT/Triple.h"
 #include "llvm/Support/Compiler.h"
@@ -25,15 +25,15 @@ class MachineModuleInfoMachO;
 class MachineOperand;
 class Mangler;
 
-/// ARM64MCInstLower - This class is used to lower an MachineInstr
+/// AArch64MCInstLower - This class is used to lower an MachineInstr
 /// into an MCInst.
-class LLVM_LIBRARY_VISIBILITY ARM64MCInstLower {
+class LLVM_LIBRARY_VISIBILITY AArch64MCInstLower {
   MCContext &Ctx;
   AsmPrinter &Printer;
   Triple TargetTriple;
 
 public:
-  ARM64MCInstLower(MCContext &ctx, Mangler &mang, AsmPrinter &printer);
+  AArch64MCInstLower(MCContext &ctx, Mangler &mang, AsmPrinter &printer);
 
   bool lowerOperand(const MachineOperand &MO, MCOperand &MCOp) const;
   void Lower(const MachineInstr *MI, MCInst &OutMI) const;
diff --git a/lib/Target/ARM64/ARM64MachineFunctionInfo.h b/lib/Target/AArch64/AArch64MachineFunctionInfo.h
similarity index 90%
rename from lib/Target/ARM64/ARM64MachineFunctionInfo.h
rename to lib/Target/AArch64/AArch64MachineFunctionInfo.h
index 0b6f4f1ec64..7c257ba9116 100644
--- a/lib/Target/ARM64/ARM64MachineFunctionInfo.h
+++ b/lib/Target/AArch64/AArch64MachineFunctionInfo.h
@@ -1,4 +1,4 @@
-//===- ARM64MachineFuctionInfo.h - ARM64 machine function info --*- C++ -*-===//
+//=- AArch64MachineFuctionInfo.h - AArch64 machine function info --*- C++ -*-=//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -7,12 +7,12 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file declares ARM64-specific per-machine-function information.
+// This file declares AArch64-specific per-machine-function information.
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef ARM64MACHINEFUNCTIONINFO_H
-#define ARM64MACHINEFUNCTIONINFO_H
+#ifndef AArch64MACHINEFUNCTIONINFO_H
+#define AArch64MACHINEFUNCTIONINFO_H
 
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
@@ -21,9 +21,9 @@
 
 namespace llvm {
 
-/// ARM64FunctionInfo - This class is derived from MachineFunctionInfo and
-/// contains private ARM64-specific information for each MachineFunction.
-class ARM64FunctionInfo : public MachineFunctionInfo {
+/// AArch64FunctionInfo - This class is derived from MachineFunctionInfo and
+/// contains private AArch64-specific information for each MachineFunction.
+class AArch64FunctionInfo : public MachineFunctionInfo {
 
   /// Number of bytes of arguments this function has on the stack. If the callee
   /// is expected to restore the argument stack this should be a multiple of 16,
@@ -73,12 +73,12 @@ class ARM64FunctionInfo : public MachineFunctionInfo {
   unsigned VarArgsFPRSize;
 
 public:
-  ARM64FunctionInfo()
+  AArch64FunctionInfo()
       : BytesInStackArgArea(0), ArgumentStackToRestore(0), HasStackFrame(false),
         NumLocalDynamicTLSAccesses(0), VarArgsStackIndex(0), VarArgsGPRIndex(0),
         VarArgsGPRSize(0), VarArgsFPRIndex(0), VarArgsFPRSize(0) {}
 
-  explicit ARM64FunctionInfo(MachineFunction &MF)
+  explicit AArch64FunctionInfo(MachineFunction &MF)
       : BytesInStackArgArea(0), ArgumentStackToRestore(0), HasStackFrame(false),
         NumLocalDynamicTLSAccesses(0), VarArgsStackIndex(0), VarArgsGPRIndex(0),
         VarArgsGPRSize(0), VarArgsFPRIndex(0), VarArgsFPRSize(0) {
@@ -160,4 +160,4 @@ private:
 };
 } // End llvm namespace
 
-#endif // ARM64MACHINEFUNCTIONINFO_H
+#endif // AArch64MACHINEFUNCTIONINFO_H
diff --git a/lib/Target/ARM64/ARM64PerfectShuffle.h b/lib/Target/AArch64/AArch64PerfectShuffle.h
similarity index 99%
rename from lib/Target/ARM64/ARM64PerfectShuffle.h
rename to lib/Target/AArch64/AArch64PerfectShuffle.h
index 6759236fd14..b22fa2424d5 100644
--- a/lib/Target/ARM64/ARM64PerfectShuffle.h
+++ b/lib/Target/AArch64/AArch64PerfectShuffle.h
@@ -1,4 +1,4 @@
-//===-- ARM64PerfectShuffle.h - AdvSIMD Perfect Shuffle Table -------------===//
+//===-- AArch64PerfectShuffle.h - AdvSIMD Perfect Shuffle Table -----------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
diff --git a/lib/Target/ARM64/ARM64PromoteConstant.cpp b/lib/Target/AArch64/AArch64PromoteConstant.cpp
similarity index 92%
rename from lib/Target/ARM64/ARM64PromoteConstant.cpp
rename to lib/Target/AArch64/AArch64PromoteConstant.cpp
index e61a62262d3..4723cc4978e 100644
--- a/lib/Target/ARM64/ARM64PromoteConstant.cpp
+++ b/lib/Target/AArch64/AArch64PromoteConstant.cpp
@@ -1,5 +1,4 @@
-
-//===-- ARM64PromoteConstant.cpp --- Promote constant to global for ARM64 -===//
+//=- AArch64PromoteConstant.cpp --- Promote constant to global for AArch64 -==//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -8,7 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file implements the ARM64PromoteConstant pass which promotes constants
+// This file implements the AArch64PromoteConstant pass which promotes constants
 // to global variables when this is likely to be more efficient. Currently only
 // types related to constant vector (i.e., constant vector, array of constant
 // vectors, constant structure with a constant vector field, etc.) are promoted
@@ -21,7 +20,7 @@
 // FIXME: This pass may be useful for other targets too.
 //===----------------------------------------------------------------------===//
 
-#include "ARM64.h"
+#include "AArch64.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallSet.h"
@@ -41,17 +40,17 @@
 
 using namespace llvm;
 
-#define DEBUG_TYPE "arm64-promote-const"
+#define DEBUG_TYPE "aarch64-promote-const"
 
 // Stress testing mode - disable heuristics.
-static cl::opt<bool> Stress("arm64-stress-promote-const", cl::Hidden,
+static cl::opt<bool> Stress("aarch64-stress-promote-const", cl::Hidden,
                             cl::desc("Promote all vector constants"));
 
 STATISTIC(NumPromoted, "Number of promoted constants");
 STATISTIC(NumPromotedUses, "Number of promoted constants uses");
 
 //===----------------------------------------------------------------------===//
-//                       ARM64PromoteConstant
+//                       AArch64PromoteConstant
 //===----------------------------------------------------------------------===//
 
 namespace {
@@ -81,13 +80,13 @@ namespace {
 ///
 /// Therefore the final assembly final has 4 different loads. With this pass
 /// enabled, only one load is issued for the constants.
-class ARM64PromoteConstant : public ModulePass {
+class AArch64PromoteConstant : public ModulePass {
 
 public:
   static char ID;
-  ARM64PromoteConstant() : ModulePass(ID) {}
+  AArch64PromoteConstant() : ModulePass(ID) {}
 
-  const char *getPassName() const override { return "ARM64 Promote Constant"; }
+  const char *getPassName() const override { return "AArch64 Promote Constant"; }
 
   /// Iterate over the functions and promote the interesting constants into
   /// global variables with module scope.
@@ -202,20 +201,20 @@ private:
 };
 } // end anonymous namespace
 
-char ARM64PromoteConstant::ID = 0;
+char AArch64PromoteConstant::ID = 0;
 
 namespace llvm {
-void initializeARM64PromoteConstantPass(PassRegistry &);
+void initializeAArch64PromoteConstantPass(PassRegistry &);
 }
 
-INITIALIZE_PASS_BEGIN(ARM64PromoteConstant, "arm64-promote-const",
-                      "ARM64 Promote Constant Pass", false, false)
+INITIALIZE_PASS_BEGIN(AArch64PromoteConstant, "aarch64-promote-const",
+                      "AArch64 Promote Constant Pass", false, false)
 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_END(ARM64PromoteConstant, "arm64-promote-const",
-                    "ARM64 Promote Constant Pass", false, false)
+INITIALIZE_PASS_END(AArch64PromoteConstant, "aarch64-promote-const",
+                    "AArch64 Promote Constant Pass", false, false)
 
-ModulePass *llvm::createARM64PromoteConstantPass() {
-  return new ARM64PromoteConstant();
+ModulePass *llvm::createAArch64PromoteConstantPass() {
+  return new AArch64PromoteConstant();
 }
 
 /// Check if the given type uses a vector type.
@@ -330,7 +329,7 @@ static bool shouldConvert(const Constant *Cst) {
 }
 
 Instruction *
-ARM64PromoteConstant::findInsertionPoint(Value::user_iterator &Use) {
+AArch64PromoteConstant::findInsertionPoint(Value::user_iterator &Use) {
   // If this user is a phi, the insertion point is in the related
   // incoming basic block.
   PHINode *PhiInst = dyn_cast<PHINode>(*Use);
@@ -344,9 +343,9 @@ ARM64PromoteConstant::findInsertionPoint(Value::user_iterator &Use) {
   return InsertionPoint;
 }
 
-bool ARM64PromoteConstant::isDominated(Instruction *NewPt,
-                                       Value::user_iterator &UseIt,
-                                       InsertionPoints &InsertPts) {
+bool AArch64PromoteConstant::isDominated(Instruction *NewPt,
+                                         Value::user_iterator &UseIt,
+                                         InsertionPoints &InsertPts) {
 
   DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>(
       *NewPt->getParent()->getParent()).getDomTree();
@@ -371,9 +370,9 @@ bool ARM64PromoteConstant::isDominated(Instruction *NewPt,
   return false;
 }
 
-bool ARM64PromoteConstant::tryAndMerge(Instruction *NewPt,
-                                       Value::user_iterator &UseIt,
-                                       InsertionPoints &InsertPts) {
+bool AArch64PromoteConstant::tryAndMerge(Instruction *NewPt,
+                                         Value::user_iterator &UseIt,
+                                         InsertionPoints &InsertPts) {
   DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>(
       *NewPt->getParent()->getParent()).getDomTree();
   BasicBlock *NewBB = NewPt->getParent();
@@ -422,7 +421,7 @@ bool ARM64PromoteConstant::tryAndMerge(Instruction *NewPt,
   return false;
 }
 
-void ARM64PromoteConstant::computeInsertionPoints(
+void AArch64PromoteConstant::computeInsertionPoints(
     Constant *Val, InsertionPointsPerFunc &InsPtsPerFunc) {
   DEBUG(dbgs() << "** Compute insertion points **\n");
   for (Value::user_iterator UseIt = Val->user_begin(),
@@ -464,9 +463,8 @@ void ARM64PromoteConstant::computeInsertionPoints(
   }
 }
 
-bool
-ARM64PromoteConstant::insertDefinitions(Constant *Cst,
-                                        InsertionPointsPerFunc &InsPtsPerFunc) {
+bool AArch64PromoteConstant::insertDefinitions(
+    Constant *Cst, InsertionPointsPerFunc &InsPtsPerFunc) {
   // We will create one global variable per Module.
   DenseMap<Module *, GlobalVariable *> ModuleToMergedGV;
   bool HasChanged = false;
@@ -533,13 +531,13 @@ ARM64PromoteConstant::insertDefinitions(Constant *Cst,
   return HasChanged;
 }
 
-bool ARM64PromoteConstant::computeAndInsertDefinitions(Constant *Val) {
+bool AArch64PromoteConstant::computeAndInsertDefinitions(Constant *Val) {
   InsertionPointsPerFunc InsertPtsPerFunc;
   computeInsertionPoints(Val, InsertPtsPerFunc);
   return insertDefinitions(Val, InsertPtsPerFunc);
 }
 
-bool ARM64PromoteConstant::promoteConstant(Constant *Cst) {
+bool AArch64PromoteConstant::promoteConstant(Constant *Cst) {
   assert(Cst && "Given variable is not a valid constant.");
 
   if (!shouldConvert(Cst))
@@ -553,7 +551,7 @@ bool ARM64PromoteConstant::promoteConstant(Constant *Cst) {
   return computeAndInsertDefinitions(Cst);
 }
 
-bool ARM64PromoteConstant::runOnFunction(Function &F) {
+bool AArch64PromoteConstant::runOnFunction(Function &F) {
   // Look for instructions using constant vector. Promote that constant to a
   // global variable. Create as few loads of this variable as possible and
   // update the uses accordingly.
diff --git a/lib/Target/ARM64/ARM64RegisterInfo.cpp b/lib/Target/AArch64/AArch64RegisterInfo.cpp
similarity index 66%
rename from lib/Target/ARM64/ARM64RegisterInfo.cpp
rename to lib/Target/AArch64/AArch64RegisterInfo.cpp
index d3c647bd90b..48a361d50e5 100644
--- a/lib/Target/ARM64/ARM64RegisterInfo.cpp
+++ b/lib/Target/AArch64/AArch64RegisterInfo.cpp
@@ -1,4 +1,4 @@
-//===- ARM64RegisterInfo.cpp - ARM64 Register Information -----------------===//
+//===- AArch64RegisterInfo.cpp - AArch64 Register Information -------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -7,15 +7,16 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file contains the ARM64 implementation of the TargetRegisterInfo class.
+// This file contains the AArch64 implementation of the TargetRegisterInfo
+// class.
 //
 //===----------------------------------------------------------------------===//
 
-#include "ARM64RegisterInfo.h"
-#include "ARM64FrameLowering.h"
-#include "ARM64InstrInfo.h"
-#include "ARM64Subtarget.h"
-#include "MCTargetDesc/ARM64AddressingModes.h"
+#include "AArch64RegisterInfo.h"
+#include "AArch64FrameLowering.h"
+#include "AArch64InstrInfo.h"
+#include "AArch64Subtarget.h"
+#include "MCTargetDesc/AArch64AddressingModes.h"
 #include "llvm/ADT/BitVector.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -30,39 +31,39 @@
 using namespace llvm;
 
 #define GET_REGINFO_TARGET_DESC
-#include "ARM64GenRegisterInfo.inc"
+#include "AArch64GenRegisterInfo.inc"
 
-ARM64RegisterInfo::ARM64RegisterInfo(const ARM64InstrInfo *tii,
-                                     const ARM64Subtarget *sti)
-    : ARM64GenRegisterInfo(ARM64::LR), TII(tii), STI(sti) {}
+AArch64RegisterInfo::AArch64RegisterInfo(const AArch64InstrInfo *tii,
+                                         const AArch64Subtarget *sti)
+    : AArch64GenRegisterInfo(AArch64::LR), TII(tii), STI(sti) {}
 
 const MCPhysReg *
-ARM64RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
+AArch64RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
   assert(MF && "Invalid MachineFunction pointer.");
   if (MF->getFunction()->getCallingConv() == CallingConv::AnyReg)
-    return CSR_ARM64_AllRegs_SaveList;
+    return CSR_AArch64_AllRegs_SaveList;
   else
-    return CSR_ARM64_AAPCS_SaveList;
+    return CSR_AArch64_AAPCS_SaveList;
 }
 
 const uint32_t *
-ARM64RegisterInfo::getCallPreservedMask(CallingConv::ID CC) const {
+AArch64RegisterInfo::getCallPreservedMask(CallingConv::ID CC) const {
   if (CC == CallingConv::AnyReg)
-    return CSR_ARM64_AllRegs_RegMask;
+    return CSR_AArch64_AllRegs_RegMask;
   else
-    return CSR_ARM64_AAPCS_RegMask;
+    return CSR_AArch64_AAPCS_RegMask;
 }
 
-const uint32_t *ARM64RegisterInfo::getTLSCallPreservedMask() const {
+const uint32_t *AArch64RegisterInfo::getTLSCallPreservedMask() const {
   if (STI->isTargetDarwin())
-    return CSR_ARM64_TLS_Darwin_RegMask;
+    return CSR_AArch64_TLS_Darwin_RegMask;
 
   assert(STI->isTargetELF() && "only expect Darwin or ELF TLS");
-  return CSR_ARM64_TLS_ELF_RegMask;
+  return CSR_AArch64_TLS_ELF_RegMask;
 }
 
 const uint32_t *
-ARM64RegisterInfo::getThisReturnPreservedMask(CallingConv::ID) const {
+AArch64RegisterInfo::getThisReturnPreservedMask(CallingConv::ID) const {
   // This should return a register mask that is the same as that returned by
   // getCallPreservedMask but that additionally preserves the register used for
   // the first i64 argument (which must also be the register used to return a
@@ -70,57 +71,58 @@ ARM64RegisterInfo::getThisReturnPreservedMask(CallingConv::ID) const {
   //
   // In case that the calling convention does not use the same register for
   // both, the function should return NULL (does not currently apply)
-  return CSR_ARM64_AAPCS_ThisReturn_RegMask;
+  return CSR_AArch64_AAPCS_ThisReturn_RegMask;
 }
 
-BitVector ARM64RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
+BitVector
+AArch64RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
   const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
 
   // FIXME: avoid re-calculating this every time.
   BitVector Reserved(getNumRegs());
-  Reserved.set(ARM64::SP);
-  Reserved.set(ARM64::XZR);
-  Reserved.set(ARM64::WSP);
-  Reserved.set(ARM64::WZR);
+  Reserved.set(AArch64::SP);
+  Reserved.set(AArch64::XZR);
+  Reserved.set(AArch64::WSP);
+  Reserved.set(AArch64::WZR);
 
   if (TFI->hasFP(MF) || STI->isTargetDarwin()) {
-    Reserved.set(ARM64::FP);
-    Reserved.set(ARM64::W29);
+    Reserved.set(AArch64::FP);
+    Reserved.set(AArch64::W29);
   }
 
   if (STI->isTargetDarwin()) {
-    Reserved.set(ARM64::X18); // Platform register
-    Reserved.set(ARM64::W18);
+    Reserved.set(AArch64::X18); // Platform register
+    Reserved.set(AArch64::W18);
   }
 
   if (hasBasePointer(MF)) {
-    Reserved.set(ARM64::X19);
-    Reserved.set(ARM64::W19);
+    Reserved.set(AArch64::X19);
+    Reserved.set(AArch64::W19);
   }
 
   return Reserved;
 }
 
-bool ARM64RegisterInfo::isReservedReg(const MachineFunction &MF,
+bool AArch64RegisterInfo::isReservedReg(const MachineFunction &MF,
                                       unsigned Reg) const {
   const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
 
   switch (Reg) {
   default:
     break;
-  case ARM64::SP:
-  case ARM64::XZR:
-  case ARM64::WSP:
-  case ARM64::WZR:
+  case AArch64::SP:
+  case AArch64::XZR:
+  case AArch64::WSP:
+  case AArch64::WZR:
     return true;
-  case ARM64::X18:
-  case ARM64::W18:
+  case AArch64::X18:
+  case AArch64::W18:
     return STI->isTargetDarwin();
-  case ARM64::FP:
-  case ARM64::W29:
+  case AArch64::FP:
+  case AArch64::W29:
     return TFI->hasFP(MF) || STI->isTargetDarwin();
-  case ARM64::W19:
-  case ARM64::X19:
+  case AArch64::W19:
+  case AArch64::X19:
     return hasBasePointer(MF);
   }
 
@@ -128,21 +130,21 @@ bool ARM64RegisterInfo::isReservedReg(const MachineFunction &MF,
 }
 
 const TargetRegisterClass *
-ARM64RegisterInfo::getPointerRegClass(const MachineFunction &MF,
+AArch64RegisterInfo::getPointerRegClass(const MachineFunction &MF,
                                       unsigned Kind) const {
-  return &ARM64::GPR64RegClass;
+  return &AArch64::GPR64RegClass;
 }
 
 const TargetRegisterClass *
-ARM64RegisterInfo::getCrossCopyRegClass(const TargetRegisterClass *RC) const {
-  if (RC == &ARM64::CCRRegClass)
+AArch64RegisterInfo::getCrossCopyRegClass(const TargetRegisterClass *RC) const {
+  if (RC == &AArch64::CCRRegClass)
     return nullptr; // Can't copy NZCV.
   return RC;
 }
 
-unsigned ARM64RegisterInfo::getBaseRegister() const { return ARM64::X19; }
+unsigned AArch64RegisterInfo::getBaseRegister() const { return AArch64::X19; }
 
-bool ARM64RegisterInfo::hasBasePointer(const MachineFunction &MF) const {
+bool AArch64RegisterInfo::hasBasePointer(const MachineFunction &MF) const {
   const MachineFrameInfo *MFI = MF.getFrameInfo();
 
   // In the presence of variable sized objects, if the fixed stack size is
@@ -165,37 +167,39 @@ bool ARM64RegisterInfo::hasBasePointer(const MachineFunction &MF) const {
   return false;
 }
 
-unsigned ARM64RegisterInfo::getFrameRegister(const MachineFunction &MF) const {
+unsigned
+AArch64RegisterInfo::getFrameRegister(const MachineFunction &MF) const {
   const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
 
-  return TFI->hasFP(MF) ? ARM64::FP : ARM64::SP;
+  return TFI->hasFP(MF) ? AArch64::FP : AArch64::SP;
 }
 
-bool
-ARM64RegisterInfo::requiresRegisterScavenging(const MachineFunction &MF) const {
+bool AArch64RegisterInfo::requiresRegisterScavenging(
+    const MachineFunction &MF) const {
   return true;
 }
 
-bool ARM64RegisterInfo::requiresVirtualBaseRegisters(const MachineFunction &MF)
-    const {
+bool AArch64RegisterInfo::requiresVirtualBaseRegisters(
+    const MachineFunction &MF) const {
   return true;
 }
 
 bool
-ARM64RegisterInfo::useFPForScavengingIndex(const MachineFunction &MF) const {
+AArch64RegisterInfo::useFPForScavengingIndex(const MachineFunction &MF) const {
   const MachineFrameInfo *MFI = MF.getFrameInfo();
-  // ARM64FrameLowering::resolveFrameIndexReference() can always fall back
+  // AArch64FrameLowering::resolveFrameIndexReference() can always fall back
   // to the stack pointer, so only put the emergency spill slot next to the
   // FP when there's no better way to access it (SP or base pointer).
   return MFI->hasVarSizedObjects() && !hasBasePointer(MF);
 }
 
-bool ARM64RegisterInfo::requiresFrameIndexScavenging(const MachineFunction &MF)
-    const {
+bool AArch64RegisterInfo::requiresFrameIndexScavenging(
+    const MachineFunction &MF) const {
   return true;
 }
 
-bool ARM64RegisterInfo::cannotEliminateFrame(const MachineFunction &MF) const {
+bool
+AArch64RegisterInfo::cannotEliminateFrame(const MachineFunction &MF) const {
   const MachineFrameInfo *MFI = MF.getFrameInfo();
   // Only consider eliminating leaf frames.
   if (MFI->hasCalls() || (MF.getTarget().Options.DisableFramePointerElim(MF) &&
@@ -208,8 +212,8 @@ bool ARM64RegisterInfo::cannotEliminateFrame(const MachineFunction &MF) const {
 /// reference would be better served by a base register other than FP
 /// or SP. Used by LocalStackFrameAllocation to determine which frame index
 /// references it should create new base registers for.
-bool ARM64RegisterInfo::needsFrameBaseReg(MachineInstr *MI,
-                                          int64_t Offset) const {
+bool AArch64RegisterInfo::needsFrameBaseReg(MachineInstr *MI,
+                                            int64_t Offset) const {
   for (unsigned i = 0; !MI->getOperand(i).isFI(); ++i)
     assert(i < MI->getNumOperands() &&
            "Instr doesn't have FrameIndex operand!");
@@ -268,30 +272,30 @@ bool ARM64RegisterInfo::needsFrameBaseReg(MachineInstr *MI,
   return true;
 }
 
-bool ARM64RegisterInfo::isFrameOffsetLegal(const MachineInstr *MI,
-                                           int64_t Offset) const {
+bool AArch64RegisterInfo::isFrameOffsetLegal(const MachineInstr *MI,
+                                             int64_t Offset) const {
   assert(Offset <= INT_MAX && "Offset too big to fit in int.");
   assert(MI && "Unable to get the legal offset for nil instruction.");
   int SaveOffset = Offset;
-  return isARM64FrameOffsetLegal(*MI, SaveOffset) & ARM64FrameOffsetIsLegal;
+  return isAArch64FrameOffsetLegal(*MI, SaveOffset) & AArch64FrameOffsetIsLegal;
 }
 
 /// Insert defining instruction(s) for BaseReg to be a pointer to FrameIdx
 /// at the beginning of the basic block.
-void ARM64RegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB,
-                                                     unsigned BaseReg,
-                                                     int FrameIdx,
-                                                     int64_t Offset) const {
+void AArch64RegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB,
+                                                       unsigned BaseReg,
+                                                       int FrameIdx,
+                                                       int64_t Offset) const {
   MachineBasicBlock::iterator Ins = MBB->begin();
   DebugLoc DL; // Defaults to "unknown"
   if (Ins != MBB->end())
     DL = Ins->getDebugLoc();
 
-  const MCInstrDesc &MCID = TII->get(ARM64::ADDXri);
+  const MCInstrDesc &MCID = TII->get(AArch64::ADDXri);
   MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
   const MachineFunction &MF = *MBB->getParent();
   MRI.constrainRegClass(BaseReg, TII->getRegClass(MCID, 0, this, MF));
-  unsigned Shifter = ARM64_AM::getShifterImm(ARM64_AM::LSL, 0);
+  unsigned Shifter = AArch64_AM::getShifterImm(AArch64_AM::LSL, 0);
 
   BuildMI(*MBB, Ins, DL, MCID, BaseReg)
       .addFrameIndex(FrameIdx)
@@ -299,8 +303,8 @@ void ARM64RegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB,
       .addImm(Shifter);
 }
 
-void ARM64RegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg,
-                                          int64_t Offset) const {
+void AArch64RegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg,
+                                            int64_t Offset) const {
   int Off = Offset; // ARM doesn't need the general 64-bit offsets
   unsigned i = 0;
 
@@ -308,20 +312,20 @@ void ARM64RegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg,
     ++i;
     assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
   }
-  bool Done = rewriteARM64FrameIndex(MI, i, BaseReg, Off, TII);
+  bool Done = rewriteAArch64FrameIndex(MI, i, BaseReg, Off, TII);
   assert(Done && "Unable to resolve frame index!");
   (void)Done;
 }
 
-void ARM64RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
-                                            int SPAdj, unsigned FIOperandNum,
-                                            RegScavenger *RS) const {
+void AArch64RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
+                                              int SPAdj, unsigned FIOperandNum,
+                                              RegScavenger *RS) const {
   assert(SPAdj == 0 && "Unexpected");
 
   MachineInstr &MI = *II;
   MachineBasicBlock &MBB = *MI.getParent();
   MachineFunction &MF = *MBB.getParent();
-  const ARM64FrameLowering *TFI = static_cast<const ARM64FrameLowering *>(
+  const AArch64FrameLowering *TFI = static_cast<const AArch64FrameLowering *>(
       MF.getTarget().getFrameLowering());
 
   int FrameIndex = MI.getOperand(FIOperandNum).getIndex();
@@ -341,7 +345,7 @@ void ARM64RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
 
   // Modify MI as necessary to handle as much of 'Offset' as possible
   Offset = TFI->resolveFrameIndexReference(MF, FrameIndex, FrameReg);
-  if (rewriteARM64FrameIndex(MI, FIOperandNum, FrameReg, Offset, TII))
+  if (rewriteAArch64FrameIndex(MI, FIOperandNum, FrameReg, Offset, TII))
     return;
 
   assert((!RS || !RS->isScavengingFrameIndex(FrameIndex)) &&
@@ -351,48 +355,48 @@ void ARM64RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
   // as much as possible above.  Handle the rest, providing a register that is
   // SP+LargeImm.
   unsigned ScratchReg =
-      MF.getRegInfo().createVirtualRegister(&ARM64::GPR64RegClass);
+      MF.getRegInfo().createVirtualRegister(&AArch64::GPR64RegClass);
   emitFrameOffset(MBB, II, MI.getDebugLoc(), ScratchReg, FrameReg, Offset, TII);
   MI.getOperand(FIOperandNum).ChangeToRegister(ScratchReg, false, false, true);
 }
 
 namespace llvm {
 
-unsigned ARM64RegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
-                                                MachineFunction &MF) const {
+unsigned AArch64RegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
+                                                  MachineFunction &MF) const {
   const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
 
   switch (RC->getID()) {
   default:
     return 0;
-  case ARM64::GPR32RegClassID:
-  case ARM64::GPR32spRegClassID:
-  case ARM64::GPR32allRegClassID:
-  case ARM64::GPR64spRegClassID:
-  case ARM64::GPR64allRegClassID:
-  case ARM64::GPR64RegClassID:
-  case ARM64::GPR32commonRegClassID:
-  case ARM64::GPR64commonRegClassID:
+  case AArch64::GPR32RegClassID:
+  case AArch64::GPR32spRegClassID:
+  case AArch64::GPR32allRegClassID:
+  case AArch64::GPR64spRegClassID:
+  case AArch64::GPR64allRegClassID:
+  case AArch64::GPR64RegClassID:
+  case AArch64::GPR32commonRegClassID:
+  case AArch64::GPR64commonRegClassID:
     return 32 - 1                                      // XZR/SP
            - (TFI->hasFP(MF) || STI->isTargetDarwin()) // FP
            - STI->isTargetDarwin() // X18 reserved as platform register
            - hasBasePointer(MF);   // X19
-  case ARM64::FPR8RegClassID:
-  case ARM64::FPR16RegClassID:
-  case ARM64::FPR32RegClassID:
-  case ARM64::FPR64RegClassID:
-  case ARM64::FPR128RegClassID:
+  case AArch64::FPR8RegClassID:
+  case AArch64::FPR16RegClassID:
+  case AArch64::FPR32RegClassID:
+  case AArch64::FPR64RegClassID:
+  case AArch64::FPR128RegClassID:
     return 32;
 
-  case ARM64::DDRegClassID:
-  case ARM64::DDDRegClassID:
-  case ARM64::DDDDRegClassID:
-  case ARM64::QQRegClassID:
-  case ARM64::QQQRegClassID:
-  case ARM64::QQQQRegClassID:
+  case AArch64::DDRegClassID:
+  case AArch64::DDDRegClassID:
+  case AArch64::DDDDRegClassID:
+  case AArch64::QQRegClassID:
+  case AArch64::QQQRegClassID:
+  case AArch64::QQQQRegClassID:
     return 32;
 
-  case ARM64::FPR128_loRegClassID:
+  case AArch64::FPR128_loRegClassID:
     return 16;
   }
 }
diff --git a/lib/Target/ARM64/ARM64RegisterInfo.h b/lib/Target/AArch64/AArch64RegisterInfo.h
similarity index 85%
rename from lib/Target/ARM64/ARM64RegisterInfo.h
rename to lib/Target/AArch64/AArch64RegisterInfo.h
index 7691fadbcc8..76af1edce72 100644
--- a/lib/Target/ARM64/ARM64RegisterInfo.h
+++ b/lib/Target/AArch64/AArch64RegisterInfo.h
@@ -1,4 +1,4 @@
-//===- ARM64RegisterInfo.h - ARM64 Register Information Impl ----*- C++ -*-===//
+//==- AArch64RegisterInfo.h - AArch64 Register Information Impl --*- C++ -*-==//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -7,31 +7,31 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file contains the ARM64 implementation of the MRegisterInfo class.
+// This file contains the AArch64 implementation of the MRegisterInfo class.
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_TARGET_ARM64REGISTERINFO_H
-#define LLVM_TARGET_ARM64REGISTERINFO_H
+#ifndef LLVM_TARGET_AArch64REGISTERINFO_H
+#define LLVM_TARGET_AArch64REGISTERINFO_H
 
 #define GET_REGINFO_HEADER
-#include "ARM64GenRegisterInfo.inc"
+#include "AArch64GenRegisterInfo.inc"
 
 namespace llvm {
 
-class ARM64InstrInfo;
-class ARM64Subtarget;
+class AArch64InstrInfo;
+class AArch64Subtarget;
 class MachineFunction;
 class RegScavenger;
 class TargetRegisterClass;
 
-struct ARM64RegisterInfo : public ARM64GenRegisterInfo {
+struct AArch64RegisterInfo : public AArch64GenRegisterInfo {
 private:
-  const ARM64InstrInfo *TII;
-  const ARM64Subtarget *STI;
+  const AArch64InstrInfo *TII;
+  const AArch64Subtarget *STI;
 
 public:
-  ARM64RegisterInfo(const ARM64InstrInfo *tii, const ARM64Subtarget *sti);
+  AArch64RegisterInfo(const AArch64InstrInfo *tii, const AArch64Subtarget *sti);
 
   bool isReservedReg(const MachineFunction &MF, unsigned Reg) const;
 
@@ -98,4 +98,4 @@ public:
 
 } // end namespace llvm
 
-#endif // LLVM_TARGET_ARM64REGISTERINFO_H
+#endif // LLVM_TARGET_AArch64REGISTERINFO_H
diff --git a/lib/Target/AArch64/AArch64RegisterInfo.td b/lib/Target/AArch64/AArch64RegisterInfo.td
new file mode 100644
index 00000000000..21c927f2385
--- /dev/null
+++ b/lib/Target/AArch64/AArch64RegisterInfo.td
@@ -0,0 +1,593 @@
+//=- AArch64RegisterInfo.td - Describe the AArch64 Regisers --*- tablegen -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+
+class AArch64Reg<bits<16> enc, string n, list<Register> subregs = [],
+               list<string> altNames = []>
+        : Register<n, altNames> {
+  let HWEncoding = enc;
+  let Namespace = "AArch64";
+  let SubRegs = subregs;
+}
+
+let Namespace = "AArch64" in {
+  def sub_32 : SubRegIndex<32>;
+
+  def bsub : SubRegIndex<8>;
+  def hsub : SubRegIndex<16>;
+  def ssub : SubRegIndex<32>;
+  def dsub : SubRegIndex<32>;
+  def qhisub : SubRegIndex<64>;
+  def qsub : SubRegIndex<64>;
+  // Note: Code depends on these having consecutive numbers
+  def dsub0 : SubRegIndex<64>;
+  def dsub1 : SubRegIndex<64>;
+  def dsub2 : SubRegIndex<64>;
+  def dsub3 : SubRegIndex<64>;
+  // Note: Code depends on these having consecutive numbers
+  def qsub0 : SubRegIndex<128>;
+  def qsub1 : SubRegIndex<128>;
+  def qsub2 : SubRegIndex<128>;
+  def qsub3 : SubRegIndex<128>;
+}
+
+let Namespace = "AArch64" in {
+  def vreg : RegAltNameIndex;
+  def vlist1 : RegAltNameIndex;
+}
+
+//===----------------------------------------------------------------------===//
+// Registers
+//===----------------------------------------------------------------------===//
+def W0    : AArch64Reg<0,   "w0" >, DwarfRegNum<[0]>;
+def W1    : AArch64Reg<1,   "w1" >, DwarfRegNum<[1]>;
+def W2    : AArch64Reg<2,   "w2" >, DwarfRegNum<[2]>;
+def W3    : AArch64Reg<3,   "w3" >, DwarfRegNum<[3]>;
+def W4    : AArch64Reg<4,   "w4" >, DwarfRegNum<[4]>;
+def W5    : AArch64Reg<5,   "w5" >, DwarfRegNum<[5]>;
+def W6    : AArch64Reg<6,   "w6" >, DwarfRegNum<[6]>;
+def W7    : AArch64Reg<7,   "w7" >, DwarfRegNum<[7]>;
+def W8    : AArch64Reg<8,   "w8" >, DwarfRegNum<[8]>;
+def W9    : AArch64Reg<9,   "w9" >, DwarfRegNum<[9]>;
+def W10   : AArch64Reg<10, "w10">, DwarfRegNum<[10]>;
+def W11   : AArch64Reg<11, "w11">, DwarfRegNum<[11]>;
+def W12   : AArch64Reg<12, "w12">, DwarfRegNum<[12]>;
+def W13   : AArch64Reg<13, "w13">, DwarfRegNum<[13]>;
+def W14   : AArch64Reg<14, "w14">, DwarfRegNum<[14]>;
+def W15   : AArch64Reg<15, "w15">, DwarfRegNum<[15]>;
+def W16   : AArch64Reg<16, "w16">, DwarfRegNum<[16]>;
+def W17   : AArch64Reg<17, "w17">, DwarfRegNum<[17]>;
+def W18   : AArch64Reg<18, "w18">, DwarfRegNum<[18]>;
+def W19   : AArch64Reg<19, "w19">, DwarfRegNum<[19]>;
+def W20   : AArch64Reg<20, "w20">, DwarfRegNum<[20]>;
+def W21   : AArch64Reg<21, "w21">, DwarfRegNum<[21]>;
+def W22   : AArch64Reg<22, "w22">, DwarfRegNum<[22]>;
+def W23   : AArch64Reg<23, "w23">, DwarfRegNum<[23]>;
+def W24   : AArch64Reg<24, "w24">, DwarfRegNum<[24]>;
+def W25   : AArch64Reg<25, "w25">, DwarfRegNum<[25]>;
+def W26   : AArch64Reg<26, "w26">, DwarfRegNum<[26]>;
+def W27   : AArch64Reg<27, "w27">, DwarfRegNum<[27]>;
+def W28   : AArch64Reg<28, "w28">, DwarfRegNum<[28]>;
+def W29   : AArch64Reg<29, "w29">, DwarfRegNum<[29]>;
+def W30   : AArch64Reg<30, "w30">, DwarfRegNum<[30]>;
+def WSP   : AArch64Reg<31, "wsp">, DwarfRegNum<[31]>;
+def WZR   : AArch64Reg<31, "wzr">, DwarfRegAlias<WSP>;
+
+let SubRegIndices = [sub_32] in {
+def X0    : AArch64Reg<0,   "x0",  [W0]>, DwarfRegAlias<W0>;
+def X1    : AArch64Reg<1,   "x1",  [W1]>, DwarfRegAlias<W1>;
+def X2    : AArch64Reg<2,   "x2",  [W2]>, DwarfRegAlias<W2>;
+def X3    : AArch64Reg<3,   "x3",  [W3]>, DwarfRegAlias<W3>;
+def X4    : AArch64Reg<4,   "x4",  [W4]>, DwarfRegAlias<W4>;
+def X5    : AArch64Reg<5,   "x5",  [W5]>, DwarfRegAlias<W5>;
+def X6    : AArch64Reg<6,   "x6",  [W6]>, DwarfRegAlias<W6>;
+def X7    : AArch64Reg<7,   "x7",  [W7]>, DwarfRegAlias<W7>;
+def X8    : AArch64Reg<8,   "x8",  [W8]>, DwarfRegAlias<W8>;
+def X9    : AArch64Reg<9,   "x9",  [W9]>, DwarfRegAlias<W9>;
+def X10   : AArch64Reg<10, "x10", [W10]>, DwarfRegAlias<W10>;
+def X11   : AArch64Reg<11, "x11", [W11]>, DwarfRegAlias<W11>;
+def X12   : AArch64Reg<12, "x12", [W12]>, DwarfRegAlias<W12>;
+def X13   : AArch64Reg<13, "x13", [W13]>, DwarfRegAlias<W13>;
+def X14   : AArch64Reg<14, "x14", [W14]>, DwarfRegAlias<W14>;
+def X15   : AArch64Reg<15, "x15", [W15]>, DwarfRegAlias<W15>;
+def X16   : AArch64Reg<16, "x16", [W16]>, DwarfRegAlias<W16>;
+def X17   : AArch64Reg<17, "x17", [W17]>, DwarfRegAlias<W17>;
+def X18   : AArch64Reg<18, "x18", [W18]>, DwarfRegAlias<W18>;
+def X19   : AArch64Reg<19, "x19", [W19]>, DwarfRegAlias<W19>;
+def X20   : AArch64Reg<20, "x20", [W20]>, DwarfRegAlias<W20>;
+def X21   : AArch64Reg<21, "x21", [W21]>, DwarfRegAlias<W21>;
+def X22   : AArch64Reg<22, "x22", [W22]>, DwarfRegAlias<W22>;
+def X23   : AArch64Reg<23, "x23", [W23]>, DwarfRegAlias<W23>;
+def X24   : AArch64Reg<24, "x24", [W24]>, DwarfRegAlias<W24>;
+def X25   : AArch64Reg<25, "x25", [W25]>, DwarfRegAlias<W25>;
+def X26   : AArch64Reg<26, "x26", [W26]>, DwarfRegAlias<W26>;
+def X27   : AArch64Reg<27, "x27", [W27]>, DwarfRegAlias<W27>;
+def X28   : AArch64Reg<28, "x28", [W28]>, DwarfRegAlias<W28>;
+def FP    : AArch64Reg<29, "x29", [W29]>, DwarfRegAlias<W29>;
+def LR    : AArch64Reg<30, "x30", [W30]>, DwarfRegAlias<W30>;
+def SP    : AArch64Reg<31, "sp",  [WSP]>, DwarfRegAlias<WSP>;
+def XZR   : AArch64Reg<31, "xzr", [WZR]>, DwarfRegAlias<WSP>;
+}
+
+// Condition code register.
+def NZCV  : AArch64Reg<0, "nzcv">;
+
+// GPR register classes with the intersections of GPR32/GPR32sp and
+// GPR64/GPR64sp for use by the coalescer.
+def GPR32common : RegisterClass<"AArch64", [i32], 32, (sequence "W%u", 0, 30)> {
+  let AltOrders = [(rotl GPR32common, 8)];
+  let AltOrderSelect = [{ return 1; }];
+}
+def GPR64common : RegisterClass<"AArch64", [i64], 64,
+                                (add (sequence "X%u", 0, 28), FP, LR)> {
+  let AltOrders = [(rotl GPR64common, 8)];
+  let AltOrderSelect = [{ return 1; }];
+}
+// GPR register classes which exclude SP/WSP.
+def GPR32 : RegisterClass<"AArch64", [i32], 32, (add GPR32common, WZR)> {
+  let AltOrders = [(rotl GPR32, 8)];
+  let AltOrderSelect = [{ return 1; }];
+}
+def GPR64 : RegisterClass<"AArch64", [i64], 64, (add GPR64common, XZR)> {
+  let AltOrders = [(rotl GPR64, 8)];
+  let AltOrderSelect = [{ return 1; }];
+}
+
+// GPR register classes which include SP/WSP.
+def GPR32sp : RegisterClass<"AArch64", [i32], 32, (add GPR32common, WSP)> {
+  let AltOrders = [(rotl GPR32sp, 8)];
+  let AltOrderSelect = [{ return 1; }];
+}
+def GPR64sp : RegisterClass<"AArch64", [i64], 64, (add GPR64common, SP)> {
+  let AltOrders = [(rotl GPR64sp, 8)];
+  let AltOrderSelect = [{ return 1; }];
+}
+
+def GPR32sponly : RegisterClass<"AArch64", [i32], 32, (add WSP)>;
+def GPR64sponly : RegisterClass<"AArch64", [i64], 64, (add SP)>;
+
+def GPR64spPlus0Operand : AsmOperandClass {
+  let Name = "GPR64sp0";
+  let RenderMethod = "addRegOperands";
+  let ParserMethod = "tryParseGPR64sp0Operand";
+}
+
+def GPR64sp0 : RegisterOperand<GPR64sp> {
+  let ParserMatchClass = GPR64spPlus0Operand;
+}
+
+// GPR register classes which include WZR/XZR AND SP/WSP. This is not a
+// constraint used by any instructions, it is used as a common super-class.
+def GPR32all : RegisterClass<"AArch64", [i32], 32, (add GPR32common, WZR, WSP)>;
+def GPR64all : RegisterClass<"AArch64", [i64], 64, (add GPR64common, XZR, SP)>;
+
+// For tail calls, we can't use callee-saved registers, as they are restored
+// to the saved value before the tail call, which would clobber a call address.
+// This is for indirect tail calls to store the address of the destination.
+def tcGPR64 : RegisterClass<"AArch64", [i64], 64, (sub GPR64common, X19, X20, X21,
+                                                     X22, X23, X24, X25, X26,
+                                                     X27, X28)>;
+
+// GPR register classes for post increment amount of vector load/store that
+// has alternate printing when Rm=31 and prints a constant immediate value
+// equal to the total number of bytes transferred.
+
+// FIXME: TableGen *should* be able to do these itself now. There appears to be
+// a bug in counting how many operands a Post-indexed MCInst should have which
+// means the aliases don't trigger.
+def GPR64pi1  : RegisterOperand<GPR64, "printPostIncOperand<1>">;
+def GPR64pi2  : RegisterOperand<GPR64, "printPostIncOperand<2>">;
+def GPR64pi3  : RegisterOperand<GPR64, "printPostIncOperand<3>">;
+def GPR64pi4  : RegisterOperand<GPR64, "printPostIncOperand<4>">;
+def GPR64pi6  : RegisterOperand<GPR64, "printPostIncOperand<6>">;
+def GPR64pi8  : RegisterOperand<GPR64, "printPostIncOperand<8>">;
+def GPR64pi12 : RegisterOperand<GPR64, "printPostIncOperand<12>">;
+def GPR64pi16 : RegisterOperand<GPR64, "printPostIncOperand<16>">;
+def GPR64pi24 : RegisterOperand<GPR64, "printPostIncOperand<24>">;
+def GPR64pi32 : RegisterOperand<GPR64, "printPostIncOperand<32>">;
+def GPR64pi48 : RegisterOperand<GPR64, "printPostIncOperand<48>">;
+def GPR64pi64 : RegisterOperand<GPR64, "printPostIncOperand<64>">;
+
+// Condition code regclass.
+def CCR : RegisterClass<"AArch64", [i32], 32, (add NZCV)> {
+  let CopyCost = -1;  // Don't allow copying of status registers.
+
+  // CCR is not allocatable.
+  let isAllocatable = 0;
+}
+
+//===----------------------------------------------------------------------===//
+// Floating Point Scalar Registers
+//===----------------------------------------------------------------------===//
+
+def B0    : AArch64Reg<0,   "b0">, DwarfRegNum<[64]>;
+def B1    : AArch64Reg<1,   "b1">, DwarfRegNum<[65]>;
+def B2    : AArch64Reg<2,   "b2">, DwarfRegNum<[66]>;
+def B3    : AArch64Reg<3,   "b3">, DwarfRegNum<[67]>;
+def B4    : AArch64Reg<4,   "b4">, DwarfRegNum<[68]>;
+def B5    : AArch64Reg<5,   "b5">, DwarfRegNum<[69]>;
+def B6    : AArch64Reg<6,   "b6">, DwarfRegNum<[70]>;
+def B7    : AArch64Reg<7,   "b7">, DwarfRegNum<[71]>;
+def B8    : AArch64Reg<8,   "b8">, DwarfRegNum<[72]>;
+def B9    : AArch64Reg<9,   "b9">, DwarfRegNum<[73]>;
+def B10   : AArch64Reg<10, "b10">, DwarfRegNum<[74]>;
+def B11   : AArch64Reg<11, "b11">, DwarfRegNum<[75]>;
+def B12   : AArch64Reg<12, "b12">, DwarfRegNum<[76]>;
+def B13   : AArch64Reg<13, "b13">, DwarfRegNum<[77]>;
+def B14   : AArch64Reg<14, "b14">, DwarfRegNum<[78]>;
+def B15   : AArch64Reg<15, "b15">, DwarfRegNum<[79]>;
+def B16   : AArch64Reg<16, "b16">, DwarfRegNum<[80]>;
+def B17   : AArch64Reg<17, "b17">, DwarfRegNum<[81]>;
+def B18   : AArch64Reg<18, "b18">, DwarfRegNum<[82]>;
+def B19   : AArch64Reg<19, "b19">, DwarfRegNum<[83]>;
+def B20   : AArch64Reg<20, "b20">, DwarfRegNum<[84]>;
+def B21   : AArch64Reg<21, "b21">, DwarfRegNum<[85]>;
+def B22   : AArch64Reg<22, "b22">, DwarfRegNum<[86]>;
+def B23   : AArch64Reg<23, "b23">, DwarfRegNum<[87]>;
+def B24   : AArch64Reg<24, "b24">, DwarfRegNum<[88]>;
+def B25   : AArch64Reg<25, "b25">, DwarfRegNum<[89]>;
+def B26   : AArch64Reg<26, "b26">, DwarfRegNum<[90]>;
+def B27   : AArch64Reg<27, "b27">, DwarfRegNum<[91]>;
+def B28   : AArch64Reg<28, "b28">, DwarfRegNum<[92]>;
+def B29   : AArch64Reg<29, "b29">, DwarfRegNum<[93]>;
+def B30   : AArch64Reg<30, "b30">, DwarfRegNum<[94]>;
+def B31   : AArch64Reg<31, "b31">, DwarfRegNum<[95]>;
+
+let SubRegIndices = [bsub] in {
+def H0    : AArch64Reg<0,   "h0", [B0]>, DwarfRegAlias<B0>;
+def H1    : AArch64Reg<1,   "h1", [B1]>, DwarfRegAlias<B1>;
+def H2    : AArch64Reg<2,   "h2", [B2]>, DwarfRegAlias<B2>;
+def H3    : AArch64Reg<3,   "h3", [B3]>, DwarfRegAlias<B3>;
+def H4    : AArch64Reg<4,   "h4", [B4]>, DwarfRegAlias<B4>;
+def H5    : AArch64Reg<5,   "h5", [B5]>, DwarfRegAlias<B5>;
+def H6    : AArch64Reg<6,   "h6", [B6]>, DwarfRegAlias<B6>;
+def H7    : AArch64Reg<7,   "h7", [B7]>, DwarfRegAlias<B7>;
+def H8    : AArch64Reg<8,   "h8", [B8]>, DwarfRegAlias<B8>;
+def H9    : AArch64Reg<9,   "h9", [B9]>, DwarfRegAlias<B9>;
+def H10   : AArch64Reg<10, "h10", [B10]>, DwarfRegAlias<B10>;
+def H11   : AArch64Reg<11, "h11", [B11]>, DwarfRegAlias<B11>;
+def H12   : AArch64Reg<12, "h12", [B12]>, DwarfRegAlias<B12>;
+def H13   : AArch64Reg<13, "h13", [B13]>, DwarfRegAlias<B13>;
+def H14   : AArch64Reg<14, "h14", [B14]>, DwarfRegAlias<B14>;
+def H15   : AArch64Reg<15, "h15", [B15]>, DwarfRegAlias<B15>;
+def H16   : AArch64Reg<16, "h16", [B16]>, DwarfRegAlias<B16>;
+def H17   : AArch64Reg<17, "h17", [B17]>, DwarfRegAlias<B17>;
+def H18   : AArch64Reg<18, "h18", [B18]>, DwarfRegAlias<B18>;
+def H19   : AArch64Reg<19, "h19", [B19]>, DwarfRegAlias<B19>;
+def H20   : AArch64Reg<20, "h20", [B20]>, DwarfRegAlias<B20>;
+def H21   : AArch64Reg<21, "h21", [B21]>, DwarfRegAlias<B21>;
+def H22   : AArch64Reg<22, "h22", [B22]>, DwarfRegAlias<B22>;
+def H23   : AArch64Reg<23, "h23", [B23]>, DwarfRegAlias<B23>;
+def H24   : AArch64Reg<24, "h24", [B24]>, DwarfRegAlias<B24>;
+def H25   : AArch64Reg<25, "h25", [B25]>, DwarfRegAlias<B25>;
+def H26   : AArch64Reg<26, "h26", [B26]>, DwarfRegAlias<B26>;
+def H27   : AArch64Reg<27, "h27", [B27]>, DwarfRegAlias<B27>;
+def H28   : AArch64Reg<28, "h28", [B28]>, DwarfRegAlias<B28>;
+def H29   : AArch64Reg<29, "h29", [B29]>, DwarfRegAlias<B29>;
+def H30   : AArch64Reg<30, "h30", [B30]>, DwarfRegAlias<B30>;
+def H31   : AArch64Reg<31, "h31", [B31]>, DwarfRegAlias<B31>;
+}
+
+let SubRegIndices = [hsub] in {
+def S0    : AArch64Reg<0,   "s0", [H0]>, DwarfRegAlias<B0>;
+def S1    : AArch64Reg<1,   "s1", [H1]>, DwarfRegAlias<B1>;
+def S2    : AArch64Reg<2,   "s2", [H2]>, DwarfRegAlias<B2>;
+def S3    : AArch64Reg<3,   "s3", [H3]>, DwarfRegAlias<B3>;
+def S4    : AArch64Reg<4,   "s4", [H4]>, DwarfRegAlias<B4>;
+def S5    : AArch64Reg<5,   "s5", [H5]>, DwarfRegAlias<B5>;
+def S6    : AArch64Reg<6,   "s6", [H6]>, DwarfRegAlias<B6>;
+def S7    : AArch64Reg<7,   "s7", [H7]>, DwarfRegAlias<B7>;
+def S8    : AArch64Reg<8,   "s8", [H8]>, DwarfRegAlias<B8>;
+def S9    : AArch64Reg<9,   "s9", [H9]>, DwarfRegAlias<B9>;
+def S10   : AArch64Reg<10, "s10", [H10]>, DwarfRegAlias<B10>;
+def S11   : AArch64Reg<11, "s11", [H11]>, DwarfRegAlias<B11>;
+def S12   : AArch64Reg<12, "s12", [H12]>, DwarfRegAlias<B12>;
+def S13   : AArch64Reg<13, "s13", [H13]>, DwarfRegAlias<B13>;
+def S14   : AArch64Reg<14, "s14", [H14]>, DwarfRegAlias<B14>;
+def S15   : AArch64Reg<15, "s15", [H15]>, DwarfRegAlias<B15>;
+def S16   : AArch64Reg<16, "s16", [H16]>, DwarfRegAlias<B16>;
+def S17   : AArch64Reg<17, "s17", [H17]>, DwarfRegAlias<B17>;
+def S18   : AArch64Reg<18, "s18", [H18]>, DwarfRegAlias<B18>;
+def S19   : AArch64Reg<19, "s19", [H19]>, DwarfRegAlias<B19>;
+def S20   : AArch64Reg<20, "s20", [H20]>, DwarfRegAlias<B20>;
+def S21   : AArch64Reg<21, "s21", [H21]>, DwarfRegAlias<B21>;
+def S22   : AArch64Reg<22, "s22", [H22]>, DwarfRegAlias<B22>;
+def S23   : AArch64Reg<23, "s23", [H23]>, DwarfRegAlias<B23>;
+def S24   : AArch64Reg<24, "s24", [H24]>, DwarfRegAlias<B24>;
+def S25   : AArch64Reg<25, "s25", [H25]>, DwarfRegAlias<B25>;
+def S26   : AArch64Reg<26, "s26", [H26]>, DwarfRegAlias<B26>;
+def S27   : AArch64Reg<27, "s27", [H27]>, DwarfRegAlias<B27>;
+def S28   : AArch64Reg<28, "s28", [H28]>, DwarfRegAlias<B28>;
+def S29   : AArch64Reg<29, "s29", [H29]>, DwarfRegAlias<B29>;
+def S30   : AArch64Reg<30, "s30", [H30]>, DwarfRegAlias<B30>;
+def S31   : AArch64Reg<31, "s31", [H31]>, DwarfRegAlias<B31>;
+}
+
+let SubRegIndices = [ssub], RegAltNameIndices = [vreg, vlist1] in {
+def D0    : AArch64Reg<0,   "d0", [S0], ["v0", ""]>, DwarfRegAlias<B0>;
+def D1    : AArch64Reg<1,   "d1", [S1], ["v1", ""]>, DwarfRegAlias<B1>;
+def D2    : AArch64Reg<2,   "d2", [S2], ["v2", ""]>, DwarfRegAlias<B2>;
+def D3    : AArch64Reg<3,   "d3", [S3], ["v3", ""]>, DwarfRegAlias<B3>;
+def D4    : AArch64Reg<4,   "d4", [S4], ["v4", ""]>, DwarfRegAlias<B4>;
+def D5    : AArch64Reg<5,   "d5", [S5], ["v5", ""]>, DwarfRegAlias<B5>;
+def D6    : AArch64Reg<6,   "d6", [S6], ["v6", ""]>, DwarfRegAlias<B6>;
+def D7    : AArch64Reg<7,   "d7", [S7], ["v7", ""]>, DwarfRegAlias<B7>;
+def D8    : AArch64Reg<8,   "d8", [S8], ["v8", ""]>, DwarfRegAlias<B8>;
+def D9    : AArch64Reg<9,   "d9", [S9], ["v9", ""]>, DwarfRegAlias<B9>;
+def D10   : AArch64Reg<10, "d10", [S10], ["v10", ""]>, DwarfRegAlias<B10>;
+def D11   : AArch64Reg<11, "d11", [S11], ["v11", ""]>, DwarfRegAlias<B11>;
+def D12   : AArch64Reg<12, "d12", [S12], ["v12", ""]>, DwarfRegAlias<B12>;
+def D13   : AArch64Reg<13, "d13", [S13], ["v13", ""]>, DwarfRegAlias<B13>;
+def D14   : AArch64Reg<14, "d14", [S14], ["v14", ""]>, DwarfRegAlias<B14>;
+def D15   : AArch64Reg<15, "d15", [S15], ["v15", ""]>, DwarfRegAlias<B15>;
+def D16   : AArch64Reg<16, "d16", [S16], ["v16", ""]>, DwarfRegAlias<B16>;
+def D17   : AArch64Reg<17, "d17", [S17], ["v17", ""]>, DwarfRegAlias<B17>;
+def D18   : AArch64Reg<18, "d18", [S18], ["v18", ""]>, DwarfRegAlias<B18>;
+def D19   : AArch64Reg<19, "d19", [S19], ["v19", ""]>, DwarfRegAlias<B19>;
+def D20   : AArch64Reg<20, "d20", [S20], ["v20", ""]>, DwarfRegAlias<B20>;
+def D21   : AArch64Reg<21, "d21", [S21], ["v21", ""]>, DwarfRegAlias<B21>;
+def D22   : AArch64Reg<22, "d22", [S22], ["v22", ""]>, DwarfRegAlias<B22>;
+def D23   : AArch64Reg<23, "d23", [S23], ["v23", ""]>, DwarfRegAlias<B23>;
+def D24   : AArch64Reg<24, "d24", [S24], ["v24", ""]>, DwarfRegAlias<B24>;
+def D25   : AArch64Reg<25, "d25", [S25], ["v25", ""]>, DwarfRegAlias<B25>;
+def D26   : AArch64Reg<26, "d26", [S26], ["v26", ""]>, DwarfRegAlias<B26>;
+def D27   : AArch64Reg<27, "d27", [S27], ["v27", ""]>, DwarfRegAlias<B27>;
+def D28   : AArch64Reg<28, "d28", [S28], ["v28", ""]>, DwarfRegAlias<B28>;
+def D29   : AArch64Reg<29, "d29", [S29], ["v29", ""]>, DwarfRegAlias<B29>;
+def D30   : AArch64Reg<30, "d30", [S30], ["v30", ""]>, DwarfRegAlias<B30>;
+def D31   : AArch64Reg<31, "d31", [S31], ["v31", ""]>, DwarfRegAlias<B31>;
+}
+
+let SubRegIndices = [dsub], RegAltNameIndices = [vreg, vlist1] in {
+def Q0    : AArch64Reg<0,   "q0", [D0], ["v0", ""]>, DwarfRegAlias<B0>;
+def Q1    : AArch64Reg<1,   "q1", [D1], ["v1", ""]>, DwarfRegAlias<B1>;
+def Q2    : AArch64Reg<2,   "q2", [D2], ["v2", ""]>, DwarfRegAlias<B2>;
+def Q3    : AArch64Reg<3,   "q3", [D3], ["v3", ""]>, DwarfRegAlias<B3>;
+def Q4    : AArch64Reg<4,   "q4", [D4], ["v4", ""]>, DwarfRegAlias<B4>;
+def Q5    : AArch64Reg<5,   "q5", [D5], ["v5", ""]>, DwarfRegAlias<B5>;
+def Q6    : AArch64Reg<6,   "q6", [D6], ["v6", ""]>, DwarfRegAlias<B6>;
+def Q7    : AArch64Reg<7,   "q7", [D7], ["v7", ""]>, DwarfRegAlias<B7>;
+def Q8    : AArch64Reg<8,   "q8", [D8], ["v8", ""]>, DwarfRegAlias<B8>;
+def Q9    : AArch64Reg<9,   "q9", [D9], ["v9", ""]>, DwarfRegAlias<B9>;
+def Q10   : AArch64Reg<10, "q10", [D10], ["v10", ""]>, DwarfRegAlias<B10>;
+def Q11   : AArch64Reg<11, "q11", [D11], ["v11", ""]>, DwarfRegAlias<B11>;
+def Q12   : AArch64Reg<12, "q12", [D12], ["v12", ""]>, DwarfRegAlias<B12>;
+def Q13   : AArch64Reg<13, "q13", [D13], ["v13", ""]>, DwarfRegAlias<B13>;
+def Q14   : AArch64Reg<14, "q14", [D14], ["v14", ""]>, DwarfRegAlias<B14>;
+def Q15   : AArch64Reg<15, "q15", [D15], ["v15", ""]>, DwarfRegAlias<B15>;
+def Q16   : AArch64Reg<16, "q16", [D16], ["v16", ""]>, DwarfRegAlias<B16>;
+def Q17   : AArch64Reg<17, "q17", [D17], ["v17", ""]>, DwarfRegAlias<B17>;
+def Q18   : AArch64Reg<18, "q18", [D18], ["v18", ""]>, DwarfRegAlias<B18>;
+def Q19   : AArch64Reg<19, "q19", [D19], ["v19", ""]>, DwarfRegAlias<B19>;
+def Q20   : AArch64Reg<20, "q20", [D20], ["v20", ""]>, DwarfRegAlias<B20>;
+def Q21   : AArch64Reg<21, "q21", [D21], ["v21", ""]>, DwarfRegAlias<B21>;
+def Q22   : AArch64Reg<22, "q22", [D22], ["v22", ""]>, DwarfRegAlias<B22>;
+def Q23   : AArch64Reg<23, "q23", [D23], ["v23", ""]>, DwarfRegAlias<B23>;
+def Q24   : AArch64Reg<24, "q24", [D24], ["v24", ""]>, DwarfRegAlias<B24>;
+def Q25   : AArch64Reg<25, "q25", [D25], ["v25", ""]>, DwarfRegAlias<B25>;
+def Q26   : AArch64Reg<26, "q26", [D26], ["v26", ""]>, DwarfRegAlias<B26>;
+def Q27   : AArch64Reg<27, "q27", [D27], ["v27", ""]>, DwarfRegAlias<B27>;
+def Q28   : AArch64Reg<28, "q28", [D28], ["v28", ""]>, DwarfRegAlias<B28>;
+def Q29   : AArch64Reg<29, "q29", [D29], ["v29", ""]>, DwarfRegAlias<B29>;
+def Q30   : AArch64Reg<30, "q30", [D30], ["v30", ""]>, DwarfRegAlias<B30>;
+def Q31   : AArch64Reg<31, "q31", [D31], ["v31", ""]>, DwarfRegAlias<B31>;
+}
+
+def FPR8  : RegisterClass<"AArch64", [untyped], 8, (sequence "B%u", 0, 31)> {
+  let Size = 8;
+}
+def FPR16 : RegisterClass<"AArch64", [f16], 16, (sequence "H%u", 0, 31)> {
+  let Size = 16;
+}
+def FPR32 : RegisterClass<"AArch64", [f32, i32], 32,(sequence "S%u", 0, 31)>;
+def FPR64 : RegisterClass<"AArch64", [f64, i64, v2f32, v1f64, v8i8, v4i16, v2i32,
+                                    v1i64],
+                                    64, (sequence "D%u", 0, 31)>;
+// We don't (yet) have an f128 legal type, so don't use that here. We
+// normalize 128-bit vectors to v2f64 for arg passing and such, so use
+// that here.
+def FPR128 : RegisterClass<"AArch64",
+                           [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64, f128],
+                           128, (sequence "Q%u", 0, 31)>;
+
+// The lower 16 vector registers.  Some instructions can only take registers
+// in this range.
+def FPR128_lo : RegisterClass<"AArch64",
+                              [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+                              128, (trunc FPR128, 16)>;
+
+// Pairs, triples, and quads of 64-bit vector registers.
+def DSeqPairs : RegisterTuples<[dsub0, dsub1], [(rotl FPR64, 0), (rotl FPR64, 1)]>;
+def DSeqTriples : RegisterTuples<[dsub0, dsub1, dsub2],
+                                 [(rotl FPR64, 0), (rotl FPR64, 1),
+                                  (rotl FPR64, 2)]>;
+def DSeqQuads : RegisterTuples<[dsub0, dsub1, dsub2, dsub3],
+                               [(rotl FPR64, 0), (rotl FPR64, 1),
+                                (rotl FPR64, 2), (rotl FPR64, 3)]>;
+def DD   : RegisterClass<"AArch64", [untyped], 64, (add DSeqPairs)> {
+  let Size = 128;
+}
+def DDD  : RegisterClass<"AArch64", [untyped], 64, (add DSeqTriples)> {
+  let Size = 196;
+}
+def DDDD : RegisterClass<"AArch64", [untyped], 64, (add DSeqQuads)> {
+  let Size = 256;
+}
+
+// Pairs, triples, and quads of 128-bit vector registers.
+def QSeqPairs : RegisterTuples<[qsub0, qsub1], [(rotl FPR128, 0), (rotl FPR128, 1)]>;
+def QSeqTriples : RegisterTuples<[qsub0, qsub1, qsub2],
+                                 [(rotl FPR128, 0), (rotl FPR128, 1),
+                                  (rotl FPR128, 2)]>;
+def QSeqQuads : RegisterTuples<[qsub0, qsub1, qsub2, qsub3],
+                               [(rotl FPR128, 0), (rotl FPR128, 1),
+                                (rotl FPR128, 2), (rotl FPR128, 3)]>;
+def QQ   : RegisterClass<"AArch64", [untyped], 128, (add QSeqPairs)> {
+  let Size = 256;
+}
+def QQQ  : RegisterClass<"AArch64", [untyped], 128, (add QSeqTriples)> {
+  let Size = 384;
+}
+def QQQQ : RegisterClass<"AArch64", [untyped], 128, (add QSeqQuads)> {
+  let Size = 512;
+}
+
+
+// Vector operand versions of the FP registers. Alternate name printing and
+// assmebler matching.
+def VectorReg64AsmOperand : AsmOperandClass {
+  let Name = "VectorReg64";
+  let PredicateMethod = "isVectorReg";
+}
+def VectorReg128AsmOperand : AsmOperandClass {
+  let Name = "VectorReg128";
+  let PredicateMethod = "isVectorReg";
+}
+
+def V64  : RegisterOperand<FPR64, "printVRegOperand"> {
+  let ParserMatchClass = VectorReg64AsmOperand;
+}
+
+def V128 : RegisterOperand<FPR128, "printVRegOperand"> {
+  let ParserMatchClass = VectorReg128AsmOperand;
+}
+
+def VectorRegLoAsmOperand : AsmOperandClass { let Name = "VectorRegLo"; }
+def V128_lo : RegisterOperand<FPR128_lo, "printVRegOperand"> {
+  let ParserMatchClass = VectorRegLoAsmOperand;
+}
+
+class TypedVecListAsmOperand<int count, int regsize, int lanes, string kind>
+    : AsmOperandClass {
+  let Name = "TypedVectorList" # count # "_" # lanes # kind;
+
+  let PredicateMethod
+      = "isTypedVectorList<" # count # ", " # lanes # ", '" # kind # "'>";
+  let RenderMethod = "addVectorList" # regsize # "Operands<" # count # ">";
+}
+
+class TypedVecListRegOperand<RegisterClass Reg, int lanes, string kind>
+    : RegisterOperand<Reg, "printTypedVectorList<" # lanes # ", '"
+                                                   # kind # "'>">;
+
+multiclass VectorList<int count, RegisterClass Reg64, RegisterClass Reg128> {
+  // With implicit types (probably on instruction instead). E.g. { v0, v1 }
+  def _64AsmOperand : AsmOperandClass {
+    let Name = NAME # "64";
+    let PredicateMethod = "isImplicitlyTypedVectorList<" # count # ">";
+    let RenderMethod = "addVectorList64Operands<" # count # ">";
+  }
+
+  def "64" : RegisterOperand<Reg64, "printImplicitlyTypedVectorList"> {
+    let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_64AsmOperand");
+  }
+
+  def _128AsmOperand : AsmOperandClass {
+    let Name = NAME # "128";
+    let PredicateMethod = "isImplicitlyTypedVectorList<" # count # ">";
+    let RenderMethod = "addVectorList128Operands<" # count # ">";
+  }
+
+  def "128" : RegisterOperand<Reg128, "printImplicitlyTypedVectorList"> {
+    let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_128AsmOperand");
+  }
+
+  // 64-bit register lists with explicit type.
+
+  // { v0.8b, v1.8b }
+  def _8bAsmOperand : TypedVecListAsmOperand<count, 64, 8, "b">;
+  def "8b" : TypedVecListRegOperand<Reg64, 8, "b"> {
+    let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_8bAsmOperand");
+  }
+
+  // { v0.4h, v1.4h }
+  def _4hAsmOperand : TypedVecListAsmOperand<count, 64, 4, "h">;
+  def "4h" : TypedVecListRegOperand<Reg64, 4, "h"> {
+    let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_4hAsmOperand");
+  }
+
+  // { v0.2s, v1.2s }
+  def _2sAsmOperand : TypedVecListAsmOperand<count, 64, 2, "s">;
+  def "2s" : TypedVecListRegOperand<Reg64, 2, "s"> {
+    let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_2sAsmOperand");
+  }
+
+  // { v0.1d, v1.1d }
+  def _1dAsmOperand : TypedVecListAsmOperand<count, 64, 1, "d">;
+  def "1d" : TypedVecListRegOperand<Reg64, 1, "d"> {
+    let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_1dAsmOperand");
+  }
+
+  // 128-bit register lists with explicit type
+
+  // { v0.16b, v1.16b }
+  def _16bAsmOperand : TypedVecListAsmOperand<count, 128, 16, "b">;
+  def "16b" : TypedVecListRegOperand<Reg128, 16, "b"> {
+    let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_16bAsmOperand");
+  }
+
+  // { v0.8h, v1.8h }
+  def _8hAsmOperand : TypedVecListAsmOperand<count, 128, 8, "h">;
+  def "8h" : TypedVecListRegOperand<Reg128, 8, "h"> {
+    let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_8hAsmOperand");
+  }
+
+  // { v0.4s, v1.4s }
+  def _4sAsmOperand : TypedVecListAsmOperand<count, 128, 4, "s">;
+  def "4s" : TypedVecListRegOperand<Reg128, 4, "s"> {
+    let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_4sAsmOperand");
+  }
+
+  // { v0.2d, v1.2d }
+  def _2dAsmOperand : TypedVecListAsmOperand<count, 128, 2, "d">;
+  def "2d" : TypedVecListRegOperand<Reg128, 2, "d"> {
+    let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_2dAsmOperand");
+  }
+
+  // { v0.b, v1.b }
+  def _bAsmOperand : TypedVecListAsmOperand<count, 128, 0, "b">;
+  def "b" : TypedVecListRegOperand<Reg128, 0, "b"> {
+    let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_bAsmOperand");
+  }
+
+  // { v0.h, v1.h }
+  def _hAsmOperand : TypedVecListAsmOperand<count, 128, 0, "h">;
+  def "h" : TypedVecListRegOperand<Reg128, 0, "h"> {
+    let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_hAsmOperand");
+  }
+
+  // { v0.s, v1.s }
+  def _sAsmOperand : TypedVecListAsmOperand<count, 128, 0, "s">;
+  def "s" : TypedVecListRegOperand<Reg128, 0, "s"> {
+    let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_sAsmOperand");
+  }
+
+  // { v0.d, v1.d }
+  def _dAsmOperand : TypedVecListAsmOperand<count, 128, 0, "d">;
+  def "d" : TypedVecListRegOperand<Reg128, 0, "d"> {
+    let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_dAsmOperand");
+  }
+
+
+}
+
+defm VecListOne   : VectorList<1, FPR64, FPR128>;
+defm VecListTwo   : VectorList<2, DD,    QQ>;
+defm VecListThree : VectorList<3, DDD,   QQQ>;
+defm VecListFour  : VectorList<4, DDDD,  QQQQ>;
+
+
+// Register operand versions of the scalar FP registers.
+def FPR16Op : RegisterOperand<FPR16, "printOperand">;
+def FPR32Op : RegisterOperand<FPR32, "printOperand">;
+def FPR64Op : RegisterOperand<FPR64, "printOperand">;
+def FPR128Op : RegisterOperand<FPR128, "printOperand">;
diff --git a/lib/Target/ARM64/ARM64SchedA53.td b/lib/Target/AArch64/AArch64SchedA53.td
similarity index 99%
rename from lib/Target/ARM64/ARM64SchedA53.td
rename to lib/Target/AArch64/AArch64SchedA53.td
index cf1a8202764..0c3949ecfc1 100644
--- a/lib/Target/ARM64/ARM64SchedA53.td
+++ b/lib/Target/AArch64/AArch64SchedA53.td
@@ -1,4 +1,4 @@
-//=- ARM64SchedA53.td - ARM Cortex-A53 Scheduling Definitions -*- tablegen -*-=//
+//==- AArch64SchedA53.td - Cortex-A53 Scheduling Definitions -*- tablegen -*-=//
 //
 //                     The LLVM Compiler Infrastructure
 //
diff --git a/lib/Target/ARM64/ARM64SchedCyclone.td b/lib/Target/AArch64/AArch64SchedCyclone.td
similarity index 98%
rename from lib/Target/ARM64/ARM64SchedCyclone.td
rename to lib/Target/AArch64/AArch64SchedCyclone.td
index c04a7bb8baf..a2a18023778 100644
--- a/lib/Target/ARM64/ARM64SchedCyclone.td
+++ b/lib/Target/AArch64/AArch64SchedCyclone.td
@@ -1,4 +1,4 @@
-//=- ARMSchedCyclone.td - ARM64 Cyclone Scheduling Defs ------*- tablegen -*-=//
+//=- ARMSchedCyclone.td - AArch64 Cyclone Scheduling Defs ----*- tablegen -*-=//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -7,7 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file defines the machine model for ARM64 Cyclone to support
+// This file defines the machine model for AArch64 Cyclone to support
 // instruction scheduling and other instruction cost heuristics.
 //
 //===----------------------------------------------------------------------===//
@@ -239,13 +239,13 @@ def : WriteRes<WriteST, [CyUnitLS]> {
 def CyWriteLDIdx : SchedWriteVariant<[
   SchedVar<ScaledIdxPred, [WriteIS, WriteLD]>, // Load from scaled register.
   SchedVar<NoSchedPred,   [WriteLD]>]>;        // Load from register offset.
-def : SchedAlias<WriteLDIdx, CyWriteLDIdx>;    // Map ARM64->Cyclone type.
+def : SchedAlias<WriteLDIdx, CyWriteLDIdx>;    // Map AArch64->Cyclone type.
 
 // EXAMPLE: STR Xn, Xm [, lsl 3]
 def CyWriteSTIdx : SchedWriteVariant<[
   SchedVar<ScaledIdxPred, [WriteIS, WriteST]>, // Store to scaled register.
   SchedVar<NoSchedPred,   [WriteST]>]>;        // Store to register offset.
-def : SchedAlias<WriteSTIdx, CyWriteSTIdx>;    // Map ARM64->Cyclone type.
+def : SchedAlias<WriteSTIdx, CyWriteSTIdx>;    // Map AArch64->Cyclone type.
 
 // Read the (unshifted) base register Xn in the second micro-op one cycle later.
 // EXAMPLE: LDR Xn, Xm [, lsl 3]
@@ -253,7 +253,7 @@ def ReadBaseRS : SchedReadAdvance<1>;
 def CyReadAdrBase : SchedReadVariant<[
   SchedVar<ScaledIdxPred, [ReadBaseRS]>, // Read base reg after shifting offset.
   SchedVar<NoSchedPred,   [ReadDefault]>]>;   // Read base reg with no shift.
-def : SchedAlias<ReadAdrBase, CyReadAdrBase>; // Map ARM64->Cyclone type.
+def : SchedAlias<ReadAdrBase, CyReadAdrBase>; // Map AArch64->Cyclone type.
 
 //---
 // 7.8.9,7.8.11. Load/Store, paired
diff --git a/lib/Target/ARM64/ARM64Schedule.td b/lib/Target/AArch64/AArch64Schedule.td
similarity index 95%
rename from lib/Target/ARM64/ARM64Schedule.td
rename to lib/Target/AArch64/AArch64Schedule.td
index 3a4194173a8..eaa9110ab1b 100644
--- a/lib/Target/ARM64/ARM64Schedule.td
+++ b/lib/Target/AArch64/AArch64Schedule.td
@@ -1,4 +1,4 @@
-//===-- ARMSchedule.td - ARM Scheduling Definitions --------*- tablegen -*-===//
+//==-- AArch64Schedule.td - AArch64 Scheduling Definitions -*- tablegen -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -11,12 +11,12 @@
 // const MachineInstr *MI and const TargetSchedModel *SchedModel
 // are defined by default.
 def : PredicateProlog<[{
-  const ARM64InstrInfo *TII =
-    static_cast<const ARM64InstrInfo*>(SchedModel->getInstrInfo());
+  const AArch64InstrInfo *TII =
+    static_cast<const AArch64InstrInfo*>(SchedModel->getInstrInfo());
   (void)TII;
 }]>;
 
-// ARM64 Scheduler Definitions
+// AArch64 Scheduler Definitions
 
 def WriteImm       : SchedWrite; // MOVN, MOVZ
 // TODO: Provide variants for MOV32/64imm Pseudos that dynamically
diff --git a/lib/Target/ARM64/ARM64SelectionDAGInfo.cpp b/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
similarity index 74%
rename from lib/Target/ARM64/ARM64SelectionDAGInfo.cpp
rename to lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
index f8a2527616c..5c65b750ee5 100644
--- a/lib/Target/ARM64/ARM64SelectionDAGInfo.cpp
+++ b/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
@@ -1,4 +1,4 @@
-//===-- ARM64SelectionDAGInfo.cpp - ARM64 SelectionDAG Info ---------------===//
+//===-- AArch64SelectionDAGInfo.cpp - AArch64 SelectionDAG Info -----------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -7,22 +7,22 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file implements the ARM64SelectionDAGInfo class.
+// This file implements the AArch64SelectionDAGInfo class.
 //
 //===----------------------------------------------------------------------===//
 
-#include "ARM64TargetMachine.h"
+#include "AArch64TargetMachine.h"
 using namespace llvm;
 
-#define DEBUG_TYPE "arm64-selectiondag-info"
+#define DEBUG_TYPE "aarch64-selectiondag-info"
 
-ARM64SelectionDAGInfo::ARM64SelectionDAGInfo(const TargetMachine &TM)
+AArch64SelectionDAGInfo::AArch64SelectionDAGInfo(const TargetMachine &TM)
     : TargetSelectionDAGInfo(TM),
-      Subtarget(&TM.getSubtarget<ARM64Subtarget>()) {}
+      Subtarget(&TM.getSubtarget<AArch64Subtarget>()) {}
 
-ARM64SelectionDAGInfo::~ARM64SelectionDAGInfo() {}
+AArch64SelectionDAGInfo::~AArch64SelectionDAGInfo() {}
 
-SDValue ARM64SelectionDAGInfo::EmitTargetCodeForMemset(
+SDValue AArch64SelectionDAGInfo::EmitTargetCodeForMemset(
     SelectionDAG &DAG, SDLoc dl, SDValue Chain, SDValue Dst, SDValue Src,
     SDValue Size, unsigned Align, bool isVolatile,
     MachinePointerInfo DstPtrInfo) const {
@@ -34,8 +34,9 @@ SDValue ARM64SelectionDAGInfo::EmitTargetCodeForMemset(
   // For small size (< 256), it is not beneficial to use bzero
   // instead of memset.
   if (bzeroEntry && (!SizeValue || SizeValue->getZExtValue() > 256)) {
-    const ARM64TargetLowering &TLI = *static_cast<const ARM64TargetLowering *>(
-                                          DAG.getTarget().getTargetLowering());
+    const AArch64TargetLowering &TLI =
+        *static_cast<const AArch64TargetLowering *>(
+            DAG.getTarget().getTargetLowering());
 
     EVT IntPtr = TLI.getPointerTy();
     Type *IntPtrTy = getDataLayout()->getIntPtrType(*DAG.getContext());
diff --git a/lib/Target/ARM64/ARM64SelectionDAGInfo.h b/lib/Target/AArch64/AArch64SelectionDAGInfo.h
similarity index 69%
rename from lib/Target/ARM64/ARM64SelectionDAGInfo.h
rename to lib/Target/AArch64/AArch64SelectionDAGInfo.h
index 770775fc02d..8381f9916a8 100644
--- a/lib/Target/ARM64/ARM64SelectionDAGInfo.h
+++ b/lib/Target/AArch64/AArch64SelectionDAGInfo.h
@@ -1,4 +1,4 @@
-//===-- ARM64SelectionDAGInfo.h - ARM64 SelectionDAG Info -------*- C++ -*-===//
+//===-- AArch64SelectionDAGInfo.h - AArch64 SelectionDAG Info ---*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -7,25 +7,25 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file defines the ARM64 subclass for TargetSelectionDAGInfo.
+// This file defines the AArch64 subclass for TargetSelectionDAGInfo.
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef ARM64SELECTIONDAGINFO_H
-#define ARM64SELECTIONDAGINFO_H
+#ifndef AArch64SELECTIONDAGINFO_H
+#define AArch64SELECTIONDAGINFO_H
 
 #include "llvm/Target/TargetSelectionDAGInfo.h"
 
 namespace llvm {
 
-class ARM64SelectionDAGInfo : public TargetSelectionDAGInfo {
+class AArch64SelectionDAGInfo : public TargetSelectionDAGInfo {
   /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
   /// make the right decision when generating code for different targets.
-  const ARM64Subtarget *Subtarget;
+  const AArch64Subtarget *Subtarget;
 
 public:
-  explicit ARM64SelectionDAGInfo(const TargetMachine &TM);
-  ~ARM64SelectionDAGInfo();
+  explicit AArch64SelectionDAGInfo(const TargetMachine &TM);
+  ~AArch64SelectionDAGInfo();
 
   SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl, SDValue Chain,
                                   SDValue Dst, SDValue Src, SDValue Size,
diff --git a/lib/Target/ARM64/ARM64StorePairSuppress.cpp b/lib/Target/AArch64/AArch64StorePairSuppress.cpp
similarity index 82%
rename from lib/Target/ARM64/ARM64StorePairSuppress.cpp
rename to lib/Target/AArch64/AArch64StorePairSuppress.cpp
index a9501ed9217..45f8ddbd2d8 100644
--- a/lib/Target/ARM64/ARM64StorePairSuppress.cpp
+++ b/lib/Target/AArch64/AArch64StorePairSuppress.cpp
@@ -1,4 +1,4 @@
-//===---- ARM64StorePairSuppress.cpp --- Suppress store pair formation ----===//
+//===--- AArch64StorePairSuppress.cpp --- Suppress store pair formation ---===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -11,7 +11,7 @@
 // store pairs. Later we may do the same for floating point loads.
 // ===---------------------------------------------------------------------===//
 
-#include "ARM64InstrInfo.h"
+#include "AArch64InstrInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstr.h"
@@ -23,11 +23,11 @@
 
 using namespace llvm;
 
-#define DEBUG_TYPE "arm64-stp-suppress"
+#define DEBUG_TYPE "aarch64-stp-suppress"
 
 namespace {
-class ARM64StorePairSuppress : public MachineFunctionPass {
-  const ARM64InstrInfo *TII;
+class AArch64StorePairSuppress : public MachineFunctionPass {
+  const AArch64InstrInfo *TII;
   const TargetRegisterInfo *TRI;
   const MachineRegisterInfo *MRI;
   MachineFunction *MF;
@@ -37,10 +37,10 @@ class ARM64StorePairSuppress : public MachineFunctionPass {
 
 public:
   static char ID;
-  ARM64StorePairSuppress() : MachineFunctionPass(ID) {}
+  AArch64StorePairSuppress() : MachineFunctionPass(ID) {}
 
   virtual const char *getPassName() const override {
-    return "ARM64 Store Pair Suppression";
+    return "AArch64 Store Pair Suppression";
   }
 
   bool runOnMachineFunction(MachineFunction &F) override;
@@ -57,11 +57,11 @@ private:
     MachineFunctionPass::getAnalysisUsage(AU);
   }
 };
-char ARM64StorePairSuppress::ID = 0;
+char AArch64StorePairSuppress::ID = 0;
 } // anonymous
 
-FunctionPass *llvm::createARM64StorePairSuppressPass() {
-  return new ARM64StorePairSuppress();
+FunctionPass *llvm::createAArch64StorePairSuppressPass() {
+  return new AArch64StorePairSuppress();
 }
 
 /// Return true if an STP can be added to this block without increasing the
@@ -70,7 +70,7 @@ FunctionPass *llvm::createARM64StorePairSuppressPass() {
 /// critical path. If the critical path is longer than the resource height, the
 /// extra vector ops can limit physreg renaming. Otherwise, it could simply
 /// oversaturate the vector units.
-bool ARM64StorePairSuppress::shouldAddSTPToBlock(const MachineBasicBlock *BB) {
+bool AArch64StorePairSuppress::shouldAddSTPToBlock(const MachineBasicBlock *BB) {
   if (!MinInstr)
     MinInstr = Traces->getEnsemble(MachineTraceMetrics::TS_MinInstrCount);
 
@@ -79,7 +79,7 @@ bool ARM64StorePairSuppress::shouldAddSTPToBlock(const MachineBasicBlock *BB) {
 
   // Get the machine model's scheduling class for STPQi.
   // Bypass TargetSchedule's SchedClass resolution since we only have an opcode.
-  unsigned SCIdx = TII->get(ARM64::STPDi).getSchedClass();
+  unsigned SCIdx = TII->get(AArch64::STPDi).getSchedClass();
   const MCSchedClassDesc *SCDesc =
       SchedModel.getMCSchedModel()->getSchedClassDesc(SCIdx);
 
@@ -103,22 +103,22 @@ bool ARM64StorePairSuppress::shouldAddSTPToBlock(const MachineBasicBlock *BB) {
 /// tell us if it's profitable with no cpu knowledge here.
 ///
 /// FIXME: We plan to develop a decent Target abstraction for simple loads and
-/// stores. Until then use a nasty switch similar to ARM64LoadStoreOptimizer.
-bool ARM64StorePairSuppress::isNarrowFPStore(const MachineInstr &MI) {
+/// stores. Until then use a nasty switch similar to AArch64LoadStoreOptimizer.
+bool AArch64StorePairSuppress::isNarrowFPStore(const MachineInstr &MI) {
   switch (MI.getOpcode()) {
   default:
     return false;
-  case ARM64::STRSui:
-  case ARM64::STRDui:
-  case ARM64::STURSi:
-  case ARM64::STURDi:
+  case AArch64::STRSui:
+  case AArch64::STRDui:
+  case AArch64::STURSi:
+  case AArch64::STURDi:
     return true;
   }
 }
 
-bool ARM64StorePairSuppress::runOnMachineFunction(MachineFunction &mf) {
+bool AArch64StorePairSuppress::runOnMachineFunction(MachineFunction &mf) {
   MF = &mf;
-  TII = static_cast<const ARM64InstrInfo *>(MF->getTarget().getInstrInfo());
+  TII = static_cast<const AArch64InstrInfo *>(MF->getTarget().getInstrInfo());
   TRI = MF->getTarget().getRegisterInfo();
   MRI = &MF->getRegInfo();
   const TargetSubtargetInfo &ST =
diff --git a/lib/Target/ARM64/ARM64Subtarget.cpp b/lib/Target/AArch64/AArch64Subtarget.cpp
similarity index 71%
rename from lib/Target/ARM64/ARM64Subtarget.cpp
rename to lib/Target/AArch64/AArch64Subtarget.cpp
index 624e47483ff..cd69994620d 100644
--- a/lib/Target/ARM64/ARM64Subtarget.cpp
+++ b/lib/Target/AArch64/AArch64Subtarget.cpp
@@ -1,4 +1,4 @@
-//===-- ARM64Subtarget.cpp - ARM64 Subtarget Information --------*- C++ -*-===//
+//===-- AArch64Subtarget.cpp - AArch64 Subtarget Information ----*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -7,12 +7,12 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file implements the ARM64 specific subclass of TargetSubtarget.
+// This file implements the AArch64 specific subclass of TargetSubtarget.
 //
 //===----------------------------------------------------------------------===//
 
-#include "ARM64InstrInfo.h"
-#include "ARM64Subtarget.h"
+#include "AArch64InstrInfo.h"
+#include "AArch64Subtarget.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/CodeGen/MachineScheduler.h"
 #include "llvm/IR/GlobalValue.h"
@@ -20,22 +20,23 @@
 
 using namespace llvm;
 
-#define DEBUG_TYPE "arm64-subtarget"
+#define DEBUG_TYPE "aarch64-subtarget"
 
 #define GET_SUBTARGETINFO_CTOR
 #define GET_SUBTARGETINFO_TARGET_DESC
-#include "ARM64GenSubtargetInfo.inc"
+#include "AArch64GenSubtargetInfo.inc"
 
 static cl::opt<bool>
-EnableEarlyIfConvert("arm64-early-ifcvt", cl::desc("Enable the early if "
+EnableEarlyIfConvert("aarch64-early-ifcvt", cl::desc("Enable the early if "
                      "converter pass"), cl::init(true), cl::Hidden);
 
-ARM64Subtarget::ARM64Subtarget(const std::string &TT, const std::string &CPU,
-                               const std::string &FS, bool LittleEndian)
-    : ARM64GenSubtargetInfo(TT, CPU, FS), ARMProcFamily(Others),
+AArch64Subtarget::AArch64Subtarget(const std::string &TT,
+                                   const std::string &CPU,
+                                   const std::string &FS, bool LittleEndian)
+    : AArch64GenSubtargetInfo(TT, CPU, FS), ARMProcFamily(Others),
       HasFPARMv8(false), HasNEON(false), HasCrypto(false), HasCRC(false),
-      HasZeroCycleRegMove(false), HasZeroCycleZeroing(false),
-      CPUString(CPU), TargetTriple(TT), IsLittleEndian(LittleEndian) {
+      HasZeroCycleRegMove(false), HasZeroCycleZeroing(false), CPUString(CPU),
+      TargetTriple(TT), IsLittleEndian(LittleEndian) {
   // Determine default and user-specified characteristics
 
   if (CPUString.empty())
@@ -47,7 +48,7 @@ ARM64Subtarget::ARM64Subtarget(const std::string &TT, const std::string &CPU,
 /// ClassifyGlobalReference - Find the target operand flags that describe
 /// how a global value should be referenced for the current subtarget.
 unsigned char
-ARM64Subtarget::ClassifyGlobalReference(const GlobalValue *GV,
+AArch64Subtarget::ClassifyGlobalReference(const GlobalValue *GV,
                                         const TargetMachine &TM) const {
 
   // Determine whether this is a reference to a definition or a declaration.
@@ -60,13 +61,13 @@ ARM64Subtarget::ClassifyGlobalReference(const GlobalValue *GV,
   // MachO large model always goes via a GOT, simply to get a single 8-byte
   // absolute relocation on all global addresses.
   if (TM.getCodeModel() == CodeModel::Large && isTargetMachO())
-    return ARM64II::MO_GOT;
+    return AArch64II::MO_GOT;
 
   // The small code mode's direct accesses use ADRP, which cannot necessarily
   // produce the value 0 (if the code is above 4GB). Therefore they must use the
   // GOT.
   if (TM.getCodeModel() == CodeModel::Small && GV->isWeakForLinker() && isDecl)
-    return ARM64II::MO_GOT;
+    return AArch64II::MO_GOT;
 
   // If symbol visibility is hidden, the extra load is not needed if
   // the symbol is definitely defined in the current translation unit.
@@ -78,14 +79,14 @@ ARM64Subtarget::ClassifyGlobalReference(const GlobalValue *GV,
   //     defined could end up in unexpected places. Use a GOT.
   if (TM.getRelocationModel() != Reloc::Static && GV->hasDefaultVisibility()) {
     if (isTargetMachO())
-      return (isDecl || GV->isWeakForLinker()) ? ARM64II::MO_GOT
-                                               : ARM64II::MO_NO_FLAG;
+      return (isDecl || GV->isWeakForLinker()) ? AArch64II::MO_GOT
+                                               : AArch64II::MO_NO_FLAG;
     else
       // No need to go through the GOT for local symbols on ELF.
-      return GV->hasLocalLinkage() ? ARM64II::MO_NO_FLAG : ARM64II::MO_GOT;
+      return GV->hasLocalLinkage() ? AArch64II::MO_NO_FLAG : AArch64II::MO_GOT;
   }
 
-  return ARM64II::MO_NO_FLAG;
+  return AArch64II::MO_NO_FLAG;
 }
 
 /// This function returns the name of a function which has an interface
@@ -93,7 +94,7 @@ ARM64Subtarget::ClassifyGlobalReference(const GlobalValue *GV,
 /// the current subtarget and it is considered prefereable over
 /// memset with zero passed as the second argument. Otherwise it
 /// returns null.
-const char *ARM64Subtarget::getBZeroEntry() const {
+const char *AArch64Subtarget::getBZeroEntry() const {
   // Prefer bzero on Darwin only.
   if(isTargetDarwin())
     return "bzero";
@@ -101,7 +102,7 @@ const char *ARM64Subtarget::getBZeroEntry() const {
   return nullptr;
 }
 
-void ARM64Subtarget::overrideSchedPolicy(MachineSchedPolicy &Policy,
+void AArch64Subtarget::overrideSchedPolicy(MachineSchedPolicy &Policy,
                                          MachineInstr *begin, MachineInstr *end,
                                          unsigned NumRegionInstrs) const {
   // LNT run (at least on Cyclone) showed reasonably significant gains for
@@ -110,6 +111,6 @@ void ARM64Subtarget::overrideSchedPolicy(MachineSchedPolicy &Policy,
   Policy.OnlyBottomUp = false;
 }
 
-bool ARM64Subtarget::enableEarlyIfConversion() const {
+bool AArch64Subtarget::enableEarlyIfConversion() const {
   return EnableEarlyIfConvert;
 }
diff --git a/lib/Target/ARM64/ARM64Subtarget.h b/lib/Target/AArch64/AArch64Subtarget.h
similarity index 88%
rename from lib/Target/ARM64/ARM64Subtarget.h
rename to lib/Target/AArch64/AArch64Subtarget.h
index 9cea3c387d6..590ea0580ea 100644
--- a/lib/Target/ARM64/ARM64Subtarget.h
+++ b/lib/Target/AArch64/AArch64Subtarget.h
@@ -1,4 +1,4 @@
-//=====---- ARM64Subtarget.h - Define Subtarget for the ARM64 -*- C++ -*--====//
+//===--- AArch64Subtarget.h - Define Subtarget for the AArch64 -*- C++ -*--===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -7,25 +7,25 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file declares the ARM64 specific subclass of TargetSubtarget.
+// This file declares the AArch64 specific subclass of TargetSubtarget.
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef ARM64SUBTARGET_H
-#define ARM64SUBTARGET_H
+#ifndef AArch64SUBTARGET_H
+#define AArch64SUBTARGET_H
 
 #include "llvm/Target/TargetSubtargetInfo.h"
-#include "ARM64RegisterInfo.h"
+#include "AArch64RegisterInfo.h"
 #include <string>
 
 #define GET_SUBTARGETINFO_HEADER
-#include "ARM64GenSubtargetInfo.inc"
+#include "AArch64GenSubtargetInfo.inc"
 
 namespace llvm {
 class GlobalValue;
 class StringRef;
 
-class ARM64Subtarget : public ARM64GenSubtargetInfo {
+class AArch64Subtarget : public AArch64GenSubtargetInfo {
 protected:
   enum ARMProcFamilyEnum {Others, CortexA53, CortexA57, Cyclone};
 
@@ -55,7 +55,7 @@ protected:
 public:
   /// This constructor initializes the data members to match that
   /// of the specified triple.
-  ARM64Subtarget(const std::string &TT, const std::string &CPU,
+  AArch64Subtarget(const std::string &TT, const std::string &CPU,
                  const std::string &FS, bool LittleEndian);
 
   bool enableMachineScheduler() const override { return true; }
@@ -107,4 +107,4 @@ public:
 };
 } // End llvm namespace
 
-#endif // ARM64SUBTARGET_H
+#endif // AArch64SUBTARGET_H
diff --git a/lib/Target/ARM64/ARM64TargetMachine.cpp b/lib/Target/AArch64/AArch64TargetMachine.cpp
similarity index 51%
rename from lib/Target/ARM64/ARM64TargetMachine.cpp
rename to lib/Target/AArch64/AArch64TargetMachine.cpp
index fc73145be3f..0b5dd2f067e 100644
--- a/lib/Target/ARM64/ARM64TargetMachine.cpp
+++ b/lib/Target/AArch64/AArch64TargetMachine.cpp
@@ -1,4 +1,4 @@
-//===-- ARM64TargetMachine.cpp - Define TargetMachine for ARM64 -----------===//
+//===-- AArch64TargetMachine.cpp - Define TargetMachine for AArch64 -------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -10,8 +10,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "ARM64.h"
-#include "ARM64TargetMachine.h"
+#include "AArch64.h"
+#include "AArch64TargetMachine.h"
 #include "llvm/PassManager.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/Support/CommandLine.h"
@@ -21,28 +21,28 @@
 using namespace llvm;
 
 static cl::opt<bool>
-EnableCCMP("arm64-ccmp", cl::desc("Enable the CCMP formation pass"),
+EnableCCMP("aarch64-ccmp", cl::desc("Enable the CCMP formation pass"),
            cl::init(true), cl::Hidden);
 
 static cl::opt<bool>
-EnableStPairSuppress("arm64-stp-suppress", cl::desc("Suppress STP for ARM64"),
+EnableStPairSuppress("aarch64-stp-suppress", cl::desc("Suppress STP for AArch64"),
                      cl::init(true), cl::Hidden);
 
 static cl::opt<bool>
-EnableAdvSIMDScalar("arm64-simd-scalar", cl::desc("Enable use of AdvSIMD scalar"
+EnableAdvSIMDScalar("aarch64-simd-scalar", cl::desc("Enable use of AdvSIMD scalar"
                     " integer instructions"), cl::init(false), cl::Hidden);
 
 static cl::opt<bool>
-EnablePromoteConstant("arm64-promote-const", cl::desc("Enable the promote "
+EnablePromoteConstant("aarch64-promote-const", cl::desc("Enable the promote "
                       "constant pass"), cl::init(true), cl::Hidden);
 
 static cl::opt<bool>
-EnableCollectLOH("arm64-collect-loh", cl::desc("Enable the pass that emits the"
+EnableCollectLOH("aarch64-collect-loh", cl::desc("Enable the pass that emits the"
                  " linker optimization hints (LOH)"), cl::init(true),
                  cl::Hidden);
 
 static cl::opt<bool>
-EnableDeadRegisterElimination("arm64-dead-def-elimination", cl::Hidden,
+EnableDeadRegisterElimination("aarch64-dead-def-elimination", cl::Hidden,
                               cl::desc("Enable the pass that removes dead"
                                        " definitons and replaces stores to"
                                        " them with stores to the zero"
@@ -50,67 +50,67 @@ EnableDeadRegisterElimination("arm64-dead-def-elimination", cl::Hidden,
                               cl::init(true));
 
 static cl::opt<bool>
-EnableLoadStoreOpt("arm64-load-store-opt", cl::desc("Enable the load/store pair"
+EnableLoadStoreOpt("aarch64-load-store-opt", cl::desc("Enable the load/store pair"
                    " optimization pass"), cl::init(true), cl::Hidden);
 
-extern "C" void LLVMInitializeARM64Target() {
+extern "C" void LLVMInitializeAArch64Target() {
   // Register the target.
-  RegisterTargetMachine<ARM64leTargetMachine> X(TheARM64leTarget);
-  RegisterTargetMachine<ARM64beTargetMachine> Y(TheARM64beTarget);
+  RegisterTargetMachine<AArch64leTargetMachine> X(TheAArch64leTarget);
+  RegisterTargetMachine<AArch64beTargetMachine> Y(TheAArch64beTarget);
 
-  RegisterTargetMachine<ARM64leTargetMachine> Z(TheAArch64leTarget);
-  RegisterTargetMachine<ARM64beTargetMachine> W(TheAArch64beTarget);
+  RegisterTargetMachine<AArch64leTargetMachine> Z(TheARM64leTarget);
+  RegisterTargetMachine<AArch64beTargetMachine> W(TheARM64beTarget);
 }
 
-/// TargetMachine ctor - Create an ARM64 architecture model.
+/// TargetMachine ctor - Create an AArch64 architecture model.
 ///
-ARM64TargetMachine::ARM64TargetMachine(const Target &T, StringRef TT,
-                                       StringRef CPU, StringRef FS,
-                                       const TargetOptions &Options,
-                                       Reloc::Model RM, CodeModel::Model CM,
-                                       CodeGenOpt::Level OL,
-                                       bool LittleEndian)
+AArch64TargetMachine::AArch64TargetMachine(const Target &T, StringRef TT,
+                                           StringRef CPU, StringRef FS,
+                                           const TargetOptions &Options,
+                                           Reloc::Model RM, CodeModel::Model CM,
+                                           CodeGenOpt::Level OL,
+                                           bool LittleEndian)
     : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
       Subtarget(TT, CPU, FS, LittleEndian),
-      // This nested ternary is horrible, but DL needs to be properly initialized
+      // This nested ternary is horrible, but DL needs to be properly
+      // initialized
       // before TLInfo is constructed.
-      DL(Subtarget.isTargetMachO() ?
-         "e-m:o-i64:64-i128:128-n32:64-S128" :
-         (LittleEndian ?
-          "e-m:e-i64:64-i128:128-n32:64-S128" :
-          "E-m:e-i64:64-i128:128-n32:64-S128")),
+      DL(Subtarget.isTargetMachO()
+             ? "e-m:o-i64:64-i128:128-n32:64-S128"
+             : (LittleEndian ? "e-m:e-i64:64-i128:128-n32:64-S128"
+                             : "E-m:e-i64:64-i128:128-n32:64-S128")),
       InstrInfo(Subtarget), TLInfo(*this), FrameLowering(*this, Subtarget),
       TSInfo(*this) {
   initAsmInfo();
 }
 
-void ARM64leTargetMachine::anchor() { }
+void AArch64leTargetMachine::anchor() { }
 
-ARM64leTargetMachine::
-ARM64leTargetMachine(const Target &T, StringRef TT,
+AArch64leTargetMachine::
+AArch64leTargetMachine(const Target &T, StringRef TT,
                        StringRef CPU, StringRef FS, const TargetOptions &Options,
                        Reloc::Model RM, CodeModel::Model CM,
                        CodeGenOpt::Level OL)
-  : ARM64TargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {}
+  : AArch64TargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {}
 
-void ARM64beTargetMachine::anchor() { }
+void AArch64beTargetMachine::anchor() { }
 
-ARM64beTargetMachine::
-ARM64beTargetMachine(const Target &T, StringRef TT,
+AArch64beTargetMachine::
+AArch64beTargetMachine(const Target &T, StringRef TT,
                        StringRef CPU, StringRef FS, const TargetOptions &Options,
                        Reloc::Model RM, CodeModel::Model CM,
                        CodeGenOpt::Level OL)
-  : ARM64TargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {}
+  : AArch64TargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {}
 
 namespace {
-/// ARM64 Code Generator Pass Configuration Options.
-class ARM64PassConfig : public TargetPassConfig {
+/// AArch64 Code Generator Pass Configuration Options.
+class AArch64PassConfig : public TargetPassConfig {
 public:
-  ARM64PassConfig(ARM64TargetMachine *TM, PassManagerBase &PM)
+  AArch64PassConfig(AArch64TargetMachine *TM, PassManagerBase &PM)
       : TargetPassConfig(TM, PM) {}
 
-  ARM64TargetMachine &getARM64TargetMachine() const {
-    return getTM<ARM64TargetMachine>();
+  AArch64TargetMachine &getAArch64TargetMachine() const {
+    return getTM<AArch64TargetMachine>();
   }
 
   bool addPreISel() override;
@@ -123,28 +123,28 @@ public:
 };
 } // namespace
 
-void ARM64TargetMachine::addAnalysisPasses(PassManagerBase &PM) {
-  // Add first the target-independent BasicTTI pass, then our ARM64 pass. This
-  // allows the ARM64 pass to delegate to the target independent layer when
+void AArch64TargetMachine::addAnalysisPasses(PassManagerBase &PM) {
+  // Add first the target-independent BasicTTI pass, then our AArch64 pass. This
+  // allows the AArch64 pass to delegate to the target independent layer when
   // appropriate.
   PM.add(createBasicTargetTransformInfoPass(this));
-  PM.add(createARM64TargetTransformInfoPass(this));
+  PM.add(createAArch64TargetTransformInfoPass(this));
 }
 
-TargetPassConfig *ARM64TargetMachine::createPassConfig(PassManagerBase &PM) {
-  return new ARM64PassConfig(this, PM);
+TargetPassConfig *AArch64TargetMachine::createPassConfig(PassManagerBase &PM) {
+  return new AArch64PassConfig(this, PM);
 }
 
 // Pass Pipeline Configuration
-bool ARM64PassConfig::addPreISel() {
+bool AArch64PassConfig::addPreISel() {
   // Run promote constant before global merge, so that the promoted constants
   // get a chance to be merged
   if (TM->getOptLevel() != CodeGenOpt::None && EnablePromoteConstant)
-    addPass(createARM64PromoteConstantPass());
+    addPass(createAArch64PromoteConstantPass());
   if (TM->getOptLevel() != CodeGenOpt::None)
     addPass(createGlobalMergePass(TM));
   if (TM->getOptLevel() != CodeGenOpt::None)
-    addPass(createARM64AddressTypePromotionPass());
+    addPass(createAArch64AddressTypePromotionPass());
 
   // Always expand atomic operations, we don't deal with atomicrmw or cmpxchg
   // ourselves.
@@ -153,56 +153,56 @@ bool ARM64PassConfig::addPreISel() {
   return false;
 }
 
-bool ARM64PassConfig::addInstSelector() {
-  addPass(createARM64ISelDag(getARM64TargetMachine(), getOptLevel()));
+bool AArch64PassConfig::addInstSelector() {
+  addPass(createAArch64ISelDag(getAArch64TargetMachine(), getOptLevel()));
 
   // For ELF, cleanup any local-dynamic TLS accesses (i.e. combine as many
   // references to _TLS_MODULE_BASE_ as possible.
-  if (TM->getSubtarget<ARM64Subtarget>().isTargetELF() &&
+  if (TM->getSubtarget<AArch64Subtarget>().isTargetELF() &&
       getOptLevel() != CodeGenOpt::None)
-    addPass(createARM64CleanupLocalDynamicTLSPass());
+    addPass(createAArch64CleanupLocalDynamicTLSPass());
 
   return false;
 }
 
-bool ARM64PassConfig::addILPOpts() {
+bool AArch64PassConfig::addILPOpts() {
   if (EnableCCMP)
-    addPass(createARM64ConditionalCompares());
+    addPass(createAArch64ConditionalCompares());
   addPass(&EarlyIfConverterID);
   if (EnableStPairSuppress)
-    addPass(createARM64StorePairSuppressPass());
+    addPass(createAArch64StorePairSuppressPass());
   return true;
 }
 
-bool ARM64PassConfig::addPreRegAlloc() {
+bool AArch64PassConfig::addPreRegAlloc() {
   // Use AdvSIMD scalar instructions whenever profitable.
   if (TM->getOptLevel() != CodeGenOpt::None && EnableAdvSIMDScalar)
-    addPass(createARM64AdvSIMDScalar());
+    addPass(createAArch64AdvSIMDScalar());
   return true;
 }
 
-bool ARM64PassConfig::addPostRegAlloc() {
+bool AArch64PassConfig::addPostRegAlloc() {
   // Change dead register definitions to refer to the zero register.
   if (TM->getOptLevel() != CodeGenOpt::None && EnableDeadRegisterElimination)
-    addPass(createARM64DeadRegisterDefinitions());
+    addPass(createAArch64DeadRegisterDefinitions());
   return true;
 }
 
-bool ARM64PassConfig::addPreSched2() {
+bool AArch64PassConfig::addPreSched2() {
   // Expand some pseudo instructions to allow proper scheduling.
-  addPass(createARM64ExpandPseudoPass());
+  addPass(createAArch64ExpandPseudoPass());
   // Use load/store pair instructions when possible.
   if (TM->getOptLevel() != CodeGenOpt::None && EnableLoadStoreOpt)
-    addPass(createARM64LoadStoreOptimizationPass());
+    addPass(createAArch64LoadStoreOptimizationPass());
   return true;
 }
 
-bool ARM64PassConfig::addPreEmitPass() {
+bool AArch64PassConfig::addPreEmitPass() {
   // Relax conditional branch instructions if they're otherwise out of
   // range of their destination.
-  addPass(createARM64BranchRelaxation());
+  addPass(createAArch64BranchRelaxation());
   if (TM->getOptLevel() != CodeGenOpt::None && EnableCollectLOH &&
-      TM->getSubtarget<ARM64Subtarget>().isTargetMachO())
-    addPass(createARM64CollectLOHPass());
+      TM->getSubtarget<AArch64Subtarget>().isTargetMachO())
+    addPass(createAArch64CollectLOHPass());
   return true;
 }
diff --git a/lib/Target/AArch64/AArch64TargetMachine.h b/lib/Target/AArch64/AArch64TargetMachine.h
new file mode 100644
index 00000000000..079b19b23bb
--- /dev/null
+++ b/lib/Target/AArch64/AArch64TargetMachine.h
@@ -0,0 +1,94 @@
+//==-- AArch64TargetMachine.h - Define TargetMachine for AArch64 -*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the AArch64 specific subclass of TargetMachine.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef AArch64TARGETMACHINE_H
+#define AArch64TARGETMACHINE_H
+
+#include "AArch64InstrInfo.h"
+#include "AArch64ISelLowering.h"
+#include "AArch64Subtarget.h"
+#include "AArch64FrameLowering.h"
+#include "AArch64SelectionDAGInfo.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/MC/MCStreamer.h"
+
+namespace llvm {
+
+class AArch64TargetMachine : public LLVMTargetMachine {
+protected:
+  AArch64Subtarget Subtarget;
+
+private:
+  const DataLayout DL;
+  AArch64InstrInfo InstrInfo;
+  AArch64TargetLowering TLInfo;
+  AArch64FrameLowering FrameLowering;
+  AArch64SelectionDAGInfo TSInfo;
+
+public:
+  AArch64TargetMachine(const Target &T, StringRef TT, StringRef CPU,
+                       StringRef FS, const TargetOptions &Options,
+                       Reloc::Model RM, CodeModel::Model CM,
+                       CodeGenOpt::Level OL, bool IsLittleEndian);
+
+  const AArch64Subtarget *getSubtargetImpl() const override {
+    return &Subtarget;
+  }
+  const AArch64TargetLowering *getTargetLowering() const override {
+    return &TLInfo;
+  }
+  const DataLayout *getDataLayout() const override { return &DL; }
+  const AArch64FrameLowering *getFrameLowering() const override {
+    return &FrameLowering;
+  }
+  const AArch64InstrInfo *getInstrInfo() const override { return &InstrInfo; }
+  const AArch64RegisterInfo *getRegisterInfo() const override {
+    return &InstrInfo.getRegisterInfo();
+  }
+  const AArch64SelectionDAGInfo *getSelectionDAGInfo() const override {
+    return &TSInfo;
+  }
+
+  // Pass Pipeline Configuration
+  TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
+
+  /// \brief Register AArch64 analysis passes with a pass manager.
+  void addAnalysisPasses(PassManagerBase &PM) override;
+};
+
+// AArch64leTargetMachine - AArch64 little endian target machine.
+//
+class AArch64leTargetMachine : public AArch64TargetMachine {
+  virtual void anchor();
+public:
+  AArch64leTargetMachine(const Target &T, StringRef TT, StringRef CPU,
+                         StringRef FS, const TargetOptions &Options,
+                         Reloc::Model RM, CodeModel::Model CM,
+                         CodeGenOpt::Level OL);
+};
+
+// AArch64beTargetMachine - AArch64 big endian target machine.
+//
+class AArch64beTargetMachine : public AArch64TargetMachine {
+  virtual void anchor();
+public:
+  AArch64beTargetMachine(const Target &T, StringRef TT, StringRef CPU,
+                         StringRef FS, const TargetOptions &Options,
+                         Reloc::Model RM, CodeModel::Model CM,
+                         CodeGenOpt::Level OL);
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/ARM64/ARM64TargetObjectFile.cpp b/lib/Target/AArch64/AArch64TargetObjectFile.cpp
similarity index 80%
rename from lib/Target/ARM64/ARM64TargetObjectFile.cpp
rename to lib/Target/AArch64/AArch64TargetObjectFile.cpp
index cde01e515dc..4069038dffe 100644
--- a/lib/Target/ARM64/ARM64TargetObjectFile.cpp
+++ b/lib/Target/AArch64/AArch64TargetObjectFile.cpp
@@ -1,4 +1,4 @@
-//===-- ARM64TargetObjectFile.cpp - ARM64 Object Info ---------------------===//
+//===-- AArch64TargetObjectFile.cpp - AArch64 Object Info -----------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -7,8 +7,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "ARM64TargetObjectFile.h"
-#include "ARM64TargetMachine.h"
+#include "AArch64TargetObjectFile.h"
+#include "AArch64TargetMachine.h"
 #include "llvm/IR/Mangler.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCExpr.h"
@@ -17,13 +17,13 @@
 using namespace llvm;
 using namespace dwarf;
 
-void ARM64_ELFTargetObjectFile::Initialize(MCContext &Ctx,
-                                           const TargetMachine &TM) {
+void AArch64_ELFTargetObjectFile::Initialize(MCContext &Ctx,
+                                             const TargetMachine &TM) {
   TargetLoweringObjectFileELF::Initialize(Ctx, TM);
   InitializeELF(TM.Options.UseInitArray);
 }
 
-const MCExpr *ARM64_MachoTargetObjectFile::getTTypeGlobalReference(
+const MCExpr *AArch64_MachoTargetObjectFile::getTTypeGlobalReference(
     const GlobalValue *GV, unsigned Encoding, Mangler &Mang,
     const TargetMachine &TM, MachineModuleInfo *MMI,
     MCStreamer &Streamer) const {
@@ -45,7 +45,7 @@ const MCExpr *ARM64_MachoTargetObjectFile::getTTypeGlobalReference(
       GV, Encoding, Mang, TM, MMI, Streamer);
 }
 
-MCSymbol *ARM64_MachoTargetObjectFile::getCFIPersonalitySymbol(
+MCSymbol *AArch64_MachoTargetObjectFile::getCFIPersonalitySymbol(
     const GlobalValue *GV, Mangler &Mang, const TargetMachine &TM,
     MachineModuleInfo *MMI) const {
   return TM.getSymbol(GV, Mang);
diff --git a/lib/Target/ARM64/ARM64TargetObjectFile.h b/lib/Target/AArch64/AArch64TargetObjectFile.h
similarity index 73%
rename from lib/Target/ARM64/ARM64TargetObjectFile.h
rename to lib/Target/AArch64/AArch64TargetObjectFile.h
index 62446f94f17..de63cb42542 100644
--- a/lib/Target/ARM64/ARM64TargetObjectFile.h
+++ b/lib/Target/AArch64/AArch64TargetObjectFile.h
@@ -1,4 +1,4 @@
-//===-- ARM64TargetObjectFile.h - ARM64 Object Info -*- C++ -------------*-===//
+//===-- AArch64TargetObjectFile.h - AArch64 Object Info -*- C++ ---------*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -7,22 +7,22 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_TARGET_ARM64_TARGETOBJECTFILE_H
-#define LLVM_TARGET_ARM64_TARGETOBJECTFILE_H
+#ifndef LLVM_TARGET_AArch64_TARGETOBJECTFILE_H
+#define LLVM_TARGET_AArch64_TARGETOBJECTFILE_H
 
 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
 #include "llvm/Target/TargetLoweringObjectFile.h"
 
 namespace llvm {
-class ARM64TargetMachine;
+class AArch64TargetMachine;
 
 /// This implementation is used for AArch64 ELF targets (Linux in particular).
-class ARM64_ELFTargetObjectFile : public TargetLoweringObjectFileELF {
+class AArch64_ELFTargetObjectFile : public TargetLoweringObjectFileELF {
   void Initialize(MCContext &Ctx, const TargetMachine &TM) override;
 };
 
-/// ARM64_MachoTargetObjectFile - This TLOF implementation is used for Darwin.
-class ARM64_MachoTargetObjectFile : public TargetLoweringObjectFileMachO {
+/// AArch64_MachoTargetObjectFile - This TLOF implementation is used for Darwin.
+class AArch64_MachoTargetObjectFile : public TargetLoweringObjectFileMachO {
 public:
   const MCExpr *getTTypeGlobalReference(const GlobalValue *GV,
                                         unsigned Encoding, Mangler &Mang,
diff --git a/lib/Target/ARM64/ARM64TargetTransformInfo.cpp b/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
similarity index 87%
rename from lib/Target/ARM64/ARM64TargetTransformInfo.cpp
rename to lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index cc4cdff62b5..33e482a53a4 100644
--- a/lib/Target/ARM64/ARM64TargetTransformInfo.cpp
+++ b/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -1,4 +1,4 @@
-//===-- ARM64TargetTransformInfo.cpp - ARM64 specific TTI pass ------------===//
+//===-- AArch64TargetTransformInfo.cpp - AArch64 specific TTI pass --------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -8,15 +8,15 @@
 //===----------------------------------------------------------------------===//
 /// \file
 /// This file implements a TargetTransformInfo analysis pass specific to the
-/// ARM64 target machine. It uses the target's detailed information to provide
+/// AArch64 target machine. It uses the target's detailed information to provide
 /// more precise answers to certain TTI queries, while letting the target
 /// independent and default TTI implementations handle the rest.
 ///
 //===----------------------------------------------------------------------===//
 
-#include "ARM64.h"
-#include "ARM64TargetMachine.h"
-#include "MCTargetDesc/ARM64AddressingModes.h"
+#include "AArch64.h"
+#include "AArch64TargetMachine.h"
+#include "MCTargetDesc/AArch64AddressingModes.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Target/CostTable.h"
@@ -24,35 +24,35 @@
 #include <algorithm>
 using namespace llvm;
 
-#define DEBUG_TYPE "arm64tti"
+#define DEBUG_TYPE "aarch64tti"
 
 // Declare the pass initialization routine locally as target-specific passes
 // don't have a target-wide initialization entry point, and so we rely on the
 // pass constructor initialization.
 namespace llvm {
-void initializeARM64TTIPass(PassRegistry &);
+void initializeAArch64TTIPass(PassRegistry &);
 }
 
 namespace {
 
-class ARM64TTI final : public ImmutablePass, public TargetTransformInfo {
-  const ARM64TargetMachine *TM;
-  const ARM64Subtarget *ST;
-  const ARM64TargetLowering *TLI;
+class AArch64TTI final : public ImmutablePass, public TargetTransformInfo {
+  const AArch64TargetMachine *TM;
+  const AArch64Subtarget *ST;
+  const AArch64TargetLowering *TLI;
 
   /// Estimate the overhead of scalarizing an instruction. Insert and Extract
   /// are set if the result needs to be inserted and/or extracted from vectors.
   unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const;
 
 public:
-  ARM64TTI() : ImmutablePass(ID), TM(nullptr), ST(nullptr), TLI(nullptr) {
+  AArch64TTI() : ImmutablePass(ID), TM(nullptr), ST(nullptr), TLI(nullptr) {
     llvm_unreachable("This pass cannot be directly constructed");
   }
 
-  ARM64TTI(const ARM64TargetMachine *TM)
+  AArch64TTI(const AArch64TargetMachine *TM)
       : ImmutablePass(ID), TM(TM), ST(TM->getSubtargetImpl()),
         TLI(TM->getTargetLowering()) {
-    initializeARM64TTIPass(*PassRegistry::getPassRegistry());
+    initializeAArch64TTIPass(*PassRegistry::getPassRegistry());
   }
 
   void initializePass() override { pushTTIStack(this); }
@@ -129,21 +129,21 @@ public:
 
 } // end anonymous namespace
 
-INITIALIZE_AG_PASS(ARM64TTI, TargetTransformInfo, "arm64tti",
-                   "ARM64 Target Transform Info", true, true, false)
-char ARM64TTI::ID = 0;
+INITIALIZE_AG_PASS(AArch64TTI, TargetTransformInfo, "aarch64tti",
+                   "AArch64 Target Transform Info", true, true, false)
+char AArch64TTI::ID = 0;
 
 ImmutablePass *
-llvm::createARM64TargetTransformInfoPass(const ARM64TargetMachine *TM) {
-  return new ARM64TTI(TM);
+llvm::createAArch64TargetTransformInfoPass(const AArch64TargetMachine *TM) {
+  return new AArch64TTI(TM);
 }
 
 /// \brief Calculate the cost of materializing a 64-bit value. This helper
 /// method might only calculate a fraction of a larger immediate. Therefore it
 /// is valid to return a cost of ZERO.
-unsigned ARM64TTI::getIntImmCost(int64_t Val) const {
+unsigned AArch64TTI::getIntImmCost(int64_t Val) const {
   // Check if the immediate can be encoded within an instruction.
-  if (Val == 0 || ARM64_AM::isLogicalImmediate(Val, 64))
+  if (Val == 0 || AArch64_AM::isLogicalImmediate(Val, 64))
     return 0;
 
   if (Val < 0)
@@ -155,7 +155,7 @@ unsigned ARM64TTI::getIntImmCost(int64_t Val) const {
 }
 
 /// \brief Calculate the cost of materializing the given constant.
-unsigned ARM64TTI::getIntImmCost(const APInt &Imm, Type *Ty) const {
+unsigned AArch64TTI::getIntImmCost(const APInt &Imm, Type *Ty) const {
   assert(Ty->isIntegerTy());
 
   unsigned BitSize = Ty->getPrimitiveSizeInBits();
@@ -179,7 +179,7 @@ unsigned ARM64TTI::getIntImmCost(const APInt &Imm, Type *Ty) const {
   return std::max(1U, Cost);
 }
 
-unsigned ARM64TTI::getIntImmCost(unsigned Opcode, unsigned Idx,
+unsigned AArch64TTI::getIntImmCost(unsigned Opcode, unsigned Idx,
                                  const APInt &Imm, Type *Ty) const {
   assert(Ty->isIntegerTy());
 
@@ -237,14 +237,14 @@ unsigned ARM64TTI::getIntImmCost(unsigned Opcode, unsigned Idx,
 
   if (Idx == ImmIdx) {
     unsigned NumConstants = (BitSize + 63) / 64;
-    unsigned Cost = ARM64TTI::getIntImmCost(Imm, Ty);
+    unsigned Cost = AArch64TTI::getIntImmCost(Imm, Ty);
     return (Cost <= NumConstants * TCC_Basic)
       ? static_cast<unsigned>(TCC_Free) : Cost;
   }
-  return ARM64TTI::getIntImmCost(Imm, Ty);
+  return AArch64TTI::getIntImmCost(Imm, Ty);
 }
 
-unsigned ARM64TTI::getIntImmCost(Intrinsic::ID IID, unsigned Idx,
+unsigned AArch64TTI::getIntImmCost(Intrinsic::ID IID, unsigned Idx,
                                  const APInt &Imm, Type *Ty) const {
   assert(Ty->isIntegerTy());
 
@@ -265,7 +265,7 @@ unsigned ARM64TTI::getIntImmCost(Intrinsic::ID IID, unsigned Idx,
   case Intrinsic::umul_with_overflow:
     if (Idx == 1) {
       unsigned NumConstants = (BitSize + 63) / 64;
-      unsigned Cost = ARM64TTI::getIntImmCost(Imm, Ty);
+      unsigned Cost = AArch64TTI::getIntImmCost(Imm, Ty);
       return (Cost <= NumConstants * TCC_Basic)
         ? static_cast<unsigned>(TCC_Free) : Cost;
     }
@@ -280,18 +280,19 @@ unsigned ARM64TTI::getIntImmCost(Intrinsic::ID IID, unsigned Idx,
       return TCC_Free;
     break;
   }
-  return ARM64TTI::getIntImmCost(Imm, Ty);
+  return AArch64TTI::getIntImmCost(Imm, Ty);
 }
 
-ARM64TTI::PopcntSupportKind ARM64TTI::getPopcntSupport(unsigned TyWidth) const {
+AArch64TTI::PopcntSupportKind
+AArch64TTI::getPopcntSupport(unsigned TyWidth) const {
   assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
   if (TyWidth == 32 || TyWidth == 64)
     return PSK_FastHardware;
-  // TODO: ARM64TargetLowering::LowerCTPOP() supports 128bit popcount.
+  // TODO: AArch64TargetLowering::LowerCTPOP() supports 128bit popcount.
   return PSK_Software;
 }
 
-unsigned ARM64TTI::getCastInstrCost(unsigned Opcode, Type *Dst,
+unsigned AArch64TTI::getCastInstrCost(unsigned Opcode, Type *Dst,
                                     Type *Src) const {
   int ISD = TLI->InstructionOpcodeToISD(Opcode);
   assert(ISD && "Invalid opcode");
@@ -338,7 +339,7 @@ unsigned ARM64TTI::getCastInstrCost(unsigned Opcode, Type *Dst,
   return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src);
 }
 
-unsigned ARM64TTI::getVectorInstrCost(unsigned Opcode, Type *Val,
+unsigned AArch64TTI::getVectorInstrCost(unsigned Opcode, Type *Val,
                                       unsigned Index) const {
   assert(Val->isVectorTy() && "This must be a vector type");
 
@@ -363,7 +364,7 @@ unsigned ARM64TTI::getVectorInstrCost(unsigned Opcode, Type *Val,
   return 2;
 }
 
-unsigned ARM64TTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
+unsigned AArch64TTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
                                           OperandValueKind Opd1Info,
                                           OperandValueKind Opd2Info) const {
   // Legalize the type.
@@ -386,7 +387,7 @@ unsigned ARM64TTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
   }
 }
 
-unsigned ARM64TTI::getAddressComputationCost(Type *Ty, bool IsComplex) const {
+unsigned AArch64TTI::getAddressComputationCost(Type *Ty, bool IsComplex) const {
   // Address computations in vectorized code with non-consecutive addresses will
   // likely result in more instructions compared to scalar code where the
   // computation can more often be merged into the index mode. The resulting
@@ -401,7 +402,7 @@ unsigned ARM64TTI::getAddressComputationCost(Type *Ty, bool IsComplex) const {
   return 1;
 }
 
-unsigned ARM64TTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
+unsigned AArch64TTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
                                       Type *CondTy) const {
 
   int ISD = TLI->InstructionOpcodeToISD(Opcode);
@@ -432,7 +433,7 @@ unsigned ARM64TTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
   return TargetTransformInfo::getCmpSelInstrCost(Opcode, ValTy, CondTy);
 }
 
-unsigned ARM64TTI::getMemoryOpCost(unsigned Opcode, Type *Src,
+unsigned AArch64TTI::getMemoryOpCost(unsigned Opcode, Type *Src,
                                    unsigned Alignment,
                                    unsigned AddressSpace) const {
   std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Src);
diff --git a/lib/Target/ARM64/AsmParser/ARM64AsmParser.cpp b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
similarity index 78%
rename from lib/Target/ARM64/AsmParser/ARM64AsmParser.cpp
rename to lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
index 4d710db1d93..65b77c547dc 100644
--- a/lib/Target/ARM64/AsmParser/ARM64AsmParser.cpp
+++ b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
@@ -1,4 +1,4 @@
-//===-- ARM64AsmParser.cpp - Parse ARM64 assembly to MCInst instructions --===//
+//==- AArch64AsmParser.cpp - Parse AArch64 assembly to MCInst instructions -==//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -7,9 +7,9 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "MCTargetDesc/ARM64AddressingModes.h"
-#include "MCTargetDesc/ARM64MCExpr.h"
-#include "Utils/ARM64BaseInfo.h"
+#include "MCTargetDesc/AArch64AddressingModes.h"
+#include "MCTargetDesc/AArch64MCExpr.h"
+#include "Utils/AArch64BaseInfo.h"
 #include "llvm/MC/MCParser/MCAsmLexer.h"
 #include "llvm/MC/MCParser/MCAsmParser.h"
 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
@@ -35,9 +35,9 @@ using namespace llvm;
 
 namespace {
 
-class ARM64Operand;
+class AArch64Operand;
 
-class ARM64AsmParser : public MCTargetAsmParser {
+class AArch64AsmParser : public MCTargetAsmParser {
 public:
   typedef SmallVectorImpl<MCParsedAsmOperand *> OperandVector;
 
@@ -52,7 +52,7 @@ private:
   SMLoc getLoc() const { return Parser.getTok().getLoc(); }
 
   bool parseSysAlias(StringRef Name, SMLoc NameLoc, OperandVector &Operands);
-  ARM64CC::CondCode parseCondCodeString(StringRef Cond);
+  AArch64CC::CondCode parseCondCodeString(StringRef Cond);
   bool parseCondCode(OperandVector &Operands, bool invertCondCode);
   int tryParseRegister();
   int tryMatchVectorRegister(StringRef &Kind, bool expected);
@@ -80,7 +80,7 @@ private:
 /// {
 
 #define GET_ASSEMBLER_HEADER
-#include "ARM64GenAsmMatcher.inc"
+#include "AArch64GenAsmMatcher.inc"
 
   /// }
 
@@ -98,12 +98,12 @@ private:
   bool tryParseVectorRegister(OperandVector &Operands);
 
 public:
-  enum ARM64MatchResultTy {
+  enum AArch64MatchResultTy {
     Match_InvalidSuffix = FIRST_TARGET_MATCH_RESULT_TY,
 #define GET_OPERAND_DIAGNOSTIC_TYPES
-#include "ARM64GenAsmMatcher.inc"
+#include "AArch64GenAsmMatcher.inc"
   };
-  ARM64AsmParser(MCSubtargetInfo &_STI, MCAsmParser &_Parser,
+  AArch64AsmParser(MCSubtargetInfo &_STI, MCAsmParser &_Parser,
                  const MCInstrInfo &MII,
                  const MCTargetOptions &Options)
       : MCTargetAsmParser(), STI(_STI), Parser(_Parser) {
@@ -121,7 +121,7 @@ public:
                                       unsigned Kind) override;
 
   static bool classifySymbolRef(const MCExpr *Expr,
-                                ARM64MCExpr::VariantKind &ELFRefKind,
+                                AArch64MCExpr::VariantKind &ELFRefKind,
                                 MCSymbolRefExpr::VariantKind &DarwinRefKind,
                                 int64_t &Addend);
 };
@@ -129,9 +129,9 @@ public:
 
 namespace {
 
-/// ARM64Operand - Instances of this class represent a parsed ARM64 machine
+/// AArch64Operand - Instances of this class represent a parsed AArch64 machine
 /// instruction.
-class ARM64Operand : public MCParsedAsmOperand {
+class AArch64Operand : public MCParsedAsmOperand {
 private:
   enum KindTy {
     k_Immediate,
@@ -183,7 +183,7 @@ private:
   };
 
   struct CondCodeOp {
-    ARM64CC::CondCode Code;
+    AArch64CC::CondCode Code;
   };
 
   struct FPImmOp {
@@ -211,7 +211,7 @@ private:
   };
 
   struct ShiftExtendOp {
-    ARM64_AM::ShiftExtendType Type;
+    AArch64_AM::ShiftExtendType Type;
     unsigned Amount;
     bool HasExplicitAmount;
   };
@@ -240,11 +240,11 @@ private:
   // the add<>Operands() calls.
   MCContext &Ctx;
 
-  ARM64Operand(KindTy K, MCContext &_Ctx)
+  AArch64Operand(KindTy K, MCContext &_Ctx)
       : MCParsedAsmOperand(), Kind(K), Ctx(_Ctx) {}
 
 public:
-  ARM64Operand(const ARM64Operand &o) : MCParsedAsmOperand(), Ctx(o.Ctx) {
+  AArch64Operand(const AArch64Operand &o) : MCParsedAsmOperand(), Ctx(o.Ctx) {
     Kind = o.Kind;
     StartLoc = o.StartLoc;
     EndLoc = o.EndLoc;
@@ -321,7 +321,7 @@ public:
     return ShiftedImm.ShiftAmount;
   }
 
-  ARM64CC::CondCode getCondCode() const {
+  AArch64CC::CondCode getCondCode() const {
     assert(Kind == k_CondCode && "Invalid access!");
     return CondCode.Code;
   }
@@ -376,7 +376,7 @@ public:
     return Prefetch.Val;
   }
 
-  ARM64_AM::ShiftExtendType getShiftExtendType() const {
+  AArch64_AM::ShiftExtendType getShiftExtendType() const {
     assert(Kind == k_ShiftExtend && "Invalid access!");
     return ShiftExtend.Type;
   }
@@ -431,10 +431,10 @@ public:
   }
 
   bool isSymbolicUImm12Offset(const MCExpr *Expr, unsigned Scale) const {
-    ARM64MCExpr::VariantKind ELFRefKind;
+    AArch64MCExpr::VariantKind ELFRefKind;
     MCSymbolRefExpr::VariantKind DarwinRefKind;
     int64_t Addend;
-    if (!ARM64AsmParser::classifySymbolRef(Expr, ELFRefKind, DarwinRefKind,
+    if (!AArch64AsmParser::classifySymbolRef(Expr, ELFRefKind, DarwinRefKind,
                                            Addend)) {
       // If we don't understand the expression, assume the best and
       // let the fixup and relocation code deal with it.
@@ -442,14 +442,14 @@ public:
     }
 
     if (DarwinRefKind == MCSymbolRefExpr::VK_PAGEOFF ||
-        ELFRefKind == ARM64MCExpr::VK_LO12 ||
-        ELFRefKind == ARM64MCExpr::VK_GOT_LO12 ||
-        ELFRefKind == ARM64MCExpr::VK_DTPREL_LO12 ||
-        ELFRefKind == ARM64MCExpr::VK_DTPREL_LO12_NC ||
-        ELFRefKind == ARM64MCExpr::VK_TPREL_LO12 ||
-        ELFRefKind == ARM64MCExpr::VK_TPREL_LO12_NC ||
-        ELFRefKind == ARM64MCExpr::VK_GOTTPREL_LO12_NC ||
-        ELFRefKind == ARM64MCExpr::VK_TLSDESC_LO12) {
+        ELFRefKind == AArch64MCExpr::VK_LO12 ||
+        ELFRefKind == AArch64MCExpr::VK_GOT_LO12 ||
+        ELFRefKind == AArch64MCExpr::VK_DTPREL_LO12 ||
+        ELFRefKind == AArch64MCExpr::VK_DTPREL_LO12_NC ||
+        ELFRefKind == AArch64MCExpr::VK_TPREL_LO12 ||
+        ELFRefKind == AArch64MCExpr::VK_TPREL_LO12_NC ||
+        ELFRefKind == AArch64MCExpr::VK_GOTTPREL_LO12_NC ||
+        ELFRefKind == AArch64MCExpr::VK_TLSDESC_LO12) {
       // Note that we don't range-check the addend. It's adjusted modulo page
       // size when converted, so there is no "out of range" condition when using
       // @pageoff.
@@ -607,7 +607,7 @@ public:
     const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
     if (!MCE)
       return false;
-    return ARM64_AM::isLogicalImmediate(MCE->getValue(), 32);
+    return AArch64_AM::isLogicalImmediate(MCE->getValue(), 32);
   }
   bool isLogicalImm64() const {
     if (!isImm())
@@ -615,7 +615,7 @@ public:
     const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
     if (!MCE)
       return false;
-    return ARM64_AM::isLogicalImmediate(MCE->getValue(), 64);
+    return AArch64_AM::isLogicalImmediate(MCE->getValue(), 64);
   }
   bool isShiftedImm() const { return Kind == k_ShiftedImm; }
   bool isAddSubImm() const {
@@ -634,22 +634,22 @@ public:
       Expr = getImm();
     }
 
-    ARM64MCExpr::VariantKind ELFRefKind;
+    AArch64MCExpr::VariantKind ELFRefKind;
     MCSymbolRefExpr::VariantKind DarwinRefKind;
     int64_t Addend;
-    if (ARM64AsmParser::classifySymbolRef(Expr, ELFRefKind,
+    if (AArch64AsmParser::classifySymbolRef(Expr, ELFRefKind,
                                           DarwinRefKind, Addend)) {
       return DarwinRefKind == MCSymbolRefExpr::VK_PAGEOFF
           || DarwinRefKind == MCSymbolRefExpr::VK_TLVPPAGEOFF
           || (DarwinRefKind == MCSymbolRefExpr::VK_GOTPAGEOFF && Addend == 0)
-          || ELFRefKind == ARM64MCExpr::VK_LO12
-          || ELFRefKind == ARM64MCExpr::VK_DTPREL_HI12
-          || ELFRefKind == ARM64MCExpr::VK_DTPREL_LO12
-          || ELFRefKind == ARM64MCExpr::VK_DTPREL_LO12_NC
-          || ELFRefKind == ARM64MCExpr::VK_TPREL_HI12
-          || ELFRefKind == ARM64MCExpr::VK_TPREL_LO12
-          || ELFRefKind == ARM64MCExpr::VK_TPREL_LO12_NC
-          || ELFRefKind == ARM64MCExpr::VK_TLSDESC_LO12;
+          || ELFRefKind == AArch64MCExpr::VK_LO12
+          || ELFRefKind == AArch64MCExpr::VK_DTPREL_HI12
+          || ELFRefKind == AArch64MCExpr::VK_DTPREL_LO12
+          || ELFRefKind == AArch64MCExpr::VK_DTPREL_LO12_NC
+          || ELFRefKind == AArch64MCExpr::VK_TPREL_HI12
+          || ELFRefKind == AArch64MCExpr::VK_TPREL_LO12
+          || ELFRefKind == AArch64MCExpr::VK_TPREL_LO12_NC
+          || ELFRefKind == AArch64MCExpr::VK_TLSDESC_LO12;
     }
 
     // Otherwise it should be a real immediate in range:
@@ -663,7 +663,7 @@ public:
     const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
     if (!MCE)
       return false;
-    return ARM64_AM::isAdvSIMDModImmType10(MCE->getValue());
+    return AArch64_AM::isAdvSIMDModImmType10(MCE->getValue());
   }
   bool isBranchTarget26() const {
     if (!isImm())
@@ -699,15 +699,16 @@ public:
     return (Val >= -(0x2000 << 2) && Val <= (0x1fff << 2));
   }
 
-  bool isMovWSymbol(ArrayRef<ARM64MCExpr::VariantKind> AllowedModifiers) const {
+  bool
+  isMovWSymbol(ArrayRef<AArch64MCExpr::VariantKind> AllowedModifiers) const {
     if (!isImm())
       return false;
 
-    ARM64MCExpr::VariantKind ELFRefKind;
+    AArch64MCExpr::VariantKind ELFRefKind;
     MCSymbolRefExpr::VariantKind DarwinRefKind;
     int64_t Addend;
-    if (!ARM64AsmParser::classifySymbolRef(getImm(), ELFRefKind, DarwinRefKind,
-                                           Addend)) {
+    if (!AArch64AsmParser::classifySymbolRef(getImm(), ELFRefKind,
+                                             DarwinRefKind, Addend)) {
       return false;
     }
     if (DarwinRefKind != MCSymbolRefExpr::VK_None)
@@ -722,57 +723,56 @@ public:
   }
 
   bool isMovZSymbolG3() const {
-    static ARM64MCExpr::VariantKind Variants[] = { ARM64MCExpr::VK_ABS_G3 };
+    static AArch64MCExpr::VariantKind Variants[] = { AArch64MCExpr::VK_ABS_G3 };
     return isMovWSymbol(Variants);
   }
 
   bool isMovZSymbolG2() const {
-    static ARM64MCExpr::VariantKind Variants[] = { ARM64MCExpr::VK_ABS_G2,
-                                                   ARM64MCExpr::VK_ABS_G2_S,
-                                                   ARM64MCExpr::VK_TPREL_G2,
-                                                   ARM64MCExpr::VK_DTPREL_G2 };
+    static AArch64MCExpr::VariantKind Variants[] = {
+        AArch64MCExpr::VK_ABS_G2, AArch64MCExpr::VK_ABS_G2_S,
+        AArch64MCExpr::VK_TPREL_G2, AArch64MCExpr::VK_DTPREL_G2};
     return isMovWSymbol(Variants);
   }
 
   bool isMovZSymbolG1() const {
-    static ARM64MCExpr::VariantKind Variants[] = { ARM64MCExpr::VK_ABS_G1,
-                                                   ARM64MCExpr::VK_ABS_G1_S,
-                                                   ARM64MCExpr::VK_GOTTPREL_G1,
-                                                   ARM64MCExpr::VK_TPREL_G1,
-                                                   ARM64MCExpr::VK_DTPREL_G1, };
+    static AArch64MCExpr::VariantKind Variants[] = {
+        AArch64MCExpr::VK_ABS_G1,      AArch64MCExpr::VK_ABS_G1_S,
+        AArch64MCExpr::VK_GOTTPREL_G1, AArch64MCExpr::VK_TPREL_G1,
+        AArch64MCExpr::VK_DTPREL_G1,
+    };
     return isMovWSymbol(Variants);
   }
 
   bool isMovZSymbolG0() const {
-    static ARM64MCExpr::VariantKind Variants[] = { ARM64MCExpr::VK_ABS_G0,
-                                                   ARM64MCExpr::VK_ABS_G0_S,
-                                                   ARM64MCExpr::VK_TPREL_G0,
-                                                   ARM64MCExpr::VK_DTPREL_G0 };
+    static AArch64MCExpr::VariantKind Variants[] = {
+        AArch64MCExpr::VK_ABS_G0, AArch64MCExpr::VK_ABS_G0_S,
+        AArch64MCExpr::VK_TPREL_G0, AArch64MCExpr::VK_DTPREL_G0};
     return isMovWSymbol(Variants);
   }
 
   bool isMovKSymbolG3() const {
-    static ARM64MCExpr::VariantKind Variants[] = { ARM64MCExpr::VK_ABS_G3 };
+    static AArch64MCExpr::VariantKind Variants[] = { AArch64MCExpr::VK_ABS_G3 };
     return isMovWSymbol(Variants);
   }
 
   bool isMovKSymbolG2() const {
-    static ARM64MCExpr::VariantKind Variants[] = { ARM64MCExpr::VK_ABS_G2_NC };
+    static AArch64MCExpr::VariantKind Variants[] = {
+        AArch64MCExpr::VK_ABS_G2_NC};
     return isMovWSymbol(Variants);
   }
 
   bool isMovKSymbolG1() const {
-    static ARM64MCExpr::VariantKind Variants[] = {
-      ARM64MCExpr::VK_ABS_G1_NC, ARM64MCExpr::VK_TPREL_G1_NC,
-      ARM64MCExpr::VK_DTPREL_G1_NC
+    static AArch64MCExpr::VariantKind Variants[] = {
+      AArch64MCExpr::VK_ABS_G1_NC, AArch64MCExpr::VK_TPREL_G1_NC,
+      AArch64MCExpr::VK_DTPREL_G1_NC
     };
     return isMovWSymbol(Variants);
   }
 
   bool isMovKSymbolG0() const {
-    static ARM64MCExpr::VariantKind Variants[] = {
-      ARM64MCExpr::VK_ABS_G0_NC,   ARM64MCExpr::VK_GOTTPREL_G0_NC,
-      ARM64MCExpr::VK_TPREL_G0_NC, ARM64MCExpr::VK_DTPREL_G0_NC
+    static AArch64MCExpr::VariantKind Variants[] = {
+      AArch64MCExpr::VK_ABS_G0_NC,   AArch64MCExpr::VK_GOTTPREL_G0_NC,
+      AArch64MCExpr::VK_TPREL_G0_NC, AArch64MCExpr::VK_DTPREL_G0_NC
     };
     return isMovWSymbol(Variants);
   }
@@ -822,7 +822,7 @@ public:
     if (!isSysReg()) return false;
 
     bool IsKnownRegister;
-    auto Mapper = ARM64SysReg::MRSMapper(getSysRegFeatureBits());
+    auto Mapper = AArch64SysReg::MRSMapper(getSysRegFeatureBits());
     Mapper.fromString(getSysReg(), IsKnownRegister);
 
     return IsKnownRegister;
@@ -831,7 +831,7 @@ public:
     if (!isSysReg()) return false;
 
     bool IsKnownRegister;
-    auto Mapper = ARM64SysReg::MSRMapper(getSysRegFeatureBits());
+    auto Mapper = AArch64SysReg::MSRMapper(getSysRegFeatureBits());
     Mapper.fromString(getSysReg(), IsKnownRegister);
 
     return IsKnownRegister;
@@ -840,7 +840,7 @@ public:
     if (!isSysReg()) return false;
 
     bool IsKnownRegister;
-    ARM64PState::PStateMapper().fromString(getSysReg(), IsKnownRegister);
+    AArch64PState::PStateMapper().fromString(getSysReg(), IsKnownRegister);
 
     return IsKnownRegister;
   }
@@ -848,16 +848,17 @@ public:
   bool isVectorReg() const { return Kind == k_Register && Reg.isVector; }
   bool isVectorRegLo() const {
     return Kind == k_Register && Reg.isVector &&
-      ARM64MCRegisterClasses[ARM64::FPR128_loRegClassID].contains(Reg.RegNum);
+           AArch64MCRegisterClasses[AArch64::FPR128_loRegClassID].contains(
+               Reg.RegNum);
   }
   bool isGPR32as64() const {
     return Kind == k_Register && !Reg.isVector &&
-      ARM64MCRegisterClasses[ARM64::GPR64RegClassID].contains(Reg.RegNum);
+      AArch64MCRegisterClasses[AArch64::GPR64RegClassID].contains(Reg.RegNum);
   }
 
   bool isGPR64sp0() const {
     return Kind == k_Register && !Reg.isVector &&
-      ARM64MCRegisterClasses[ARM64::GPR64spRegClassID].contains(Reg.RegNum);
+      AArch64MCRegisterClasses[AArch64::GPR64spRegClassID].contains(Reg.RegNum);
   }
 
   /// Is this a vector list with the type implicit (presumably attached to the
@@ -904,20 +905,21 @@ public:
     if (!isShiftExtend())
       return false;
 
-    ARM64_AM::ShiftExtendType ST = getShiftExtendType();
-    return (ST == ARM64_AM::LSL || ST == ARM64_AM::LSR || ST == ARM64_AM::ASR ||
-            ST == ARM64_AM::ROR || ST == ARM64_AM::MSL);
+    AArch64_AM::ShiftExtendType ST = getShiftExtendType();
+    return (ST == AArch64_AM::LSL || ST == AArch64_AM::LSR ||
+            ST == AArch64_AM::ASR || ST == AArch64_AM::ROR ||
+            ST == AArch64_AM::MSL);
   }
   bool isExtend() const {
     if (!isShiftExtend())
       return false;
 
-    ARM64_AM::ShiftExtendType ET = getShiftExtendType();
-    return (ET == ARM64_AM::UXTB || ET == ARM64_AM::SXTB ||
-            ET == ARM64_AM::UXTH || ET == ARM64_AM::SXTH ||
-            ET == ARM64_AM::UXTW || ET == ARM64_AM::SXTW ||
-            ET == ARM64_AM::UXTX || ET == ARM64_AM::SXTX ||
-            ET == ARM64_AM::LSL) &&
+    AArch64_AM::ShiftExtendType ET = getShiftExtendType();
+    return (ET == AArch64_AM::UXTB || ET == AArch64_AM::SXTB ||
+            ET == AArch64_AM::UXTH || ET == AArch64_AM::SXTH ||
+            ET == AArch64_AM::UXTW || ET == AArch64_AM::SXTW ||
+            ET == AArch64_AM::UXTX || ET == AArch64_AM::SXTX ||
+            ET == AArch64_AM::LSL) &&
            getShiftExtendAmount() <= 4;
   }
 
@@ -925,22 +927,23 @@ public:
     if (!isExtend())
       return false;
     // UXTX and SXTX require a 64-bit source register (the ExtendLSL64 class).
-    ARM64_AM::ShiftExtendType ET = getShiftExtendType();
-    return ET != ARM64_AM::UXTX && ET != ARM64_AM::SXTX;
+    AArch64_AM::ShiftExtendType ET = getShiftExtendType();
+    return ET != AArch64_AM::UXTX && ET != AArch64_AM::SXTX;
   }
   bool isExtendLSL64() const {
     if (!isExtend())
       return false;
-    ARM64_AM::ShiftExtendType ET = getShiftExtendType();
-    return (ET == ARM64_AM::UXTX || ET == ARM64_AM::SXTX || ET == ARM64_AM::LSL) &&
-      getShiftExtendAmount() <= 4;
+    AArch64_AM::ShiftExtendType ET = getShiftExtendType();
+    return (ET == AArch64_AM::UXTX || ET == AArch64_AM::SXTX ||
+            ET == AArch64_AM::LSL) &&
+           getShiftExtendAmount() <= 4;
   }
 
   template<int Width> bool isMemXExtend() const {
     if (!isExtend())
       return false;
-    ARM64_AM::ShiftExtendType ET = getShiftExtendType();
-    return (ET == ARM64_AM::LSL || ET == ARM64_AM::SXTX) &&
+    AArch64_AM::ShiftExtendType ET = getShiftExtendType();
+    return (ET == AArch64_AM::LSL || ET == AArch64_AM::SXTX) &&
            (getShiftExtendAmount() == Log2_32(Width / 8) ||
             getShiftExtendAmount() == 0);
   }
@@ -948,8 +951,8 @@ public:
   template<int Width> bool isMemWExtend() const {
     if (!isExtend())
       return false;
-    ARM64_AM::ShiftExtendType ET = getShiftExtendType();
-    return (ET == ARM64_AM::UXTW || ET == ARM64_AM::SXTW) &&
+    AArch64_AM::ShiftExtendType ET = getShiftExtendType();
+    return (ET == AArch64_AM::UXTW || ET == AArch64_AM::SXTW) &&
            (getShiftExtendAmount() == Log2_32(Width / 8) ||
             getShiftExtendAmount() == 0);
   }
@@ -960,9 +963,9 @@ public:
       return false;
 
     // An arithmetic shifter is LSL, LSR, or ASR.
-    ARM64_AM::ShiftExtendType ST = getShiftExtendType();
-    return (ST == ARM64_AM::LSL || ST == ARM64_AM::LSR ||
-            ST == ARM64_AM::ASR) && getShiftExtendAmount() < width;
+    AArch64_AM::ShiftExtendType ST = getShiftExtendType();
+    return (ST == AArch64_AM::LSL || ST == AArch64_AM::LSR ||
+            ST == AArch64_AM::ASR) && getShiftExtendAmount() < width;
   }
 
   template <unsigned width>
@@ -971,9 +974,9 @@ public:
       return false;
 
     // A logical shifter is LSL, LSR, ASR or ROR.
-    ARM64_AM::ShiftExtendType ST = getShiftExtendType();
-    return (ST == ARM64_AM::LSL || ST == ARM64_AM::LSR || ST == ARM64_AM::ASR ||
-            ST == ARM64_AM::ROR) &&
+    AArch64_AM::ShiftExtendType ST = getShiftExtendType();
+    return (ST == AArch64_AM::LSL || ST == AArch64_AM::LSR ||
+            ST == AArch64_AM::ASR || ST == AArch64_AM::ROR) &&
            getShiftExtendAmount() < width;
   }
 
@@ -982,8 +985,8 @@ public:
       return false;
 
     // A MOVi shifter is LSL of 0, 16, 32, or 48.
-    ARM64_AM::ShiftExtendType ST = getShiftExtendType();
-    if (ST != ARM64_AM::LSL)
+    AArch64_AM::ShiftExtendType ST = getShiftExtendType();
+    if (ST != AArch64_AM::LSL)
       return false;
     uint64_t Val = getShiftExtendAmount();
     return (Val == 0 || Val == 16);
@@ -994,8 +997,8 @@ public:
       return false;
 
     // A MOVi shifter is LSL of 0 or 16.
-    ARM64_AM::ShiftExtendType ST = getShiftExtendType();
-    if (ST != ARM64_AM::LSL)
+    AArch64_AM::ShiftExtendType ST = getShiftExtendType();
+    if (ST != AArch64_AM::LSL)
       return false;
     uint64_t Val = getShiftExtendAmount();
     return (Val == 0 || Val == 16 || Val == 32 || Val == 48);
@@ -1007,7 +1010,7 @@ public:
 
     // A logical vector shifter is a left shift by 0, 8, 16, or 24.
     unsigned Shift = getShiftExtendAmount();
-    return getShiftExtendType() == ARM64_AM::LSL &&
+    return getShiftExtendType() == AArch64_AM::LSL &&
            (Shift == 0 || Shift == 8 || Shift == 16 || Shift == 24);
   }
 
@@ -1017,7 +1020,8 @@ public:
 
     // A logical vector shifter is a left shift by 0 or 8.
     unsigned Shift = getShiftExtendAmount();
-    return getShiftExtendType() == ARM64_AM::LSL && (Shift == 0 || Shift == 8);
+    return getShiftExtendType() == AArch64_AM::LSL &&
+           (Shift == 0 || Shift == 8);
   }
 
   bool isMoveVecShifter() const {
@@ -1026,7 +1030,8 @@ public:
 
     // A logical vector shifter is a left shift by 8 or 16.
     unsigned Shift = getShiftExtendAmount();
-    return getShiftExtendType() == ARM64_AM::MSL && (Shift == 8 || Shift == 16);
+    return getShiftExtendType() == AArch64_AM::MSL &&
+           (Shift == 8 || Shift == 16);
   }
 
   // Fallback unscaled operands are for aliases of LDR/STR that fall back
@@ -1088,10 +1093,11 @@ public:
 
   void addGPR32as64Operands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
-    assert(ARM64MCRegisterClasses[ARM64::GPR64RegClassID].contains(getReg()));
+    assert(
+        AArch64MCRegisterClasses[AArch64::GPR64RegClassID].contains(getReg()));
 
     const MCRegisterInfo *RI = Ctx.getRegisterInfo();
-    uint32_t Reg = RI->getRegClass(ARM64::GPR32RegClassID).getRegister(
+    uint32_t Reg = RI->getRegClass(AArch64::GPR32RegClassID).getRegister(
         RI->getEncodingValue(getReg()));
 
     Inst.addOperand(MCOperand::CreateReg(Reg));
@@ -1099,13 +1105,15 @@ public:
 
   void addVectorReg64Operands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
-    assert(ARM64MCRegisterClasses[ARM64::FPR128RegClassID].contains(getReg()));
-    Inst.addOperand(MCOperand::CreateReg(ARM64::D0 + getReg() - ARM64::Q0));
+    assert(
+        AArch64MCRegisterClasses[AArch64::FPR128RegClassID].contains(getReg()));
+    Inst.addOperand(MCOperand::CreateReg(AArch64::D0 + getReg() - AArch64::Q0));
   }
 
   void addVectorReg128Operands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
-    assert(ARM64MCRegisterClasses[ARM64::FPR128RegClassID].contains(getReg()));
+    assert(
+        AArch64MCRegisterClasses[AArch64::FPR128RegClassID].contains(getReg()));
     Inst.addOperand(MCOperand::CreateReg(getReg()));
   }
 
@@ -1117,23 +1125,23 @@ public:
   template <unsigned NumRegs>
   void addVectorList64Operands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
-    static unsigned FirstRegs[] = { ARM64::D0,       ARM64::D0_D1,
-                                    ARM64::D0_D1_D2, ARM64::D0_D1_D2_D3 };
+    static unsigned FirstRegs[] = { AArch64::D0,       AArch64::D0_D1,
+                                    AArch64::D0_D1_D2, AArch64::D0_D1_D2_D3 };
     unsigned FirstReg = FirstRegs[NumRegs - 1];
 
     Inst.addOperand(
-        MCOperand::CreateReg(FirstReg + getVectorListStart() - ARM64::Q0));
+        MCOperand::CreateReg(FirstReg + getVectorListStart() - AArch64::Q0));
   }
 
   template <unsigned NumRegs>
   void addVectorList128Operands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
-    static unsigned FirstRegs[] = { ARM64::Q0,       ARM64::Q0_Q1,
-                                    ARM64::Q0_Q1_Q2, ARM64::Q0_Q1_Q2_Q3 };
+    static unsigned FirstRegs[] = { AArch64::Q0,       AArch64::Q0_Q1,
+                                    AArch64::Q0_Q1_Q2, AArch64::Q0_Q1_Q2_Q3 };
     unsigned FirstReg = FirstRegs[NumRegs - 1];
 
     Inst.addOperand(
-        MCOperand::CreateReg(FirstReg + getVectorListStart() - ARM64::Q0));
+        MCOperand::CreateReg(FirstReg + getVectorListStart() - AArch64::Q0));
   }
 
   void addVectorIndex1Operands(MCInst &Inst, unsigned N) const {
@@ -1340,7 +1348,7 @@ public:
     assert(N == 1 && "Invalid number of operands!");
     const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
     assert(MCE && "Invalid logical immediate operand!");
-    uint64_t encoding = ARM64_AM::encodeLogicalImmediate(MCE->getValue(), 32);
+    uint64_t encoding = AArch64_AM::encodeLogicalImmediate(MCE->getValue(), 32);
     Inst.addOperand(MCOperand::CreateImm(encoding));
   }
 
@@ -1348,7 +1356,7 @@ public:
     assert(N == 1 && "Invalid number of operands!");
     const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
     assert(MCE && "Invalid logical immediate operand!");
-    uint64_t encoding = ARM64_AM::encodeLogicalImmediate(MCE->getValue(), 64);
+    uint64_t encoding = AArch64_AM::encodeLogicalImmediate(MCE->getValue(), 64);
     Inst.addOperand(MCOperand::CreateImm(encoding));
   }
 
@@ -1356,7 +1364,7 @@ public:
     assert(N == 1 && "Invalid number of operands!");
     const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
     assert(MCE && "Invalid immediate operand!");
-    uint64_t encoding = ARM64_AM::encodeAdvSIMDModImmType10(MCE->getValue());
+    uint64_t encoding = AArch64_AM::encodeAdvSIMDModImmType10(MCE->getValue());
     Inst.addOperand(MCOperand::CreateImm(encoding));
   }
 
@@ -1416,7 +1424,7 @@ public:
     assert(N == 1 && "Invalid number of operands!");
 
     bool Valid;
-    auto Mapper = ARM64SysReg::MRSMapper(getSysRegFeatureBits());
+    auto Mapper = AArch64SysReg::MRSMapper(getSysRegFeatureBits());
     uint32_t Bits = Mapper.fromString(getSysReg(), Valid);
 
     Inst.addOperand(MCOperand::CreateImm(Bits));
@@ -1426,7 +1434,7 @@ public:
     assert(N == 1 && "Invalid number of operands!");
 
     bool Valid;
-    auto Mapper = ARM64SysReg::MSRMapper(getSysRegFeatureBits());
+    auto Mapper = AArch64SysReg::MSRMapper(getSysRegFeatureBits());
     uint32_t Bits = Mapper.fromString(getSysReg(), Valid);
 
     Inst.addOperand(MCOperand::CreateImm(Bits));
@@ -1436,7 +1444,8 @@ public:
     assert(N == 1 && "Invalid number of operands!");
 
     bool Valid;
-    uint32_t Bits = ARM64PState::PStateMapper().fromString(getSysReg(), Valid);
+    uint32_t Bits =
+        AArch64PState::PStateMapper().fromString(getSysReg(), Valid);
 
     Inst.addOperand(MCOperand::CreateImm(Bits));
   }
@@ -1454,30 +1463,30 @@ public:
   void addShifterOperands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
     unsigned Imm =
-        ARM64_AM::getShifterImm(getShiftExtendType(), getShiftExtendAmount());
+        AArch64_AM::getShifterImm(getShiftExtendType(), getShiftExtendAmount());
     Inst.addOperand(MCOperand::CreateImm(Imm));
   }
 
   void addExtendOperands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
-    ARM64_AM::ShiftExtendType ET = getShiftExtendType();
-    if (ET == ARM64_AM::LSL) ET = ARM64_AM::UXTW;
-    unsigned Imm = ARM64_AM::getArithExtendImm(ET, getShiftExtendAmount());
+    AArch64_AM::ShiftExtendType ET = getShiftExtendType();
+    if (ET == AArch64_AM::LSL) ET = AArch64_AM::UXTW;
+    unsigned Imm = AArch64_AM::getArithExtendImm(ET, getShiftExtendAmount());
     Inst.addOperand(MCOperand::CreateImm(Imm));
   }
 
   void addExtend64Operands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
-    ARM64_AM::ShiftExtendType ET = getShiftExtendType();
-    if (ET == ARM64_AM::LSL) ET = ARM64_AM::UXTX;
-    unsigned Imm = ARM64_AM::getArithExtendImm(ET, getShiftExtendAmount());
+    AArch64_AM::ShiftExtendType ET = getShiftExtendType();
+    if (ET == AArch64_AM::LSL) ET = AArch64_AM::UXTX;
+    unsigned Imm = AArch64_AM::getArithExtendImm(ET, getShiftExtendAmount());
     Inst.addOperand(MCOperand::CreateImm(Imm));
   }
 
   void addMemExtendOperands(MCInst &Inst, unsigned N) const {
     assert(N == 2 && "Invalid number of operands!");
-    ARM64_AM::ShiftExtendType ET = getShiftExtendType();
-    bool IsSigned = ET == ARM64_AM::SXTW || ET == ARM64_AM::SXTX;
+    AArch64_AM::ShiftExtendType ET = getShiftExtendType();
+    bool IsSigned = ET == AArch64_AM::SXTW || ET == AArch64_AM::SXTX;
     Inst.addOperand(MCOperand::CreateImm(IsSigned));
     Inst.addOperand(MCOperand::CreateImm(getShiftExtendAmount() != 0));
   }
@@ -1488,8 +1497,8 @@ public:
   // than its size.
   void addMemExtend8Operands(MCInst &Inst, unsigned N) const {
     assert(N == 2 && "Invalid number of operands!");
-    ARM64_AM::ShiftExtendType ET = getShiftExtendType();
-    bool IsSigned = ET == ARM64_AM::SXTW || ET == ARM64_AM::SXTX;
+    AArch64_AM::ShiftExtendType ET = getShiftExtendType();
+    bool IsSigned = ET == AArch64_AM::SXTW || ET == AArch64_AM::SXTX;
     Inst.addOperand(MCOperand::CreateImm(IsSigned));
     Inst.addOperand(MCOperand::CreateImm(hasShiftExtendAmount()));
   }
@@ -1514,9 +1523,9 @@ public:
 
   void print(raw_ostream &OS) const override;
 
-  static ARM64Operand *CreateToken(StringRef Str, bool IsSuffix, SMLoc S,
+  static AArch64Operand *CreateToken(StringRef Str, bool IsSuffix, SMLoc S,
                                    MCContext &Ctx) {
-    ARM64Operand *Op = new ARM64Operand(k_Token, Ctx);
+    AArch64Operand *Op = new AArch64Operand(k_Token, Ctx);
     Op->Tok.Data = Str.data();
     Op->Tok.Length = Str.size();
     Op->Tok.IsSuffix = IsSuffix;
@@ -1525,9 +1534,9 @@ public:
     return Op;
   }
 
-  static ARM64Operand *CreateReg(unsigned RegNum, bool isVector, SMLoc S,
+  static AArch64Operand *CreateReg(unsigned RegNum, bool isVector, SMLoc S,
                                  SMLoc E, MCContext &Ctx) {
-    ARM64Operand *Op = new ARM64Operand(k_Register, Ctx);
+    AArch64Operand *Op = new AArch64Operand(k_Register, Ctx);
     Op->Reg.RegNum = RegNum;
     Op->Reg.isVector = isVector;
     Op->StartLoc = S;
@@ -1535,10 +1544,10 @@ public:
     return Op;
   }
 
-  static ARM64Operand *CreateVectorList(unsigned RegNum, unsigned Count,
+  static AArch64Operand *CreateVectorList(unsigned RegNum, unsigned Count,
                                         unsigned NumElements, char ElementKind,
                                         SMLoc S, SMLoc E, MCContext &Ctx) {
-    ARM64Operand *Op = new ARM64Operand(k_VectorList, Ctx);
+    AArch64Operand *Op = new AArch64Operand(k_VectorList, Ctx);
     Op->VectorList.RegNum = RegNum;
     Op->VectorList.Count = Count;
     Op->VectorList.NumElements = NumElements;
@@ -1548,27 +1557,28 @@ public:
     return Op;
   }
 
-  static ARM64Operand *CreateVectorIndex(unsigned Idx, SMLoc S, SMLoc E,
+  static AArch64Operand *CreateVectorIndex(unsigned Idx, SMLoc S, SMLoc E,
                                          MCContext &Ctx) {
-    ARM64Operand *Op = new ARM64Operand(k_VectorIndex, Ctx);
+    AArch64Operand *Op = new AArch64Operand(k_VectorIndex, Ctx);
     Op->VectorIndex.Val = Idx;
     Op->StartLoc = S;
     Op->EndLoc = E;
     return Op;
   }
 
-  static ARM64Operand *CreateImm(const MCExpr *Val, SMLoc S, SMLoc E,
+  static AArch64Operand *CreateImm(const MCExpr *Val, SMLoc S, SMLoc E,
                                  MCContext &Ctx) {
-    ARM64Operand *Op = new ARM64Operand(k_Immediate, Ctx);
+    AArch64Operand *Op = new AArch64Operand(k_Immediate, Ctx);
     Op->Imm.Val = Val;
     Op->StartLoc = S;
     Op->EndLoc = E;
     return Op;
   }
 
-  static ARM64Operand *CreateShiftedImm(const MCExpr *Val, unsigned ShiftAmount,
-                                        SMLoc S, SMLoc E, MCContext &Ctx) {
-    ARM64Operand *Op = new ARM64Operand(k_ShiftedImm, Ctx);
+  static AArch64Operand *CreateShiftedImm(const MCExpr *Val,
+                                          unsigned ShiftAmount, SMLoc S,
+                                          SMLoc E, MCContext &Ctx) {
+    AArch64Operand *Op = new AArch64Operand(k_ShiftedImm, Ctx);
     Op->ShiftedImm .Val = Val;
     Op->ShiftedImm.ShiftAmount = ShiftAmount;
     Op->StartLoc = S;
@@ -1576,34 +1586,34 @@ public:
     return Op;
   }
 
-  static ARM64Operand *CreateCondCode(ARM64CC::CondCode Code, SMLoc S, SMLoc E,
-                                      MCContext &Ctx) {
-    ARM64Operand *Op = new ARM64Operand(k_CondCode, Ctx);
+  static AArch64Operand *CreateCondCode(AArch64CC::CondCode Code, SMLoc S,
+                                        SMLoc E, MCContext &Ctx) {
+    AArch64Operand *Op = new AArch64Operand(k_CondCode, Ctx);
     Op->CondCode.Code = Code;
     Op->StartLoc = S;
     Op->EndLoc = E;
     return Op;
   }
 
-  static ARM64Operand *CreateFPImm(unsigned Val, SMLoc S, MCContext &Ctx) {
-    ARM64Operand *Op = new ARM64Operand(k_FPImm, Ctx);
+  static AArch64Operand *CreateFPImm(unsigned Val, SMLoc S, MCContext &Ctx) {
+    AArch64Operand *Op = new AArch64Operand(k_FPImm, Ctx);
     Op->FPImm.Val = Val;
     Op->StartLoc = S;
     Op->EndLoc = S;
     return Op;
   }
 
-  static ARM64Operand *CreateBarrier(unsigned Val, SMLoc S, MCContext &Ctx) {
-    ARM64Operand *Op = new ARM64Operand(k_Barrier, Ctx);
+  static AArch64Operand *CreateBarrier(unsigned Val, SMLoc S, MCContext &Ctx) {
+    AArch64Operand *Op = new AArch64Operand(k_Barrier, Ctx);
     Op->Barrier.Val = Val;
     Op->StartLoc = S;
     Op->EndLoc = S;
     return Op;
   }
 
-  static ARM64Operand *CreateSysReg(StringRef Str, SMLoc S,
+  static AArch64Operand *CreateSysReg(StringRef Str, SMLoc S,
                                     uint64_t FeatureBits, MCContext &Ctx) {
-    ARM64Operand *Op = new ARM64Operand(k_SysReg, Ctx);
+    AArch64Operand *Op = new AArch64Operand(k_SysReg, Ctx);
     Op->SysReg.Data = Str.data();
     Op->SysReg.Length = Str.size();
     Op->SysReg.FeatureBits = FeatureBits;
@@ -1612,27 +1622,27 @@ public:
     return Op;
   }
 
-  static ARM64Operand *CreateSysCR(unsigned Val, SMLoc S, SMLoc E,
+  static AArch64Operand *CreateSysCR(unsigned Val, SMLoc S, SMLoc E,
                                    MCContext &Ctx) {
-    ARM64Operand *Op = new ARM64Operand(k_SysCR, Ctx);
+    AArch64Operand *Op = new AArch64Operand(k_SysCR, Ctx);
     Op->SysCRImm.Val = Val;
     Op->StartLoc = S;
     Op->EndLoc = E;
     return Op;
   }
 
-  static ARM64Operand *CreatePrefetch(unsigned Val, SMLoc S, MCContext &Ctx) {
-    ARM64Operand *Op = new ARM64Operand(k_Prefetch, Ctx);
+  static AArch64Operand *CreatePrefetch(unsigned Val, SMLoc S, MCContext &Ctx) {
+    AArch64Operand *Op = new AArch64Operand(k_Prefetch, Ctx);
     Op->Prefetch.Val = Val;
     Op->StartLoc = S;
     Op->EndLoc = S;
     return Op;
   }
 
-  static ARM64Operand *CreateShiftExtend(ARM64_AM::ShiftExtendType ShOp,
+  static AArch64Operand *CreateShiftExtend(AArch64_AM::ShiftExtendType ShOp,
                                          unsigned Val, bool HasExplicitAmount,
                                          SMLoc S, SMLoc E, MCContext &Ctx) {
-    ARM64Operand *Op = new ARM64Operand(k_ShiftExtend, Ctx);
+    AArch64Operand *Op = new AArch64Operand(k_ShiftExtend, Ctx);
     Op->ShiftExtend.Type = ShOp;
     Op->ShiftExtend.Amount = Val;
     Op->ShiftExtend.HasExplicitAmount = HasExplicitAmount;
@@ -1644,15 +1654,15 @@ public:
 
 } // end anonymous namespace.
 
-void ARM64Operand::print(raw_ostream &OS) const {
+void AArch64Operand::print(raw_ostream &OS) const {
   switch (Kind) {
   case k_FPImm:
-    OS << "<fpimm " << getFPImm() << "(" << ARM64_AM::getFPImmFloat(getFPImm())
-       << ") >";
+    OS << "<fpimm " << getFPImm() << "("
+       << AArch64_AM::getFPImmFloat(getFPImm()) << ") >";
     break;
   case k_Barrier: {
     bool Valid;
-    StringRef Name = ARM64DB::DBarrierMapper().toString(getBarrier(), Valid);
+    StringRef Name = AArch64DB::DBarrierMapper().toString(getBarrier(), Valid);
     if (Valid)
       OS << "<barrier " << Name << ">";
     else
@@ -1666,7 +1676,7 @@ void ARM64Operand::print(raw_ostream &OS) const {
     unsigned Shift = getShiftedImmShift();
     OS << "<shiftedimm ";
     getShiftedImmVal()->print(OS);
-    OS << ", lsl #" << ARM64_AM::getShiftValue(Shift) << ">";
+    OS << ", lsl #" << AArch64_AM::getShiftValue(Shift) << ">";
     break;
   }
   case k_CondCode:
@@ -1697,7 +1707,7 @@ void ARM64Operand::print(raw_ostream &OS) const {
     break;
   case k_Prefetch: {
     bool Valid;
-    StringRef Name = ARM64PRFM::PRFMMapper().toString(getPrefetch(), Valid);
+    StringRef Name = AArch64PRFM::PRFMMapper().toString(getPrefetch(), Valid);
     if (Valid)
       OS << "<prfop " << Name << ">";
     else
@@ -1705,7 +1715,7 @@ void ARM64Operand::print(raw_ostream &OS) const {
     break;
   }
   case k_ShiftExtend: {
-    OS << "<" << ARM64_AM::getShiftExtendName(getShiftExtendType()) << " #"
+    OS << "<" << AArch64_AM::getShiftExtendName(getShiftExtendType()) << " #"
        << getShiftExtendAmount();
     if (!hasShiftExtendAmount())
       OS << "<imp>";
@@ -1724,38 +1734,38 @@ static unsigned MatchRegisterName(StringRef Name);
 
 static unsigned matchVectorRegName(StringRef Name) {
   return StringSwitch<unsigned>(Name)
-      .Case("v0", ARM64::Q0)
-      .Case("v1", ARM64::Q1)
-      .Case("v2", ARM64::Q2)
-      .Case("v3", ARM64::Q3)
-      .Case("v4", ARM64::Q4)
-      .Case("v5", ARM64::Q5)
-      .Case("v6", ARM64::Q6)
-      .Case("v7", ARM64::Q7)
-      .Case("v8", ARM64::Q8)
-      .Case("v9", ARM64::Q9)
-      .Case("v10", ARM64::Q10)
-      .Case("v11", ARM64::Q11)
-      .Case("v12", ARM64::Q12)
-      .Case("v13", ARM64::Q13)
-      .Case("v14", ARM64::Q14)
-      .Case("v15", ARM64::Q15)
-      .Case("v16", ARM64::Q16)
-      .Case("v17", ARM64::Q17)
-      .Case("v18", ARM64::Q18)
-      .Case("v19", ARM64::Q19)
-      .Case("v20", ARM64::Q20)
-      .Case("v21", ARM64::Q21)
-      .Case("v22", ARM64::Q22)
-      .Case("v23", ARM64::Q23)
-      .Case("v24", ARM64::Q24)
-      .Case("v25", ARM64::Q25)
-      .Case("v26", ARM64::Q26)
-      .Case("v27", ARM64::Q27)
-      .Case("v28", ARM64::Q28)
-      .Case("v29", ARM64::Q29)
-      .Case("v30", ARM64::Q30)
-      .Case("v31", ARM64::Q31)
+      .Case("v0", AArch64::Q0)
+      .Case("v1", AArch64::Q1)
+      .Case("v2", AArch64::Q2)
+      .Case("v3", AArch64::Q3)
+      .Case("v4", AArch64::Q4)
+      .Case("v5", AArch64::Q5)
+      .Case("v6", AArch64::Q6)
+      .Case("v7", AArch64::Q7)
+      .Case("v8", AArch64::Q8)
+      .Case("v9", AArch64::Q9)
+      .Case("v10", AArch64::Q10)
+      .Case("v11", AArch64::Q11)
+      .Case("v12", AArch64::Q12)
+      .Case("v13", AArch64::Q13)
+      .Case("v14", AArch64::Q14)
+      .Case("v15", AArch64::Q15)
+      .Case("v16", AArch64::Q16)
+      .Case("v17", AArch64::Q17)
+      .Case("v18", AArch64::Q18)
+      .Case("v19", AArch64::Q19)
+      .Case("v20", AArch64::Q20)
+      .Case("v21", AArch64::Q21)
+      .Case("v22", AArch64::Q22)
+      .Case("v23", AArch64::Q23)
+      .Case("v24", AArch64::Q24)
+      .Case("v25", AArch64::Q25)
+      .Case("v26", AArch64::Q26)
+      .Case("v27", AArch64::Q27)
+      .Case("v28", AArch64::Q28)
+      .Case("v29", AArch64::Q29)
+      .Case("v30", AArch64::Q30)
+      .Case("v31", AArch64::Q31)
       .Default(0);
 }
 
@@ -1798,8 +1808,8 @@ static void parseValidVectorKind(StringRef Name, unsigned &NumElements,
   }
 }
 
-bool ARM64AsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
-                                   SMLoc &EndLoc) {
+bool AArch64AsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
+                                     SMLoc &EndLoc) {
   StartLoc = getLoc();
   RegNo = tryParseRegister();
   EndLoc = SMLoc::getFromPointer(getLoc().getPointer() - 1);
@@ -1809,7 +1819,7 @@ bool ARM64AsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
 /// tryParseRegister - Try to parse a register name. The token must be an
 /// Identifier when called, and if it is a register name the token is eaten and
 /// the register is added to the operand list.
-int ARM64AsmParser::tryParseRegister() {
+int AArch64AsmParser::tryParseRegister() {
   const AsmToken &Tok = Parser.getTok();
   assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier");
 
@@ -1818,10 +1828,10 @@ int ARM64AsmParser::tryParseRegister() {
   // Also handle a few aliases of registers.
   if (RegNum == 0)
     RegNum = StringSwitch<unsigned>(lowerCase)
-                 .Case("fp",  ARM64::FP)
-                 .Case("lr",  ARM64::LR)
-                 .Case("x31", ARM64::XZR)
-                 .Case("w31", ARM64::WZR)
+                 .Case("fp",  AArch64::FP)
+                 .Case("lr",  AArch64::LR)
+                 .Case("x31", AArch64::XZR)
+                 .Case("w31", AArch64::WZR)
                  .Default(0);
 
   if (RegNum == 0)
@@ -1833,7 +1843,7 @@ int ARM64AsmParser::tryParseRegister() {
 
 /// tryMatchVectorRegister - Try to parse a vector register name with optional
 /// kind specifier. If it is a register specifier, eat the token and return it.
-int ARM64AsmParser::tryMatchVectorRegister(StringRef &Kind, bool expected) {
+int AArch64AsmParser::tryMatchVectorRegister(StringRef &Kind, bool expected) {
   if (Parser.getTok().isNot(AsmToken::Identifier)) {
     TokError("vector register expected");
     return -1;
@@ -1863,8 +1873,8 @@ int ARM64AsmParser::tryMatchVectorRegister(StringRef &Kind, bool expected) {
 }
 
 /// tryParseSysCROperand - Try to parse a system instruction CR operand name.
-ARM64AsmParser::OperandMatchResultTy
-ARM64AsmParser::tryParseSysCROperand(OperandVector &Operands) {
+AArch64AsmParser::OperandMatchResultTy
+AArch64AsmParser::tryParseSysCROperand(OperandVector &Operands) {
   SMLoc S = getLoc();
 
   if (Parser.getTok().isNot(AsmToken::Identifier)) {
@@ -1886,13 +1896,14 @@ ARM64AsmParser::tryParseSysCROperand(OperandVector &Operands) {
   }
 
   Parser.Lex(); // Eat identifier token.
-  Operands.push_back(ARM64Operand::CreateSysCR(CRNum, S, getLoc(), getContext()));
+  Operands.push_back(
+      AArch64Operand::CreateSysCR(CRNum, S, getLoc(), getContext()));
   return MatchOperand_Success;
 }
 
 /// tryParsePrefetch - Try to parse a prefetch operand.
-ARM64AsmParser::OperandMatchResultTy
-ARM64AsmParser::tryParsePrefetch(OperandVector &Operands) {
+AArch64AsmParser::OperandMatchResultTy
+AArch64AsmParser::tryParsePrefetch(OperandVector &Operands) {
   SMLoc S = getLoc();
   const AsmToken &Tok = Parser.getTok();
   // Either an identifier for named values or a 5-bit immediate.
@@ -1915,7 +1926,7 @@ ARM64AsmParser::tryParsePrefetch(OperandVector &Operands) {
       return MatchOperand_ParseFail;
     }
 
-    Operands.push_back(ARM64Operand::CreatePrefetch(prfop, S, getContext()));
+    Operands.push_back(AArch64Operand::CreatePrefetch(prfop, S, getContext()));
     return MatchOperand_Success;
   }
 
@@ -1925,21 +1936,21 @@ ARM64AsmParser::tryParsePrefetch(OperandVector &Operands) {
   }
 
   bool Valid;
-  unsigned prfop = ARM64PRFM::PRFMMapper().fromString(Tok.getString(), Valid);
+  unsigned prfop = AArch64PRFM::PRFMMapper().fromString(Tok.getString(), Valid);
   if (!Valid) {
     TokError("pre-fetch hint expected");
     return MatchOperand_ParseFail;
   }
 
   Parser.Lex(); // Eat identifier token.
-  Operands.push_back(ARM64Operand::CreatePrefetch(prfop, S, getContext()));
+  Operands.push_back(AArch64Operand::CreatePrefetch(prfop, S, getContext()));
   return MatchOperand_Success;
 }
 
 /// tryParseAdrpLabel - Parse and validate a source label for the ADRP
 /// instruction.
-ARM64AsmParser::OperandMatchResultTy
-ARM64AsmParser::tryParseAdrpLabel(OperandVector &Operands) {
+AArch64AsmParser::OperandMatchResultTy
+AArch64AsmParser::tryParseAdrpLabel(OperandVector &Operands) {
   SMLoc S = getLoc();
   const MCExpr *Expr;
 
@@ -1950,15 +1961,16 @@ ARM64AsmParser::tryParseAdrpLabel(OperandVector &Operands) {
   if (parseSymbolicImmVal(Expr))
     return MatchOperand_ParseFail;
 
-  ARM64MCExpr::VariantKind ELFRefKind;
+  AArch64MCExpr::VariantKind ELFRefKind;
   MCSymbolRefExpr::VariantKind DarwinRefKind;
   int64_t Addend;
   if (classifySymbolRef(Expr, ELFRefKind, DarwinRefKind, Addend)) {
     if (DarwinRefKind == MCSymbolRefExpr::VK_None &&
-        ELFRefKind == ARM64MCExpr::VK_INVALID) {
+        ELFRefKind == AArch64MCExpr::VK_INVALID) {
       // No modifier was specified at all; this is the syntax for an ELF basic
       // ADRP relocation (unfortunately).
-      Expr = ARM64MCExpr::Create(Expr, ARM64MCExpr::VK_ABS_PAGE, getContext());
+      Expr =
+          AArch64MCExpr::Create(Expr, AArch64MCExpr::VK_ABS_PAGE, getContext());
     } else if ((DarwinRefKind == MCSymbolRefExpr::VK_GOTPAGE ||
                 DarwinRefKind == MCSymbolRefExpr::VK_TLVPPAGE) &&
                Addend != 0) {
@@ -1967,9 +1979,9 @@ ARM64AsmParser::tryParseAdrpLabel(OperandVector &Operands) {
     } else if (DarwinRefKind != MCSymbolRefExpr::VK_PAGE &&
                DarwinRefKind != MCSymbolRefExpr::VK_GOTPAGE &&
                DarwinRefKind != MCSymbolRefExpr::VK_TLVPPAGE &&
-               ELFRefKind != ARM64MCExpr::VK_GOT_PAGE &&
-               ELFRefKind != ARM64MCExpr::VK_GOTTPREL_PAGE &&
-               ELFRefKind != ARM64MCExpr::VK_TLSDESC_PAGE) {
+               ELFRefKind != AArch64MCExpr::VK_GOT_PAGE &&
+               ELFRefKind != AArch64MCExpr::VK_GOTTPREL_PAGE &&
+               ELFRefKind != AArch64MCExpr::VK_TLSDESC_PAGE) {
       // The operand must be an @page or @gotpage qualified symbolref.
       Error(S, "page or gotpage label reference expected");
       return MatchOperand_ParseFail;
@@ -1980,15 +1992,15 @@ ARM64AsmParser::tryParseAdrpLabel(OperandVector &Operands) {
   // addend is a raw value here. The linker will adjust it to only reference the
   // page.
   SMLoc E = SMLoc::getFromPointer(getLoc().getPointer() - 1);
-  Operands.push_back(ARM64Operand::CreateImm(Expr, S, E, getContext()));
+  Operands.push_back(AArch64Operand::CreateImm(Expr, S, E, getContext()));
 
   return MatchOperand_Success;
 }
 
 /// tryParseAdrLabel - Parse and validate a source label for the ADR
 /// instruction.
-ARM64AsmParser::OperandMatchResultTy
-ARM64AsmParser::tryParseAdrLabel(OperandVector &Operands) {
+AArch64AsmParser::OperandMatchResultTy
+AArch64AsmParser::tryParseAdrLabel(OperandVector &Operands) {
   SMLoc S = getLoc();
   const MCExpr *Expr;
 
@@ -2000,14 +2012,14 @@ ARM64AsmParser::tryParseAdrLabel(OperandVector &Operands) {
     return MatchOperand_ParseFail;
 
   SMLoc E = SMLoc::getFromPointer(getLoc().getPointer() - 1);
-  Operands.push_back(ARM64Operand::CreateImm(Expr, S, E, getContext()));
+  Operands.push_back(AArch64Operand::CreateImm(Expr, S, E, getContext()));
 
   return MatchOperand_Success;
 }
 
 /// tryParseFPImm - A floating point immediate expression operand.
-ARM64AsmParser::OperandMatchResultTy
-ARM64AsmParser::tryParseFPImm(OperandVector &Operands) {
+AArch64AsmParser::OperandMatchResultTy
+AArch64AsmParser::tryParseFPImm(OperandVector &Operands) {
   SMLoc S = getLoc();
 
   bool Hash = false;
@@ -2028,7 +2040,7 @@ ARM64AsmParser::tryParseFPImm(OperandVector &Operands) {
     uint64_t IntVal = RealVal.bitcastToAPInt().getZExtValue();
     // If we had a '-' in front, toggle the sign bit.
     IntVal ^= (uint64_t)isNegative << 63;
-    int Val = ARM64_AM::getFP64Imm(APInt(64, IntVal));
+    int Val = AArch64_AM::getFP64Imm(APInt(64, IntVal));
     Parser.Lex(); // Eat the token.
     // Check for out of range values. As an exception, we let Zero through,
     // as we handle that special case in post-processing before matching in
@@ -2037,7 +2049,7 @@ ARM64AsmParser::tryParseFPImm(OperandVector &Operands) {
       TokError("expected compatible register or floating-point constant");
       return MatchOperand_ParseFail;
     }
-    Operands.push_back(ARM64Operand::CreateFPImm(Val, S, getContext()));
+    Operands.push_back(AArch64Operand::CreateFPImm(Val, S, getContext()));
     return MatchOperand_Success;
   }
   if (Tok.is(AsmToken::Integer)) {
@@ -2053,10 +2065,10 @@ ARM64AsmParser::tryParseFPImm(OperandVector &Operands) {
       uint64_t IntVal = RealVal.bitcastToAPInt().getZExtValue();
       // If we had a '-' in front, toggle the sign bit.
       IntVal ^= (uint64_t)isNegative << 63;
-      Val = ARM64_AM::getFP64Imm(APInt(64, IntVal));
+      Val = AArch64_AM::getFP64Imm(APInt(64, IntVal));
     }
     Parser.Lex(); // Eat the token.
-    Operands.push_back(ARM64Operand::CreateFPImm(Val, S, getContext()));
+    Operands.push_back(AArch64Operand::CreateFPImm(Val, S, getContext()));
     return MatchOperand_Success;
   }
 
@@ -2068,8 +2080,8 @@ ARM64AsmParser::tryParseFPImm(OperandVector &Operands) {
 }
 
 /// tryParseAddSubImm - Parse ADD/SUB shifted immediate operand
-ARM64AsmParser::OperandMatchResultTy
-ARM64AsmParser::tryParseAddSubImm(OperandVector &Operands) {
+AArch64AsmParser::OperandMatchResultTy
+AArch64AsmParser::tryParseAddSubImm(OperandVector &Operands) {
   SMLoc S = getLoc();
 
   if (Parser.getTok().is(AsmToken::Hash))
@@ -2092,8 +2104,8 @@ ARM64AsmParser::tryParseAddSubImm(OperandVector &Operands) {
       }
     }
     SMLoc E = Parser.getTok().getLoc();
-    Operands.push_back(ARM64Operand::CreateShiftedImm(Imm, ShiftAmount, S, E,
-                                                      getContext()));
+    Operands.push_back(AArch64Operand::CreateShiftedImm(Imm, ShiftAmount, S, E,
+                                                        getContext()));
     return MatchOperand_Success;
   }
 
@@ -2128,81 +2140,81 @@ ARM64AsmParser::tryParseAddSubImm(OperandVector &Operands) {
   Parser.Lex(); // Eat the number
 
   SMLoc E = Parser.getTok().getLoc();
-  Operands.push_back(ARM64Operand::CreateShiftedImm(Imm, ShiftAmount,
-                                                    S, E, getContext()));
+  Operands.push_back(AArch64Operand::CreateShiftedImm(Imm, ShiftAmount,
+                                                      S, E, getContext()));
   return MatchOperand_Success;
 }
 
 /// parseCondCodeString - Parse a Condition Code string.
-ARM64CC::CondCode ARM64AsmParser::parseCondCodeString(StringRef Cond) {
-  ARM64CC::CondCode CC = StringSwitch<ARM64CC::CondCode>(Cond.lower())
-                    .Case("eq", ARM64CC::EQ)
-                    .Case("ne", ARM64CC::NE)
-                    .Case("cs", ARM64CC::HS)
-                    .Case("hs", ARM64CC::HS)
-                    .Case("cc", ARM64CC::LO)
-                    .Case("lo", ARM64CC::LO)
-                    .Case("mi", ARM64CC::MI)
-                    .Case("pl", ARM64CC::PL)
-                    .Case("vs", ARM64CC::VS)
-                    .Case("vc", ARM64CC::VC)
-                    .Case("hi", ARM64CC::HI)
-                    .Case("ls", ARM64CC::LS)
-                    .Case("ge", ARM64CC::GE)
-                    .Case("lt", ARM64CC::LT)
-                    .Case("gt", ARM64CC::GT)
-                    .Case("le", ARM64CC::LE)
-                    .Case("al", ARM64CC::AL)
-                    .Case("nv", ARM64CC::NV)
-                    .Default(ARM64CC::Invalid);
+AArch64CC::CondCode AArch64AsmParser::parseCondCodeString(StringRef Cond) {
+  AArch64CC::CondCode CC = StringSwitch<AArch64CC::CondCode>(Cond.lower())
+                    .Case("eq", AArch64CC::EQ)
+                    .Case("ne", AArch64CC::NE)
+                    .Case("cs", AArch64CC::HS)
+                    .Case("hs", AArch64CC::HS)
+                    .Case("cc", AArch64CC::LO)
+                    .Case("lo", AArch64CC::LO)
+                    .Case("mi", AArch64CC::MI)
+                    .Case("pl", AArch64CC::PL)
+                    .Case("vs", AArch64CC::VS)
+                    .Case("vc", AArch64CC::VC)
+                    .Case("hi", AArch64CC::HI)
+                    .Case("ls", AArch64CC::LS)
+                    .Case("ge", AArch64CC::GE)
+                    .Case("lt", AArch64CC::LT)
+                    .Case("gt", AArch64CC::GT)
+                    .Case("le", AArch64CC::LE)
+                    .Case("al", AArch64CC::AL)
+                    .Case("nv", AArch64CC::NV)
+                    .Default(AArch64CC::Invalid);
   return CC;
 }
 
 /// parseCondCode - Parse a Condition Code operand.
-bool ARM64AsmParser::parseCondCode(OperandVector &Operands,
-                                   bool invertCondCode) {
+bool AArch64AsmParser::parseCondCode(OperandVector &Operands,
+                                     bool invertCondCode) {
   SMLoc S = getLoc();
   const AsmToken &Tok = Parser.getTok();
   assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier");
 
   StringRef Cond = Tok.getString();
-  ARM64CC::CondCode CC = parseCondCodeString(Cond);
-  if (CC == ARM64CC::Invalid)
+  AArch64CC::CondCode CC = parseCondCodeString(Cond);
+  if (CC == AArch64CC::Invalid)
     return TokError("invalid condition code");
   Parser.Lex(); // Eat identifier token.
 
   if (invertCondCode)
-    CC = ARM64CC::getInvertedCondCode(ARM64CC::CondCode(CC));
+    CC = AArch64CC::getInvertedCondCode(AArch64CC::CondCode(CC));
 
   Operands.push_back(
-      ARM64Operand::CreateCondCode(CC, S, getLoc(), getContext()));
+      AArch64Operand::CreateCondCode(CC, S, getLoc(), getContext()));
   return false;
 }
 
 /// tryParseOptionalShift - Some operands take an optional shift argument. Parse
 /// them if present.
-ARM64AsmParser::OperandMatchResultTy
-ARM64AsmParser::tryParseOptionalShiftExtend(OperandVector &Operands) {
+AArch64AsmParser::OperandMatchResultTy
+AArch64AsmParser::tryParseOptionalShiftExtend(OperandVector &Operands) {
   const AsmToken &Tok = Parser.getTok();
   std::string LowerID = Tok.getString().lower();
-  ARM64_AM::ShiftExtendType ShOp =
-      StringSwitch<ARM64_AM::ShiftExtendType>(LowerID)
-          .Case("lsl", ARM64_AM::LSL)
-          .Case("lsr", ARM64_AM::LSR)
-          .Case("asr", ARM64_AM::ASR)
-          .Case("ror", ARM64_AM::ROR)
-          .Case("msl", ARM64_AM::MSL)
-          .Case("uxtb", ARM64_AM::UXTB)
-          .Case("uxth", ARM64_AM::UXTH)
-          .Case("uxtw", ARM64_AM::UXTW)
-          .Case("uxtx", ARM64_AM::UXTX)
-          .Case("sxtb", ARM64_AM::SXTB)
-          .Case("sxth", ARM64_AM::SXTH)
-          .Case("sxtw", ARM64_AM::SXTW)
-          .Case("sxtx", ARM64_AM::SXTX)
-          .Default(ARM64_AM::InvalidShiftExtend);
-
-  if (ShOp == ARM64_AM::InvalidShiftExtend)
+  AArch64_AM::ShiftExtendType ShOp =
+      StringSwitch<AArch64_AM::ShiftExtendType>(LowerID)
+          .Case("lsl", AArch64_AM::LSL)
+          .Case("lsr", AArch64_AM::LSR)
+          .Case("asr", AArch64_AM::ASR)
+          .Case("ror", AArch64_AM::ROR)
+          .Case("msl", AArch64_AM::MSL)
+          .Case("uxtb", AArch64_AM::UXTB)
+          .Case("uxth", AArch64_AM::UXTH)
+          .Case("uxtw", AArch64_AM::UXTW)
+          .Case("uxtx", AArch64_AM::UXTX)
+          .Case("sxtb", AArch64_AM::SXTB)
+          .Case("sxth", AArch64_AM::SXTH)
+          .Case("sxtw", AArch64_AM::SXTW)
+          .Case("sxtx", AArch64_AM::SXTX)
+          .Default(AArch64_AM::InvalidShiftExtend);
+
+  if (ShOp == AArch64_AM::InvalidShiftExtend)
     return MatchOperand_NoMatch;
 
   SMLoc S = Tok.getLoc();
@@ -2210,9 +2222,9 @@ ARM64AsmParser::tryParseOptionalShiftExtend(OperandVector &Operands) {
 
   bool Hash = getLexer().is(AsmToken::Hash);
   if (!Hash && getLexer().isNot(AsmToken::Integer)) {
-    if (ShOp == ARM64_AM::LSL || ShOp == ARM64_AM::LSR ||
-        ShOp == ARM64_AM::ASR || ShOp == ARM64_AM::ROR ||
-        ShOp == ARM64_AM::MSL) {
+    if (ShOp == AArch64_AM::LSL || ShOp == AArch64_AM::LSR ||
+        ShOp == AArch64_AM::ASR || ShOp == AArch64_AM::ROR ||
+        ShOp == AArch64_AM::MSL) {
       // We expect a number here.
       TokError("expected #imm after shift specifier");
       return MatchOperand_ParseFail;
@@ -2221,7 +2233,7 @@ ARM64AsmParser::tryParseOptionalShiftExtend(OperandVector &Operands) {
     // "extend" type operatoins don't need an immediate, #0 is implicit.
     SMLoc E = SMLoc::getFromPointer(getLoc().getPointer() - 1);
     Operands.push_back(
-        ARM64Operand::CreateShiftExtend(ShOp, 0, false, S, E, getContext()));
+        AArch64Operand::CreateShiftExtend(ShOp, 0, false, S, E, getContext()));
     return MatchOperand_Success;
   }
 
@@ -2246,21 +2258,21 @@ ARM64AsmParser::tryParseOptionalShiftExtend(OperandVector &Operands) {
   }
 
   SMLoc E = SMLoc::getFromPointer(getLoc().getPointer() - 1);
-  Operands.push_back(ARM64Operand::CreateShiftExtend(ShOp, MCE->getValue(),
-                                                     true, S, E, getContext()));
+  Operands.push_back(AArch64Operand::CreateShiftExtend(
+      ShOp, MCE->getValue(), true, S, E, getContext()));
   return MatchOperand_Success;
 }
 
 /// parseSysAlias - The IC, DC, AT, and TLBI instructions are simple aliases for
 /// the SYS instruction. Parse them specially so that we create a SYS MCInst.
-bool ARM64AsmParser::parseSysAlias(StringRef Name, SMLoc NameLoc,
+bool AArch64AsmParser::parseSysAlias(StringRef Name, SMLoc NameLoc,
                                    OperandVector &Operands) {
   if (Name.find('.') != StringRef::npos)
     return TokError("invalid operand");
 
   Mnemonic = Name;
   Operands.push_back(
-      ARM64Operand::CreateToken("sys", false, NameLoc, getContext()));
+      AArch64Operand::CreateToken("sys", false, NameLoc, getContext()));
 
   const AsmToken &Tok = Parser.getTok();
   StringRef Op = Tok.getString();
@@ -2272,14 +2284,14 @@ bool ARM64AsmParser::parseSysAlias(StringRef Name, SMLoc NameLoc,
   do {                                                                         \
     Expr = MCConstantExpr::Create(op1, getContext());                          \
     Operands.push_back(                                                        \
-        ARM64Operand::CreateImm(Expr, S, getLoc(), getContext()));             \
+        AArch64Operand::CreateImm(Expr, S, getLoc(), getContext()));           \
     Operands.push_back(                                                        \
-        ARM64Operand::CreateSysCR(Cn, S, getLoc(), getContext()));             \
+        AArch64Operand::CreateSysCR(Cn, S, getLoc(), getContext()));           \
     Operands.push_back(                                                        \
-        ARM64Operand::CreateSysCR(Cm, S, getLoc(), getContext()));             \
+        AArch64Operand::CreateSysCR(Cm, S, getLoc(), getContext()));           \
     Expr = MCConstantExpr::Create(op2, getContext());                          \
     Operands.push_back(                                                        \
-        ARM64Operand::CreateImm(Expr, S, getLoc(), getContext()));             \
+        AArch64Operand::CreateImm(Expr, S, getLoc(), getContext()));           \
   } while (0)
 
   if (Mnemonic == "ic") {
@@ -2498,8 +2510,8 @@ bool ARM64AsmParser::parseSysAlias(StringRef Name, SMLoc NameLoc,
   return false;
 }
 
-ARM64AsmParser::OperandMatchResultTy
-ARM64AsmParser::tryParseBarrierOperand(OperandVector &Operands) {
+AArch64AsmParser::OperandMatchResultTy
+AArch64AsmParser::tryParseBarrierOperand(OperandVector &Operands) {
   const AsmToken &Tok = Parser.getTok();
 
   // Can be either a #imm style literal or an option name
@@ -2522,7 +2534,7 @@ ARM64AsmParser::tryParseBarrierOperand(OperandVector &Operands) {
       return MatchOperand_ParseFail;
     }
     Operands.push_back(
-        ARM64Operand::CreateBarrier(MCE->getValue(), ExprLoc, getContext()));
+        AArch64Operand::CreateBarrier(MCE->getValue(), ExprLoc, getContext()));
     return MatchOperand_Success;
   }
 
@@ -2532,32 +2544,33 @@ ARM64AsmParser::tryParseBarrierOperand(OperandVector &Operands) {
   }
 
   bool Valid;
-  unsigned Opt = ARM64DB::DBarrierMapper().fromString(Tok.getString(), Valid);
+  unsigned Opt = AArch64DB::DBarrierMapper().fromString(Tok.getString(), Valid);
   if (!Valid) {
     TokError("invalid barrier option name");
     return MatchOperand_ParseFail;
   }
 
   // The only valid named option for ISB is 'sy'
-  if (Mnemonic == "isb" && Opt != ARM64DB::SY) {
+  if (Mnemonic == "isb" && Opt != AArch64DB::SY) {
     TokError("'sy' or #imm operand expected");
     return MatchOperand_ParseFail;
   }
 
-  Operands.push_back(ARM64Operand::CreateBarrier(Opt, getLoc(), getContext()));
+  Operands.push_back(
+      AArch64Operand::CreateBarrier(Opt, getLoc(), getContext()));
   Parser.Lex(); // Consume the option
 
   return MatchOperand_Success;
 }
 
-ARM64AsmParser::OperandMatchResultTy
-ARM64AsmParser::tryParseSysReg(OperandVector &Operands) {
+AArch64AsmParser::OperandMatchResultTy
+AArch64AsmParser::tryParseSysReg(OperandVector &Operands) {
   const AsmToken &Tok = Parser.getTok();
 
   if (Tok.isNot(AsmToken::Identifier))
     return MatchOperand_NoMatch;
 
-  Operands.push_back(ARM64Operand::CreateSysReg(Tok.getString(), getLoc(),
+  Operands.push_back(AArch64Operand::CreateSysReg(Tok.getString(), getLoc(),
                      STI.getFeatureBits(), getContext()));
   Parser.Lex(); // Eat identifier
 
@@ -2565,7 +2578,7 @@ ARM64AsmParser::tryParseSysReg(OperandVector &Operands) {
 }
 
 /// tryParseVectorRegister - Parse a vector register operand.
-bool ARM64AsmParser::tryParseVectorRegister(OperandVector &Operands) {
+bool AArch64AsmParser::tryParseVectorRegister(OperandVector &Operands) {
   if (Parser.getTok().isNot(AsmToken::Identifier))
     return true;
 
@@ -2576,11 +2589,12 @@ bool ARM64AsmParser::tryParseVectorRegister(OperandVector &Operands) {
   if (Reg == -1)
     return true;
   Operands.push_back(
-      ARM64Operand::CreateReg(Reg, true, S, getLoc(), getContext()));
+      AArch64Operand::CreateReg(Reg, true, S, getLoc(), getContext()));
   // If there was an explicit qualifier, that goes on as a literal text
   // operand.
   if (!Kind.empty())
-    Operands.push_back(ARM64Operand::CreateToken(Kind, false, S, getContext()));
+    Operands.push_back(
+        AArch64Operand::CreateToken(Kind, false, S, getContext()));
 
   // If there is an index specifier following the register, parse that too.
   if (Parser.getTok().is(AsmToken::LBrac)) {
@@ -2604,15 +2618,15 @@ bool ARM64AsmParser::tryParseVectorRegister(OperandVector &Operands) {
 
     Parser.Lex(); // Eat right bracket token.
 
-    Operands.push_back(ARM64Operand::CreateVectorIndex(MCE->getValue(), SIdx, E,
-                                                       getContext()));
+    Operands.push_back(AArch64Operand::CreateVectorIndex(MCE->getValue(), SIdx,
+                                                         E, getContext()));
   }
 
   return false;
 }
 
 /// parseRegister - Parse a non-vector register operand.
-bool ARM64AsmParser::parseRegister(OperandVector &Operands) {
+bool AArch64AsmParser::parseRegister(OperandVector &Operands) {
   SMLoc S = getLoc();
   // Try for a vector register.
   if (!tryParseVectorRegister(Operands))
@@ -2623,7 +2637,7 @@ bool ARM64AsmParser::parseRegister(OperandVector &Operands) {
   if (Reg == -1)
     return true;
   Operands.push_back(
-      ARM64Operand::CreateReg(Reg, false, S, getLoc(), getContext()));
+      AArch64Operand::CreateReg(Reg, false, S, getLoc(), getContext()));
 
   // A small number of instructions (FMOVXDhighr, for example) have "[1]"
   // as a string token in the instruction itself.
@@ -2640,11 +2654,11 @@ bool ARM64AsmParser::parseRegister(OperandVector &Operands) {
           SMLoc RBracS = getLoc();
           Parser.Lex();
           Operands.push_back(
-              ARM64Operand::CreateToken("[", false, LBracS, getContext()));
+              AArch64Operand::CreateToken("[", false, LBracS, getContext()));
           Operands.push_back(
-              ARM64Operand::CreateToken("1", false, IntS, getContext()));
+              AArch64Operand::CreateToken("1", false, IntS, getContext()));
           Operands.push_back(
-              ARM64Operand::CreateToken("]", false, RBracS, getContext()));
+              AArch64Operand::CreateToken("]", false, RBracS, getContext()));
           return false;
         }
       }
@@ -2654,9 +2668,9 @@ bool ARM64AsmParser::parseRegister(OperandVector &Operands) {
   return false;
 }
 
-bool ARM64AsmParser::parseSymbolicImmVal(const MCExpr *&ImmVal) {
+bool AArch64AsmParser::parseSymbolicImmVal(const MCExpr *&ImmVal) {
   bool HasELFModifier = false;
-  ARM64MCExpr::VariantKind RefKind;
+  AArch64MCExpr::VariantKind RefKind;
 
   if (Parser.getTok().is(AsmToken::Colon)) {
     Parser.Lex(); // Eat ':"
@@ -2669,45 +2683,45 @@ bool ARM64AsmParser::parseSymbolicImmVal(const MCExpr *&ImmVal) {
     }
 
     std::string LowerCase = Parser.getTok().getIdentifier().lower();
-    RefKind = StringSwitch<ARM64MCExpr::VariantKind>(LowerCase)
-                  .Case("lo12", ARM64MCExpr::VK_LO12)
-                  .Case("abs_g3", ARM64MCExpr::VK_ABS_G3)
-                  .Case("abs_g2", ARM64MCExpr::VK_ABS_G2)
-                  .Case("abs_g2_s", ARM64MCExpr::VK_ABS_G2_S)
-                  .Case("abs_g2_nc", ARM64MCExpr::VK_ABS_G2_NC)
-                  .Case("abs_g1", ARM64MCExpr::VK_ABS_G1)
-                  .Case("abs_g1_s", ARM64MCExpr::VK_ABS_G1_S)
-                  .Case("abs_g1_nc", ARM64MCExpr::VK_ABS_G1_NC)
-                  .Case("abs_g0", ARM64MCExpr::VK_ABS_G0)
-                  .Case("abs_g0_s", ARM64MCExpr::VK_ABS_G0_S)
-                  .Case("abs_g0_nc", ARM64MCExpr::VK_ABS_G0_NC)
-                  .Case("dtprel_g2", ARM64MCExpr::VK_DTPREL_G2)
-                  .Case("dtprel_g1", ARM64MCExpr::VK_DTPREL_G1)
-                  .Case("dtprel_g1_nc", ARM64MCExpr::VK_DTPREL_G1_NC)
-                  .Case("dtprel_g0", ARM64MCExpr::VK_DTPREL_G0)
-                  .Case("dtprel_g0_nc", ARM64MCExpr::VK_DTPREL_G0_NC)
-                  .Case("dtprel_hi12", ARM64MCExpr::VK_DTPREL_HI12)
-                  .Case("dtprel_lo12", ARM64MCExpr::VK_DTPREL_LO12)
-                  .Case("dtprel_lo12_nc", ARM64MCExpr::VK_DTPREL_LO12_NC)
-                  .Case("tprel_g2", ARM64MCExpr::VK_TPREL_G2)
-                  .Case("tprel_g1", ARM64MCExpr::VK_TPREL_G1)
-                  .Case("tprel_g1_nc", ARM64MCExpr::VK_TPREL_G1_NC)
-                  .Case("tprel_g0", ARM64MCExpr::VK_TPREL_G0)
-                  .Case("tprel_g0_nc", ARM64MCExpr::VK_TPREL_G0_NC)
-                  .Case("tprel_hi12", ARM64MCExpr::VK_TPREL_HI12)
-                  .Case("tprel_lo12", ARM64MCExpr::VK_TPREL_LO12)
-                  .Case("tprel_lo12_nc", ARM64MCExpr::VK_TPREL_LO12_NC)
-                  .Case("tlsdesc_lo12", ARM64MCExpr::VK_TLSDESC_LO12)
-                  .Case("got", ARM64MCExpr::VK_GOT_PAGE)
-                  .Case("got_lo12", ARM64MCExpr::VK_GOT_LO12)
-                  .Case("gottprel", ARM64MCExpr::VK_GOTTPREL_PAGE)
-                  .Case("gottprel_lo12", ARM64MCExpr::VK_GOTTPREL_LO12_NC)
-                  .Case("gottprel_g1", ARM64MCExpr::VK_GOTTPREL_G1)
-                  .Case("gottprel_g0_nc", ARM64MCExpr::VK_GOTTPREL_G0_NC)
-                  .Case("tlsdesc", ARM64MCExpr::VK_TLSDESC_PAGE)
-                  .Default(ARM64MCExpr::VK_INVALID);
-
-    if (RefKind == ARM64MCExpr::VK_INVALID) {
+    RefKind = StringSwitch<AArch64MCExpr::VariantKind>(LowerCase)
+                  .Case("lo12", AArch64MCExpr::VK_LO12)
+                  .Case("abs_g3", AArch64MCExpr::VK_ABS_G3)
+                  .Case("abs_g2", AArch64MCExpr::VK_ABS_G2)
+                  .Case("abs_g2_s", AArch64MCExpr::VK_ABS_G2_S)
+                  .Case("abs_g2_nc", AArch64MCExpr::VK_ABS_G2_NC)
+                  .Case("abs_g1", AArch64MCExpr::VK_ABS_G1)
+                  .Case("abs_g1_s", AArch64MCExpr::VK_ABS_G1_S)
+                  .Case("abs_g1_nc", AArch64MCExpr::VK_ABS_G1_NC)
+                  .Case("abs_g0", AArch64MCExpr::VK_ABS_G0)
+                  .Case("abs_g0_s", AArch64MCExpr::VK_ABS_G0_S)
+                  .Case("abs_g0_nc", AArch64MCExpr::VK_ABS_G0_NC)
+                  .Case("dtprel_g2", AArch64MCExpr::VK_DTPREL_G2)
+                  .Case("dtprel_g1", AArch64MCExpr::VK_DTPREL_G1)
+                  .Case("dtprel_g1_nc", AArch64MCExpr::VK_DTPREL_G1_NC)
+                  .Case("dtprel_g0", AArch64MCExpr::VK_DTPREL_G0)
+                  .Case("dtprel_g0_nc", AArch64MCExpr::VK_DTPREL_G0_NC)
+                  .Case("dtprel_hi12", AArch64MCExpr::VK_DTPREL_HI12)
+                  .Case("dtprel_lo12", AArch64MCExpr::VK_DTPREL_LO12)
+                  .Case("dtprel_lo12_nc", AArch64MCExpr::VK_DTPREL_LO12_NC)
+                  .Case("tprel_g2", AArch64MCExpr::VK_TPREL_G2)
+                  .Case("tprel_g1", AArch64MCExpr::VK_TPREL_G1)
+                  .Case("tprel_g1_nc", AArch64MCExpr::VK_TPREL_G1_NC)
+                  .Case("tprel_g0", AArch64MCExpr::VK_TPREL_G0)
+                  .Case("tprel_g0_nc", AArch64MCExpr::VK_TPREL_G0_NC)
+                  .Case("tprel_hi12", AArch64MCExpr::VK_TPREL_HI12)
+                  .Case("tprel_lo12", AArch64MCExpr::VK_TPREL_LO12)
+                  .Case("tprel_lo12_nc", AArch64MCExpr::VK_TPREL_LO12_NC)
+                  .Case("tlsdesc_lo12", AArch64MCExpr::VK_TLSDESC_LO12)
+                  .Case("got", AArch64MCExpr::VK_GOT_PAGE)
+                  .Case("got_lo12", AArch64MCExpr::VK_GOT_LO12)
+                  .Case("gottprel", AArch64MCExpr::VK_GOTTPREL_PAGE)
+                  .Case("gottprel_lo12", AArch64MCExpr::VK_GOTTPREL_LO12_NC)
+                  .Case("gottprel_g1", AArch64MCExpr::VK_GOTTPREL_G1)
+                  .Case("gottprel_g0_nc", AArch64MCExpr::VK_GOTTPREL_G0_NC)
+                  .Case("tlsdesc", AArch64MCExpr::VK_TLSDESC_PAGE)
+                  .Default(AArch64MCExpr::VK_INVALID);
+
+    if (RefKind == AArch64MCExpr::VK_INVALID) {
       Error(Parser.getTok().getLoc(),
             "expect relocation specifier in operand after ':'");
       return true;
@@ -2726,13 +2740,13 @@ bool ARM64AsmParser::parseSymbolicImmVal(const MCExpr *&ImmVal) {
     return true;
 
   if (HasELFModifier)
-    ImmVal = ARM64MCExpr::Create(ImmVal, RefKind, getContext());
+    ImmVal = AArch64MCExpr::Create(ImmVal, RefKind, getContext());
 
   return false;
 }
 
 /// parseVectorList - Parse a vector list operand for AdvSIMD instructions.
-bool ARM64AsmParser::parseVectorList(OperandVector &Operands) {
+bool AArch64AsmParser::parseVectorList(OperandVector &Operands) {
   assert(Parser.getTok().is(AsmToken::LCurly) && "Token is not a Left Bracket");
   SMLoc S = getLoc();
   Parser.Lex(); // Eat left bracket token.
@@ -2798,7 +2812,7 @@ bool ARM64AsmParser::parseVectorList(OperandVector &Operands) {
   if (!Kind.empty())
     parseValidVectorKind(Kind, NumElements, ElementKind);
 
-  Operands.push_back(ARM64Operand::CreateVectorList(
+  Operands.push_back(AArch64Operand::CreateVectorList(
       FirstReg, Count, NumElements, ElementKind, S, getLoc(), getContext()));
 
   // If there is an index specifier following the list, parse that too.
@@ -2823,14 +2837,14 @@ bool ARM64AsmParser::parseVectorList(OperandVector &Operands) {
 
     Parser.Lex(); // Eat right bracket token.
 
-    Operands.push_back(ARM64Operand::CreateVectorIndex(MCE->getValue(), SIdx, E,
-                                                       getContext()));
+    Operands.push_back(AArch64Operand::CreateVectorIndex(MCE->getValue(), SIdx,
+                                                         E, getContext()));
   }
   return false;
 }
 
-ARM64AsmParser::OperandMatchResultTy
-ARM64AsmParser::tryParseGPR64sp0Operand(OperandVector &Operands) {
+AArch64AsmParser::OperandMatchResultTy
+AArch64AsmParser::tryParseGPR64sp0Operand(OperandVector &Operands) {
   const AsmToken &Tok = Parser.getTok();
   if (!Tok.is(AsmToken::Identifier))
     return MatchOperand_NoMatch;
@@ -2839,14 +2853,15 @@ ARM64AsmParser::tryParseGPR64sp0Operand(OperandVector &Operands) {
 
   MCContext &Ctx = getContext();
   const MCRegisterInfo *RI = Ctx.getRegisterInfo();
-  if (!RI->getRegClass(ARM64::GPR64spRegClassID).contains(RegNum))
+  if (!RI->getRegClass(AArch64::GPR64spRegClassID).contains(RegNum))
     return MatchOperand_NoMatch;
 
   SMLoc S = getLoc();
   Parser.Lex(); // Eat register
 
   if (Parser.getTok().isNot(AsmToken::Comma)) {
-    Operands.push_back(ARM64Operand::CreateReg(RegNum, false, S, getLoc(), Ctx));
+    Operands.push_back(
+        AArch64Operand::CreateReg(RegNum, false, S, getLoc(), Ctx));
     return MatchOperand_Success;
   }
   Parser.Lex(); // Eat comma.
@@ -2866,13 +2881,14 @@ ARM64AsmParser::tryParseGPR64sp0Operand(OperandVector &Operands) {
     return MatchOperand_ParseFail;
   }
 
-  Operands.push_back(ARM64Operand::CreateReg(RegNum, false, S, getLoc(), Ctx));
+  Operands.push_back(
+      AArch64Operand::CreateReg(RegNum, false, S, getLoc(), Ctx));
   return MatchOperand_Success;
 }
 
 /// parseOperand - Parse a arm instruction operand.  For now this parses the
 /// operand regardless of the mnemonic.
-bool ARM64AsmParser::parseOperand(OperandVector &Operands, bool isCondCode,
+bool AArch64AsmParser::parseOperand(OperandVector &Operands, bool isCondCode,
                                   bool invertCondCode) {
   // Check if the current operand has a custom associated parser, if so, try to
   // custom parse the operand, or fallback to the general approach.
@@ -2895,13 +2911,13 @@ bool ARM64AsmParser::parseOperand(OperandVector &Operands, bool isCondCode,
       return Error(S, "invalid operand");
 
     SMLoc E = SMLoc::getFromPointer(getLoc().getPointer() - 1);
-    Operands.push_back(ARM64Operand::CreateImm(Expr, S, E, getContext()));
+    Operands.push_back(AArch64Operand::CreateImm(Expr, S, E, getContext()));
     return false;
   }
   case AsmToken::LBrac: {
     SMLoc Loc = Parser.getTok().getLoc();
-    Operands.push_back(ARM64Operand::CreateToken("[", false, Loc,
-                                                 getContext()));
+    Operands.push_back(AArch64Operand::CreateToken("[", false, Loc,
+                                                   getContext()));
     Parser.Lex(); // Eat '['
 
     // There's no comma after a '[', so we can parse the next operand
@@ -2933,7 +2949,7 @@ bool ARM64AsmParser::parseOperand(OperandVector &Operands, bool isCondCode,
       return true;
 
     E = SMLoc::getFromPointer(getLoc().getPointer() - 1);
-    Operands.push_back(ARM64Operand::CreateImm(IdVal, S, E, getContext()));
+    Operands.push_back(AArch64Operand::CreateImm(IdVal, S, E, getContext()));
     return false;
   }
   case AsmToken::Integer:
@@ -2970,9 +2986,9 @@ bool ARM64AsmParser::parseOperand(OperandVector &Operands, bool isCondCode,
       Parser.Lex(); // Eat the token.
 
       Operands.push_back(
-          ARM64Operand::CreateToken("#0", false, S, getContext()));
+          AArch64Operand::CreateToken("#0", false, S, getContext()));
       Operands.push_back(
-          ARM64Operand::CreateToken(".0", false, S, getContext()));
+          AArch64Operand::CreateToken(".0", false, S, getContext()));
       return false;
     }
 
@@ -2981,17 +2997,17 @@ bool ARM64AsmParser::parseOperand(OperandVector &Operands, bool isCondCode,
       return true;
 
     E = SMLoc::getFromPointer(getLoc().getPointer() - 1);
-    Operands.push_back(ARM64Operand::CreateImm(ImmVal, S, E, getContext()));
+    Operands.push_back(AArch64Operand::CreateImm(ImmVal, S, E, getContext()));
     return false;
   }
   }
 }
 
-/// ParseInstruction - Parse an ARM64 instruction mnemonic followed by its
+/// ParseInstruction - Parse an AArch64 instruction mnemonic followed by its
 /// operands.
-bool ARM64AsmParser::ParseInstruction(ParseInstructionInfo &Info,
-                                      StringRef Name, SMLoc NameLoc,
-                                      OperandVector &Operands) {
+bool AArch64AsmParser::ParseInstruction(ParseInstructionInfo &Info,
+                                        StringRef Name, SMLoc NameLoc,
+                                        OperandVector &Operands) {
   Name = StringSwitch<StringRef>(Name.lower())
              .Case("beq", "b.eq")
              .Case("bne", "b.ne")
@@ -3026,7 +3042,7 @@ bool ARM64AsmParser::ParseInstruction(ParseInstructionInfo &Info,
   }
 
   Operands.push_back(
-      ARM64Operand::CreateToken(Head, false, NameLoc, getContext()));
+      AArch64Operand::CreateToken(Head, false, NameLoc, getContext()));
   Mnemonic = Head;
 
   // Handle condition codes for a branch mnemonic
@@ -3037,13 +3053,13 @@ bool ARM64AsmParser::ParseInstruction(ParseInstructionInfo &Info,
 
     SMLoc SuffixLoc = SMLoc::getFromPointer(NameLoc.getPointer() +
                                             (Head.data() - Name.data()));
-    ARM64CC::CondCode CC = parseCondCodeString(Head);
-    if (CC == ARM64CC::Invalid)
+    AArch64CC::CondCode CC = parseCondCodeString(Head);
+    if (CC == AArch64CC::Invalid)
       return Error(SuffixLoc, "invalid condition code");
     Operands.push_back(
-        ARM64Operand::CreateToken(".", true, SuffixLoc, getContext()));
+        AArch64Operand::CreateToken(".", true, SuffixLoc, getContext()));
     Operands.push_back(
-        ARM64Operand::CreateCondCode(CC, NameLoc, NameLoc, getContext()));
+        AArch64Operand::CreateCondCode(CC, NameLoc, NameLoc, getContext()));
   }
 
   // Add the remaining tokens in the mnemonic.
@@ -3054,7 +3070,7 @@ bool ARM64AsmParser::ParseInstruction(ParseInstructionInfo &Info,
     SMLoc SuffixLoc = SMLoc::getFromPointer(NameLoc.getPointer() +
                                             (Head.data() - Name.data()) + 1);
     Operands.push_back(
-        ARM64Operand::CreateToken(Head, true, SuffixLoc, getContext()));
+        AArch64Operand::CreateToken(Head, true, SuffixLoc, getContext()));
   }
 
   // Conditional compare instructions have a Condition Code operand, which needs
@@ -3105,15 +3121,15 @@ bool ARM64AsmParser::ParseInstruction(ParseInstructionInfo &Info,
       // in the given context!
       if (Parser.getTok().is(AsmToken::RBrac)) {
         SMLoc Loc = Parser.getTok().getLoc();
-        Operands.push_back(ARM64Operand::CreateToken("]", false, Loc,
-                                                     getContext()));
+        Operands.push_back(AArch64Operand::CreateToken("]", false, Loc,
+                                                       getContext()));
         Parser.Lex();
       }
 
       if (Parser.getTok().is(AsmToken::Exclaim)) {
         SMLoc Loc = Parser.getTok().getLoc();
-        Operands.push_back(ARM64Operand::CreateToken("!", false, Loc,
-                                                     getContext()));
+        Operands.push_back(AArch64Operand::CreateToken("!", false, Loc,
+                                                       getContext()));
         Parser.Lex();
       }
 
@@ -3134,18 +3150,18 @@ bool ARM64AsmParser::ParseInstruction(ParseInstructionInfo &Info,
 // FIXME: This entire function is a giant hack to provide us with decent
 // operand range validation/diagnostics until TableGen/MC can be extended
 // to support autogeneration of this kind of validation.
-bool ARM64AsmParser::validateInstruction(MCInst &Inst,
+bool AArch64AsmParser::validateInstruction(MCInst &Inst,
                                          SmallVectorImpl<SMLoc> &Loc) {
   const MCRegisterInfo *RI = getContext().getRegisterInfo();
   // Check for indexed addressing modes w/ the base register being the
   // same as a destination/source register or pair load where
   // the Rt == Rt2. All of those are undefined behaviour.
   switch (Inst.getOpcode()) {
-  case ARM64::LDPSWpre:
-  case ARM64::LDPWpost:
-  case ARM64::LDPWpre:
-  case ARM64::LDPXpost:
-  case ARM64::LDPXpre: {
+  case AArch64::LDPSWpre:
+  case AArch64::LDPWpost:
+  case AArch64::LDPWpre:
+  case AArch64::LDPXpost:
+  case AArch64::LDPXpre: {
     unsigned Rt = Inst.getOperand(1).getReg();
     unsigned Rt2 = Inst.getOperand(2).getReg();
     unsigned Rn = Inst.getOperand(3).getReg();
@@ -3157,41 +3173,41 @@ bool ARM64AsmParser::validateInstruction(MCInst &Inst,
                            "is also a destination");
     // FALLTHROUGH
   }
-  case ARM64::LDPDi:
-  case ARM64::LDPQi:
-  case ARM64::LDPSi:
-  case ARM64::LDPSWi:
-  case ARM64::LDPWi:
-  case ARM64::LDPXi: {
+  case AArch64::LDPDi:
+  case AArch64::LDPQi:
+  case AArch64::LDPSi:
+  case AArch64::LDPSWi:
+  case AArch64::LDPWi:
+  case AArch64::LDPXi: {
     unsigned Rt = Inst.getOperand(0).getReg();
     unsigned Rt2 = Inst.getOperand(1).getReg();
     if (Rt == Rt2)
       return Error(Loc[1], "unpredictable LDP instruction, Rt2==Rt");
     break;
   }
-  case ARM64::LDPDpost:
-  case ARM64::LDPDpre:
-  case ARM64::LDPQpost:
-  case ARM64::LDPQpre:
-  case ARM64::LDPSpost:
-  case ARM64::LDPSpre:
-  case ARM64::LDPSWpost: {
+  case AArch64::LDPDpost:
+  case AArch64::LDPDpre:
+  case AArch64::LDPQpost:
+  case AArch64::LDPQpre:
+  case AArch64::LDPSpost:
+  case AArch64::LDPSpre:
+  case AArch64::LDPSWpost: {
     unsigned Rt = Inst.getOperand(1).getReg();
     unsigned Rt2 = Inst.getOperand(2).getReg();
     if (Rt == Rt2)
       return Error(Loc[1], "unpredictable LDP instruction, Rt2==Rt");
     break;
   }
-  case ARM64::STPDpost:
-  case ARM64::STPDpre:
-  case ARM64::STPQpost:
-  case ARM64::STPQpre:
-  case ARM64::STPSpost:
-  case ARM64::STPSpre:
-  case ARM64::STPWpost:
-  case ARM64::STPWpre:
-  case ARM64::STPXpost:
-  case ARM64::STPXpre: {
+  case AArch64::STPDpost:
+  case AArch64::STPDpre:
+  case AArch64::STPQpost:
+  case AArch64::STPQpre:
+  case AArch64::STPSpost:
+  case AArch64::STPSpre:
+  case AArch64::STPWpost:
+  case AArch64::STPWpre:
+  case AArch64::STPXpost:
+  case AArch64::STPXpre: {
     unsigned Rt = Inst.getOperand(1).getReg();
     unsigned Rt2 = Inst.getOperand(2).getReg();
     unsigned Rn = Inst.getOperand(3).getReg();
@@ -3203,28 +3219,28 @@ bool ARM64AsmParser::validateInstruction(MCInst &Inst,
                            "is also a source");
     break;
   }
-  case ARM64::LDRBBpre:
-  case ARM64::LDRBpre:
-  case ARM64::LDRHHpre:
-  case ARM64::LDRHpre:
-  case ARM64::LDRSBWpre:
-  case ARM64::LDRSBXpre:
-  case ARM64::LDRSHWpre:
-  case ARM64::LDRSHXpre:
-  case ARM64::LDRSWpre:
-  case ARM64::LDRWpre:
-  case ARM64::LDRXpre:
-  case ARM64::LDRBBpost:
-  case ARM64::LDRBpost:
-  case ARM64::LDRHHpost:
-  case ARM64::LDRHpost:
-  case ARM64::LDRSBWpost:
-  case ARM64::LDRSBXpost:
-  case ARM64::LDRSHWpost:
-  case ARM64::LDRSHXpost:
-  case ARM64::LDRSWpost:
-  case ARM64::LDRWpost:
-  case ARM64::LDRXpost: {
+  case AArch64::LDRBBpre:
+  case AArch64::LDRBpre:
+  case AArch64::LDRHHpre:
+  case AArch64::LDRHpre:
+  case AArch64::LDRSBWpre:
+  case AArch64::LDRSBXpre:
+  case AArch64::LDRSHWpre:
+  case AArch64::LDRSHXpre:
+  case AArch64::LDRSWpre:
+  case AArch64::LDRWpre:
+  case AArch64::LDRXpre:
+  case AArch64::LDRBBpost:
+  case AArch64::LDRBpost:
+  case AArch64::LDRHHpost:
+  case AArch64::LDRHpost:
+  case AArch64::LDRSBWpost:
+  case AArch64::LDRSBXpost:
+  case AArch64::LDRSHWpost:
+  case AArch64::LDRSHXpost:
+  case AArch64::LDRSWpost:
+  case AArch64::LDRWpost:
+  case AArch64::LDRXpost: {
     unsigned Rt = Inst.getOperand(1).getReg();
     unsigned Rn = Inst.getOperand(2).getReg();
     if (RI->isSubRegisterEq(Rn, Rt))
@@ -3232,18 +3248,18 @@ bool ARM64AsmParser::validateInstruction(MCInst &Inst,
                            "is also a source");
     break;
   }
-  case ARM64::STRBBpost:
-  case ARM64::STRBpost:
-  case ARM64::STRHHpost:
-  case ARM64::STRHpost:
-  case ARM64::STRWpost:
-  case ARM64::STRXpost:
-  case ARM64::STRBBpre:
-  case ARM64::STRBpre:
-  case ARM64::STRHHpre:
-  case ARM64::STRHpre:
-  case ARM64::STRWpre:
-  case ARM64::STRXpre: {
+  case AArch64::STRBBpost:
+  case AArch64::STRBpost:
+  case AArch64::STRHHpost:
+  case AArch64::STRHpost:
+  case AArch64::STRWpost:
+  case AArch64::STRXpost:
+  case AArch64::STRBBpre:
+  case AArch64::STRBpre:
+  case AArch64::STRHHpre:
+  case AArch64::STRHpre:
+  case AArch64::STRWpre:
+  case AArch64::STRXpre: {
     unsigned Rt = Inst.getOperand(1).getReg();
     unsigned Rn = Inst.getOperand(2).getReg();
     if (RI->isSubRegisterEq(Rn, Rt))
@@ -3257,19 +3273,19 @@ bool ARM64AsmParser::validateInstruction(MCInst &Inst,
   // in the instructions being checked and this keeps the nested conditionals
   // to a minimum.
   switch (Inst.getOpcode()) {
-  case ARM64::ADDSWri:
-  case ARM64::ADDSXri:
-  case ARM64::ADDWri:
-  case ARM64::ADDXri:
-  case ARM64::SUBSWri:
-  case ARM64::SUBSXri:
-  case ARM64::SUBWri:
-  case ARM64::SUBXri: {
+  case AArch64::ADDSWri:
+  case AArch64::ADDSXri:
+  case AArch64::ADDWri:
+  case AArch64::ADDXri:
+  case AArch64::SUBSWri:
+  case AArch64::SUBSXri:
+  case AArch64::SUBWri:
+  case AArch64::SUBXri: {
     // Annoyingly we can't do this in the isAddSubImm predicate, so there is
     // some slight duplication here.
     if (Inst.getOperand(2).isExpr()) {
       const MCExpr *Expr = Inst.getOperand(2).getExpr();
-      ARM64MCExpr::VariantKind ELFRefKind;
+      AArch64MCExpr::VariantKind ELFRefKind;
       MCSymbolRefExpr::VariantKind DarwinRefKind;
       int64_t Addend;
       if (!classifySymbolRef(Expr, ELFRefKind, DarwinRefKind, Addend)) {
@@ -3279,20 +3295,20 @@ bool ARM64AsmParser::validateInstruction(MCInst &Inst,
       // Only allow these with ADDXri.
       if ((DarwinRefKind == MCSymbolRefExpr::VK_PAGEOFF ||
           DarwinRefKind == MCSymbolRefExpr::VK_TLVPPAGEOFF) &&
-          Inst.getOpcode() == ARM64::ADDXri)
+          Inst.getOpcode() == AArch64::ADDXri)
         return false;
 
       // Only allow these with ADDXri/ADDWri
-      if ((ELFRefKind == ARM64MCExpr::VK_LO12 ||
-          ELFRefKind == ARM64MCExpr::VK_DTPREL_HI12 ||
-          ELFRefKind == ARM64MCExpr::VK_DTPREL_LO12 ||
-          ELFRefKind == ARM64MCExpr::VK_DTPREL_LO12_NC ||
-          ELFRefKind == ARM64MCExpr::VK_TPREL_HI12 ||
-          ELFRefKind == ARM64MCExpr::VK_TPREL_LO12 ||
-          ELFRefKind == ARM64MCExpr::VK_TPREL_LO12_NC ||
-          ELFRefKind == ARM64MCExpr::VK_TLSDESC_LO12) &&
-          (Inst.getOpcode() == ARM64::ADDXri ||
-          Inst.getOpcode() == ARM64::ADDWri))
+      if ((ELFRefKind == AArch64MCExpr::VK_LO12 ||
+          ELFRefKind == AArch64MCExpr::VK_DTPREL_HI12 ||
+          ELFRefKind == AArch64MCExpr::VK_DTPREL_LO12 ||
+          ELFRefKind == AArch64MCExpr::VK_DTPREL_LO12_NC ||
+          ELFRefKind == AArch64MCExpr::VK_TPREL_HI12 ||
+          ELFRefKind == AArch64MCExpr::VK_TPREL_LO12 ||
+          ELFRefKind == AArch64MCExpr::VK_TPREL_LO12_NC ||
+          ELFRefKind == AArch64MCExpr::VK_TLSDESC_LO12) &&
+          (Inst.getOpcode() == AArch64::ADDXri ||
+          Inst.getOpcode() == AArch64::ADDWri))
         return false;
 
       // Don't allow expressions in the immediate field otherwise
@@ -3305,7 +3321,7 @@ bool ARM64AsmParser::validateInstruction(MCInst &Inst,
   }
 }
 
-bool ARM64AsmParser::showMatchError(SMLoc Loc, unsigned ErrCode) {
+bool AArch64AsmParser::showMatchError(SMLoc Loc, unsigned ErrCode) {
   switch (ErrCode) {
   case Match_MissingFeature:
     return Error(Loc,
@@ -3434,28 +3450,28 @@ bool ARM64AsmParser::showMatchError(SMLoc Loc, unsigned ErrCode) {
 
 static const char *getSubtargetFeatureName(unsigned Val);
 
-bool ARM64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
-                                             OperandVector &Operands,
-                                             MCStreamer &Out,
-                                             unsigned &ErrorInfo,
-                                             bool MatchingInlineAsm) {
+bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
+                                               OperandVector &Operands,
+                                               MCStreamer &Out,
+                                               unsigned &ErrorInfo,
+                                               bool MatchingInlineAsm) {
   assert(!Operands.empty() && "Unexpect empty operand list!");
-  ARM64Operand *Op = static_cast<ARM64Operand *>(Operands[0]);
+  AArch64Operand *Op = static_cast<AArch64Operand *>(Operands[0]);
   assert(Op->isToken() && "Leading operand should always be a mnemonic!");
 
   StringRef Tok = Op->getToken();
   unsigned NumOperands = Operands.size();
 
   if (NumOperands == 4 && Tok == "lsl") {
-    ARM64Operand *Op2 = static_cast<ARM64Operand *>(Operands[2]);
-    ARM64Operand *Op3 = static_cast<ARM64Operand *>(Operands[3]);
+    AArch64Operand *Op2 = static_cast<AArch64Operand *>(Operands[2]);
+    AArch64Operand *Op3 = static_cast<AArch64Operand *>(Operands[3]);
     if (Op2->isReg() && Op3->isImm()) {
       const MCConstantExpr *Op3CE = dyn_cast<MCConstantExpr>(Op3->getImm());
       if (Op3CE) {
         uint64_t Op3Val = Op3CE->getValue();
         uint64_t NewOp3Val = 0;
         uint64_t NewOp4Val = 0;
-        if (ARM64MCRegisterClasses[ARM64::GPR32allRegClassID].contains(
+        if (AArch64MCRegisterClasses[AArch64::GPR32allRegClassID].contains(
                 Op2->getReg())) {
           NewOp3Val = (32 - Op3Val) & 0x1f;
           NewOp4Val = 31 - Op3Val;
@@ -3467,11 +3483,11 @@ bool ARM64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
         const MCExpr *NewOp3 = MCConstantExpr::Create(NewOp3Val, getContext());
         const MCExpr *NewOp4 = MCConstantExpr::Create(NewOp4Val, getContext());
 
-        Operands[0] = ARM64Operand::CreateToken(
+        Operands[0] = AArch64Operand::CreateToken(
             "ubfm", false, Op->getStartLoc(), getContext());
-        Operands[3] = ARM64Operand::CreateImm(NewOp3, Op3->getStartLoc(),
-                                              Op3->getEndLoc(), getContext());
-        Operands.push_back(ARM64Operand::CreateImm(
+        Operands[3] = AArch64Operand::CreateImm(NewOp3, Op3->getStartLoc(),
+                                                Op3->getEndLoc(), getContext());
+        Operands.push_back(AArch64Operand::CreateImm(
             NewOp4, Op3->getStartLoc(), Op3->getEndLoc(), getContext()));
         delete Op3;
         delete Op;
@@ -3481,9 +3497,9 @@ bool ARM64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
     // FIXME: Horrible hack to handle the BFI -> BFM, SBFIZ->SBFM, and
     // UBFIZ -> UBFM aliases.
     if (Tok == "bfi" || Tok == "sbfiz" || Tok == "ubfiz") {
-      ARM64Operand *Op1 = static_cast<ARM64Operand *>(Operands[1]);
-      ARM64Operand *Op3 = static_cast<ARM64Operand *>(Operands[3]);
-      ARM64Operand *Op4 = static_cast<ARM64Operand *>(Operands[4]);
+      AArch64Operand *Op1 = static_cast<AArch64Operand *>(Operands[1]);
+      AArch64Operand *Op3 = static_cast<AArch64Operand *>(Operands[3]);
+      AArch64Operand *Op4 = static_cast<AArch64Operand *>(Operands[4]);
 
       if (Op1->isReg() && Op3->isImm() && Op4->isImm()) {
         const MCConstantExpr *Op3CE = dyn_cast<MCConstantExpr>(Op3->getImm());
@@ -3494,7 +3510,7 @@ bool ARM64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
           uint64_t Op4Val = Op4CE->getValue();
 
           uint64_t RegWidth = 0;
-          if (ARM64MCRegisterClasses[ARM64::GPR64allRegClassID].contains(
+          if (AArch64MCRegisterClasses[AArch64::GPR64allRegClassID].contains(
               Op1->getReg()))
             RegWidth = 64;
           else
@@ -3508,7 +3524,7 @@ bool ARM64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
                          "expected integer in range [1, 32]");
 
           uint64_t NewOp3Val = 0;
-          if (ARM64MCRegisterClasses[ARM64::GPR32allRegClassID].contains(
+          if (AArch64MCRegisterClasses[AArch64::GPR32allRegClassID].contains(
                   Op1->getReg()))
             NewOp3Val = (32 - Op3Val) & 0x1f;
           else
@@ -3524,18 +3540,18 @@ bool ARM64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
               MCConstantExpr::Create(NewOp3Val, getContext());
           const MCExpr *NewOp4 =
               MCConstantExpr::Create(NewOp4Val, getContext());
-          Operands[3] = ARM64Operand::CreateImm(NewOp3, Op3->getStartLoc(),
-                                                Op3->getEndLoc(), getContext());
-          Operands[4] = ARM64Operand::CreateImm(NewOp4, Op4->getStartLoc(),
-                                                Op4->getEndLoc(), getContext());
+          Operands[3] = AArch64Operand::CreateImm(
+              NewOp3, Op3->getStartLoc(), Op3->getEndLoc(), getContext());
+          Operands[4] = AArch64Operand::CreateImm(
+              NewOp4, Op4->getStartLoc(), Op4->getEndLoc(), getContext());
           if (Tok == "bfi")
-            Operands[0] = ARM64Operand::CreateToken(
+            Operands[0] = AArch64Operand::CreateToken(
                 "bfm", false, Op->getStartLoc(), getContext());
           else if (Tok == "sbfiz")
-            Operands[0] = ARM64Operand::CreateToken(
+            Operands[0] = AArch64Operand::CreateToken(
                 "sbfm", false, Op->getStartLoc(), getContext());
           else if (Tok == "ubfiz")
-            Operands[0] = ARM64Operand::CreateToken(
+            Operands[0] = AArch64Operand::CreateToken(
                 "ubfm", false, Op->getStartLoc(), getContext());
           else
             llvm_unreachable("No valid mnemonic for alias?");
@@ -3550,9 +3566,9 @@ bool ARM64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
       // UBFX -> UBFM aliases.
     } else if (NumOperands == 5 &&
                (Tok == "bfxil" || Tok == "sbfx" || Tok == "ubfx")) {
-      ARM64Operand *Op1 = static_cast<ARM64Operand *>(Operands[1]);
-      ARM64Operand *Op3 = static_cast<ARM64Operand *>(Operands[3]);
-      ARM64Operand *Op4 = static_cast<ARM64Operand *>(Operands[4]);
+      AArch64Operand *Op1 = static_cast<AArch64Operand *>(Operands[1]);
+      AArch64Operand *Op3 = static_cast<AArch64Operand *>(Operands[3]);
+      AArch64Operand *Op4 = static_cast<AArch64Operand *>(Operands[4]);
 
       if (Op1->isReg() && Op3->isImm() && Op4->isImm()) {
         const MCConstantExpr *Op3CE = dyn_cast<MCConstantExpr>(Op3->getImm());
@@ -3563,7 +3579,7 @@ bool ARM64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
           uint64_t Op4Val = Op4CE->getValue();
 
           uint64_t RegWidth = 0;
-          if (ARM64MCRegisterClasses[ARM64::GPR64allRegClassID].contains(
+          if (AArch64MCRegisterClasses[AArch64::GPR64allRegClassID].contains(
               Op1->getReg()))
             RegWidth = 64;
           else
@@ -3584,16 +3600,16 @@ bool ARM64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
 
           const MCExpr *NewOp4 =
               MCConstantExpr::Create(NewOp4Val, getContext());
-          Operands[4] = ARM64Operand::CreateImm(
+          Operands[4] = AArch64Operand::CreateImm(
               NewOp4, Op4->getStartLoc(), Op4->getEndLoc(), getContext());
           if (Tok == "bfxil")
-            Operands[0] = ARM64Operand::CreateToken(
+            Operands[0] = AArch64Operand::CreateToken(
                 "bfm", false, Op->getStartLoc(), getContext());
           else if (Tok == "sbfx")
-            Operands[0] = ARM64Operand::CreateToken(
+            Operands[0] = AArch64Operand::CreateToken(
                 "sbfm", false, Op->getStartLoc(), getContext());
           else if (Tok == "ubfx")
-            Operands[0] = ARM64Operand::CreateToken(
+            Operands[0] = AArch64Operand::CreateToken(
                 "ubfm", false, Op->getStartLoc(), getContext());
           else
             llvm_unreachable("No valid mnemonic for alias?");
@@ -3610,44 +3626,44 @@ bool ARM64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
   if (NumOperands == 3 && (Tok == "sxtw" || Tok == "uxtw")) {
     // The source register can be Wn here, but the matcher expects a
     // GPR64. Twiddle it here if necessary.
-    ARM64Operand *Op = static_cast<ARM64Operand *>(Operands[2]);
+    AArch64Operand *Op = static_cast<AArch64Operand *>(Operands[2]);
     if (Op->isReg()) {
       unsigned Reg = getXRegFromWReg(Op->getReg());
-      Operands[2] = ARM64Operand::CreateReg(Reg, false, Op->getStartLoc(),
-                                            Op->getEndLoc(), getContext());
+      Operands[2] = AArch64Operand::CreateReg(Reg, false, Op->getStartLoc(),
+                                              Op->getEndLoc(), getContext());
       delete Op;
     }
   }
   // FIXME: Likewise for sxt[bh] with a Xd dst operand
   else if (NumOperands == 3 && (Tok == "sxtb" || Tok == "sxth")) {
-    ARM64Operand *Op = static_cast<ARM64Operand *>(Operands[1]);
+    AArch64Operand *Op = static_cast<AArch64Operand *>(Operands[1]);
     if (Op->isReg() &&
-        ARM64MCRegisterClasses[ARM64::GPR64allRegClassID].contains(
+        AArch64MCRegisterClasses[AArch64::GPR64allRegClassID].contains(
             Op->getReg())) {
       // The source register can be Wn here, but the matcher expects a
       // GPR64. Twiddle it here if necessary.
-      ARM64Operand *Op = static_cast<ARM64Operand *>(Operands[2]);
+      AArch64Operand *Op = static_cast<AArch64Operand *>(Operands[2]);
       if (Op->isReg()) {
         unsigned Reg = getXRegFromWReg(Op->getReg());
-        Operands[2] = ARM64Operand::CreateReg(Reg, false, Op->getStartLoc(),
-                                              Op->getEndLoc(), getContext());
+        Operands[2] = AArch64Operand::CreateReg(Reg, false, Op->getStartLoc(),
+                                                Op->getEndLoc(), getContext());
         delete Op;
       }
     }
   }
   // FIXME: Likewise for uxt[bh] with a Xd dst operand
   else if (NumOperands == 3 && (Tok == "uxtb" || Tok == "uxth")) {
-    ARM64Operand *Op = static_cast<ARM64Operand *>(Operands[1]);
+    AArch64Operand *Op = static_cast<AArch64Operand *>(Operands[1]);
     if (Op->isReg() &&
-        ARM64MCRegisterClasses[ARM64::GPR64allRegClassID].contains(
+        AArch64MCRegisterClasses[AArch64::GPR64allRegClassID].contains(
             Op->getReg())) {
       // The source register can be Wn here, but the matcher expects a
       // GPR32. Twiddle it here if necessary.
-      ARM64Operand *Op = static_cast<ARM64Operand *>(Operands[1]);
+      AArch64Operand *Op = static_cast<AArch64Operand *>(Operands[1]);
       if (Op->isReg()) {
         unsigned Reg = getWRegFromXReg(Op->getReg());
-        Operands[1] = ARM64Operand::CreateReg(Reg, false, Op->getStartLoc(),
-                                              Op->getEndLoc(), getContext());
+        Operands[1] = AArch64Operand::CreateReg(Reg, false, Op->getStartLoc(),
+                                                Op->getEndLoc(), getContext());
         delete Op;
       }
     }
@@ -3655,16 +3671,17 @@ bool ARM64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
 
   // Yet another horrible hack to handle FMOV Rd, #0.0 using [WX]ZR.
   if (NumOperands == 3 && Tok == "fmov") {
-    ARM64Operand *RegOp = static_cast<ARM64Operand *>(Operands[1]);
-    ARM64Operand *ImmOp = static_cast<ARM64Operand *>(Operands[2]);
+    AArch64Operand *RegOp = static_cast<AArch64Operand *>(Operands[1]);
+    AArch64Operand *ImmOp = static_cast<AArch64Operand *>(Operands[2]);
     if (RegOp->isReg() && ImmOp->isFPImm() &&
         ImmOp->getFPImm() == (unsigned)-1) {
-      unsigned zreg = ARM64MCRegisterClasses[ARM64::FPR32RegClassID].contains(
-                          RegOp->getReg())
-                          ? ARM64::WZR
-                          : ARM64::XZR;
-      Operands[2] = ARM64Operand::CreateReg(zreg, false, Op->getStartLoc(),
-                                            Op->getEndLoc(), getContext());
+      unsigned zreg =
+          AArch64MCRegisterClasses[AArch64::FPR32RegClassID].contains(
+              RegOp->getReg())
+              ? AArch64::WZR
+              : AArch64::XZR;
+      Operands[2] = AArch64Operand::CreateReg(zreg, false, Op->getStartLoc(),
+                                              Op->getEndLoc(), getContext());
       delete ImmOp;
     }
   }
@@ -3718,14 +3735,14 @@ bool ARM64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
       if (ErrorInfo >= Operands.size())
         return Error(IDLoc, "too few operands for instruction");
 
-      ErrorLoc = ((ARM64Operand *)Operands[ErrorInfo])->getStartLoc();
+      ErrorLoc = ((AArch64Operand *)Operands[ErrorInfo])->getStartLoc();
       if (ErrorLoc == SMLoc())
         ErrorLoc = IDLoc;
     }
     // If the match failed on a suffix token operand, tweak the diagnostic
     // accordingly.
-    if (((ARM64Operand *)Operands[ErrorInfo])->isToken() &&
-        ((ARM64Operand *)Operands[ErrorInfo])->isTokenSuffix())
+    if (((AArch64Operand *)Operands[ErrorInfo])->isToken() &&
+        ((AArch64Operand *)Operands[ErrorInfo])->isTokenSuffix())
       MatchResult = Match_InvalidSuffix;
 
     return showMatchError(ErrorLoc, MatchResult);
@@ -3779,7 +3796,7 @@ bool ARM64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
   case Match_MRS: {
     // Any time we get here, there's nothing fancy to do. Just get the
     // operand SMLoc and display the diagnostic.
-    SMLoc ErrorLoc = ((ARM64Operand *)Operands[ErrorInfo])->getStartLoc();
+    SMLoc ErrorLoc = ((AArch64Operand *)Operands[ErrorInfo])->getStartLoc();
     if (ErrorLoc == SMLoc())
       ErrorLoc = IDLoc;
     return showMatchError(ErrorLoc, MatchResult);
@@ -3791,7 +3808,7 @@ bool ARM64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
 }
 
 /// ParseDirective parses the arm specific directives
-bool ARM64AsmParser::ParseDirective(AsmToken DirectiveID) {
+bool AArch64AsmParser::ParseDirective(AsmToken DirectiveID) {
   StringRef IDVal = DirectiveID.getIdentifier();
   SMLoc Loc = DirectiveID.getLoc();
   if (IDVal == ".hword")
@@ -3808,7 +3825,7 @@ bool ARM64AsmParser::ParseDirective(AsmToken DirectiveID) {
 
 /// parseDirectiveWord
 ///  ::= .word [ expression (, expression)* ]
-bool ARM64AsmParser::parseDirectiveWord(unsigned Size, SMLoc L) {
+bool AArch64AsmParser::parseDirectiveWord(unsigned Size, SMLoc L) {
   if (getLexer().isNot(AsmToken::EndOfStatement)) {
     for (;;) {
       const MCExpr *Value;
@@ -3833,17 +3850,17 @@ bool ARM64AsmParser::parseDirectiveWord(unsigned Size, SMLoc L) {
 
 // parseDirectiveTLSDescCall:
 //   ::= .tlsdesccall symbol
-bool ARM64AsmParser::parseDirectiveTLSDescCall(SMLoc L) {
+bool AArch64AsmParser::parseDirectiveTLSDescCall(SMLoc L) {
   StringRef Name;
   if (getParser().parseIdentifier(Name))
     return Error(L, "expected symbol after directive");
 
   MCSymbol *Sym = getContext().GetOrCreateSymbol(Name);
   const MCExpr *Expr = MCSymbolRefExpr::Create(Sym, getContext());
-  Expr = ARM64MCExpr::Create(Expr, ARM64MCExpr::VK_TLSDESC, getContext());
+  Expr = AArch64MCExpr::Create(Expr, AArch64MCExpr::VK_TLSDESC, getContext());
 
   MCInst Inst;
-  Inst.setOpcode(ARM64::TLSDESCCALL);
+  Inst.setOpcode(AArch64::TLSDESCCALL);
   Inst.addOperand(MCOperand::CreateExpr(Expr));
 
   getParser().getStreamer().EmitInstruction(Inst, STI);
@@ -3852,7 +3869,7 @@ bool ARM64AsmParser::parseDirectiveTLSDescCall(SMLoc L) {
 
 /// ::= .loh <lohName | lohId> label1, ..., labelN
 /// The number of arguments depends on the loh identifier.
-bool ARM64AsmParser::parseDirectiveLOH(StringRef IDVal, SMLoc Loc) {
+bool AArch64AsmParser::parseDirectiveLOH(StringRef IDVal, SMLoc Loc) {
   if (IDVal != MCLOHDirectiveName())
     return true;
   MCLOHType Kind;
@@ -3904,15 +3921,15 @@ bool ARM64AsmParser::parseDirectiveLOH(StringRef IDVal, SMLoc Loc) {
 }
 
 bool
-ARM64AsmParser::classifySymbolRef(const MCExpr *Expr,
-                                  ARM64MCExpr::VariantKind &ELFRefKind,
-                                  MCSymbolRefExpr::VariantKind &DarwinRefKind,
-                                  int64_t &Addend) {
-  ELFRefKind = ARM64MCExpr::VK_INVALID;
+AArch64AsmParser::classifySymbolRef(const MCExpr *Expr,
+                                    AArch64MCExpr::VariantKind &ELFRefKind,
+                                    MCSymbolRefExpr::VariantKind &DarwinRefKind,
+                                    int64_t &Addend) {
+  ELFRefKind = AArch64MCExpr::VK_INVALID;
   DarwinRefKind = MCSymbolRefExpr::VK_None;
   Addend = 0;
 
-  if (const ARM64MCExpr *AE = dyn_cast<ARM64MCExpr>(Expr)) {
+  if (const AArch64MCExpr *AE = dyn_cast<AArch64MCExpr>(Expr)) {
     ELFRefKind = AE->getKind();
     Expr = AE->getSubExpr();
   }
@@ -3949,29 +3966,29 @@ ARM64AsmParser::classifySymbolRef(const MCExpr *Expr,
 
   // It's some symbol reference + a constant addend, but really
   // shouldn't use both Darwin and ELF syntax.
-  return ELFRefKind == ARM64MCExpr::VK_INVALID ||
+  return ELFRefKind == AArch64MCExpr::VK_INVALID ||
          DarwinRefKind == MCSymbolRefExpr::VK_None;
 }
 
 /// Force static initialization.
-extern "C" void LLVMInitializeARM64AsmParser() {
-  RegisterMCAsmParser<ARM64AsmParser> X(TheARM64leTarget);
-  RegisterMCAsmParser<ARM64AsmParser> Y(TheARM64beTarget);
+extern "C" void LLVMInitializeAArch64AsmParser() {
+  RegisterMCAsmParser<AArch64AsmParser> X(TheAArch64leTarget);
+  RegisterMCAsmParser<AArch64AsmParser> Y(TheAArch64beTarget);
 
-  RegisterMCAsmParser<ARM64AsmParser> Z(TheAArch64leTarget);
-  RegisterMCAsmParser<ARM64AsmParser> W(TheAArch64beTarget);
+  RegisterMCAsmParser<AArch64AsmParser> Z(TheARM64leTarget);
+  RegisterMCAsmParser<AArch64AsmParser> W(TheARM64beTarget);
 }
 
 #define GET_REGISTER_MATCHER
 #define GET_SUBTARGET_FEATURE_NAME
 #define GET_MATCHER_IMPLEMENTATION
-#include "ARM64GenAsmMatcher.inc"
+#include "AArch64GenAsmMatcher.inc"
 
 // Define this matcher function after the auto-generated include so we
 // have the match class enum definitions.
-unsigned ARM64AsmParser::validateTargetOperandClass(MCParsedAsmOperand *AsmOp,
-                                                    unsigned Kind) {
-  ARM64Operand *Op = static_cast<ARM64Operand *>(AsmOp);
+unsigned AArch64AsmParser::validateTargetOperandClass(MCParsedAsmOperand *AsmOp,
+                                                      unsigned Kind) {
+  AArch64Operand *Op = static_cast<AArch64Operand *>(AsmOp);
   // If the kind is a token for a literal immediate, check if our asm
   // operand matches. This is for InstAliases which have a fixed-value
   // immediate in the syntax.
diff --git a/lib/Target/ARM64/AsmParser/CMakeLists.txt b/lib/Target/AArch64/AsmParser/CMakeLists.txt
similarity index 59%
rename from lib/Target/ARM64/AsmParser/CMakeLists.txt
rename to lib/Target/AArch64/AsmParser/CMakeLists.txt
index 826158b1ed1..cc0a9d86a14 100644
--- a/lib/Target/ARM64/AsmParser/CMakeLists.txt
+++ b/lib/Target/AArch64/AsmParser/CMakeLists.txt
@@ -1,6 +1,6 @@
 include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
 
-add_llvm_library(LLVMARM64AsmParser
-  ARM64AsmParser.cpp
+add_llvm_library(LLVMAArch64AsmParser
+  AArch64AsmParser.cpp
   )
 
diff --git a/lib/Target/ARM64/AsmParser/LLVMBuild.txt b/lib/Target/AArch64/AsmParser/LLVMBuild.txt
similarity index 70%
rename from lib/Target/ARM64/AsmParser/LLVMBuild.txt
rename to lib/Target/AArch64/AsmParser/LLVMBuild.txt
index 9045283e919..11eb9d55f61 100644
--- a/lib/Target/ARM64/AsmParser/LLVMBuild.txt
+++ b/lib/Target/AArch64/AsmParser/LLVMBuild.txt
@@ -1,4 +1,4 @@
-;===- ./lib/Target/ARM64/AsmParser/LLVMBuild.txt ---------------*- Conf -*--===;
+;===- ./lib/Target/AArch64/AsmParser/LLVMBuild.txt ---------------*- Conf -*--===;
 ;
 ;                     The LLVM Compiler Infrastructure
 ;
@@ -17,7 +17,7 @@
 
 [component_0]
 type = Library
-name = ARM64AsmParser
-parent = ARM64
-required_libraries = ARM64Desc ARM64Info ARM64Utils MC MCParser Support
-add_to_library_groups = ARM64
+name = AArch64AsmParser
+parent = AArch64
+required_libraries = AArch64Desc AArch64Info AArch64Utils MC MCParser Support
+add_to_library_groups = AArch64
diff --git a/lib/Target/ARM64/AsmParser/Makefile b/lib/Target/AArch64/AsmParser/Makefile
similarity index 82%
rename from lib/Target/ARM64/AsmParser/Makefile
rename to lib/Target/AArch64/AsmParser/Makefile
index d25c47f9af9..00268c76f8e 100644
--- a/lib/Target/ARM64/AsmParser/Makefile
+++ b/lib/Target/AArch64/AsmParser/Makefile
@@ -1,4 +1,4 @@
-##===- lib/Target/ARM/AsmParser/Makefile -------------------*- Makefile -*-===##
+##===- lib/Target/AArch64/AsmParser/Makefile ---------------*- Makefile -*-===##
 #
 #                     The LLVM Compiler Infrastructure
 #
@@ -7,7 +7,7 @@
 #
 ##===----------------------------------------------------------------------===##
 LEVEL = ../../../..
-LIBRARYNAME = LLVMARM64AsmParser
+LIBRARYNAME = LLVMAArch64AsmParser
 
 # Hack: we need to include 'main' ARM target directory to grab private headers
 CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
diff --git a/lib/Target/AArch64/CMakeLists.txt b/lib/Target/AArch64/CMakeLists.txt
new file mode 100644
index 00000000000..789d549bb15
--- /dev/null
+++ b/lib/Target/AArch64/CMakeLists.txt
@@ -0,0 +1,51 @@
+set(LLVM_TARGET_DEFINITIONS AArch64.td)
+
+tablegen(LLVM AArch64GenRegisterInfo.inc -gen-register-info)
+tablegen(LLVM AArch64GenInstrInfo.inc -gen-instr-info)
+tablegen(LLVM AArch64GenMCCodeEmitter.inc -gen-emitter -mc-emitter)
+tablegen(LLVM AArch64GenMCPseudoLowering.inc -gen-pseudo-lowering)
+tablegen(LLVM AArch64GenAsmWriter.inc -gen-asm-writer)
+tablegen(LLVM AArch64GenAsmWriter1.inc -gen-asm-writer -asmwriternum=1)
+tablegen(LLVM AArch64GenAsmMatcher.inc -gen-asm-matcher)
+tablegen(LLVM AArch64GenDAGISel.inc -gen-dag-isel)
+tablegen(LLVM AArch64GenFastISel.inc -gen-fast-isel)
+tablegen(LLVM AArch64GenCallingConv.inc -gen-callingconv)
+tablegen(LLVM AArch64GenSubtargetInfo.inc -gen-subtarget)
+tablegen(LLVM AArch64GenDisassemblerTables.inc -gen-disassembler)
+add_public_tablegen_target(AArch64CommonTableGen)
+
+add_llvm_target(AArch64CodeGen
+  AArch64AddressTypePromotion.cpp
+  AArch64AdvSIMDScalarPass.cpp
+  AArch64AsmPrinter.cpp
+  AArch64BranchRelaxation.cpp
+  AArch64CleanupLocalDynamicTLSPass.cpp
+  AArch64CollectLOH.cpp
+  AArch64ConditionalCompares.cpp
+  AArch64DeadRegisterDefinitionsPass.cpp
+  AArch64ExpandPseudoInsts.cpp
+  AArch64FastISel.cpp
+  AArch64FrameLowering.cpp
+  AArch64ISelDAGToDAG.cpp
+  AArch64ISelLowering.cpp
+  AArch64InstrInfo.cpp
+  AArch64LoadStoreOptimizer.cpp
+  AArch64MCInstLower.cpp
+  AArch64PromoteConstant.cpp
+  AArch64RegisterInfo.cpp
+  AArch64SelectionDAGInfo.cpp
+  AArch64StorePairSuppress.cpp
+  AArch64Subtarget.cpp
+  AArch64TargetMachine.cpp
+  AArch64TargetObjectFile.cpp
+  AArch64TargetTransformInfo.cpp
+)
+
+add_dependencies(LLVMAArch64CodeGen intrinsics_gen)
+
+add_subdirectory(TargetInfo)
+add_subdirectory(AsmParser)
+add_subdirectory(Disassembler)
+add_subdirectory(InstPrinter)
+add_subdirectory(MCTargetDesc)
+add_subdirectory(Utils)
diff --git a/lib/Target/ARM64/Disassembler/ARM64Disassembler.cpp b/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
similarity index 69%
rename from lib/Target/ARM64/Disassembler/ARM64Disassembler.cpp
rename to lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
index bb47b3a0982..6de27d6d51a 100644
--- a/lib/Target/ARM64/Disassembler/ARM64Disassembler.cpp
+++ b/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
@@ -1,4 +1,4 @@
-//===- ARM64Disassembler.cpp - Disassembler for ARM64 -----------*- C++ -*-===//
+//===- AArch64Disassembler.cpp - Disassembler for AArch64 -------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -10,11 +10,11 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "ARM64Disassembler.h"
-#include "ARM64ExternalSymbolizer.h"
-#include "ARM64Subtarget.h"
-#include "MCTargetDesc/ARM64AddressingModes.h"
-#include "Utils/ARM64BaseInfo.h"
+#include "AArch64Disassembler.h"
+#include "AArch64ExternalSymbolizer.h"
+#include "AArch64Subtarget.h"
+#include "MCTargetDesc/AArch64AddressingModes.h"
+#include "Utils/AArch64BaseInfo.h"
 #include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCFixedLenDisassembler.h"
 #include "llvm/Support/Debug.h"
@@ -24,7 +24,7 @@
 
 using namespace llvm;
 
-#define DEBUG_TYPE "arm64-disassembler"
+#define DEBUG_TYPE "aarch64-disassembler"
 
 // Pull DecodeStatus and its enum values into the global namespace.
 typedef llvm::MCDisassembler::DecodeStatus DecodeStatus;
@@ -186,20 +186,20 @@ static bool Check(DecodeStatus &Out, DecodeStatus In) {
   llvm_unreachable("Invalid DecodeStatus!");
 }
 
-#include "ARM64GenDisassemblerTables.inc"
-#include "ARM64GenInstrInfo.inc"
+#include "AArch64GenDisassemblerTables.inc"
+#include "AArch64GenInstrInfo.inc"
 
 #define Success llvm::MCDisassembler::Success
 #define Fail llvm::MCDisassembler::Fail
 #define SoftFail llvm::MCDisassembler::SoftFail
 
-static MCDisassembler *createARM64Disassembler(const Target &T,
+static MCDisassembler *createAArch64Disassembler(const Target &T,
                                                const MCSubtargetInfo &STI,
                                                MCContext &Ctx) {
-  return new ARM64Disassembler(STI, Ctx);
+  return new AArch64Disassembler(STI, Ctx);
 }
 
-DecodeStatus ARM64Disassembler::getInstruction(MCInst &MI, uint64_t &Size,
+DecodeStatus AArch64Disassembler::getInstruction(MCInst &MI, uint64_t &Size,
                                                const MemoryObject &Region,
                                                uint64_t Address,
                                                raw_ostream &os,
@@ -223,43 +223,44 @@ DecodeStatus ARM64Disassembler::getInstruction(MCInst &MI, uint64_t &Size,
 }
 
 static MCSymbolizer *
-createARM64ExternalSymbolizer(StringRef TT, LLVMOpInfoCallback GetOpInfo,
+createAArch64ExternalSymbolizer(StringRef TT, LLVMOpInfoCallback GetOpInfo,
                               LLVMSymbolLookupCallback SymbolLookUp,
                               void *DisInfo, MCContext *Ctx,
                               MCRelocationInfo *RelInfo) {
-  return new llvm::ARM64ExternalSymbolizer(
+  return new llvm::AArch64ExternalSymbolizer(
                                      *Ctx,
                                      std::unique_ptr<MCRelocationInfo>(RelInfo),
                                      GetOpInfo, SymbolLookUp, DisInfo);
 }
 
-extern "C" void LLVMInitializeARM64Disassembler() {
-  TargetRegistry::RegisterMCDisassembler(TheARM64leTarget,
-                                         createARM64Disassembler);
-  TargetRegistry::RegisterMCDisassembler(TheARM64beTarget,
-                                         createARM64Disassembler);
-  TargetRegistry::RegisterMCSymbolizer(TheARM64leTarget,
-                                       createARM64ExternalSymbolizer);
-  TargetRegistry::RegisterMCSymbolizer(TheARM64beTarget,
-                                       createARM64ExternalSymbolizer);
-
+extern "C" void LLVMInitializeAArch64Disassembler() {
   TargetRegistry::RegisterMCDisassembler(TheAArch64leTarget,
-                                         createARM64Disassembler);
+                                         createAArch64Disassembler);
   TargetRegistry::RegisterMCDisassembler(TheAArch64beTarget,
-                                         createARM64Disassembler);
+                                         createAArch64Disassembler);
   TargetRegistry::RegisterMCSymbolizer(TheAArch64leTarget,
-                                       createARM64ExternalSymbolizer);
+                                       createAArch64ExternalSymbolizer);
   TargetRegistry::RegisterMCSymbolizer(TheAArch64beTarget,
-                                       createARM64ExternalSymbolizer);
+                                       createAArch64ExternalSymbolizer);
+
+  TargetRegistry::RegisterMCDisassembler(TheARM64leTarget,
+                                         createAArch64Disassembler);
+  TargetRegistry::RegisterMCDisassembler(TheARM64beTarget,
+                                         createAArch64Disassembler);
+  TargetRegistry::RegisterMCSymbolizer(TheARM64leTarget,
+                                       createAArch64ExternalSymbolizer);
+  TargetRegistry::RegisterMCSymbolizer(TheARM64beTarget,
+                                       createAArch64ExternalSymbolizer);
 }
 
 static const unsigned FPR128DecoderTable[] = {
-  ARM64::Q0,  ARM64::Q1,  ARM64::Q2,  ARM64::Q3,  ARM64::Q4,  ARM64::Q5,
-  ARM64::Q6,  ARM64::Q7,  ARM64::Q8,  ARM64::Q9,  ARM64::Q10, ARM64::Q11,
-  ARM64::Q12, ARM64::Q13, ARM64::Q14, ARM64::Q15, ARM64::Q16, ARM64::Q17,
-  ARM64::Q18, ARM64::Q19, ARM64::Q20, ARM64::Q21, ARM64::Q22, ARM64::Q23,
-  ARM64::Q24, ARM64::Q25, ARM64::Q26, ARM64::Q27, ARM64::Q28, ARM64::Q29,
-  ARM64::Q30, ARM64::Q31
+    AArch64::Q0,  AArch64::Q1,  AArch64::Q2,  AArch64::Q3,  AArch64::Q4,
+    AArch64::Q5,  AArch64::Q6,  AArch64::Q7,  AArch64::Q8,  AArch64::Q9,
+    AArch64::Q10, AArch64::Q11, AArch64::Q12, AArch64::Q13, AArch64::Q14,
+    AArch64::Q15, AArch64::Q16, AArch64::Q17, AArch64::Q18, AArch64::Q19,
+    AArch64::Q20, AArch64::Q21, AArch64::Q22, AArch64::Q23, AArch64::Q24,
+    AArch64::Q25, AArch64::Q26, AArch64::Q27, AArch64::Q28, AArch64::Q29,
+    AArch64::Q30, AArch64::Q31
 };
 
 static DecodeStatus DecodeFPR128RegisterClass(MCInst &Inst, unsigned RegNo,
@@ -282,12 +283,13 @@ static DecodeStatus DecodeFPR128_loRegisterClass(MCInst &Inst, unsigned RegNo,
 }
 
 static const unsigned FPR64DecoderTable[] = {
-  ARM64::D0,  ARM64::D1,  ARM64::D2,  ARM64::D3,  ARM64::D4,  ARM64::D5,
-  ARM64::D6,  ARM64::D7,  ARM64::D8,  ARM64::D9,  ARM64::D10, ARM64::D11,
-  ARM64::D12, ARM64::D13, ARM64::D14, ARM64::D15, ARM64::D16, ARM64::D17,
-  ARM64::D18, ARM64::D19, ARM64::D20, ARM64::D21, ARM64::D22, ARM64::D23,
-  ARM64::D24, ARM64::D25, ARM64::D26, ARM64::D27, ARM64::D28, ARM64::D29,
-  ARM64::D30, ARM64::D31
+    AArch64::D0,  AArch64::D1,  AArch64::D2,  AArch64::D3,  AArch64::D4,
+    AArch64::D5,  AArch64::D6,  AArch64::D7,  AArch64::D8,  AArch64::D9,
+    AArch64::D10, AArch64::D11, AArch64::D12, AArch64::D13, AArch64::D14,
+    AArch64::D15, AArch64::D16, AArch64::D17, AArch64::D18, AArch64::D19,
+    AArch64::D20, AArch64::D21, AArch64::D22, AArch64::D23, AArch64::D24,
+    AArch64::D25, AArch64::D26, AArch64::D27, AArch64::D28, AArch64::D29,
+    AArch64::D30, AArch64::D31
 };
 
 static DecodeStatus DecodeFPR64RegisterClass(MCInst &Inst, unsigned RegNo,
@@ -302,12 +304,13 @@ static DecodeStatus DecodeFPR64RegisterClass(MCInst &Inst, unsigned RegNo,
 }
 
 static const unsigned FPR32DecoderTable[] = {
-  ARM64::S0,  ARM64::S1,  ARM64::S2,  ARM64::S3,  ARM64::S4,  ARM64::S5,
-  ARM64::S6,  ARM64::S7,  ARM64::S8,  ARM64::S9,  ARM64::S10, ARM64::S11,
-  ARM64::S12, ARM64::S13, ARM64::S14, ARM64::S15, ARM64::S16, ARM64::S17,
-  ARM64::S18, ARM64::S19, ARM64::S20, ARM64::S21, ARM64::S22, ARM64::S23,
-  ARM64::S24, ARM64::S25, ARM64::S26, ARM64::S27, ARM64::S28, ARM64::S29,
-  ARM64::S30, ARM64::S31
+    AArch64::S0,  AArch64::S1,  AArch64::S2,  AArch64::S3,  AArch64::S4,
+    AArch64::S5,  AArch64::S6,  AArch64::S7,  AArch64::S8,  AArch64::S9,
+    AArch64::S10, AArch64::S11, AArch64::S12, AArch64::S13, AArch64::S14,
+    AArch64::S15, AArch64::S16, AArch64::S17, AArch64::S18, AArch64::S19,
+    AArch64::S20, AArch64::S21, AArch64::S22, AArch64::S23, AArch64::S24,
+    AArch64::S25, AArch64::S26, AArch64::S27, AArch64::S28, AArch64::S29,
+    AArch64::S30, AArch64::S31
 };
 
 static DecodeStatus DecodeFPR32RegisterClass(MCInst &Inst, unsigned RegNo,
@@ -322,12 +325,13 @@ static DecodeStatus DecodeFPR32RegisterClass(MCInst &Inst, unsigned RegNo,
 }
 
 static const unsigned FPR16DecoderTable[] = {
-  ARM64::H0,  ARM64::H1,  ARM64::H2,  ARM64::H3,  ARM64::H4,  ARM64::H5,
-  ARM64::H6,  ARM64::H7,  ARM64::H8,  ARM64::H9,  ARM64::H10, ARM64::H11,
-  ARM64::H12, ARM64::H13, ARM64::H14, ARM64::H15, ARM64::H16, ARM64::H17,
-  ARM64::H18, ARM64::H19, ARM64::H20, ARM64::H21, ARM64::H22, ARM64::H23,
-  ARM64::H24, ARM64::H25, ARM64::H26, ARM64::H27, ARM64::H28, ARM64::H29,
-  ARM64::H30, ARM64::H31
+    AArch64::H0,  AArch64::H1,  AArch64::H2,  AArch64::H3,  AArch64::H4,
+    AArch64::H5,  AArch64::H6,  AArch64::H7,  AArch64::H8,  AArch64::H9,
+    AArch64::H10, AArch64::H11, AArch64::H12, AArch64::H13, AArch64::H14,
+    AArch64::H15, AArch64::H16, AArch64::H17, AArch64::H18, AArch64::H19,
+    AArch64::H20, AArch64::H21, AArch64::H22, AArch64::H23, AArch64::H24,
+    AArch64::H25, AArch64::H26, AArch64::H27, AArch64::H28, AArch64::H29,
+    AArch64::H30, AArch64::H31
 };
 
 static DecodeStatus DecodeFPR16RegisterClass(MCInst &Inst, unsigned RegNo,
@@ -342,12 +346,13 @@ static DecodeStatus DecodeFPR16RegisterClass(MCInst &Inst, unsigned RegNo,
 }
 
 static const unsigned FPR8DecoderTable[] = {
-  ARM64::B0,  ARM64::B1,  ARM64::B2,  ARM64::B3,  ARM64::B4,  ARM64::B5,
-  ARM64::B6,  ARM64::B7,  ARM64::B8,  ARM64::B9,  ARM64::B10, ARM64::B11,
-  ARM64::B12, ARM64::B13, ARM64::B14, ARM64::B15, ARM64::B16, ARM64::B17,
-  ARM64::B18, ARM64::B19, ARM64::B20, ARM64::B21, ARM64::B22, ARM64::B23,
-  ARM64::B24, ARM64::B25, ARM64::B26, ARM64::B27, ARM64::B28, ARM64::B29,
-  ARM64::B30, ARM64::B31
+    AArch64::B0,  AArch64::B1,  AArch64::B2,  AArch64::B3,  AArch64::B4,
+    AArch64::B5,  AArch64::B6,  AArch64::B7,  AArch64::B8,  AArch64::B9,
+    AArch64::B10, AArch64::B11, AArch64::B12, AArch64::B13, AArch64::B14,
+    AArch64::B15, AArch64::B16, AArch64::B17, AArch64::B18, AArch64::B19,
+    AArch64::B20, AArch64::B21, AArch64::B22, AArch64::B23, AArch64::B24,
+    AArch64::B25, AArch64::B26, AArch64::B27, AArch64::B28, AArch64::B29,
+    AArch64::B30, AArch64::B31
 };
 
 static DecodeStatus DecodeFPR8RegisterClass(MCInst &Inst, unsigned RegNo,
@@ -362,12 +367,13 @@ static DecodeStatus DecodeFPR8RegisterClass(MCInst &Inst, unsigned RegNo,
 }
 
 static const unsigned GPR64DecoderTable[] = {
-  ARM64::X0,  ARM64::X1,  ARM64::X2,  ARM64::X3,  ARM64::X4,  ARM64::X5,
-  ARM64::X6,  ARM64::X7,  ARM64::X8,  ARM64::X9,  ARM64::X10, ARM64::X11,
-  ARM64::X12, ARM64::X13, ARM64::X14, ARM64::X15, ARM64::X16, ARM64::X17,
-  ARM64::X18, ARM64::X19, ARM64::X20, ARM64::X21, ARM64::X22, ARM64::X23,
-  ARM64::X24, ARM64::X25, ARM64::X26, ARM64::X27, ARM64::X28, ARM64::FP,
-  ARM64::LR,  ARM64::XZR
+    AArch64::X0,  AArch64::X1,  AArch64::X2,  AArch64::X3,  AArch64::X4,
+    AArch64::X5,  AArch64::X6,  AArch64::X7,  AArch64::X8,  AArch64::X9,
+    AArch64::X10, AArch64::X11, AArch64::X12, AArch64::X13, AArch64::X14,
+    AArch64::X15, AArch64::X16, AArch64::X17, AArch64::X18, AArch64::X19,
+    AArch64::X20, AArch64::X21, AArch64::X22, AArch64::X23, AArch64::X24,
+    AArch64::X25, AArch64::X26, AArch64::X27, AArch64::X28, AArch64::FP,
+    AArch64::LR,  AArch64::XZR
 };
 
 static DecodeStatus DecodeGPR64RegisterClass(MCInst &Inst, unsigned RegNo,
@@ -387,19 +393,20 @@ static DecodeStatus DecodeGPR64spRegisterClass(MCInst &Inst, unsigned RegNo,
   if (RegNo > 31)
     return Fail;
   unsigned Register = GPR64DecoderTable[RegNo];
-  if (Register == ARM64::XZR)
-    Register = ARM64::SP;
+  if (Register == AArch64::XZR)
+    Register = AArch64::SP;
   Inst.addOperand(MCOperand::CreateReg(Register));
   return Success;
 }
 
 static const unsigned GPR32DecoderTable[] = {
-  ARM64::W0,  ARM64::W1,  ARM64::W2,  ARM64::W3,  ARM64::W4,  ARM64::W5,
-  ARM64::W6,  ARM64::W7,  ARM64::W8,  ARM64::W9,  ARM64::W10, ARM64::W11,
-  ARM64::W12, ARM64::W13, ARM64::W14, ARM64::W15, ARM64::W16, ARM64::W17,
-  ARM64::W18, ARM64::W19, ARM64::W20, ARM64::W21, ARM64::W22, ARM64::W23,
-  ARM64::W24, ARM64::W25, ARM64::W26, ARM64::W27, ARM64::W28, ARM64::W29,
-  ARM64::W30, ARM64::WZR
+    AArch64::W0,  AArch64::W1,  AArch64::W2,  AArch64::W3,  AArch64::W4,
+    AArch64::W5,  AArch64::W6,  AArch64::W7,  AArch64::W8,  AArch64::W9,
+    AArch64::W10, AArch64::W11, AArch64::W12, AArch64::W13, AArch64::W14,
+    AArch64::W15, AArch64::W16, AArch64::W17, AArch64::W18, AArch64::W19,
+    AArch64::W20, AArch64::W21, AArch64::W22, AArch64::W23, AArch64::W24,
+    AArch64::W25, AArch64::W26, AArch64::W27, AArch64::W28, AArch64::W29,
+    AArch64::W30, AArch64::WZR
 };
 
 static DecodeStatus DecodeGPR32RegisterClass(MCInst &Inst, unsigned RegNo,
@@ -420,19 +427,20 @@ static DecodeStatus DecodeGPR32spRegisterClass(MCInst &Inst, unsigned RegNo,
     return Fail;
 
   unsigned Register = GPR32DecoderTable[RegNo];
-  if (Register == ARM64::WZR)
-    Register = ARM64::WSP;
+  if (Register == AArch64::WZR)
+    Register = AArch64::WSP;
   Inst.addOperand(MCOperand::CreateReg(Register));
   return Success;
 }
 
 static const unsigned VectorDecoderTable[] = {
-  ARM64::Q0,  ARM64::Q1,  ARM64::Q2,  ARM64::Q3,  ARM64::Q4,  ARM64::Q5,
-  ARM64::Q6,  ARM64::Q7,  ARM64::Q8,  ARM64::Q9,  ARM64::Q10, ARM64::Q11,
-  ARM64::Q12, ARM64::Q13, ARM64::Q14, ARM64::Q15, ARM64::Q16, ARM64::Q17,
-  ARM64::Q18, ARM64::Q19, ARM64::Q20, ARM64::Q21, ARM64::Q22, ARM64::Q23,
-  ARM64::Q24, ARM64::Q25, ARM64::Q26, ARM64::Q27, ARM64::Q28, ARM64::Q29,
-  ARM64::Q30, ARM64::Q31
+    AArch64::Q0,  AArch64::Q1,  AArch64::Q2,  AArch64::Q3,  AArch64::Q4,
+    AArch64::Q5,  AArch64::Q6,  AArch64::Q7,  AArch64::Q8,  AArch64::Q9,
+    AArch64::Q10, AArch64::Q11, AArch64::Q12, AArch64::Q13, AArch64::Q14,
+    AArch64::Q15, AArch64::Q16, AArch64::Q17, AArch64::Q18, AArch64::Q19,
+    AArch64::Q20, AArch64::Q21, AArch64::Q22, AArch64::Q23, AArch64::Q24,
+    AArch64::Q25, AArch64::Q26, AArch64::Q27, AArch64::Q28, AArch64::Q29,
+    AArch64::Q30, AArch64::Q31
 };
 
 static DecodeStatus DecodeVectorRegisterClass(MCInst &Inst, unsigned RegNo,
@@ -447,14 +455,14 @@ static DecodeStatus DecodeVectorRegisterClass(MCInst &Inst, unsigned RegNo,
 }
 
 static const unsigned QQDecoderTable[] = {
-  ARM64::Q0_Q1,   ARM64::Q1_Q2,   ARM64::Q2_Q3,   ARM64::Q3_Q4,
-  ARM64::Q4_Q5,   ARM64::Q5_Q6,   ARM64::Q6_Q7,   ARM64::Q7_Q8,
-  ARM64::Q8_Q9,   ARM64::Q9_Q10,  ARM64::Q10_Q11, ARM64::Q11_Q12,
-  ARM64::Q12_Q13, ARM64::Q13_Q14, ARM64::Q14_Q15, ARM64::Q15_Q16,
-  ARM64::Q16_Q17, ARM64::Q17_Q18, ARM64::Q18_Q19, ARM64::Q19_Q20,
-  ARM64::Q20_Q21, ARM64::Q21_Q22, ARM64::Q22_Q23, ARM64::Q23_Q24,
-  ARM64::Q24_Q25, ARM64::Q25_Q26, ARM64::Q26_Q27, ARM64::Q27_Q28,
-  ARM64::Q28_Q29, ARM64::Q29_Q30, ARM64::Q30_Q31, ARM64::Q31_Q0
+  AArch64::Q0_Q1,   AArch64::Q1_Q2,   AArch64::Q2_Q3,   AArch64::Q3_Q4,
+  AArch64::Q4_Q5,   AArch64::Q5_Q6,   AArch64::Q6_Q7,   AArch64::Q7_Q8,
+  AArch64::Q8_Q9,   AArch64::Q9_Q10,  AArch64::Q10_Q11, AArch64::Q11_Q12,
+  AArch64::Q12_Q13, AArch64::Q13_Q14, AArch64::Q14_Q15, AArch64::Q15_Q16,
+  AArch64::Q16_Q17, AArch64::Q17_Q18, AArch64::Q18_Q19, AArch64::Q19_Q20,
+  AArch64::Q20_Q21, AArch64::Q21_Q22, AArch64::Q22_Q23, AArch64::Q23_Q24,
+  AArch64::Q24_Q25, AArch64::Q25_Q26, AArch64::Q26_Q27, AArch64::Q27_Q28,
+  AArch64::Q28_Q29, AArch64::Q29_Q30, AArch64::Q30_Q31, AArch64::Q31_Q0
 };
 
 static DecodeStatus DecodeQQRegisterClass(MCInst &Inst, unsigned RegNo,
@@ -467,17 +475,17 @@ static DecodeStatus DecodeQQRegisterClass(MCInst &Inst, unsigned RegNo,
 }
 
 static const unsigned QQQDecoderTable[] = {
-  ARM64::Q0_Q1_Q2,    ARM64::Q1_Q2_Q3,    ARM64::Q2_Q3_Q4,
-  ARM64::Q3_Q4_Q5,    ARM64::Q4_Q5_Q6,    ARM64::Q5_Q6_Q7,
-  ARM64::Q6_Q7_Q8,    ARM64::Q7_Q8_Q9,    ARM64::Q8_Q9_Q10,
-  ARM64::Q9_Q10_Q11,  ARM64::Q10_Q11_Q12, ARM64::Q11_Q12_Q13,
-  ARM64::Q12_Q13_Q14, ARM64::Q13_Q14_Q15, ARM64::Q14_Q15_Q16,
-  ARM64::Q15_Q16_Q17, ARM64::Q16_Q17_Q18, ARM64::Q17_Q18_Q19,
-  ARM64::Q18_Q19_Q20, ARM64::Q19_Q20_Q21, ARM64::Q20_Q21_Q22,
-  ARM64::Q21_Q22_Q23, ARM64::Q22_Q23_Q24, ARM64::Q23_Q24_Q25,
-  ARM64::Q24_Q25_Q26, ARM64::Q25_Q26_Q27, ARM64::Q26_Q27_Q28,
-  ARM64::Q27_Q28_Q29, ARM64::Q28_Q29_Q30, ARM64::Q29_Q30_Q31,
-  ARM64::Q30_Q31_Q0,  ARM64::Q31_Q0_Q1
+  AArch64::Q0_Q1_Q2,    AArch64::Q1_Q2_Q3,    AArch64::Q2_Q3_Q4,
+  AArch64::Q3_Q4_Q5,    AArch64::Q4_Q5_Q6,    AArch64::Q5_Q6_Q7,
+  AArch64::Q6_Q7_Q8,    AArch64::Q7_Q8_Q9,    AArch64::Q8_Q9_Q10,
+  AArch64::Q9_Q10_Q11,  AArch64::Q10_Q11_Q12, AArch64::Q11_Q12_Q13,
+  AArch64::Q12_Q13_Q14, AArch64::Q13_Q14_Q15, AArch64::Q14_Q15_Q16,
+  AArch64::Q15_Q16_Q17, AArch64::Q16_Q17_Q18, AArch64::Q17_Q18_Q19,
+  AArch64::Q18_Q19_Q20, AArch64::Q19_Q20_Q21, AArch64::Q20_Q21_Q22,
+  AArch64::Q21_Q22_Q23, AArch64::Q22_Q23_Q24, AArch64::Q23_Q24_Q25,
+  AArch64::Q24_Q25_Q26, AArch64::Q25_Q26_Q27, AArch64::Q26_Q27_Q28,
+  AArch64::Q27_Q28_Q29, AArch64::Q28_Q29_Q30, AArch64::Q29_Q30_Q31,
+  AArch64::Q30_Q31_Q0,  AArch64::Q31_Q0_Q1
 };
 
 static DecodeStatus DecodeQQQRegisterClass(MCInst &Inst, unsigned RegNo,
@@ -490,17 +498,17 @@ static DecodeStatus DecodeQQQRegisterClass(MCInst &Inst, unsigned RegNo,
 }
 
 static const unsigned QQQQDecoderTable[] = {
-  ARM64::Q0_Q1_Q2_Q3,     ARM64::Q1_Q2_Q3_Q4,     ARM64::Q2_Q3_Q4_Q5,
-  ARM64::Q3_Q4_Q5_Q6,     ARM64::Q4_Q5_Q6_Q7,     ARM64::Q5_Q6_Q7_Q8,
-  ARM64::Q6_Q7_Q8_Q9,     ARM64::Q7_Q8_Q9_Q10,    ARM64::Q8_Q9_Q10_Q11,
-  ARM64::Q9_Q10_Q11_Q12,  ARM64::Q10_Q11_Q12_Q13, ARM64::Q11_Q12_Q13_Q14,
-  ARM64::Q12_Q13_Q14_Q15, ARM64::Q13_Q14_Q15_Q16, ARM64::Q14_Q15_Q16_Q17,
-  ARM64::Q15_Q16_Q17_Q18, ARM64::Q16_Q17_Q18_Q19, ARM64::Q17_Q18_Q19_Q20,
-  ARM64::Q18_Q19_Q20_Q21, ARM64::Q19_Q20_Q21_Q22, ARM64::Q20_Q21_Q22_Q23,
-  ARM64::Q21_Q22_Q23_Q24, ARM64::Q22_Q23_Q24_Q25, ARM64::Q23_Q24_Q25_Q26,
-  ARM64::Q24_Q25_Q26_Q27, ARM64::Q25_Q26_Q27_Q28, ARM64::Q26_Q27_Q28_Q29,
-  ARM64::Q27_Q28_Q29_Q30, ARM64::Q28_Q29_Q30_Q31, ARM64::Q29_Q30_Q31_Q0,
-  ARM64::Q30_Q31_Q0_Q1,   ARM64::Q31_Q0_Q1_Q2
+  AArch64::Q0_Q1_Q2_Q3,     AArch64::Q1_Q2_Q3_Q4,     AArch64::Q2_Q3_Q4_Q5,
+  AArch64::Q3_Q4_Q5_Q6,     AArch64::Q4_Q5_Q6_Q7,     AArch64::Q5_Q6_Q7_Q8,
+  AArch64::Q6_Q7_Q8_Q9,     AArch64::Q7_Q8_Q9_Q10,    AArch64::Q8_Q9_Q10_Q11,
+  AArch64::Q9_Q10_Q11_Q12,  AArch64::Q10_Q11_Q12_Q13, AArch64::Q11_Q12_Q13_Q14,
+  AArch64::Q12_Q13_Q14_Q15, AArch64::Q13_Q14_Q15_Q16, AArch64::Q14_Q15_Q16_Q17,
+  AArch64::Q15_Q16_Q17_Q18, AArch64::Q16_Q17_Q18_Q19, AArch64::Q17_Q18_Q19_Q20,
+  AArch64::Q18_Q19_Q20_Q21, AArch64::Q19_Q20_Q21_Q22, AArch64::Q20_Q21_Q22_Q23,
+  AArch64::Q21_Q22_Q23_Q24, AArch64::Q22_Q23_Q24_Q25, AArch64::Q23_Q24_Q25_Q26,
+  AArch64::Q24_Q25_Q26_Q27, AArch64::Q25_Q26_Q27_Q28, AArch64::Q26_Q27_Q28_Q29,
+  AArch64::Q27_Q28_Q29_Q30, AArch64::Q28_Q29_Q30_Q31, AArch64::Q29_Q30_Q31_Q0,
+  AArch64::Q30_Q31_Q0_Q1,   AArch64::Q31_Q0_Q1_Q2
 };
 
 static DecodeStatus DecodeQQQQRegisterClass(MCInst &Inst, unsigned RegNo,
@@ -514,14 +522,14 @@ static DecodeStatus DecodeQQQQRegisterClass(MCInst &Inst, unsigned RegNo,
 }
 
 static const unsigned DDDecoderTable[] = {
-  ARM64::D0_D1,   ARM64::D1_D2,   ARM64::D2_D3,   ARM64::D3_D4,
-  ARM64::D4_D5,   ARM64::D5_D6,   ARM64::D6_D7,   ARM64::D7_D8,
-  ARM64::D8_D9,   ARM64::D9_D10,  ARM64::D10_D11, ARM64::D11_D12,
-  ARM64::D12_D13, ARM64::D13_D14, ARM64::D14_D15, ARM64::D15_D16,
-  ARM64::D16_D17, ARM64::D17_D18, ARM64::D18_D19, ARM64::D19_D20,
-  ARM64::D20_D21, ARM64::D21_D22, ARM64::D22_D23, ARM64::D23_D24,
-  ARM64::D24_D25, ARM64::D25_D26, ARM64::D26_D27, ARM64::D27_D28,
-  ARM64::D28_D29, ARM64::D29_D30, ARM64::D30_D31, ARM64::D31_D0
+  AArch64::D0_D1,   AArch64::D1_D2,   AArch64::D2_D3,   AArch64::D3_D4,
+  AArch64::D4_D5,   AArch64::D5_D6,   AArch64::D6_D7,   AArch64::D7_D8,
+  AArch64::D8_D9,   AArch64::D9_D10,  AArch64::D10_D11, AArch64::D11_D12,
+  AArch64::D12_D13, AArch64::D13_D14, AArch64::D14_D15, AArch64::D15_D16,
+  AArch64::D16_D17, AArch64::D17_D18, AArch64::D18_D19, AArch64::D19_D20,
+  AArch64::D20_D21, AArch64::D21_D22, AArch64::D22_D23, AArch64::D23_D24,
+  AArch64::D24_D25, AArch64::D25_D26, AArch64::D26_D27, AArch64::D27_D28,
+  AArch64::D28_D29, AArch64::D29_D30, AArch64::D30_D31, AArch64::D31_D0
 };
 
 static DecodeStatus DecodeDDRegisterClass(MCInst &Inst, unsigned RegNo,
@@ -534,17 +542,17 @@ static DecodeStatus DecodeDDRegisterClass(MCInst &Inst, unsigned RegNo,
 }
 
 static const unsigned DDDDecoderTable[] = {
-  ARM64::D0_D1_D2,    ARM64::D1_D2_D3,    ARM64::D2_D3_D4,
-  ARM64::D3_D4_D5,    ARM64::D4_D5_D6,    ARM64::D5_D6_D7,
-  ARM64::D6_D7_D8,    ARM64::D7_D8_D9,    ARM64::D8_D9_D10,
-  ARM64::D9_D10_D11,  ARM64::D10_D11_D12, ARM64::D11_D12_D13,
-  ARM64::D12_D13_D14, ARM64::D13_D14_D15, ARM64::D14_D15_D16,
-  ARM64::D15_D16_D17, ARM64::D16_D17_D18, ARM64::D17_D18_D19,
-  ARM64::D18_D19_D20, ARM64::D19_D20_D21, ARM64::D20_D21_D22,
-  ARM64::D21_D22_D23, ARM64::D22_D23_D24, ARM64::D23_D24_D25,
-  ARM64::D24_D25_D26, ARM64::D25_D26_D27, ARM64::D26_D27_D28,
-  ARM64::D27_D28_D29, ARM64::D28_D29_D30, ARM64::D29_D30_D31,
-  ARM64::D30_D31_D0,  ARM64::D31_D0_D1
+  AArch64::D0_D1_D2,    AArch64::D1_D2_D3,    AArch64::D2_D3_D4,
+  AArch64::D3_D4_D5,    AArch64::D4_D5_D6,    AArch64::D5_D6_D7,
+  AArch64::D6_D7_D8,    AArch64::D7_D8_D9,    AArch64::D8_D9_D10,
+  AArch64::D9_D10_D11,  AArch64::D10_D11_D12, AArch64::D11_D12_D13,
+  AArch64::D12_D13_D14, AArch64::D13_D14_D15, AArch64::D14_D15_D16,
+  AArch64::D15_D16_D17, AArch64::D16_D17_D18, AArch64::D17_D18_D19,
+  AArch64::D18_D19_D20, AArch64::D19_D20_D21, AArch64::D20_D21_D22,
+  AArch64::D21_D22_D23, AArch64::D22_D23_D24, AArch64::D23_D24_D25,
+  AArch64::D24_D25_D26, AArch64::D25_D26_D27, AArch64::D26_D27_D28,
+  AArch64::D27_D28_D29, AArch64::D28_D29_D30, AArch64::D29_D30_D31,
+  AArch64::D30_D31_D0,  AArch64::D31_D0_D1
 };
 
 static DecodeStatus DecodeDDDRegisterClass(MCInst &Inst, unsigned RegNo,
@@ -557,17 +565,17 @@ static DecodeStatus DecodeDDDRegisterClass(MCInst &Inst, unsigned RegNo,
 }
 
 static const unsigned DDDDDecoderTable[] = {
-  ARM64::D0_D1_D2_D3,     ARM64::D1_D2_D3_D4,     ARM64::D2_D3_D4_D5,
-  ARM64::D3_D4_D5_D6,     ARM64::D4_D5_D6_D7,     ARM64::D5_D6_D7_D8,
-  ARM64::D6_D7_D8_D9,     ARM64::D7_D8_D9_D10,    ARM64::D8_D9_D10_D11,
-  ARM64::D9_D10_D11_D12,  ARM64::D10_D11_D12_D13, ARM64::D11_D12_D13_D14,
-  ARM64::D12_D13_D14_D15, ARM64::D13_D14_D15_D16, ARM64::D14_D15_D16_D17,
-  ARM64::D15_D16_D17_D18, ARM64::D16_D17_D18_D19, ARM64::D17_D18_D19_D20,
-  ARM64::D18_D19_D20_D21, ARM64::D19_D20_D21_D22, ARM64::D20_D21_D22_D23,
-  ARM64::D21_D22_D23_D24, ARM64::D22_D23_D24_D25, ARM64::D23_D24_D25_D26,
-  ARM64::D24_D25_D26_D27, ARM64::D25_D26_D27_D28, ARM64::D26_D27_D28_D29,
-  ARM64::D27_D28_D29_D30, ARM64::D28_D29_D30_D31, ARM64::D29_D30_D31_D0,
-  ARM64::D30_D31_D0_D1,   ARM64::D31_D0_D1_D2
+  AArch64::D0_D1_D2_D3,     AArch64::D1_D2_D3_D4,     AArch64::D2_D3_D4_D5,
+  AArch64::D3_D4_D5_D6,     AArch64::D4_D5_D6_D7,     AArch64::D5_D6_D7_D8,
+  AArch64::D6_D7_D8_D9,     AArch64::D7_D8_D9_D10,    AArch64::D8_D9_D10_D11,
+  AArch64::D9_D10_D11_D12,  AArch64::D10_D11_D12_D13, AArch64::D11_D12_D13_D14,
+  AArch64::D12_D13_D14_D15, AArch64::D13_D14_D15_D16, AArch64::D14_D15_D16_D17,
+  AArch64::D15_D16_D17_D18, AArch64::D16_D17_D18_D19, AArch64::D17_D18_D19_D20,
+  AArch64::D18_D19_D20_D21, AArch64::D19_D20_D21_D22, AArch64::D20_D21_D22_D23,
+  AArch64::D21_D22_D23_D24, AArch64::D22_D23_D24_D25, AArch64::D23_D24_D25_D26,
+  AArch64::D24_D25_D26_D27, AArch64::D25_D26_D27_D28, AArch64::D26_D27_D28_D29,
+  AArch64::D27_D28_D29_D30, AArch64::D28_D29_D30_D31, AArch64::D29_D30_D31_D0,
+  AArch64::D30_D31_D0_D1,   AArch64::D31_D0_D1_D2
 };
 
 static DecodeStatus DecodeDDDDRegisterClass(MCInst &Inst, unsigned RegNo,
@@ -599,15 +607,15 @@ static DecodeStatus DecodeFixedPointScaleImm64(llvm::MCInst &Inst, unsigned Imm,
 static DecodeStatus DecodePCRelLabel19(llvm::MCInst &Inst, unsigned Imm,
                                        uint64_t Addr, const void *Decoder) {
   int64_t ImmVal = Imm;
-  const ARM64Disassembler *Dis =
-      static_cast<const ARM64Disassembler *>(Decoder);
+  const AArch64Disassembler *Dis =
+      static_cast<const AArch64Disassembler *>(Decoder);
 
   // Sign-extend 19-bit immediate.
   if (ImmVal & (1 << (19 - 1)))
     ImmVal |= ~((1LL << 19) - 1);
 
   if (!Dis->tryAddingSymbolicOperand(Inst, ImmVal << 2, Addr,
-                                     Inst.getOpcode() != ARM64::LDRXl, 0, 4))
+                                     Inst.getOpcode() != AArch64::LDRXl, 0, 4))
     Inst.addOperand(MCOperand::CreateImm(ImmVal));
   return Success;
 }
@@ -622,15 +630,16 @@ static DecodeStatus DecodeMemExtend(llvm::MCInst &Inst, unsigned Imm,
 static DecodeStatus DecodeMRSSystemRegister(llvm::MCInst &Inst, unsigned Imm,
                                             uint64_t Address,
                                             const void *Decoder) {
-  const ARM64Disassembler *Dis =
-      static_cast<const ARM64Disassembler *>(Decoder);
+  const AArch64Disassembler *Dis =
+      static_cast<const AArch64Disassembler *>(Decoder);
   const MCSubtargetInfo &STI = Dis->getSubtargetInfo();
 
   Imm |= 0x8000;
   Inst.addOperand(MCOperand::CreateImm(Imm));
 
   bool ValidNamed;
-  (void)ARM64SysReg::MRSMapper(STI.getFeatureBits()).toString(Imm, ValidNamed);
+  (void)AArch64SysReg::MRSMapper(STI.getFeatureBits())
+      .toString(Imm, ValidNamed);
 
   return ValidNamed ? Success : Fail;
 }
@@ -638,15 +647,16 @@ static DecodeStatus DecodeMRSSystemRegister(llvm::MCInst &Inst, unsigned Imm,
 static DecodeStatus DecodeMSRSystemRegister(llvm::MCInst &Inst, unsigned Imm,
                                             uint64_t Address,
                                             const void *Decoder) {
-  const ARM64Disassembler *Dis =
-      static_cast<const ARM64Disassembler *>(Decoder);
+  const AArch64Disassembler *Dis =
+      static_cast<const AArch64Disassembler *>(Decoder);
   const MCSubtargetInfo &STI = Dis->getSubtargetInfo();
 
   Imm |= 0x8000;
   Inst.addOperand(MCOperand::CreateImm(Imm));
 
   bool ValidNamed;
-  (void)ARM64SysReg::MSRMapper(STI.getFeatureBits()).toString(Imm, ValidNamed);
+  (void)AArch64SysReg::MSRMapper(STI.getFeatureBits())
+      .toString(Imm, ValidNamed);
 
   return ValidNamed ? Success : Fail;
 }
@@ -756,22 +766,22 @@ static DecodeStatus DecodeThreeAddrSRegInstruction(llvm::MCInst &Inst,
   switch (Inst.getOpcode()) {
   default:
     return Fail;
-  case ARM64::ADDWrs:
-  case ARM64::ADDSWrs:
-  case ARM64::SUBWrs:
-  case ARM64::SUBSWrs:
+  case AArch64::ADDWrs:
+  case AArch64::ADDSWrs:
+  case AArch64::SUBWrs:
+  case AArch64::SUBSWrs:
     // if shift == '11' then ReservedValue()
     if (shiftHi == 0x3)
       return Fail;
     // Deliberate fallthrough
-  case ARM64::ANDWrs:
-  case ARM64::ANDSWrs:
-  case ARM64::BICWrs:
-  case ARM64::BICSWrs:
-  case ARM64::ORRWrs:
-  case ARM64::ORNWrs:
-  case ARM64::EORWrs:
-  case ARM64::EONWrs: {
+  case AArch64::ANDWrs:
+  case AArch64::ANDSWrs:
+  case AArch64::BICWrs:
+  case AArch64::BICSWrs:
+  case AArch64::ORRWrs:
+  case AArch64::ORNWrs:
+  case AArch64::EORWrs:
+  case AArch64::EONWrs: {
     // if sf == '0' and imm6<5> == '1' then ReservedValue()
     if (shiftLo >> 5 == 1)
       return Fail;
@@ -780,22 +790,22 @@ static DecodeStatus DecodeThreeAddrSRegInstruction(llvm::MCInst &Inst,
     DecodeGPR32RegisterClass(Inst, Rm, Addr, Decoder);
     break;
   }
-  case ARM64::ADDXrs:
-  case ARM64::ADDSXrs:
-  case ARM64::SUBXrs:
-  case ARM64::SUBSXrs:
+  case AArch64::ADDXrs:
+  case AArch64::ADDSXrs:
+  case AArch64::SUBXrs:
+  case AArch64::SUBSXrs:
     // if shift == '11' then ReservedValue()
     if (shiftHi == 0x3)
       return Fail;
     // Deliberate fallthrough
-  case ARM64::ANDXrs:
-  case ARM64::ANDSXrs:
-  case ARM64::BICXrs:
-  case ARM64::BICSXrs:
-  case ARM64::ORRXrs:
-  case ARM64::ORNXrs:
-  case ARM64::EORXrs:
-  case ARM64::EONXrs:
+  case AArch64::ANDXrs:
+  case AArch64::ANDSXrs:
+  case AArch64::BICXrs:
+  case AArch64::BICSXrs:
+  case AArch64::ORRXrs:
+  case AArch64::ORNXrs:
+  case AArch64::EORXrs:
+  case AArch64::EONXrs:
     DecodeGPR64RegisterClass(Inst, Rd, Addr, Decoder);
     DecodeGPR64RegisterClass(Inst, Rn, Addr, Decoder);
     DecodeGPR64RegisterClass(Inst, Rm, Addr, Decoder);
@@ -816,21 +826,22 @@ static DecodeStatus DecodeMoveImmInstruction(llvm::MCInst &Inst, uint32_t insn,
   switch (Inst.getOpcode()) {
   default:
     return Fail;
-  case ARM64::MOVZWi:
-  case ARM64::MOVNWi:
-  case ARM64::MOVKWi:
+  case AArch64::MOVZWi:
+  case AArch64::MOVNWi:
+  case AArch64::MOVKWi:
     if (shift & (1U << 5))
       return Fail;
     DecodeGPR32RegisterClass(Inst, Rd, Addr, Decoder);
     break;
-  case ARM64::MOVZXi:
-  case ARM64::MOVNXi:
-  case ARM64::MOVKXi:
+  case AArch64::MOVZXi:
+  case AArch64::MOVNXi:
+  case AArch64::MOVKXi:
     DecodeGPR64RegisterClass(Inst, Rd, Addr, Decoder);
     break;
   }
 
-  if (Inst.getOpcode() == ARM64::MOVKWi || Inst.getOpcode() == ARM64::MOVKXi)
+  if (Inst.getOpcode() == AArch64::MOVKWi ||
+      Inst.getOpcode() == AArch64::MOVKXi)
     Inst.addOperand(Inst.getOperand(0));
 
   Inst.addOperand(MCOperand::CreateImm(imm));
@@ -844,51 +855,51 @@ static DecodeStatus DecodeUnsignedLdStInstruction(llvm::MCInst &Inst,
   unsigned Rt = fieldFromInstruction(insn, 0, 5);
   unsigned Rn = fieldFromInstruction(insn, 5, 5);
   unsigned offset = fieldFromInstruction(insn, 10, 12);
-  const ARM64Disassembler *Dis =
-      static_cast<const ARM64Disassembler *>(Decoder);
+  const AArch64Disassembler *Dis =
+      static_cast<const AArch64Disassembler *>(Decoder);
 
   switch (Inst.getOpcode()) {
   default:
     return Fail;
-  case ARM64::PRFMui:
+  case AArch64::PRFMui:
     // Rt is an immediate in prefetch.
     Inst.addOperand(MCOperand::CreateImm(Rt));
     break;
-  case ARM64::STRBBui:
-  case ARM64::LDRBBui:
-  case ARM64::LDRSBWui:
-  case ARM64::STRHHui:
-  case ARM64::LDRHHui:
-  case ARM64::LDRSHWui:
-  case ARM64::STRWui:
-  case ARM64::LDRWui:
+  case AArch64::STRBBui:
+  case AArch64::LDRBBui:
+  case AArch64::LDRSBWui:
+  case AArch64::STRHHui:
+  case AArch64::LDRHHui:
+  case AArch64::LDRSHWui:
+  case AArch64::STRWui:
+  case AArch64::LDRWui:
     DecodeGPR32RegisterClass(Inst, Rt, Addr, Decoder);
     break;
-  case ARM64::LDRSBXui:
-  case ARM64::LDRSHXui:
-  case ARM64::LDRSWui:
-  case ARM64::STRXui:
-  case ARM64::LDRXui:
+  case AArch64::LDRSBXui:
+  case AArch64::LDRSHXui:
+  case AArch64::LDRSWui:
+  case AArch64::STRXui:
+  case AArch64::LDRXui:
     DecodeGPR64RegisterClass(Inst, Rt, Addr, Decoder);
     break;
-  case ARM64::LDRQui:
-  case ARM64::STRQui:
+  case AArch64::LDRQui:
+  case AArch64::STRQui:
     DecodeFPR128RegisterClass(Inst, Rt, Addr, Decoder);
     break;
-  case ARM64::LDRDui:
-  case ARM64::STRDui:
+  case AArch64::LDRDui:
+  case AArch64::STRDui:
     DecodeFPR64RegisterClass(Inst, Rt, Addr, Decoder);
     break;
-  case ARM64::LDRSui:
-  case ARM64::STRSui:
+  case AArch64::LDRSui:
+  case AArch64::STRSui:
     DecodeFPR32RegisterClass(Inst, Rt, Addr, Decoder);
     break;
-  case ARM64::LDRHui:
-  case ARM64::STRHui:
+  case AArch64::LDRHui:
+  case AArch64::STRHui:
     DecodeFPR16RegisterClass(Inst, Rt, Addr, Decoder);
     break;
-  case ARM64::LDRBui:
-  case ARM64::STRBui:
+  case AArch64::LDRBui:
+  case AArch64::STRBui:
     DecodeFPR8RegisterClass(Inst, Rt, Addr, Decoder);
     break;
   }
@@ -915,52 +926,52 @@ static DecodeStatus DecodeSignedLdStInstruction(llvm::MCInst &Inst,
   switch (Inst.getOpcode()) {
   default:
     break;
-  case ARM64::LDRSBWpre:
-  case ARM64::LDRSHWpre:
-  case ARM64::STRBBpre:
-  case ARM64::LDRBBpre:
-  case ARM64::STRHHpre:
-  case ARM64::LDRHHpre:
-  case ARM64::STRWpre:
-  case ARM64::LDRWpre:
-  case ARM64::LDRSBWpost:
-  case ARM64::LDRSHWpost:
-  case ARM64::STRBBpost:
-  case ARM64::LDRBBpost:
-  case ARM64::STRHHpost:
-  case ARM64::LDRHHpost:
-  case ARM64::STRWpost:
-  case ARM64::LDRWpost:
-  case ARM64::LDRSBXpre:
-  case ARM64::LDRSHXpre:
-  case ARM64::STRXpre:
-  case ARM64::LDRSWpre:
-  case ARM64::LDRXpre:
-  case ARM64::LDRSBXpost:
-  case ARM64::LDRSHXpost:
-  case ARM64::STRXpost:
-  case ARM64::LDRSWpost:
-  case ARM64::LDRXpost:
-  case ARM64::LDRQpre:
-  case ARM64::STRQpre:
-  case ARM64::LDRQpost:
-  case ARM64::STRQpost:
-  case ARM64::LDRDpre:
-  case ARM64::STRDpre:
-  case ARM64::LDRDpost:
-  case ARM64::STRDpost:
-  case ARM64::LDRSpre:
-  case ARM64::STRSpre:
-  case ARM64::LDRSpost:
-  case ARM64::STRSpost:
-  case ARM64::LDRHpre:
-  case ARM64::STRHpre:
-  case ARM64::LDRHpost:
-  case ARM64::STRHpost:
-  case ARM64::LDRBpre:
-  case ARM64::STRBpre:
-  case ARM64::LDRBpost:
-  case ARM64::STRBpost:
+  case AArch64::LDRSBWpre:
+  case AArch64::LDRSHWpre:
+  case AArch64::STRBBpre:
+  case AArch64::LDRBBpre:
+  case AArch64::STRHHpre:
+  case AArch64::LDRHHpre:
+  case AArch64::STRWpre:
+  case AArch64::LDRWpre:
+  case AArch64::LDRSBWpost:
+  case AArch64::LDRSHWpost:
+  case AArch64::STRBBpost:
+  case AArch64::LDRBBpost:
+  case AArch64::STRHHpost:
+  case AArch64::LDRHHpost:
+  case AArch64::STRWpost:
+  case AArch64::LDRWpost:
+  case AArch64::LDRSBXpre:
+  case AArch64::LDRSHXpre:
+  case AArch64::STRXpre:
+  case AArch64::LDRSWpre:
+  case AArch64::LDRXpre:
+  case AArch64::LDRSBXpost:
+  case AArch64::LDRSHXpost:
+  case AArch64::STRXpost:
+  case AArch64::LDRSWpost:
+  case AArch64::LDRXpost:
+  case AArch64::LDRQpre:
+  case AArch64::STRQpre:
+  case AArch64::LDRQpost:
+  case AArch64::STRQpost:
+  case AArch64::LDRDpre:
+  case AArch64::STRDpre:
+  case AArch64::LDRDpost:
+  case AArch64::STRDpost:
+  case AArch64::LDRSpre:
+  case AArch64::STRSpre:
+  case AArch64::LDRSpost:
+  case AArch64::STRSpost:
+  case AArch64::LDRHpre:
+  case AArch64::STRHpre:
+  case AArch64::LDRHpost:
+  case AArch64::STRHpost:
+  case AArch64::LDRBpre:
+  case AArch64::STRBpre:
+  case AArch64::LDRBpost:
+  case AArch64::STRBpost:
     DecodeGPR64spRegisterClass(Inst, Rn, Addr, Decoder);
     break;
   }
@@ -968,104 +979,104 @@ static DecodeStatus DecodeSignedLdStInstruction(llvm::MCInst &Inst,
   switch (Inst.getOpcode()) {
   default:
     return Fail;
-  case ARM64::PRFUMi:
+  case AArch64::PRFUMi:
     // Rt is an immediate in prefetch.
     Inst.addOperand(MCOperand::CreateImm(Rt));
     break;
-  case ARM64::STURBBi:
-  case ARM64::LDURBBi:
-  case ARM64::LDURSBWi:
-  case ARM64::STURHHi:
-  case ARM64::LDURHHi:
-  case ARM64::LDURSHWi:
-  case ARM64::STURWi:
-  case ARM64::LDURWi:
-  case ARM64::LDTRSBWi:
-  case ARM64::LDTRSHWi:
-  case ARM64::STTRWi:
-  case ARM64::LDTRWi:
-  case ARM64::STTRHi:
-  case ARM64::LDTRHi:
-  case ARM64::LDTRBi:
-  case ARM64::STTRBi:
-  case ARM64::LDRSBWpre:
-  case ARM64::LDRSHWpre:
-  case ARM64::STRBBpre:
-  case ARM64::LDRBBpre:
-  case ARM64::STRHHpre:
-  case ARM64::LDRHHpre:
-  case ARM64::STRWpre:
-  case ARM64::LDRWpre:
-  case ARM64::LDRSBWpost:
-  case ARM64::LDRSHWpost:
-  case ARM64::STRBBpost:
-  case ARM64::LDRBBpost:
-  case ARM64::STRHHpost:
-  case ARM64::LDRHHpost:
-  case ARM64::STRWpost:
-  case ARM64::LDRWpost:
+  case AArch64::STURBBi:
+  case AArch64::LDURBBi:
+  case AArch64::LDURSBWi:
+  case AArch64::STURHHi:
+  case AArch64::LDURHHi:
+  case AArch64::LDURSHWi:
+  case AArch64::STURWi:
+  case AArch64::LDURWi:
+  case AArch64::LDTRSBWi:
+  case AArch64::LDTRSHWi:
+  case AArch64::STTRWi:
+  case AArch64::LDTRWi:
+  case AArch64::STTRHi:
+  case AArch64::LDTRHi:
+  case AArch64::LDTRBi:
+  case AArch64::STTRBi:
+  case AArch64::LDRSBWpre:
+  case AArch64::LDRSHWpre:
+  case AArch64::STRBBpre:
+  case AArch64::LDRBBpre:
+  case AArch64::STRHHpre:
+  case AArch64::LDRHHpre:
+  case AArch64::STRWpre:
+  case AArch64::LDRWpre:
+  case AArch64::LDRSBWpost:
+  case AArch64::LDRSHWpost:
+  case AArch64::STRBBpost:
+  case AArch64::LDRBBpost:
+  case AArch64::STRHHpost:
+  case AArch64::LDRHHpost:
+  case AArch64::STRWpost:
+  case AArch64::LDRWpost:
     DecodeGPR32RegisterClass(Inst, Rt, Addr, Decoder);
     break;
-  case ARM64::LDURSBXi:
-  case ARM64::LDURSHXi:
-  case ARM64::LDURSWi:
-  case ARM64::STURXi:
-  case ARM64::LDURXi:
-  case ARM64::LDTRSBXi:
-  case ARM64::LDTRSHXi:
-  case ARM64::LDTRSWi:
-  case ARM64::STTRXi:
-  case ARM64::LDTRXi:
-  case ARM64::LDRSBXpre:
-  case ARM64::LDRSHXpre:
-  case ARM64::STRXpre:
-  case ARM64::LDRSWpre:
-  case ARM64::LDRXpre:
-  case ARM64::LDRSBXpost:
-  case ARM64::LDRSHXpost:
-  case ARM64::STRXpost:
-  case ARM64::LDRSWpost:
-  case ARM64::LDRXpost:
+  case AArch64::LDURSBXi:
+  case AArch64::LDURSHXi:
+  case AArch64::LDURSWi:
+  case AArch64::STURXi:
+  case AArch64::LDURXi:
+  case AArch64::LDTRSBXi:
+  case AArch64::LDTRSHXi:
+  case AArch64::LDTRSWi:
+  case AArch64::STTRXi:
+  case AArch64::LDTRXi:
+  case AArch64::LDRSBXpre:
+  case AArch64::LDRSHXpre:
+  case AArch64::STRXpre:
+  case AArch64::LDRSWpre:
+  case AArch64::LDRXpre:
+  case AArch64::LDRSBXpost:
+  case AArch64::LDRSHXpost:
+  case AArch64::STRXpost:
+  case AArch64::LDRSWpost:
+  case AArch64::LDRXpost:
     DecodeGPR64RegisterClass(Inst, Rt, Addr, Decoder);
     break;
-  case ARM64::LDURQi:
-  case ARM64::STURQi:
-  case ARM64::LDRQpre:
-  case ARM64::STRQpre:
-  case ARM64::LDRQpost:
-  case ARM64::STRQpost:
+  case AArch64::LDURQi:
+  case AArch64::STURQi:
+  case AArch64::LDRQpre:
+  case AArch64::STRQpre:
+  case AArch64::LDRQpost:
+  case AArch64::STRQpost:
     DecodeFPR128RegisterClass(Inst, Rt, Addr, Decoder);
     break;
-  case ARM64::LDURDi:
-  case ARM64::STURDi:
-  case ARM64::LDRDpre:
-  case ARM64::STRDpre:
-  case ARM64::LDRDpost:
-  case ARM64::STRDpost:
+  case AArch64::LDURDi:
+  case AArch64::STURDi:
+  case AArch64::LDRDpre:
+  case AArch64::STRDpre:
+  case AArch64::LDRDpost:
+  case AArch64::STRDpost:
     DecodeFPR64RegisterClass(Inst, Rt, Addr, Decoder);
     break;
-  case ARM64::LDURSi:
-  case ARM64::STURSi:
-  case ARM64::LDRSpre:
-  case ARM64::STRSpre:
-  case ARM64::LDRSpost:
-  case ARM64::STRSpost:
+  case AArch64::LDURSi:
+  case AArch64::STURSi:
+  case AArch64::LDRSpre:
+  case AArch64::STRSpre:
+  case AArch64::LDRSpost:
+  case AArch64::STRSpost:
     DecodeFPR32RegisterClass(Inst, Rt, Addr, Decoder);
     break;
-  case ARM64::LDURHi:
-  case ARM64::STURHi:
-  case ARM64::LDRHpre:
-  case ARM64::STRHpre:
-  case ARM64::LDRHpost:
-  case ARM64::STRHpost:
+  case AArch64::LDURHi:
+  case AArch64::STURHi:
+  case AArch64::LDRHpre:
+  case AArch64::STRHpre:
+  case AArch64::LDRHpost:
+  case AArch64::STRHpost:
     DecodeFPR16RegisterClass(Inst, Rt, Addr, Decoder);
     break;
-  case ARM64::LDURBi:
-  case ARM64::STURBi:
-  case ARM64::LDRBpre:
-  case ARM64::STRBpre:
-  case ARM64::LDRBpost:
-  case ARM64::STRBpost:
+  case AArch64::LDURBi:
+  case AArch64::STURBi:
+  case AArch64::LDRBpre:
+  case AArch64::STRBpre:
+  case AArch64::LDRBpost:
+  case AArch64::STRBpost:
     DecodeFPR8RegisterClass(Inst, Rt, Addr, Decoder);
     break;
   }
@@ -1096,53 +1107,53 @@ static DecodeStatus DecodeExclusiveLdStInstruction(llvm::MCInst &Inst,
   switch (Opcode) {
   default:
     return Fail;
-  case ARM64::STLXRW:
-  case ARM64::STLXRB:
-  case ARM64::STLXRH:
-  case ARM64::STXRW:
-  case ARM64::STXRB:
-  case ARM64::STXRH:
+  case AArch64::STLXRW:
+  case AArch64::STLXRB:
+  case AArch64::STLXRH:
+  case AArch64::STXRW:
+  case AArch64::STXRB:
+  case AArch64::STXRH:
     DecodeGPR32RegisterClass(Inst, Rs, Addr, Decoder);
   // FALLTHROUGH
-  case ARM64::LDARW:
-  case ARM64::LDARB:
-  case ARM64::LDARH:
-  case ARM64::LDAXRW:
-  case ARM64::LDAXRB:
-  case ARM64::LDAXRH:
-  case ARM64::LDXRW:
-  case ARM64::LDXRB:
-  case ARM64::LDXRH:
-  case ARM64::STLRW:
-  case ARM64::STLRB:
-  case ARM64::STLRH:
+  case AArch64::LDARW:
+  case AArch64::LDARB:
+  case AArch64::LDARH:
+  case AArch64::LDAXRW:
+  case AArch64::LDAXRB:
+  case AArch64::LDAXRH:
+  case AArch64::LDXRW:
+  case AArch64::LDXRB:
+  case AArch64::LDXRH:
+  case AArch64::STLRW:
+  case AArch64::STLRB:
+  case AArch64::STLRH:
     DecodeGPR32RegisterClass(Inst, Rt, Addr, Decoder);
     break;
-  case ARM64::STLXRX:
-  case ARM64::STXRX:
+  case AArch64::STLXRX:
+  case AArch64::STXRX:
     DecodeGPR32RegisterClass(Inst, Rs, Addr, Decoder);
   // FALLTHROUGH
-  case ARM64::LDARX:
-  case ARM64::LDAXRX:
-  case ARM64::LDXRX:
-  case ARM64::STLRX:
+  case AArch64::LDARX:
+  case AArch64::LDAXRX:
+  case AArch64::LDXRX:
+  case AArch64::STLRX:
     DecodeGPR64RegisterClass(Inst, Rt, Addr, Decoder);
     break;
-  case ARM64::STLXPW:
-  case ARM64::STXPW:
+  case AArch64::STLXPW:
+  case AArch64::STXPW:
     DecodeGPR32RegisterClass(Inst, Rs, Addr, Decoder);
   // FALLTHROUGH
-  case ARM64::LDAXPW:
-  case ARM64::LDXPW:
+  case AArch64::LDAXPW:
+  case AArch64::LDXPW:
     DecodeGPR32RegisterClass(Inst, Rt, Addr, Decoder);
     DecodeGPR32RegisterClass(Inst, Rt2, Addr, Decoder);
     break;
-  case ARM64::STLXPX:
-  case ARM64::STXPX:
+  case AArch64::STLXPX:
+  case AArch64::STXPX:
     DecodeGPR32RegisterClass(Inst, Rs, Addr, Decoder);
   // FALLTHROUGH
-  case ARM64::LDAXPX:
-  case ARM64::LDXPX:
+  case AArch64::LDAXPX:
+  case AArch64::LDXPX:
     DecodeGPR64RegisterClass(Inst, Rt, Addr, Decoder);
     DecodeGPR64RegisterClass(Inst, Rt2, Addr, Decoder);
     break;
@@ -1151,8 +1162,8 @@ static DecodeStatus DecodeExclusiveLdStInstruction(llvm::MCInst &Inst,
   DecodeGPR64spRegisterClass(Inst, Rn, Addr, Decoder);
 
   // You shouldn't load to the same register twice in an instruction...
-  if ((Opcode == ARM64::LDAXPW || Opcode == ARM64::LDXPW ||
-       Opcode == ARM64::LDAXPX || Opcode == ARM64::LDXPX) &&
+  if ((Opcode == AArch64::LDAXPW || Opcode == AArch64::LDXPW ||
+       Opcode == AArch64::LDAXPX || Opcode == AArch64::LDXPX) &&
       Rt == Rt2)
     return SoftFail;
 
@@ -1180,28 +1191,28 @@ static DecodeStatus DecodePairLdStInstruction(llvm::MCInst &Inst, uint32_t insn,
   switch (Opcode) {
   default:
     break;
-  case ARM64::LDPXpost:
-  case ARM64::STPXpost:
-  case ARM64::LDPSWpost:
-  case ARM64::LDPXpre:
-  case ARM64::STPXpre:
-  case ARM64::LDPSWpre:
-  case ARM64::LDPWpost:
-  case ARM64::STPWpost:
-  case ARM64::LDPWpre:
-  case ARM64::STPWpre:
-  case ARM64::LDPQpost:
-  case ARM64::STPQpost:
-  case ARM64::LDPQpre:
-  case ARM64::STPQpre:
-  case ARM64::LDPDpost:
-  case ARM64::STPDpost:
-  case ARM64::LDPDpre:
-  case ARM64::STPDpre:
-  case ARM64::LDPSpost:
-  case ARM64::STPSpost:
-  case ARM64::LDPSpre:
-  case ARM64::STPSpre:
+  case AArch64::LDPXpost:
+  case AArch64::STPXpost:
+  case AArch64::LDPSWpost:
+  case AArch64::LDPXpre:
+  case AArch64::STPXpre:
+  case AArch64::LDPSWpre:
+  case AArch64::LDPWpost:
+  case AArch64::STPWpost:
+  case AArch64::LDPWpre:
+  case AArch64::STPWpre:
+  case AArch64::LDPQpost:
+  case AArch64::STPQpost:
+  case AArch64::LDPQpre:
+  case AArch64::STPQpre:
+  case AArch64::LDPDpost:
+  case AArch64::STPDpost:
+  case AArch64::LDPDpre:
+  case AArch64::STPDpre:
+  case AArch64::LDPSpost:
+  case AArch64::STPSpost:
+  case AArch64::LDPSpre:
+  case AArch64::STPSpre:
     DecodeGPR64spRegisterClass(Inst, Rn, Addr, Decoder);
     break;
   }
@@ -1209,65 +1220,65 @@ static DecodeStatus DecodePairLdStInstruction(llvm::MCInst &Inst, uint32_t insn,
   switch (Opcode) {
   default:
     return Fail;
-  case ARM64::LDPXpost:
-  case ARM64::STPXpost:
-  case ARM64::LDPSWpost:
-  case ARM64::LDPXpre:
-  case ARM64::STPXpre:
-  case ARM64::LDPSWpre:
+  case AArch64::LDPXpost:
+  case AArch64::STPXpost:
+  case AArch64::LDPSWpost:
+  case AArch64::LDPXpre:
+  case AArch64::STPXpre:
+  case AArch64::LDPSWpre:
     NeedsDisjointWritebackTransfer = true;
     // Fallthrough
-  case ARM64::LDNPXi:
-  case ARM64::STNPXi:
-  case ARM64::LDPXi:
-  case ARM64::STPXi:
-  case ARM64::LDPSWi:
+  case AArch64::LDNPXi:
+  case AArch64::STNPXi:
+  case AArch64::LDPXi:
+  case AArch64::STPXi:
+  case AArch64::LDPSWi:
     DecodeGPR64RegisterClass(Inst, Rt, Addr, Decoder);
     DecodeGPR64RegisterClass(Inst, Rt2, Addr, Decoder);
     break;
-  case ARM64::LDPWpost:
-  case ARM64::STPWpost:
-  case ARM64::LDPWpre:
-  case ARM64::STPWpre:
+  case AArch64::LDPWpost:
+  case AArch64::STPWpost:
+  case AArch64::LDPWpre:
+  case AArch64::STPWpre:
     NeedsDisjointWritebackTransfer = true;
     // Fallthrough
-  case ARM64::LDNPWi:
-  case ARM64::STNPWi:
-  case ARM64::LDPWi:
-  case ARM64::STPWi:
+  case AArch64::LDNPWi:
+  case AArch64::STNPWi:
+  case AArch64::LDPWi:
+  case AArch64::STPWi:
     DecodeGPR32RegisterClass(Inst, Rt, Addr, Decoder);
     DecodeGPR32RegisterClass(Inst, Rt2, Addr, Decoder);
     break;
-  case ARM64::LDNPQi:
-  case ARM64::STNPQi:
-  case ARM64::LDPQpost:
-  case ARM64::STPQpost:
-  case ARM64::LDPQi:
-  case ARM64::STPQi:
-  case ARM64::LDPQpre:
-  case ARM64::STPQpre:
+  case AArch64::LDNPQi:
+  case AArch64::STNPQi:
+  case AArch64::LDPQpost:
+  case AArch64::STPQpost:
+  case AArch64::LDPQi:
+  case AArch64::STPQi:
+  case AArch64::LDPQpre:
+  case AArch64::STPQpre:
     DecodeFPR128RegisterClass(Inst, Rt, Addr, Decoder);
     DecodeFPR128RegisterClass(Inst, Rt2, Addr, Decoder);
     break;
-  case ARM64::LDNPDi:
-  case ARM64::STNPDi:
-  case ARM64::LDPDpost:
-  case ARM64::STPDpost:
-  case ARM64::LDPDi:
-  case ARM64::STPDi:
-  case ARM64::LDPDpre:
-  case ARM64::STPDpre:
+  case AArch64::LDNPDi:
+  case AArch64::STNPDi:
+  case AArch64::LDPDpost:
+  case AArch64::STPDpost:
+  case AArch64::LDPDi:
+  case AArch64::STPDi:
+  case AArch64::LDPDpre:
+  case AArch64::STPDpre:
     DecodeFPR64RegisterClass(Inst, Rt, Addr, Decoder);
     DecodeFPR64RegisterClass(Inst, Rt2, Addr, Decoder);
     break;
-  case ARM64::LDNPSi:
-  case ARM64::STNPSi:
-  case ARM64::LDPSpost:
-  case ARM64::STPSpost:
-  case ARM64::LDPSi:
-  case ARM64::STPSi:
-  case ARM64::LDPSpre:
-  case ARM64::STPSpre:
+  case AArch64::LDNPSi:
+  case AArch64::STNPSi:
+  case AArch64::LDPSpost:
+  case AArch64::STPSpost:
+  case AArch64::LDPSi:
+  case AArch64::STPSi:
+  case AArch64::LDPSpre:
+  case AArch64::STPSpre:
     DecodeFPR32RegisterClass(Inst, Rt, Addr, Decoder);
     DecodeFPR32RegisterClass(Inst, Rt2, Addr, Decoder);
     break;
@@ -1303,38 +1314,38 @@ static DecodeStatus DecodeAddSubERegInstruction(llvm::MCInst &Inst,
   switch (Inst.getOpcode()) {
   default:
     return Fail;
-  case ARM64::ADDWrx:
-  case ARM64::SUBWrx:
+  case AArch64::ADDWrx:
+  case AArch64::SUBWrx:
     DecodeGPR32spRegisterClass(Inst, Rd, Addr, Decoder);
     DecodeGPR32spRegisterClass(Inst, Rn, Addr, Decoder);
     DecodeGPR32RegisterClass(Inst, Rm, Addr, Decoder);
     break;
-  case ARM64::ADDSWrx:
-  case ARM64::SUBSWrx:
+  case AArch64::ADDSWrx:
+  case AArch64::SUBSWrx:
     DecodeGPR32RegisterClass(Inst, Rd, Addr, Decoder);
     DecodeGPR32spRegisterClass(Inst, Rn, Addr, Decoder);
     DecodeGPR32RegisterClass(Inst, Rm, Addr, Decoder);
     break;
-  case ARM64::ADDXrx:
-  case ARM64::SUBXrx:
+  case AArch64::ADDXrx:
+  case AArch64::SUBXrx:
     DecodeGPR64spRegisterClass(Inst, Rd, Addr, Decoder);
     DecodeGPR64spRegisterClass(Inst, Rn, Addr, Decoder);
     DecodeGPR32RegisterClass(Inst, Rm, Addr, Decoder);
     break;
-  case ARM64::ADDSXrx:
-  case ARM64::SUBSXrx:
+  case AArch64::ADDSXrx:
+  case AArch64::SUBSXrx:
     DecodeGPR64RegisterClass(Inst, Rd, Addr, Decoder);
     DecodeGPR64spRegisterClass(Inst, Rn, Addr, Decoder);
     DecodeGPR32RegisterClass(Inst, Rm, Addr, Decoder);
     break;
-  case ARM64::ADDXrx64:
-  case ARM64::SUBXrx64:
+  case AArch64::ADDXrx64:
+  case AArch64::SUBXrx64:
     DecodeGPR64spRegisterClass(Inst, Rd, Addr, Decoder);
     DecodeGPR64spRegisterClass(Inst, Rn, Addr, Decoder);
     DecodeGPR64RegisterClass(Inst, Rm, Addr, Decoder);
     break;
-  case ARM64::SUBSXrx64:
-  case ARM64::ADDSXrx64:
+  case AArch64::SUBSXrx64:
+  case AArch64::ADDSXrx64:
     DecodeGPR64RegisterClass(Inst, Rd, Addr, Decoder);
     DecodeGPR64spRegisterClass(Inst, Rn, Addr, Decoder);
     DecodeGPR64RegisterClass(Inst, Rm, Addr, Decoder);
@@ -1354,22 +1365,22 @@ static DecodeStatus DecodeLogicalImmInstruction(llvm::MCInst &Inst,
   unsigned imm;
 
   if (Datasize) {
-    if (Inst.getOpcode() == ARM64::ANDSXri)
+    if (Inst.getOpcode() == AArch64::ANDSXri)
       DecodeGPR64RegisterClass(Inst, Rd, Addr, Decoder);
     else
       DecodeGPR64spRegisterClass(Inst, Rd, Addr, Decoder);
     DecodeGPR64RegisterClass(Inst, Rn, Addr, Decoder);
     imm = fieldFromInstruction(insn, 10, 13);
-    if (!ARM64_AM::isValidDecodeLogicalImmediate(imm, 64))
+    if (!AArch64_AM::isValidDecodeLogicalImmediate(imm, 64))
       return Fail;
   } else {
-    if (Inst.getOpcode() == ARM64::ANDSWri)
+    if (Inst.getOpcode() == AArch64::ANDSWri)
       DecodeGPR32RegisterClass(Inst, Rd, Addr, Decoder);
     else
       DecodeGPR32spRegisterClass(Inst, Rd, Addr, Decoder);
     DecodeGPR32RegisterClass(Inst, Rn, Addr, Decoder);
     imm = fieldFromInstruction(insn, 10, 12);
-    if (!ARM64_AM::isValidDecodeLogicalImmediate(imm, 32))
+    if (!AArch64_AM::isValidDecodeLogicalImmediate(imm, 32))
       return Fail;
   }
   Inst.addOperand(MCOperand::CreateImm(imm));
@@ -1384,7 +1395,7 @@ static DecodeStatus DecodeModImmInstruction(llvm::MCInst &Inst, uint32_t insn,
   unsigned imm = fieldFromInstruction(insn, 16, 3) << 5;
   imm |= fieldFromInstruction(insn, 5, 5);
 
-  if (Inst.getOpcode() == ARM64::MOVID)
+  if (Inst.getOpcode() == AArch64::MOVID)
     DecodeFPR64RegisterClass(Inst, Rd, Addr, Decoder);
   else
     DecodeVectorRegisterClass(Inst, Rd, Addr, Decoder);
@@ -1394,20 +1405,20 @@ static DecodeStatus DecodeModImmInstruction(llvm::MCInst &Inst, uint32_t insn,
   switch (Inst.getOpcode()) {
   default:
     break;
-  case ARM64::MOVIv4i16:
-  case ARM64::MOVIv8i16:
-  case ARM64::MVNIv4i16:
-  case ARM64::MVNIv8i16:
-  case ARM64::MOVIv2i32:
-  case ARM64::MOVIv4i32:
-  case ARM64::MVNIv2i32:
-  case ARM64::MVNIv4i32:
+  case AArch64::MOVIv4i16:
+  case AArch64::MOVIv8i16:
+  case AArch64::MVNIv4i16:
+  case AArch64::MVNIv8i16:
+  case AArch64::MOVIv2i32:
+  case AArch64::MOVIv4i32:
+  case AArch64::MVNIv2i32:
+  case AArch64::MVNIv4i32:
     Inst.addOperand(MCOperand::CreateImm((cmode & 6) << 2));
     break;
-  case ARM64::MOVIv2s_msl:
-  case ARM64::MOVIv4s_msl:
-  case ARM64::MVNIv2s_msl:
-  case ARM64::MVNIv4s_msl:
+  case AArch64::MOVIv2s_msl:
+  case AArch64::MOVIv4s_msl:
+  case AArch64::MVNIv2s_msl:
+  case AArch64::MVNIv4s_msl:
     Inst.addOperand(MCOperand::CreateImm(cmode & 1 ? 0x110 : 0x108));
     break;
   }
@@ -1438,8 +1449,8 @@ static DecodeStatus DecodeAdrInstruction(llvm::MCInst &Inst, uint32_t insn,
   unsigned Rd = fieldFromInstruction(insn, 0, 5);
   int64_t imm = fieldFromInstruction(insn, 5, 19) << 2;
   imm |= fieldFromInstruction(insn, 29, 2);
-  const ARM64Disassembler *Dis =
-      static_cast<const ARM64Disassembler *>(Decoder);
+  const AArch64Disassembler *Dis =
+      static_cast<const AArch64Disassembler *>(Decoder);
 
   // Sign-extend the 21-bit immediate.
   if (imm & (1 << (21 - 1)))
@@ -1462,8 +1473,8 @@ static DecodeStatus DecodeBaseAddSubImm(llvm::MCInst &Inst, uint32_t insn,
 
   unsigned ShifterVal = (Imm >> 12) & 3;
   unsigned ImmVal = Imm & 0xFFF;
-  const ARM64Disassembler *Dis =
-      static_cast<const ARM64Disassembler *>(Decoder);
+  const AArch64Disassembler *Dis =
+      static_cast<const AArch64Disassembler *>(Decoder);
 
   if (ShifterVal != 0 && ShifterVal != 1)
     return Fail;
@@ -1492,8 +1503,8 @@ static DecodeStatus DecodeUnconditionalBranch(llvm::MCInst &Inst, uint32_t insn,
                                               uint64_t Addr,
                                               const void *Decoder) {
   int64_t imm = fieldFromInstruction(insn, 0, 26);
-  const ARM64Disassembler *Dis =
-      static_cast<const ARM64Disassembler *>(Decoder);
+  const AArch64Disassembler *Dis =
+      static_cast<const AArch64Disassembler *>(Decoder);
 
   // Sign-extend the 26-bit immediate.
   if (imm & (1 << (26 - 1)))
@@ -1518,7 +1529,7 @@ static DecodeStatus DecodeSystemPStateInstruction(llvm::MCInst &Inst,
   Inst.addOperand(MCOperand::CreateImm(crm));
 
   bool ValidNamed;
-  (void)ARM64PState::PStateMapper().toString(pstate_field, ValidNamed);
+  (void)AArch64PState::PStateMapper().toString(pstate_field, ValidNamed);
   
   return ValidNamed ? Success : Fail;
 }
@@ -1529,8 +1540,8 @@ static DecodeStatus DecodeTestAndBranch(llvm::MCInst &Inst, uint32_t insn,
   uint64_t bit = fieldFromInstruction(insn, 31, 1) << 5;
   bit |= fieldFromInstruction(insn, 19, 5);
   int64_t dst = fieldFromInstruction(insn, 5, 14);
-  const ARM64Disassembler *Dis =
-      static_cast<const ARM64Disassembler *>(Decoder);
+  const AArch64Disassembler *Dis =
+      static_cast<const AArch64Disassembler *>(Decoder);
 
   // Sign-extend 14-bit immediate.
   if (dst & (1 << (14 - 1)))
diff --git a/lib/Target/ARM64/Disassembler/ARM64Disassembler.h b/lib/Target/AArch64/Disassembler/AArch64Disassembler.h
similarity index 75%
rename from lib/Target/ARM64/Disassembler/ARM64Disassembler.h
rename to lib/Target/AArch64/Disassembler/AArch64Disassembler.h
index 8989925f36b..68d4867977b 100644
--- a/lib/Target/ARM64/Disassembler/ARM64Disassembler.h
+++ b/lib/Target/AArch64/Disassembler/AArch64Disassembler.h
@@ -1,4 +1,4 @@
-//===- ARM64Disassembler.h - Disassembler for ARM64 -------------*- C++ -*-===//
+//===- AArch64Disassembler.h - Disassembler for AArch64 ---------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -10,8 +10,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef ARM64DISASSEMBLER_H
-#define ARM64DISASSEMBLER_H
+#ifndef AArch64DISASSEMBLER_H
+#define AArch64DISASSEMBLER_H
 
 #include "llvm/MC/MCDisassembler.h"
 
@@ -21,12 +21,12 @@ class MCInst;
 class MemoryObject;
 class raw_ostream;
 
-class ARM64Disassembler : public MCDisassembler {
+class AArch64Disassembler : public MCDisassembler {
 public:
-  ARM64Disassembler(const MCSubtargetInfo &STI, MCContext &Ctx)
+  AArch64Disassembler(const MCSubtargetInfo &STI, MCContext &Ctx)
     : MCDisassembler(STI, Ctx) {}
 
-  ~ARM64Disassembler() {}
+  ~AArch64Disassembler() {}
 
   /// getInstruction - See MCDisassembler.
   MCDisassembler::DecodeStatus
diff --git a/lib/Target/ARM64/Disassembler/ARM64ExternalSymbolizer.cpp b/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.cpp
similarity index 86%
rename from lib/Target/ARM64/Disassembler/ARM64ExternalSymbolizer.cpp
rename to lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.cpp
index 2f8e516d185..24663684a3f 100644
--- a/lib/Target/ARM64/Disassembler/ARM64ExternalSymbolizer.cpp
+++ b/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.cpp
@@ -1,4 +1,4 @@
-//===- ARM64ExternalSymbolizer.cpp - Symbolizer for ARM64 -------*- C++ -*-===//
+//===- AArch64ExternalSymbolizer.cpp - Symbolizer for AArch64 ---*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -7,10 +7,10 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "ARM64ExternalSymbolizer.h"
-#include "ARM64Subtarget.h"
-#include "MCTargetDesc/ARM64AddressingModes.h"
-#include "Utils/ARM64BaseInfo.h"
+#include "AArch64ExternalSymbolizer.h"
+#include "AArch64Subtarget.h"
+#include "MCTargetDesc/AArch64AddressingModes.h"
+#include "Utils/AArch64BaseInfo.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCInst.h"
@@ -19,7 +19,7 @@
 
 using namespace llvm;
 
-#define DEBUG_TYPE "arm64-disassembler"
+#define DEBUG_TYPE "aarch64-disassembler"
 
 static MCSymbolRefExpr::VariantKind
 getVariant(uint64_t LLVMDisassembler_VariantKind) {
@@ -58,14 +58,9 @@ getVariant(uint64_t LLVMDisassembler_VariantKind) {
 /// a symbol look up is done to see it is returns a specific reference type
 /// to add to the comment stream.  This function returns Success if it adds
 /// an operand to the MCInst and Fail otherwise.
-bool ARM64ExternalSymbolizer::tryAddingSymbolicOperand(
-                                                     MCInst &MI,
-                                                     raw_ostream &CommentStream,
-                                                     int64_t Value,
-                                                     uint64_t Address,
-                                                     bool IsBranch,
-                                                     uint64_t Offset,
-                                                     uint64_t InstSize) {
+bool AArch64ExternalSymbolizer::tryAddingSymbolicOperand(
+    MCInst &MI, raw_ostream &CommentStream, int64_t Value, uint64_t Address,
+    bool IsBranch, uint64_t Offset, uint64_t InstSize) {
   // FIXME: This method shares a lot of code with
   //        MCExternalSymbolizer::tryAddingSymbolicOperand. It may be possible
   //        refactor the MCExternalSymbolizer interface to allow more of this
@@ -94,7 +89,7 @@ bool ARM64ExternalSymbolizer::tryAddingSymbolicOperand(
       else if (ReferenceType ==
                LLVMDisassembler_ReferenceType_Out_Objc_Message)
         CommentStream << "Objc message: " << ReferenceName;
-    } else if (MI.getOpcode() == ARM64::ADRP) {
+    } else if (MI.getOpcode() == AArch64::ADRP) {
         ReferenceType = LLVMDisassembler_ReferenceType_In_ARM64_ADRP;
         // otool expects the fully encoded ADRP instruction to be passed in as
         // the value here, so reconstruct it:
@@ -107,19 +102,19 @@ bool ARM64ExternalSymbolizer::tryAddingSymbolicOperand(
                      &ReferenceName);
         CommentStream << format("0x%llx",
                                 0xfffffffffffff000LL & (Address + Value));
-    } else if (MI.getOpcode() == ARM64::ADDXri ||
-               MI.getOpcode() == ARM64::LDRXui ||
-               MI.getOpcode() == ARM64::LDRXl ||
-               MI.getOpcode() == ARM64::ADR) {
-      if (MI.getOpcode() == ARM64::ADDXri)
+    } else if (MI.getOpcode() == AArch64::ADDXri ||
+               MI.getOpcode() == AArch64::LDRXui ||
+               MI.getOpcode() == AArch64::LDRXl ||
+               MI.getOpcode() == AArch64::ADR) {
+      if (MI.getOpcode() == AArch64::ADDXri)
         ReferenceType = LLVMDisassembler_ReferenceType_In_ARM64_ADDXri;
-      else if (MI.getOpcode() == ARM64::LDRXui)
+      else if (MI.getOpcode() == AArch64::LDRXui)
         ReferenceType = LLVMDisassembler_ReferenceType_In_ARM64_LDRXui;
-      if (MI.getOpcode() == ARM64::LDRXl) {
+      if (MI.getOpcode() == AArch64::LDRXl) {
         ReferenceType = LLVMDisassembler_ReferenceType_In_ARM64_LDRXl;
         SymbolLookUp(DisInfo, Address + Value, &ReferenceType, Address,
                      &ReferenceName);
-      } else if (MI.getOpcode() == ARM64::ADR) {
+      } else if (MI.getOpcode() == AArch64::ADR) {
         ReferenceType = LLVMDisassembler_ReferenceType_In_ARM64_ADR;
         SymbolLookUp(DisInfo, Address + Value, &ReferenceType, Address,
                             &ReferenceName);
@@ -128,7 +123,7 @@ bool ARM64ExternalSymbolizer::tryAddingSymbolicOperand(
         // otool expects the fully encoded ADD/LDR instruction to be passed in
         // as the value here, so reconstruct it:
         unsigned EncodedInst =
-          MI.getOpcode() == ARM64::ADDXri ? 0x91000000: 0xF9400000;
+          MI.getOpcode() == AArch64::ADDXri ? 0x91000000: 0xF9400000;
         EncodedInst |= Value << 10; // imm12 [+ shift:2 for ADD]
         EncodedInst |=
           MCRI.getEncodingValue(MI.getOperand(1).getReg()) << 5; // Rn
diff --git a/lib/Target/ARM64/Disassembler/ARM64ExternalSymbolizer.h b/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.h
similarity index 50%
rename from lib/Target/ARM64/Disassembler/ARM64ExternalSymbolizer.h
rename to lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.h
index 45f07a5e258..171d31c48cd 100644
--- a/lib/Target/ARM64/Disassembler/ARM64ExternalSymbolizer.h
+++ b/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.h
@@ -1,4 +1,4 @@
-//===- ARM64ExternalSymbolizer.h - Symbolizer for ARM64 ---------*- C++ -*-===//
+//===- AArch64ExternalSymbolizer.h - Symbolizer for AArch64 -----*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -7,25 +7,26 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// Symbolize ARM64 assembly code during disassembly using callbacks.
+// Symbolize AArch64 assembly code during disassembly using callbacks.
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef ARM64EXTERNALSYMBOLIZER_H
-#define ARM64EXTERNALSYMBOLIZER_H
+#ifndef AArch64EXTERNALSYMBOLIZER_H
+#define AArch64EXTERNALSYMBOLIZER_H
 
 #include "llvm/MC/MCExternalSymbolizer.h"
 
 namespace llvm {
 
-class ARM64ExternalSymbolizer : public MCExternalSymbolizer {
+class AArch64ExternalSymbolizer : public MCExternalSymbolizer {
 public:
-  ARM64ExternalSymbolizer(MCContext &Ctx,
-                          std::unique_ptr<MCRelocationInfo> RelInfo,
-                          LLVMOpInfoCallback GetOpInfo,
-                          LLVMSymbolLookupCallback SymbolLookUp, void *DisInfo)
-    : MCExternalSymbolizer(Ctx, std::move(RelInfo), GetOpInfo, SymbolLookUp,
-                           DisInfo) {}
+  AArch64ExternalSymbolizer(MCContext &Ctx,
+                            std::unique_ptr<MCRelocationInfo> RelInfo,
+                            LLVMOpInfoCallback GetOpInfo,
+                            LLVMSymbolLookupCallback SymbolLookUp,
+                            void *DisInfo)
+      : MCExternalSymbolizer(Ctx, std::move(RelInfo), GetOpInfo, SymbolLookUp,
+                             DisInfo) {}
 
   bool tryAddingSymbolicOperand(MCInst &MI, raw_ostream &CommentStream,
                                 int64_t Value, uint64_t Address, bool IsBranch,
diff --git a/lib/Target/ARM64/Disassembler/CMakeLists.txt b/lib/Target/AArch64/Disassembler/CMakeLists.txt
similarity index 66%
rename from lib/Target/ARM64/Disassembler/CMakeLists.txt
rename to lib/Target/AArch64/Disassembler/CMakeLists.txt
index 43ade66be14..be4ccad6d1b 100644
--- a/lib/Target/ARM64/Disassembler/CMakeLists.txt
+++ b/lib/Target/AArch64/Disassembler/CMakeLists.txt
@@ -1,8 +1,8 @@
 include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
 
-add_llvm_library(LLVMARM64Disassembler
-  ARM64Disassembler.cpp
-  ARM64ExternalSymbolizer.cpp
+add_llvm_library(LLVMAArch64Disassembler
+  AArch64Disassembler.cpp
+  AArch64ExternalSymbolizer.cpp
   )
 # workaround for hanging compilation on MSVC8, 9 and 10
 #if( MSVC_VERSION EQUAL 1400 OR MSVC_VERSION EQUAL 1500 OR MSVC_VERSION EQUAL 1600 )
@@ -11,4 +11,4 @@ add_llvm_library(LLVMARM64Disassembler
 #  PROPERTY COMPILE_FLAGS "/Od"
 #  )
 #endif()
-add_dependencies(LLVMARM64Disassembler ARM64CommonTableGen)
+add_dependencies(LLVMAArch64Disassembler AArch64CommonTableGen)
diff --git a/lib/Target/ARM64/Disassembler/LLVMBuild.txt b/lib/Target/AArch64/Disassembler/LLVMBuild.txt
similarity index 71%
rename from lib/Target/ARM64/Disassembler/LLVMBuild.txt
rename to lib/Target/AArch64/Disassembler/LLVMBuild.txt
index 5bbe88ddb49..a4224f4a2f5 100644
--- a/lib/Target/ARM64/Disassembler/LLVMBuild.txt
+++ b/lib/Target/AArch64/Disassembler/LLVMBuild.txt
@@ -1,4 +1,4 @@
-;===- ./lib/Target/ARM64/Disassembler/LLVMBuild.txt ------------*- Conf -*--===;
+;===- ./lib/Target/AArch64/Disassembler/LLVMBuild.txt ------------*- Conf -*--===;
 ;
 ;                     The LLVM Compiler Infrastructure
 ;
@@ -17,7 +17,7 @@
 
 [component_0]
 type = Library
-name = ARM64Disassembler
-parent = ARM64
-required_libraries = ARM64Info ARM64Utils MC Support
-add_to_library_groups = ARM64
+name = AArch64Disassembler
+parent = AArch64
+required_libraries = AArch64Info AArch64Utils MC Support
+add_to_library_groups = AArch64
diff --git a/lib/Target/ARM64/Disassembler/Makefile b/lib/Target/AArch64/Disassembler/Makefile
similarity index 81%
rename from lib/Target/ARM64/Disassembler/Makefile
rename to lib/Target/AArch64/Disassembler/Makefile
index 479d00c2494..741bb817a63 100644
--- a/lib/Target/ARM64/Disassembler/Makefile
+++ b/lib/Target/AArch64/Disassembler/Makefile
@@ -1,4 +1,4 @@
-##===- lib/Target/ARM64/Disassembler/Makefile --------------*- Makefile -*-===##
+##===- lib/Target/AArch64/Disassembler/Makefile ------------*- Makefile -*-===##
 #
 #                     The LLVM Compiler Infrastructure
 #
@@ -8,7 +8,7 @@
 ##===----------------------------------------------------------------------===##
 
 LEVEL = ../../../..
-LIBRARYNAME = LLVMARM64Disassembler
+LIBRARYNAME = LLVMAArch64Disassembler
 
 # Hack: we need to include 'main' arm target directory to grab private headers
 CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
diff --git a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp
new file mode 100644
index 00000000000..f484a5b1bdc
--- /dev/null
+++ b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp
@@ -0,0 +1,1316 @@
+//==-- AArch64InstPrinter.cpp - Convert AArch64 MCInst to assembly syntax --==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints an AArch64 MCInst to a .s file.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AArch64InstPrinter.h"
+#include "MCTargetDesc/AArch64AddressingModes.h"
+#include "Utils/AArch64BaseInfo.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "asm-printer"
+
+#define GET_INSTRUCTION_NAME
+#define PRINT_ALIAS_INSTR
+#include "AArch64GenAsmWriter.inc"
+#define GET_INSTRUCTION_NAME
+#define PRINT_ALIAS_INSTR
+#include "AArch64GenAsmWriter1.inc"
+
+AArch64InstPrinter::AArch64InstPrinter(const MCAsmInfo &MAI,
+                                       const MCInstrInfo &MII,
+                                       const MCRegisterInfo &MRI,
+                                       const MCSubtargetInfo &STI)
+    : MCInstPrinter(MAI, MII, MRI) {
+  // Initialize the set of available features.
+  setAvailableFeatures(STI.getFeatureBits());
+}
+
+AArch64AppleInstPrinter::AArch64AppleInstPrinter(const MCAsmInfo &MAI,
+                                                 const MCInstrInfo &MII,
+                                                 const MCRegisterInfo &MRI,
+                                                 const MCSubtargetInfo &STI)
+    : AArch64InstPrinter(MAI, MII, MRI, STI) {}
+
+void AArch64InstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
+  // This is for .cfi directives.
+  OS << getRegisterName(RegNo);
+}
+
+void AArch64InstPrinter::printInst(const MCInst *MI, raw_ostream &O,
+                                   StringRef Annot) {
+  // Check for special encodings and print the canonical alias instead.
+
+  unsigned Opcode = MI->getOpcode();
+
+  if (Opcode == AArch64::SYSxt)
+    if (printSysAlias(MI, O)) {
+      printAnnotation(O, Annot);
+      return;
+    }
+
+  // SBFM/UBFM should print to a nicer aliased form if possible.
+  if (Opcode == AArch64::SBFMXri || Opcode == AArch64::SBFMWri ||
+      Opcode == AArch64::UBFMXri || Opcode == AArch64::UBFMWri) {
+    const MCOperand &Op0 = MI->getOperand(0);
+    const MCOperand &Op1 = MI->getOperand(1);
+    const MCOperand &Op2 = MI->getOperand(2);
+    const MCOperand &Op3 = MI->getOperand(3);
+
+    bool IsSigned = (Opcode == AArch64::SBFMXri || Opcode == AArch64::SBFMWri);
+    bool Is64Bit = (Opcode == AArch64::SBFMXri || Opcode == AArch64::UBFMXri);
+    if (Op2.isImm() && Op2.getImm() == 0 && Op3.isImm()) {
+      const char *AsmMnemonic = nullptr;
+
+      switch (Op3.getImm()) {
+      default:
+        break;
+      case 7:
+        if (IsSigned)
+          AsmMnemonic = "sxtb";
+        else if (!Is64Bit)
+          AsmMnemonic = "uxtb";
+        break;
+      case 15:
+        if (IsSigned)
+          AsmMnemonic = "sxth";
+        else if (!Is64Bit)
+          AsmMnemonic = "uxth";
+        break;
+      case 31:
+        // *xtw is only valid for signed 64-bit operations.
+        if (Is64Bit && IsSigned)
+          AsmMnemonic = "sxtw";
+        break;
+      }
+
+      if (AsmMnemonic) {
+        O << '\t' << AsmMnemonic << '\t' << getRegisterName(Op0.getReg())
+          << ", " << getRegisterName(getWRegFromXReg(Op1.getReg()));
+        printAnnotation(O, Annot);
+        return;
+      }
+    }
+
+    // All immediate shifts are aliases, implemented using the Bitfield
+    // instruction. In all cases the immediate shift amount shift must be in
+    // the range 0 to (reg.size -1).
+    if (Op2.isImm() && Op3.isImm()) {
+      const char *AsmMnemonic = nullptr;
+      int shift = 0;
+      int64_t immr = Op2.getImm();
+      int64_t imms = Op3.getImm();
+      if (Opcode == AArch64::UBFMWri && imms != 0x1F && ((imms + 1) == immr)) {
+        AsmMnemonic = "lsl";
+        shift = 31 - imms;
+      } else if (Opcode == AArch64::UBFMXri && imms != 0x3f &&
+                 ((imms + 1 == immr))) {
+        AsmMnemonic = "lsl";
+        shift = 63 - imms;
+      } else if (Opcode == AArch64::UBFMWri && imms == 0x1f) {
+        AsmMnemonic = "lsr";
+        shift = immr;
+      } else if (Opcode == AArch64::UBFMXri && imms == 0x3f) {
+        AsmMnemonic = "lsr";
+        shift = immr;
+      } else if (Opcode == AArch64::SBFMWri && imms == 0x1f) {
+        AsmMnemonic = "asr";
+        shift = immr;
+      } else if (Opcode == AArch64::SBFMXri && imms == 0x3f) {
+        AsmMnemonic = "asr";
+        shift = immr;
+      }
+      if (AsmMnemonic) {
+        O << '\t' << AsmMnemonic << '\t' << getRegisterName(Op0.getReg())
+          << ", " << getRegisterName(Op1.getReg()) << ", #" << shift;
+        printAnnotation(O, Annot);
+        return;
+      }
+    }
+
+    // SBFIZ/UBFIZ aliases
+    if (Op2.getImm() > Op3.getImm()) {
+      O << '\t' << (IsSigned ? "sbfiz" : "ubfiz") << '\t'
+        << getRegisterName(Op0.getReg()) << ", " << getRegisterName(Op1.getReg())
+        << ", #" << (Is64Bit ? 64 : 32) - Op2.getImm() << ", #" << Op3.getImm() + 1;
+      printAnnotation(O, Annot);
+      return;
+    }
+
+    // Otherwise SBFX/UBFX is the preferred form
+    O << '\t' << (IsSigned ? "sbfx" : "ubfx") << '\t'
+      << getRegisterName(Op0.getReg()) << ", " << getRegisterName(Op1.getReg())
+      << ", #" << Op2.getImm() << ", #" << Op3.getImm() - Op2.getImm() + 1;
+    printAnnotation(O, Annot);
+    return;
+  }
+
+  if (Opcode == AArch64::BFMXri || Opcode == AArch64::BFMWri) {
+    const MCOperand &Op0 = MI->getOperand(0); // Op1 == Op0
+    const MCOperand &Op2 = MI->getOperand(2);
+    int ImmR = MI->getOperand(3).getImm();
+    int ImmS = MI->getOperand(4).getImm();
+
+    // BFI alias
+    if (ImmS < ImmR) {
+      int BitWidth = Opcode == AArch64::BFMXri ? 64 : 32;
+      int LSB = (BitWidth - ImmR) % BitWidth;
+      int Width = ImmS + 1;
+      O << "\tbfi\t" << getRegisterName(Op0.getReg()) << ", "
+        << getRegisterName(Op2.getReg()) << ", #" << LSB << ", #" << Width;
+      printAnnotation(O, Annot);
+      return;
+    }
+
+    int LSB = ImmR;
+    int Width = ImmS - ImmR + 1;
+    // Otherwise BFXIL the preferred form
+    O << "\tbfxil\t"
+      << getRegisterName(Op0.getReg()) << ", " << getRegisterName(Op2.getReg())
+      << ", #" << LSB << ", #" << Width;
+    printAnnotation(O, Annot);
+    return;
+  }
+
+  // Symbolic operands for MOVZ, MOVN and MOVK already imply a shift
+  // (e.g. :gottprel_g1: is always going to be "lsl #16") so it should not be
+  // printed.
+  if ((Opcode == AArch64::MOVZXi || Opcode == AArch64::MOVZWi ||
+       Opcode == AArch64::MOVNXi || Opcode == AArch64::MOVNWi) &&
+      MI->getOperand(1).isExpr()) {
+    if (Opcode == AArch64::MOVZXi || Opcode == AArch64::MOVZWi)
+      O << "\tmovz\t";
+    else
+      O << "\tmovn\t";
+
+    O << getRegisterName(MI->getOperand(0).getReg()) << ", #"
+      << *MI->getOperand(1).getExpr();
+    return;
+  }
+
+  if ((Opcode == AArch64::MOVKXi || Opcode == AArch64::MOVKWi) &&
+      MI->getOperand(2).isExpr()) {
+    O << "\tmovk\t" << getRegisterName(MI->getOperand(0).getReg()) << ", #"
+      << *MI->getOperand(2).getExpr();
+    return;
+  }
+
+  if (!printAliasInstr(MI, O))
+    printInstruction(MI, O);
+
+  printAnnotation(O, Annot);
+}
+
+static bool isTblTbxInstruction(unsigned Opcode, StringRef &Layout,
+                                bool &IsTbx) {
+  switch (Opcode) {
+  case AArch64::TBXv8i8One:
+  case AArch64::TBXv8i8Two:
+  case AArch64::TBXv8i8Three:
+  case AArch64::TBXv8i8Four:
+    IsTbx = true;
+    Layout = ".8b";
+    return true;
+  case AArch64::TBLv8i8One:
+  case AArch64::TBLv8i8Two:
+  case AArch64::TBLv8i8Three:
+  case AArch64::TBLv8i8Four:
+    IsTbx = false;
+    Layout = ".8b";
+    return true;
+  case AArch64::TBXv16i8One:
+  case AArch64::TBXv16i8Two:
+  case AArch64::TBXv16i8Three:
+  case AArch64::TBXv16i8Four:
+    IsTbx = true;
+    Layout = ".16b";
+    return true;
+  case AArch64::TBLv16i8One:
+  case AArch64::TBLv16i8Two:
+  case AArch64::TBLv16i8Three:
+  case AArch64::TBLv16i8Four:
+    IsTbx = false;
+    Layout = ".16b";
+    return true;
+  default:
+    return false;
+  }
+}
+
+struct LdStNInstrDesc {
+  unsigned Opcode;
+  const char *Mnemonic;
+  const char *Layout;
+  int ListOperand;
+  bool HasLane;
+  int NaturalOffset;
+};
+
+static LdStNInstrDesc LdStNInstInfo[] = {
+  { AArch64::LD1i8,             "ld1",  ".b",     1, true,  0  },
+  { AArch64::LD1i16,            "ld1",  ".h",     1, true,  0  },
+  { AArch64::LD1i32,            "ld1",  ".s",     1, true,  0  },
+  { AArch64::LD1i64,            "ld1",  ".d",     1, true,  0  },
+  { AArch64::LD1i8_POST,        "ld1",  ".b",     2, true,  1  },
+  { AArch64::LD1i16_POST,       "ld1",  ".h",     2, true,  2  },
+  { AArch64::LD1i32_POST,       "ld1",  ".s",     2, true,  4  },
+  { AArch64::LD1i64_POST,       "ld1",  ".d",     2, true,  8  },
+  { AArch64::LD1Rv16b,          "ld1r", ".16b",   0, false, 0  },
+  { AArch64::LD1Rv8h,           "ld1r", ".8h",    0, false, 0  },
+  { AArch64::LD1Rv4s,           "ld1r", ".4s",    0, false, 0  },
+  { AArch64::LD1Rv2d,           "ld1r", ".2d",    0, false, 0  },
+  { AArch64::LD1Rv8b,           "ld1r", ".8b",    0, false, 0  },
+  { AArch64::LD1Rv4h,           "ld1r", ".4h",    0, false, 0  },
+  { AArch64::LD1Rv2s,           "ld1r", ".2s",    0, false, 0  },
+  { AArch64::LD1Rv1d,           "ld1r", ".1d",    0, false, 0  },
+  { AArch64::LD1Rv16b_POST,     "ld1r", ".16b",   1, false, 1  },
+  { AArch64::LD1Rv8h_POST,      "ld1r", ".8h",    1, false, 2  },
+  { AArch64::LD1Rv4s_POST,      "ld1r", ".4s",    1, false, 4  },
+  { AArch64::LD1Rv2d_POST,      "ld1r", ".2d",    1, false, 8  },
+  { AArch64::LD1Rv8b_POST,      "ld1r", ".8b",    1, false, 1  },
+  { AArch64::LD1Rv4h_POST,      "ld1r", ".4h",    1, false, 2  },
+  { AArch64::LD1Rv2s_POST,      "ld1r", ".2s",    1, false, 4  },
+  { AArch64::LD1Rv1d_POST,      "ld1r", ".1d",    1, false, 8  },
+  { AArch64::LD1Onev16b,        "ld1",  ".16b",   0, false, 0  },
+  { AArch64::LD1Onev8h,         "ld1",  ".8h",    0, false, 0  },
+  { AArch64::LD1Onev4s,         "ld1",  ".4s",    0, false, 0  },
+  { AArch64::LD1Onev2d,         "ld1",  ".2d",    0, false, 0  },
+  { AArch64::LD1Onev8b,         "ld1",  ".8b",    0, false, 0  },
+  { AArch64::LD1Onev4h,         "ld1",  ".4h",    0, false, 0  },
+  { AArch64::LD1Onev2s,         "ld1",  ".2s",    0, false, 0  },
+  { AArch64::LD1Onev1d,         "ld1",  ".1d",    0, false, 0  },
+  { AArch64::LD1Onev16b_POST,   "ld1",  ".16b",   1, false, 16 },
+  { AArch64::LD1Onev8h_POST,    "ld1",  ".8h",    1, false, 16 },
+  { AArch64::LD1Onev4s_POST,    "ld1",  ".4s",    1, false, 16 },
+  { AArch64::LD1Onev2d_POST,    "ld1",  ".2d",    1, false, 16 },
+  { AArch64::LD1Onev8b_POST,    "ld1",  ".8b",    1, false, 8  },
+  { AArch64::LD1Onev4h_POST,    "ld1",  ".4h",    1, false, 8  },
+  { AArch64::LD1Onev2s_POST,    "ld1",  ".2s",    1, false, 8  },
+  { AArch64::LD1Onev1d_POST,    "ld1",  ".1d",    1, false, 8  },
+  { AArch64::LD1Twov16b,        "ld1",  ".16b",   0, false, 0  },
+  { AArch64::LD1Twov8h,         "ld1",  ".8h",    0, false, 0  },
+  { AArch64::LD1Twov4s,         "ld1",  ".4s",    0, false, 0  },
+  { AArch64::LD1Twov2d,         "ld1",  ".2d",    0, false, 0  },
+  { AArch64::LD1Twov8b,         "ld1",  ".8b",    0, false, 0  },
+  { AArch64::LD1Twov4h,         "ld1",  ".4h",    0, false, 0  },
+  { AArch64::LD1Twov2s,         "ld1",  ".2s",    0, false, 0  },
+  { AArch64::LD1Twov1d,         "ld1",  ".1d",    0, false, 0  },
+  { AArch64::LD1Twov16b_POST,   "ld1",  ".16b",   1, false, 32 },
+  { AArch64::LD1Twov8h_POST,    "ld1",  ".8h",    1, false, 32 },
+  { AArch64::LD1Twov4s_POST,    "ld1",  ".4s",    1, false, 32 },
+  { AArch64::LD1Twov2d_POST,    "ld1",  ".2d",    1, false, 32 },
+  { AArch64::LD1Twov8b_POST,    "ld1",  ".8b",    1, false, 16 },
+  { AArch64::LD1Twov4h_POST,    "ld1",  ".4h",    1, false, 16 },
+  { AArch64::LD1Twov2s_POST,    "ld1",  ".2s",    1, false, 16 },
+  { AArch64::LD1Twov1d_POST,    "ld1",  ".1d",    1, false, 16 },
+  { AArch64::LD1Threev16b,      "ld1",  ".16b",   0, false, 0  },
+  { AArch64::LD1Threev8h,       "ld1",  ".8h",    0, false, 0  },
+  { AArch64::LD1Threev4s,       "ld1",  ".4s",    0, false, 0  },
+  { AArch64::LD1Threev2d,       "ld1",  ".2d",    0, false, 0  },
+  { AArch64::LD1Threev8b,       "ld1",  ".8b",    0, false, 0  },
+  { AArch64::LD1Threev4h,       "ld1",  ".4h",    0, false, 0  },
+  { AArch64::LD1Threev2s,       "ld1",  ".2s",    0, false, 0  },
+  { AArch64::LD1Threev1d,       "ld1",  ".1d",    0, false, 0  },
+  { AArch64::LD1Threev16b_POST, "ld1",  ".16b",   1, false, 48 },
+  { AArch64::LD1Threev8h_POST,  "ld1",  ".8h",    1, false, 48 },
+  { AArch64::LD1Threev4s_POST,  "ld1",  ".4s",    1, false, 48 },
+  { AArch64::LD1Threev2d_POST,  "ld1",  ".2d",    1, false, 48 },
+  { AArch64::LD1Threev8b_POST,  "ld1",  ".8b",    1, false, 24 },
+  { AArch64::LD1Threev4h_POST,  "ld1",  ".4h",    1, false, 24 },
+  { AArch64::LD1Threev2s_POST,  "ld1",  ".2s",    1, false, 24 },
+  { AArch64::LD1Threev1d_POST,  "ld1",  ".1d",    1, false, 24 },
+  { AArch64::LD1Fourv16b,       "ld1",  ".16b",   0, false, 0  },
+  { AArch64::LD1Fourv8h,        "ld1",  ".8h",    0, false, 0  },
+  { AArch64::LD1Fourv4s,        "ld1",  ".4s",    0, false, 0  },
+  { AArch64::LD1Fourv2d,        "ld1",  ".2d",    0, false, 0  },
+  { AArch64::LD1Fourv8b,        "ld1",  ".8b",    0, false, 0  },
+  { AArch64::LD1Fourv4h,        "ld1",  ".4h",    0, false, 0  },
+  { AArch64::LD1Fourv2s,        "ld1",  ".2s",    0, false, 0  },
+  { AArch64::LD1Fourv1d,        "ld1",  ".1d",    0, false, 0  },
+  { AArch64::LD1Fourv16b_POST,  "ld1",  ".16b",   1, false, 64 },
+  { AArch64::LD1Fourv8h_POST,   "ld1",  ".8h",    1, false, 64 },
+  { AArch64::LD1Fourv4s_POST,   "ld1",  ".4s",    1, false, 64 },
+  { AArch64::LD1Fourv2d_POST,   "ld1",  ".2d",    1, false, 64 },
+  { AArch64::LD1Fourv8b_POST,   "ld1",  ".8b",    1, false, 32 },
+  { AArch64::LD1Fourv4h_POST,   "ld1",  ".4h",    1, false, 32 },
+  { AArch64::LD1Fourv2s_POST,   "ld1",  ".2s",    1, false, 32 },
+  { AArch64::LD1Fourv1d_POST,   "ld1",  ".1d",    1, false, 32 },
+  { AArch64::LD2i8,             "ld2",  ".b",     1, true,  0  },
+  { AArch64::LD2i16,            "ld2",  ".h",     1, true,  0  },
+  { AArch64::LD2i32,            "ld2",  ".s",     1, true,  0  },
+  { AArch64::LD2i64,            "ld2",  ".d",     1, true,  0  },
+  { AArch64::LD2i8_POST,        "ld2",  ".b",     2, true,  2  },
+  { AArch64::LD2i16_POST,       "ld2",  ".h",     2, true,  4  },
+  { AArch64::LD2i32_POST,       "ld2",  ".s",     2, true,  8  },
+  { AArch64::LD2i64_POST,       "ld2",  ".d",     2, true,  16  },
+  { AArch64::LD2Rv16b,          "ld2r", ".16b",   0, false, 0  },
+  { AArch64::LD2Rv8h,           "ld2r", ".8h",    0, false, 0  },
+  { AArch64::LD2Rv4s,           "ld2r", ".4s",    0, false, 0  },
+  { AArch64::LD2Rv2d,           "ld2r", ".2d",    0, false, 0  },
+  { AArch64::LD2Rv8b,           "ld2r", ".8b",    0, false, 0  },
+  { AArch64::LD2Rv4h,           "ld2r", ".4h",    0, false, 0  },
+  { AArch64::LD2Rv2s,           "ld2r", ".2s",    0, false, 0  },
+  { AArch64::LD2Rv1d,           "ld2r", ".1d",    0, false, 0  },
+  { AArch64::LD2Rv16b_POST,     "ld2r", ".16b",   1, false, 2  },
+  { AArch64::LD2Rv8h_POST,      "ld2r", ".8h",    1, false, 4  },
+  { AArch64::LD2Rv4s_POST,      "ld2r", ".4s",    1, false, 8  },
+  { AArch64::LD2Rv2d_POST,      "ld2r", ".2d",    1, false, 16 },
+  { AArch64::LD2Rv8b_POST,      "ld2r", ".8b",    1, false, 2  },
+  { AArch64::LD2Rv4h_POST,      "ld2r", ".4h",    1, false, 4  },
+  { AArch64::LD2Rv2s_POST,      "ld2r", ".2s",    1, false, 8  },
+  { AArch64::LD2Rv1d_POST,      "ld2r", ".1d",    1, false, 16 },
+  { AArch64::LD2Twov16b,        "ld2",  ".16b",   0, false, 0  },
+  { AArch64::LD2Twov8h,         "ld2",  ".8h",    0, false, 0  },
+  { AArch64::LD2Twov4s,         "ld2",  ".4s",    0, false, 0  },
+  { AArch64::LD2Twov2d,         "ld2",  ".2d",    0, false, 0  },
+  { AArch64::LD2Twov8b,         "ld2",  ".8b",    0, false, 0  },
+  { AArch64::LD2Twov4h,         "ld2",  ".4h",    0, false, 0  },
+  { AArch64::LD2Twov2s,         "ld2",  ".2s",    0, false, 0  },
+  { AArch64::LD2Twov16b_POST,   "ld2",  ".16b",   1, false, 32 },
+  { AArch64::LD2Twov8h_POST,    "ld2",  ".8h",    1, false, 32 },
+  { AArch64::LD2Twov4s_POST,    "ld2",  ".4s",    1, false, 32 },
+  { AArch64::LD2Twov2d_POST,    "ld2",  ".2d",    1, false, 32 },
+  { AArch64::LD2Twov8b_POST,    "ld2",  ".8b",    1, false, 16 },
+  { AArch64::LD2Twov4h_POST,    "ld2",  ".4h",    1, false, 16 },
+  { AArch64::LD2Twov2s_POST,    "ld2",  ".2s",    1, false, 16 },
+  { AArch64::LD3i8,             "ld3",  ".b",     1, true,  0  },
+  { AArch64::LD3i16,            "ld3",  ".h",     1, true,  0  },
+  { AArch64::LD3i32,            "ld3",  ".s",     1, true,  0  },
+  { AArch64::LD3i64,            "ld3",  ".d",     1, true,  0  },
+  { AArch64::LD3i8_POST,        "ld3",  ".b",     2, true,  3  },
+  { AArch64::LD3i16_POST,       "ld3",  ".h",     2, true,  6  },
+  { AArch64::LD3i32_POST,       "ld3",  ".s",     2, true,  12  },
+  { AArch64::LD3i64_POST,       "ld3",  ".d",     2, true,  24  },
+  { AArch64::LD3Rv16b,          "ld3r", ".16b",   0, false, 0  },
+  { AArch64::LD3Rv8h,           "ld3r", ".8h",    0, false, 0  },
+  { AArch64::LD3Rv4s,           "ld3r", ".4s",    0, false, 0  },
+  { AArch64::LD3Rv2d,           "ld3r", ".2d",    0, false, 0  },
+  { AArch64::LD3Rv8b,           "ld3r", ".8b",    0, false, 0  },
+  { AArch64::LD3Rv4h,           "ld3r", ".4h",    0, false, 0  },
+  { AArch64::LD3Rv2s,           "ld3r", ".2s",    0, false, 0  },
+  { AArch64::LD3Rv1d,           "ld3r", ".1d",    0, false, 0  },
+  { AArch64::LD3Rv16b_POST,     "ld3r", ".16b",   1, false, 3  },
+  { AArch64::LD3Rv8h_POST,      "ld3r", ".8h",    1, false, 6  },
+  { AArch64::LD3Rv4s_POST,      "ld3r", ".4s",    1, false, 12 },
+  { AArch64::LD3Rv2d_POST,      "ld3r", ".2d",    1, false, 24 },
+  { AArch64::LD3Rv8b_POST,      "ld3r", ".8b",    1, false, 3  },
+  { AArch64::LD3Rv4h_POST,      "ld3r", ".4h",    1, false, 6  },
+  { AArch64::LD3Rv2s_POST,      "ld3r", ".2s",    1, false, 12 },
+  { AArch64::LD3Rv1d_POST,      "ld3r", ".1d",    1, false, 24 },
+  { AArch64::LD3Threev16b,      "ld3",  ".16b",   0, false, 0  },
+  { AArch64::LD3Threev8h,       "ld3",  ".8h",    0, false, 0  },
+  { AArch64::LD3Threev4s,       "ld3",  ".4s",    0, false, 0  },
+  { AArch64::LD3Threev2d,       "ld3",  ".2d",    0, false, 0  },
+  { AArch64::LD3Threev8b,       "ld3",  ".8b",    0, false, 0  },
+  { AArch64::LD3Threev4h,       "ld3",  ".4h",    0, false, 0  },
+  { AArch64::LD3Threev2s,       "ld3",  ".2s",    0, false, 0  },
+  { AArch64::LD3Threev16b_POST, "ld3",  ".16b",   1, false, 48 },
+  { AArch64::LD3Threev8h_POST,  "ld3",  ".8h",    1, false, 48 },
+  { AArch64::LD3Threev4s_POST,  "ld3",  ".4s",    1, false, 48 },
+  { AArch64::LD3Threev2d_POST,  "ld3",  ".2d",    1, false, 48 },
+  { AArch64::LD3Threev8b_POST,  "ld3",  ".8b",    1, false, 24 },
+  { AArch64::LD3Threev4h_POST,  "ld3",  ".4h",    1, false, 24 },
+  { AArch64::LD3Threev2s_POST,  "ld3",  ".2s",    1, false, 24 },
+  { AArch64::LD4i8,             "ld4",  ".b",     1, true,  0  },
+  { AArch64::LD4i16,            "ld4",  ".h",     1, true,  0  },
+  { AArch64::LD4i32,            "ld4",  ".s",     1, true,  0  },
+  { AArch64::LD4i64,            "ld4",  ".d",     1, true,  0  },
+  { AArch64::LD4i8_POST,        "ld4",  ".b",     2, true,  4  },
+  { AArch64::LD4i16_POST,       "ld4",  ".h",     2, true,  8  },
+  { AArch64::LD4i32_POST,       "ld4",  ".s",     2, true,  16 },
+  { AArch64::LD4i64_POST,       "ld4",  ".d",     2, true,  32 },
+  { AArch64::LD4Rv16b,          "ld4r", ".16b",   0, false, 0  },
+  { AArch64::LD4Rv8h,           "ld4r", ".8h",    0, false, 0  },
+  { AArch64::LD4Rv4s,           "ld4r", ".4s",    0, false, 0  },
+  { AArch64::LD4Rv2d,           "ld4r", ".2d",    0, false, 0  },
+  { AArch64::LD4Rv8b,           "ld4r", ".8b",    0, false, 0  },
+  { AArch64::LD4Rv4h,           "ld4r", ".4h",    0, false, 0  },
+  { AArch64::LD4Rv2s,           "ld4r", ".2s",    0, false, 0  },
+  { AArch64::LD4Rv1d,           "ld4r", ".1d",    0, false, 0  },
+  { AArch64::LD4Rv16b_POST,     "ld4r", ".16b",   1, false, 4  },
+  { AArch64::LD4Rv8h_POST,      "ld4r", ".8h",    1, false, 8  },
+  { AArch64::LD4Rv4s_POST,      "ld4r", ".4s",    1, false, 16 },
+  { AArch64::LD4Rv2d_POST,      "ld4r", ".2d",    1, false, 32 },
+  { AArch64::LD4Rv8b_POST,      "ld4r", ".8b",    1, false, 4  },
+  { AArch64::LD4Rv4h_POST,      "ld4r", ".4h",    1, false, 8  },
+  { AArch64::LD4Rv2s_POST,      "ld4r", ".2s",    1, false, 16 },
+  { AArch64::LD4Rv1d_POST,      "ld4r", ".1d",    1, false, 32 },
+  { AArch64::LD4Fourv16b,       "ld4",  ".16b",   0, false, 0  },
+  { AArch64::LD4Fourv8h,        "ld4",  ".8h",    0, false, 0  },
+  { AArch64::LD4Fourv4s,        "ld4",  ".4s",    0, false, 0  },
+  { AArch64::LD4Fourv2d,        "ld4",  ".2d",    0, false, 0  },
+  { AArch64::LD4Fourv8b,        "ld4",  ".8b",    0, false, 0  },
+  { AArch64::LD4Fourv4h,        "ld4",  ".4h",    0, false, 0  },
+  { AArch64::LD4Fourv2s,        "ld4",  ".2s",    0, false, 0  },
+  { AArch64::LD4Fourv16b_POST,  "ld4",  ".16b",   1, false, 64 },
+  { AArch64::LD4Fourv8h_POST,   "ld4",  ".8h",    1, false, 64 },
+  { AArch64::LD4Fourv4s_POST,   "ld4",  ".4s",    1, false, 64 },
+  { AArch64::LD4Fourv2d_POST,   "ld4",  ".2d",    1, false, 64 },
+  { AArch64::LD4Fourv8b_POST,   "ld4",  ".8b",    1, false, 32 },
+  { AArch64::LD4Fourv4h_POST,   "ld4",  ".4h",    1, false, 32 },
+  { AArch64::LD4Fourv2s_POST,   "ld4",  ".2s",    1, false, 32 },
+  { AArch64::ST1i8,             "st1",  ".b",     0, true,  0  },
+  { AArch64::ST1i16,            "st1",  ".h",     0, true,  0  },
+  { AArch64::ST1i32,            "st1",  ".s",     0, true,  0  },
+  { AArch64::ST1i64,            "st1",  ".d",     0, true,  0  },
+  { AArch64::ST1i8_POST,        "st1",  ".b",     1, true,  1  },
+  { AArch64::ST1i16_POST,       "st1",  ".h",     1, true,  2  },
+  { AArch64::ST1i32_POST,       "st1",  ".s",     1, true,  4  },
+  { AArch64::ST1i64_POST,       "st1",  ".d",     1, true,  8  },
+  { AArch64::ST1Onev16b,        "st1",  ".16b",   0, false, 0  },
+  { AArch64::ST1Onev8h,         "st1",  ".8h",    0, false, 0  },
+  { AArch64::ST1Onev4s,         "st1",  ".4s",    0, false, 0  },
+  { AArch64::ST1Onev2d,         "st1",  ".2d",    0, false, 0  },
+  { AArch64::ST1Onev8b,         "st1",  ".8b",    0, false, 0  },
+  { AArch64::ST1Onev4h,         "st1",  ".4h",    0, false, 0  },
+  { AArch64::ST1Onev2s,         "st1",  ".2s",    0, false, 0  },
+  { AArch64::ST1Onev1d,         "st1",  ".1d",    0, false, 0  },
+  { AArch64::ST1Onev16b_POST,   "st1",  ".16b",   1, false, 16 },
+  { AArch64::ST1Onev8h_POST,    "st1",  ".8h",    1, false, 16 },
+  { AArch64::ST1Onev4s_POST,    "st1",  ".4s",    1, false, 16 },
+  { AArch64::ST1Onev2d_POST,    "st1",  ".2d",    1, false, 16 },
+  { AArch64::ST1Onev8b_POST,    "st1",  ".8b",    1, false, 8  },
+  { AArch64::ST1Onev4h_POST,    "st1",  ".4h",    1, false, 8  },
+  { AArch64::ST1Onev2s_POST,    "st1",  ".2s",    1, false, 8  },
+  { AArch64::ST1Onev1d_POST,    "st1",  ".1d",    1, false, 8  },
+  { AArch64::ST1Twov16b,        "st1",  ".16b",   0, false, 0  },
+  { AArch64::ST1Twov8h,         "st1",  ".8h",    0, false, 0  },
+  { AArch64::ST1Twov4s,         "st1",  ".4s",    0, false, 0  },
+  { AArch64::ST1Twov2d,         "st1",  ".2d",    0, false, 0  },
+  { AArch64::ST1Twov8b,         "st1",  ".8b",    0, false, 0  },
+  { AArch64::ST1Twov4h,         "st1",  ".4h",    0, false, 0  },
+  { AArch64::ST1Twov2s,         "st1",  ".2s",    0, false, 0  },
+  { AArch64::ST1Twov1d,         "st1",  ".1d",    0, false, 0  },
+  { AArch64::ST1Twov16b_POST,   "st1",  ".16b",   1, false, 32 },
+  { AArch64::ST1Twov8h_POST,    "st1",  ".8h",    1, false, 32 },
+  { AArch64::ST1Twov4s_POST,    "st1",  ".4s",    1, false, 32 },
+  { AArch64::ST1Twov2d_POST,    "st1",  ".2d",    1, false, 32 },
+  { AArch64::ST1Twov8b_POST,    "st1",  ".8b",    1, false, 16 },
+  { AArch64::ST1Twov4h_POST,    "st1",  ".4h",    1, false, 16 },
+  { AArch64::ST1Twov2s_POST,    "st1",  ".2s",    1, false, 16 },
+  { AArch64::ST1Twov1d_POST,    "st1",  ".1d",    1, false, 16 },
+  { AArch64::ST1Threev16b,      "st1",  ".16b",   0, false, 0  },
+  { AArch64::ST1Threev8h,       "st1",  ".8h",    0, false, 0  },
+  { AArch64::ST1Threev4s,       "st1",  ".4s",    0, false, 0  },
+  { AArch64::ST1Threev2d,       "st1",  ".2d",    0, false, 0  },
+  { AArch64::ST1Threev8b,       "st1",  ".8b",    0, false, 0  },
+  { AArch64::ST1Threev4h,       "st1",  ".4h",    0, false, 0  },
+  { AArch64::ST1Threev2s,       "st1",  ".2s",    0, false, 0  },
+  { AArch64::ST1Threev1d,       "st1",  ".1d",    0, false, 0  },
+  { AArch64::ST1Threev16b_POST, "st1",  ".16b",   1, false, 48 },
+  { AArch64::ST1Threev8h_POST,  "st1",  ".8h",    1, false, 48 },
+  { AArch64::ST1Threev4s_POST,  "st1",  ".4s",    1, false, 48 },
+  { AArch64::ST1Threev2d_POST,  "st1",  ".2d",    1, false, 48 },
+  { AArch64::ST1Threev8b_POST,  "st1",  ".8b",    1, false, 24 },
+  { AArch64::ST1Threev4h_POST,  "st1",  ".4h",    1, false, 24 },
+  { AArch64::ST1Threev2s_POST,  "st1",  ".2s",    1, false, 24 },
+  { AArch64::ST1Threev1d_POST,  "st1",  ".1d",    1, false, 24 },
+  { AArch64::ST1Fourv16b,       "st1",  ".16b",   0, false, 0  },
+  { AArch64::ST1Fourv8h,        "st1",  ".8h",    0, false, 0  },
+  { AArch64::ST1Fourv4s,        "st1",  ".4s",    0, false, 0  },
+  { AArch64::ST1Fourv2d,        "st1",  ".2d",    0, false, 0  },
+  { AArch64::ST1Fourv8b,        "st1",  ".8b",    0, false, 0  },
+  { AArch64::ST1Fourv4h,        "st1",  ".4h",    0, false, 0  },
+  { AArch64::ST1Fourv2s,        "st1",  ".2s",    0, false, 0  },
+  { AArch64::ST1Fourv1d,        "st1",  ".1d",    0, false, 0  },
+  { AArch64::ST1Fourv16b_POST,  "st1",  ".16b",   1, false, 64 },
+  { AArch64::ST1Fourv8h_POST,   "st1",  ".8h",    1, false, 64 },
+  { AArch64::ST1Fourv4s_POST,   "st1",  ".4s",    1, false, 64 },
+  { AArch64::ST1Fourv2d_POST,   "st1",  ".2d",    1, false, 64 },
+  { AArch64::ST1Fourv8b_POST,   "st1",  ".8b",    1, false, 32 },
+  { AArch64::ST1Fourv4h_POST,   "st1",  ".4h",    1, false, 32 },
+  { AArch64::ST1Fourv2s_POST,   "st1",  ".2s",    1, false, 32 },
+  { AArch64::ST1Fourv1d_POST,   "st1",  ".1d",    1, false, 32 },
+  { AArch64::ST2i8,             "st2",  ".b",     0, true,  0  },
+  { AArch64::ST2i16,            "st2",  ".h",     0, true,  0  },
+  { AArch64::ST2i32,            "st2",  ".s",     0, true,  0  },
+  { AArch64::ST2i64,            "st2",  ".d",     0, true,  0  },
+  { AArch64::ST2i8_POST,        "st2",  ".b",     1, true,  2  },
+  { AArch64::ST2i16_POST,       "st2",  ".h",     1, true,  4  },
+  { AArch64::ST2i32_POST,       "st2",  ".s",     1, true,  8  },
+  { AArch64::ST2i64_POST,       "st2",  ".d",     1, true,  16 },
+  { AArch64::ST2Twov16b,        "st2",  ".16b",   0, false, 0  },
+  { AArch64::ST2Twov8h,         "st2",  ".8h",    0, false, 0  },
+  { AArch64::ST2Twov4s,         "st2",  ".4s",    0, false, 0  },
+  { AArch64::ST2Twov2d,         "st2",  ".2d",    0, false, 0  },
+  { AArch64::ST2Twov8b,         "st2",  ".8b",    0, false, 0  },
+  { AArch64::ST2Twov4h,         "st2",  ".4h",    0, false, 0  },
+  { AArch64::ST2Twov2s,         "st2",  ".2s",    0, false, 0  },
+  { AArch64::ST2Twov16b_POST,   "st2",  ".16b",   1, false, 32 },
+  { AArch64::ST2Twov8h_POST,    "st2",  ".8h",    1, false, 32 },
+  { AArch64::ST2Twov4s_POST,    "st2",  ".4s",    1, false, 32 },
+  { AArch64::ST2Twov2d_POST,    "st2",  ".2d",    1, false, 32 },
+  { AArch64::ST2Twov8b_POST,    "st2",  ".8b",    1, false, 16 },
+  { AArch64::ST2Twov4h_POST,    "st2",  ".4h",    1, false, 16 },
+  { AArch64::ST2Twov2s_POST,    "st2",  ".2s",    1, false, 16 },
+  { AArch64::ST3i8,             "st3",  ".b",     0, true,  0  },
+  { AArch64::ST3i16,            "st3",  ".h",     0, true,  0  },
+  { AArch64::ST3i32,            "st3",  ".s",     0, true,  0  },
+  { AArch64::ST3i64,            "st3",  ".d",     0, true,  0  },
+  { AArch64::ST3i8_POST,        "st3",  ".b",     1, true,  3  },
+  { AArch64::ST3i16_POST,       "st3",  ".h",     1, true,  6  },
+  { AArch64::ST3i32_POST,       "st3",  ".s",     1, true,  12 },
+  { AArch64::ST3i64_POST,       "st3",  ".d",     1, true,  24 },
+  { AArch64::ST3Threev16b,      "st3",  ".16b",   0, false, 0  },
+  { AArch64::ST3Threev8h,       "st3",  ".8h",    0, false, 0  },
+  { AArch64::ST3Threev4s,       "st3",  ".4s",    0, false, 0  },
+  { AArch64::ST3Threev2d,       "st3",  ".2d",    0, false, 0  },
+  { AArch64::ST3Threev8b,       "st3",  ".8b",    0, false, 0  },
+  { AArch64::ST3Threev4h,       "st3",  ".4h",    0, false, 0  },
+  { AArch64::ST3Threev2s,       "st3",  ".2s",    0, false, 0  },
+  { AArch64::ST3Threev16b_POST, "st3",  ".16b",   1, false, 48 },
+  { AArch64::ST3Threev8h_POST,  "st3",  ".8h",    1, false, 48 },
+  { AArch64::ST3Threev4s_POST,  "st3",  ".4s",    1, false, 48 },
+  { AArch64::ST3Threev2d_POST,  "st3",  ".2d",    1, false, 48 },
+  { AArch64::ST3Threev8b_POST,  "st3",  ".8b",    1, false, 24 },
+  { AArch64::ST3Threev4h_POST,  "st3",  ".4h",    1, false, 24 },
+  { AArch64::ST3Threev2s_POST,  "st3",  ".2s",    1, false, 24 },
+  { AArch64::ST4i8,             "st4",  ".b",     0, true,  0  },
+  { AArch64::ST4i16,            "st4",  ".h",     0, true,  0  },
+  { AArch64::ST4i32,            "st4",  ".s",     0, true,  0  },
+  { AArch64::ST4i64,            "st4",  ".d",     0, true,  0  },
+  { AArch64::ST4i8_POST,        "st4",  ".b",     1, true,  4  },
+  { AArch64::ST4i16_POST,       "st4",  ".h",     1, true,  8  },
+  { AArch64::ST4i32_POST,       "st4",  ".s",     1, true,  16 },
+  { AArch64::ST4i64_POST,       "st4",  ".d",     1, true,  32 },
+  { AArch64::ST4Fourv16b,       "st4",  ".16b",   0, false, 0  },
+  { AArch64::ST4Fourv8h,        "st4",  ".8h",    0, false, 0  },
+  { AArch64::ST4Fourv4s,        "st4",  ".4s",    0, false, 0  },
+  { AArch64::ST4Fourv2d,        "st4",  ".2d",    0, false, 0  },
+  { AArch64::ST4Fourv8b,        "st4",  ".8b",    0, false, 0  },
+  { AArch64::ST4Fourv4h,        "st4",  ".4h",    0, false, 0  },
+  { AArch64::ST4Fourv2s,        "st4",  ".2s",    0, false, 0  },
+  { AArch64::ST4Fourv16b_POST,  "st4",  ".16b",   1, false, 64 },
+  { AArch64::ST4Fourv8h_POST,   "st4",  ".8h",    1, false, 64 },
+  { AArch64::ST4Fourv4s_POST,   "st4",  ".4s",    1, false, 64 },
+  { AArch64::ST4Fourv2d_POST,   "st4",  ".2d",    1, false, 64 },
+  { AArch64::ST4Fourv8b_POST,   "st4",  ".8b",    1, false, 32 },
+  { AArch64::ST4Fourv4h_POST,   "st4",  ".4h",    1, false, 32 },
+  { AArch64::ST4Fourv2s_POST,   "st4",  ".2s",    1, false, 32 },
+};
+
+static LdStNInstrDesc *getLdStNInstrDesc(unsigned Opcode) {
+  unsigned Idx;
+  for (Idx = 0; Idx != array_lengthof(LdStNInstInfo); ++Idx)
+    if (LdStNInstInfo[Idx].Opcode == Opcode)
+      return &LdStNInstInfo[Idx];
+
+  return nullptr;
+}
+
+void AArch64AppleInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
+                                        StringRef Annot) {
+  unsigned Opcode = MI->getOpcode();
+  StringRef Layout, Mnemonic;
+
+  bool IsTbx;
+  if (isTblTbxInstruction(MI->getOpcode(), Layout, IsTbx)) {
+    O << "\t" << (IsTbx ? "tbx" : "tbl") << Layout << '\t'
+      << getRegisterName(MI->getOperand(0).getReg(), AArch64::vreg) << ", ";
+
+    unsigned ListOpNum = IsTbx ? 2 : 1;
+    printVectorList(MI, ListOpNum, O, "");
+
+    O << ", "
+      << getRegisterName(MI->getOperand(ListOpNum + 1).getReg(), AArch64::vreg);
+    printAnnotation(O, Annot);
+    return;
+  }
+
+  if (LdStNInstrDesc *LdStDesc = getLdStNInstrDesc(Opcode)) {
+    O << "\t" << LdStDesc->Mnemonic << LdStDesc->Layout << '\t';
+
+    // Now onto the operands: first a vector list with possible lane
+    // specifier. E.g. { v0 }[2]
+    int OpNum = LdStDesc->ListOperand;
+    printVectorList(MI, OpNum++, O, "");
+
+    if (LdStDesc->HasLane)
+      O << '[' << MI->getOperand(OpNum++).getImm() << ']';
+
+    // Next the address: [xN]
+    unsigned AddrReg = MI->getOperand(OpNum++).getReg();
+    O << ", [" << getRegisterName(AddrReg) << ']';
+
+    // Finally, there might be a post-indexed offset.
+    if (LdStDesc->NaturalOffset != 0) {
+      unsigned Reg = MI->getOperand(OpNum++).getReg();
+      if (Reg != AArch64::XZR)
+        O << ", " << getRegisterName(Reg);
+      else {
+        assert(LdStDesc->NaturalOffset && "no offset on post-inc instruction?");
+        O << ", #" << LdStDesc->NaturalOffset;
+      }
+    }
+
+    printAnnotation(O, Annot);
+    return;
+  }
+
+  AArch64InstPrinter::printInst(MI, O, Annot);
+}
+
+bool AArch64InstPrinter::printSysAlias(const MCInst *MI, raw_ostream &O) {
+#ifndef NDEBUG
+  unsigned Opcode = MI->getOpcode();
+  assert(Opcode == AArch64::SYSxt && "Invalid opcode for SYS alias!");
+#endif
+
+  const char *Asm = nullptr;
+  const MCOperand &Op1 = MI->getOperand(0);
+  const MCOperand &Cn = MI->getOperand(1);
+  const MCOperand &Cm = MI->getOperand(2);
+  const MCOperand &Op2 = MI->getOperand(3);
+
+  unsigned Op1Val = Op1.getImm();
+  unsigned CnVal = Cn.getImm();
+  unsigned CmVal = Cm.getImm();
+  unsigned Op2Val = Op2.getImm();
+
+  if (CnVal == 7) {
+    switch (CmVal) {
+    default:
+      break;
+
+    // IC aliases
+    case 1:
+      if (Op1Val == 0 && Op2Val == 0)
+        Asm = "ic\tialluis";
+      break;
+    case 5:
+      if (Op1Val == 0 && Op2Val == 0)
+        Asm = "ic\tiallu";
+      else if (Op1Val == 3 && Op2Val == 1)
+        Asm = "ic\tivau";
+      break;
+
+    // DC aliases
+    case 4:
+      if (Op1Val == 3 && Op2Val == 1)
+        Asm = "dc\tzva";
+      break;
+    case 6:
+      if (Op1Val == 0 && Op2Val == 1)
+        Asm = "dc\tivac";
+      if (Op1Val == 0 && Op2Val == 2)
+        Asm = "dc\tisw";
+      break;
+    case 10:
+      if (Op1Val == 3 && Op2Val == 1)
+        Asm = "dc\tcvac";
+      else if (Op1Val == 0 && Op2Val == 2)
+        Asm = "dc\tcsw";
+      break;
+    case 11:
+      if (Op1Val == 3 && Op2Val == 1)
+        Asm = "dc\tcvau";
+      break;
+    case 14:
+      if (Op1Val == 3 && Op2Val == 1)
+        Asm = "dc\tcivac";
+      else if (Op1Val == 0 && Op2Val == 2)
+        Asm = "dc\tcisw";
+      break;
+
+    // AT aliases
+    case 8:
+      switch (Op1Val) {
+      default:
+        break;
+      case 0:
+        switch (Op2Val) {
+        default:
+          break;
+        case 0: Asm = "at\ts1e1r"; break;
+        case 1: Asm = "at\ts1e1w"; break;
+        case 2: Asm = "at\ts1e0r"; break;
+        case 3: Asm = "at\ts1e0w"; break;
+        }
+        break;
+      case 4:
+        switch (Op2Val) {
+        default:
+          break;
+        case 0: Asm = "at\ts1e2r"; break;
+        case 1: Asm = "at\ts1e2w"; break;
+        case 4: Asm = "at\ts12e1r"; break;
+        case 5: Asm = "at\ts12e1w"; break;
+        case 6: Asm = "at\ts12e0r"; break;
+        case 7: Asm = "at\ts12e0w"; break;
+        }
+        break;
+      case 6:
+        switch (Op2Val) {
+        default:
+          break;
+        case 0: Asm = "at\ts1e3r"; break;
+        case 1: Asm = "at\ts1e3w"; break;
+        }
+        break;
+      }
+      break;
+    }
+  } else if (CnVal == 8) {
+    // TLBI aliases
+    switch (CmVal) {
+    default:
+      break;
+    case 3:
+      switch (Op1Val) {
+      default:
+        break;
+      case 0:
+        switch (Op2Val) {
+        default:
+          break;
+        case 0: Asm = "tlbi\tvmalle1is"; break;
+        case 1: Asm = "tlbi\tvae1is"; break;
+        case 2: Asm = "tlbi\taside1is"; break;
+        case 3: Asm = "tlbi\tvaae1is"; break;
+        case 5: Asm = "tlbi\tvale1is"; break;
+        case 7: Asm = "tlbi\tvaale1is"; break;
+        }
+        break;
+      case 4:
+        switch (Op2Val) {
+        default:
+          break;
+        case 0: Asm = "tlbi\talle2is"; break;
+        case 1: Asm = "tlbi\tvae2is"; break;
+        case 4: Asm = "tlbi\talle1is"; break;
+        case 5: Asm = "tlbi\tvale2is"; break;
+        case 6: Asm = "tlbi\tvmalls12e1is"; break;
+        }
+        break;
+      case 6:
+        switch (Op2Val) {
+        default:
+          break;
+        case 0: Asm = "tlbi\talle3is"; break;
+        case 1: Asm = "tlbi\tvae3is"; break;
+        case 5: Asm = "tlbi\tvale3is"; break;
+        }
+        break;
+      }
+      break;
+    case 0:
+      switch (Op1Val) {
+      default:
+        break;
+      case 4:
+        switch (Op2Val) {
+        default:
+          break;
+        case 1: Asm = "tlbi\tipas2e1is"; break;
+        case 5: Asm = "tlbi\tipas2le1is"; break;
+        }
+        break;
+      }
+      break;
+    case 4:
+      switch (Op1Val) {
+      default:
+        break;
+      case 4:
+        switch (Op2Val) {
+        default:
+          break;
+        case 1: Asm = "tlbi\tipas2e1"; break;
+        case 5: Asm = "tlbi\tipas2le1"; break;
+        }
+        break;
+      }
+      break;
+    case 7:
+      switch (Op1Val) {
+      default:
+        break;
+      case 0:
+        switch (Op2Val) {
+        default:
+          break;
+        case 0: Asm = "tlbi\tvmalle1"; break;
+        case 1: Asm = "tlbi\tvae1"; break;
+        case 2: Asm = "tlbi\taside1"; break;
+        case 3: Asm = "tlbi\tvaae1"; break;
+        case 5: Asm = "tlbi\tvale1"; break;
+        case 7: Asm = "tlbi\tvaale1"; break;
+        }
+        break;
+      case 4:
+        switch (Op2Val) {
+        default:
+          break;
+        case 0: Asm = "tlbi\talle2"; break;
+        case 1: Asm = "tlbi\tvae2"; break;
+        case 4: Asm = "tlbi\talle1"; break;
+        case 5: Asm = "tlbi\tvale2"; break;
+        case 6: Asm = "tlbi\tvmalls12e1"; break;
+        }
+        break;
+      case 6:
+        switch (Op2Val) {
+        default:
+          break;
+        case 0: Asm = "tlbi\talle3"; break;
+        case 1: Asm = "tlbi\tvae3";  break;
+        case 5: Asm = "tlbi\tvale3"; break;
+        }
+        break;
+      }
+      break;
+    }
+  }
+
+  if (Asm) {
+    unsigned Reg = MI->getOperand(4).getReg();
+
+    O << '\t' << Asm;
+    if (StringRef(Asm).lower().find("all") == StringRef::npos)
+      O << ", " << getRegisterName(Reg);
+  }
+
+  return Asm != nullptr;
+}
+
+void AArch64InstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
+                                      raw_ostream &O) {
+  const MCOperand &Op = MI->getOperand(OpNo);
+  if (Op.isReg()) {
+    unsigned Reg = Op.getReg();
+    O << getRegisterName(Reg);
+  } else if (Op.isImm()) {
+    O << '#' << Op.getImm();
+  } else {
+    assert(Op.isExpr() && "unknown operand kind in printOperand");
+    O << *Op.getExpr();
+  }
+}
+
+void AArch64InstPrinter::printHexImm(const MCInst *MI, unsigned OpNo,
+                                     raw_ostream &O) {
+  const MCOperand &Op = MI->getOperand(OpNo);
+  O << format("#%#llx", Op.getImm());
+}
+
+void AArch64InstPrinter::printPostIncOperand(const MCInst *MI, unsigned OpNo,
+                                             unsigned Imm, raw_ostream &O) {
+  const MCOperand &Op = MI->getOperand(OpNo);
+  if (Op.isReg()) {
+    unsigned Reg = Op.getReg();
+    if (Reg == AArch64::XZR)
+      O << "#" << Imm;
+    else
+      O << getRegisterName(Reg);
+  } else
+    assert(0 && "unknown operand kind in printPostIncOperand64");
+}
+
+void AArch64InstPrinter::printVRegOperand(const MCInst *MI, unsigned OpNo,
+                                          raw_ostream &O) {
+  const MCOperand &Op = MI->getOperand(OpNo);
+  assert(Op.isReg() && "Non-register vreg operand!");
+  unsigned Reg = Op.getReg();
+  O << getRegisterName(Reg, AArch64::vreg);
+}
+
+void AArch64InstPrinter::printSysCROperand(const MCInst *MI, unsigned OpNo,
+                                           raw_ostream &O) {
+  const MCOperand &Op = MI->getOperand(OpNo);
+  assert(Op.isImm() && "System instruction C[nm] operands must be immediates!");
+  O << "c" << Op.getImm();
+}
+
+void AArch64InstPrinter::printAddSubImm(const MCInst *MI, unsigned OpNum,
+                                        raw_ostream &O) {
+  const MCOperand &MO = MI->getOperand(OpNum);
+  if (MO.isImm()) {
+    unsigned Val = (MO.getImm() & 0xfff);
+    assert(Val == MO.getImm() && "Add/sub immediate out of range!");
+    unsigned Shift =
+        AArch64_AM::getShiftValue(MI->getOperand(OpNum + 1).getImm());
+    O << '#' << Val;
+    if (Shift != 0)
+      printShifter(MI, OpNum + 1, O);
+
+    if (CommentStream)
+      *CommentStream << '=' << (Val << Shift) << '\n';
+  } else {
+    assert(MO.isExpr() && "Unexpected operand type!");
+    O << *MO.getExpr();
+    printShifter(MI, OpNum + 1, O);
+  }
+}
+
+void AArch64InstPrinter::printLogicalImm32(const MCInst *MI, unsigned OpNum,
+                                           raw_ostream &O) {
+  uint64_t Val = MI->getOperand(OpNum).getImm();
+  O << "#0x";
+  O.write_hex(AArch64_AM::decodeLogicalImmediate(Val, 32));
+}
+
+void AArch64InstPrinter::printLogicalImm64(const MCInst *MI, unsigned OpNum,
+                                           raw_ostream &O) {
+  uint64_t Val = MI->getOperand(OpNum).getImm();
+  O << "#0x";
+  O.write_hex(AArch64_AM::decodeLogicalImmediate(Val, 64));
+}
+
+void AArch64InstPrinter::printShifter(const MCInst *MI, unsigned OpNum,
+                                      raw_ostream &O) {
+  unsigned Val = MI->getOperand(OpNum).getImm();
+  // LSL #0 should not be printed.
+  if (AArch64_AM::getShiftType(Val) == AArch64_AM::LSL &&
+      AArch64_AM::getShiftValue(Val) == 0)
+    return;
+  O << ", " << AArch64_AM::getShiftExtendName(AArch64_AM::getShiftType(Val))
+    << " #" << AArch64_AM::getShiftValue(Val);
+}
+
+void AArch64InstPrinter::printShiftedRegister(const MCInst *MI, unsigned OpNum,
+                                              raw_ostream &O) {
+  O << getRegisterName(MI->getOperand(OpNum).getReg());
+  printShifter(MI, OpNum + 1, O);
+}
+
+void AArch64InstPrinter::printExtendedRegister(const MCInst *MI, unsigned OpNum,
+                                               raw_ostream &O) {
+  O << getRegisterName(MI->getOperand(OpNum).getReg());
+  printArithExtend(MI, OpNum + 1, O);
+}
+
+void AArch64InstPrinter::printArithExtend(const MCInst *MI, unsigned OpNum,
+                                          raw_ostream &O) {
+  unsigned Val = MI->getOperand(OpNum).getImm();
+  AArch64_AM::ShiftExtendType ExtType = AArch64_AM::getArithExtendType(Val);
+  unsigned ShiftVal = AArch64_AM::getArithShiftValue(Val);
+
+  // If the destination or first source register operand is [W]SP, print
+  // UXTW/UXTX as LSL, and if the shift amount is also zero, print nothing at
+  // all.
+  if (ExtType == AArch64_AM::UXTW || ExtType == AArch64_AM::UXTX) {
+    unsigned Dest = MI->getOperand(0).getReg();
+    unsigned Src1 = MI->getOperand(1).getReg();
+    if ( ((Dest == AArch64::SP || Src1 == AArch64::SP) &&
+          ExtType == AArch64_AM::UXTX) ||
+         ((Dest == AArch64::WSP || Src1 == AArch64::WSP) &&
+          ExtType == AArch64_AM::UXTW) ) {
+      if (ShiftVal != 0)
+        O << ", lsl #" << ShiftVal;
+      return;
+    }
+  }
+  O << ", " << AArch64_AM::getShiftExtendName(ExtType);
+  if (ShiftVal != 0)
+    O << " #" << ShiftVal;
+}
+
+void AArch64InstPrinter::printMemExtend(const MCInst *MI, unsigned OpNum,
+                                        raw_ostream &O, char SrcRegKind,
+                                        unsigned Width) {
+  unsigned SignExtend = MI->getOperand(OpNum).getImm();
+  unsigned DoShift = MI->getOperand(OpNum + 1).getImm();
+
+  // sxtw, sxtx, uxtw or lsl (== uxtx)
+  bool IsLSL = !SignExtend && SrcRegKind == 'x';
+  if (IsLSL)
+    O << "lsl";
+  else
+    O << (SignExtend ? 's' : 'u') << "xt" << SrcRegKind;
+
+  if (DoShift || IsLSL)
+    O << " #" << Log2_32(Width / 8);
+}
+
+void AArch64InstPrinter::printCondCode(const MCInst *MI, unsigned OpNum,
+                                       raw_ostream &O) {
+  AArch64CC::CondCode CC = (AArch64CC::CondCode)MI->getOperand(OpNum).getImm();
+  O << AArch64CC::getCondCodeName(CC);
+}
+
+void AArch64InstPrinter::printInverseCondCode(const MCInst *MI, unsigned OpNum,
+                                              raw_ostream &O) {
+  AArch64CC::CondCode CC = (AArch64CC::CondCode)MI->getOperand(OpNum).getImm();
+  O << AArch64CC::getCondCodeName(AArch64CC::getInvertedCondCode(CC));
+}
+
+void AArch64InstPrinter::printAMNoIndex(const MCInst *MI, unsigned OpNum,
+                                        raw_ostream &O) {
+  O << '[' << getRegisterName(MI->getOperand(OpNum).getReg()) << ']';
+}
+
+template<int Scale>
+void AArch64InstPrinter::printImmScale(const MCInst *MI, unsigned OpNum,
+                                       raw_ostream &O) {
+  O << '#' << Scale * MI->getOperand(OpNum).getImm();
+}
+
+void AArch64InstPrinter::printUImm12Offset(const MCInst *MI, unsigned OpNum,
+                                           unsigned Scale, raw_ostream &O) {
+  const MCOperand MO = MI->getOperand(OpNum);
+  if (MO.isImm()) {
+    O << "#" << (MO.getImm() * Scale);
+  } else {
+    assert(MO.isExpr() && "Unexpected operand type!");
+    O << *MO.getExpr();
+  }
+}
+
+void AArch64InstPrinter::printAMIndexedWB(const MCInst *MI, unsigned OpNum,
+                                          unsigned Scale, raw_ostream &O) {
+  const MCOperand MO1 = MI->getOperand(OpNum + 1);
+  O << '[' << getRegisterName(MI->getOperand(OpNum).getReg());
+  if (MO1.isImm()) {
+      O << ", #" << (MO1.getImm() * Scale);
+  } else {
+    assert(MO1.isExpr() && "Unexpected operand type!");
+    O << ", " << *MO1.getExpr();
+  }
+  O << ']';
+}
+
+void AArch64InstPrinter::printPrefetchOp(const MCInst *MI, unsigned OpNum,
+                                         raw_ostream &O) {
+  unsigned prfop = MI->getOperand(OpNum).getImm();
+  bool Valid;
+  StringRef Name = AArch64PRFM::PRFMMapper().toString(prfop, Valid);
+  if (Valid)
+    O << Name;
+  else
+    O << '#' << prfop;
+}
+
+void AArch64InstPrinter::printFPImmOperand(const MCInst *MI, unsigned OpNum,
+                                           raw_ostream &O) {
+  const MCOperand &MO = MI->getOperand(OpNum);
+  float FPImm =
+      MO.isFPImm() ? MO.getFPImm() : AArch64_AM::getFPImmFloat(MO.getImm());
+
+  // 8 decimal places are enough to perfectly represent permitted floats.
+  O << format("#%.8f", FPImm);
+}
+
+static unsigned getNextVectorRegister(unsigned Reg, unsigned Stride = 1) {
+  while (Stride--) {
+    switch (Reg) {
+    default:
+      assert(0 && "Vector register expected!");
+    case AArch64::Q0:  Reg = AArch64::Q1;  break;
+    case AArch64::Q1:  Reg = AArch64::Q2;  break;
+    case AArch64::Q2:  Reg = AArch64::Q3;  break;
+    case AArch64::Q3:  Reg = AArch64::Q4;  break;
+    case AArch64::Q4:  Reg = AArch64::Q5;  break;
+    case AArch64::Q5:  Reg = AArch64::Q6;  break;
+    case AArch64::Q6:  Reg = AArch64::Q7;  break;
+    case AArch64::Q7:  Reg = AArch64::Q8;  break;
+    case AArch64::Q8:  Reg = AArch64::Q9;  break;
+    case AArch64::Q9:  Reg = AArch64::Q10; break;
+    case AArch64::Q10: Reg = AArch64::Q11; break;
+    case AArch64::Q11: Reg = AArch64::Q12; break;
+    case AArch64::Q12: Reg = AArch64::Q13; break;
+    case AArch64::Q13: Reg = AArch64::Q14; break;
+    case AArch64::Q14: Reg = AArch64::Q15; break;
+    case AArch64::Q15: Reg = AArch64::Q16; break;
+    case AArch64::Q16: Reg = AArch64::Q17; break;
+    case AArch64::Q17: Reg = AArch64::Q18; break;
+    case AArch64::Q18: Reg = AArch64::Q19; break;
+    case AArch64::Q19: Reg = AArch64::Q20; break;
+    case AArch64::Q20: Reg = AArch64::Q21; break;
+    case AArch64::Q21: Reg = AArch64::Q22; break;
+    case AArch64::Q22: Reg = AArch64::Q23; break;
+    case AArch64::Q23: Reg = AArch64::Q24; break;
+    case AArch64::Q24: Reg = AArch64::Q25; break;
+    case AArch64::Q25: Reg = AArch64::Q26; break;
+    case AArch64::Q26: Reg = AArch64::Q27; break;
+    case AArch64::Q27: Reg = AArch64::Q28; break;
+    case AArch64::Q28: Reg = AArch64::Q29; break;
+    case AArch64::Q29: Reg = AArch64::Q30; break;
+    case AArch64::Q30: Reg = AArch64::Q31; break;
+    // Vector lists can wrap around.
+    case AArch64::Q31:
+      Reg = AArch64::Q0;
+      break;
+    }
+  }
+  return Reg;
+}
+
+void AArch64InstPrinter::printVectorList(const MCInst *MI, unsigned OpNum,
+                                         raw_ostream &O,
+                                         StringRef LayoutSuffix) {
+  unsigned Reg = MI->getOperand(OpNum).getReg();
+
+  O << "{ ";
+
+  // Work out how many registers there are in the list (if there is an actual
+  // list).
+  unsigned NumRegs = 1;
+  if (MRI.getRegClass(AArch64::DDRegClassID).contains(Reg) ||
+      MRI.getRegClass(AArch64::QQRegClassID).contains(Reg))
+    NumRegs = 2;
+  else if (MRI.getRegClass(AArch64::DDDRegClassID).contains(Reg) ||
+           MRI.getRegClass(AArch64::QQQRegClassID).contains(Reg))
+    NumRegs = 3;
+  else if (MRI.getRegClass(AArch64::DDDDRegClassID).contains(Reg) ||
+           MRI.getRegClass(AArch64::QQQQRegClassID).contains(Reg))
+    NumRegs = 4;
+
+  // Now forget about the list and find out what the first register is.
+  if (unsigned FirstReg = MRI.getSubReg(Reg, AArch64::dsub0))
+    Reg = FirstReg;
+  else if (unsigned FirstReg = MRI.getSubReg(Reg, AArch64::qsub0))
+    Reg = FirstReg;
+
+  // If it's a D-reg, we need to promote it to the equivalent Q-reg before
+  // printing (otherwise getRegisterName fails).
+  if (MRI.getRegClass(AArch64::FPR64RegClassID).contains(Reg)) {
+    const MCRegisterClass &FPR128RC =
+        MRI.getRegClass(AArch64::FPR128RegClassID);
+    Reg = MRI.getMatchingSuperReg(Reg, AArch64::dsub, &FPR128RC);
+  }
+
+  for (unsigned i = 0; i < NumRegs; ++i, Reg = getNextVectorRegister(Reg)) {
+    O << getRegisterName(Reg, AArch64::vreg) << LayoutSuffix;
+    if (i + 1 != NumRegs)
+      O << ", ";
+  }
+
+  O << " }";
+}
+
+void AArch64InstPrinter::printImplicitlyTypedVectorList(const MCInst *MI,
+                                                        unsigned OpNum,
+                                                        raw_ostream &O) {
+  printVectorList(MI, OpNum, O, "");
+}
+
+template <unsigned NumLanes, char LaneKind>
+void AArch64InstPrinter::printTypedVectorList(const MCInst *MI, unsigned OpNum,
+                                              raw_ostream &O) {
+  std::string Suffix(".");
+  if (NumLanes)
+    Suffix += itostr(NumLanes) + LaneKind;
+  else
+    Suffix += LaneKind;
+
+  printVectorList(MI, OpNum, O, Suffix);
+}
+
+void AArch64InstPrinter::printVectorIndex(const MCInst *MI, unsigned OpNum,
+                                          raw_ostream &O) {
+  O << "[" << MI->getOperand(OpNum).getImm() << "]";
+}
+
+void AArch64InstPrinter::printAlignedLabel(const MCInst *MI, unsigned OpNum,
+                                           raw_ostream &O) {
+  const MCOperand &Op = MI->getOperand(OpNum);
+
+  // If the label has already been resolved to an immediate offset (say, when
+  // we're running the disassembler), just print the immediate.
+  if (Op.isImm()) {
+    O << "#" << (Op.getImm() << 2);
+    return;
+  }
+
+  // If the branch target is simply an address then print it in hex.
+  const MCConstantExpr *BranchTarget =
+      dyn_cast<MCConstantExpr>(MI->getOperand(OpNum).getExpr());
+  int64_t Address;
+  if (BranchTarget && BranchTarget->EvaluateAsAbsolute(Address)) {
+    O << "0x";
+    O.write_hex(Address);
+  } else {
+    // Otherwise, just print the expression.
+    O << *MI->getOperand(OpNum).getExpr();
+  }
+}
+
+void AArch64InstPrinter::printAdrpLabel(const MCInst *MI, unsigned OpNum,
+                                        raw_ostream &O) {
+  const MCOperand &Op = MI->getOperand(OpNum);
+
+  // If the label has already been resolved to an immediate offset (say, when
+  // we're running the disassembler), just print the immediate.
+  if (Op.isImm()) {
+    O << "#" << (Op.getImm() << 12);
+    return;
+  }
+
+  // Otherwise, just print the expression.
+  O << *MI->getOperand(OpNum).getExpr();
+}
+
+void AArch64InstPrinter::printBarrierOption(const MCInst *MI, unsigned OpNo,
+                                            raw_ostream &O) {
+  unsigned Val = MI->getOperand(OpNo).getImm();
+  unsigned Opcode = MI->getOpcode();
+
+  bool Valid;
+  StringRef Name;
+  if (Opcode == AArch64::ISB)
+    Name = AArch64ISB::ISBMapper().toString(Val, Valid);
+  else
+    Name = AArch64DB::DBarrierMapper().toString(Val, Valid);
+  if (Valid)
+    O << Name;
+  else
+    O << "#" << Val;
+}
+
+void AArch64InstPrinter::printMRSSystemRegister(const MCInst *MI, unsigned OpNo,
+                                                raw_ostream &O) {
+  unsigned Val = MI->getOperand(OpNo).getImm();
+
+  bool Valid;
+  auto Mapper = AArch64SysReg::MRSMapper(getAvailableFeatures());
+  std::string Name = Mapper.toString(Val, Valid);
+
+  if (Valid)
+    O << StringRef(Name).upper();
+}
+
+void AArch64InstPrinter::printMSRSystemRegister(const MCInst *MI, unsigned OpNo,
+                                                raw_ostream &O) {
+  unsigned Val = MI->getOperand(OpNo).getImm();
+
+  bool Valid;
+  auto Mapper = AArch64SysReg::MSRMapper(getAvailableFeatures());
+  std::string Name = Mapper.toString(Val, Valid);
+
+  if (Valid)
+    O << StringRef(Name).upper();
+}
+
+void AArch64InstPrinter::printSystemPStateField(const MCInst *MI, unsigned OpNo,
+                                                raw_ostream &O) {
+  unsigned Val = MI->getOperand(OpNo).getImm();
+
+  bool Valid;
+  StringRef Name = AArch64PState::PStateMapper().toString(Val, Valid);
+  if (Valid)
+    O << StringRef(Name.str()).upper();
+  else
+    O << "#" << Val;
+}
+
+void AArch64InstPrinter::printSIMDType10Operand(const MCInst *MI, unsigned OpNo,
+                                                raw_ostream &O) {
+  unsigned RawVal = MI->getOperand(OpNo).getImm();
+  uint64_t Val = AArch64_AM::decodeAdvSIMDModImmType10(RawVal);
+  O << format("#%#016llx", Val);
+}
diff --git a/lib/Target/ARM64/InstPrinter/ARM64InstPrinter.h b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h
similarity index 88%
rename from lib/Target/ARM64/InstPrinter/ARM64InstPrinter.h
rename to lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h
index 0fd6f100712..fe7666e5cad 100644
--- a/lib/Target/ARM64/InstPrinter/ARM64InstPrinter.h
+++ b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h
@@ -1,4 +1,4 @@
-//===-- ARM64InstPrinter.h - Convert ARM64 MCInst to assembly syntax ------===//
+//===-- AArch64InstPrinter.h - Convert AArch64 MCInst to assembly syntax --===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -7,14 +7,14 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This class prints an ARM64 MCInst to a .s file.
+// This class prints an AArch64 MCInst to a .s file.
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef ARM64INSTPRINTER_H
-#define ARM64INSTPRINTER_H
+#ifndef AArch64INSTPRINTER_H
+#define AArch64INSTPRINTER_H
 
-#include "MCTargetDesc/ARM64MCTargetDesc.h"
+#include "MCTargetDesc/AArch64MCTargetDesc.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/MC/MCInstPrinter.h"
 #include "llvm/MC/MCSubtargetInfo.h"
@@ -23,10 +23,10 @@ namespace llvm {
 
 class MCOperand;
 
-class ARM64InstPrinter : public MCInstPrinter {
+class AArch64InstPrinter : public MCInstPrinter {
 public:
-  ARM64InstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
-                   const MCRegisterInfo &MRI, const MCSubtargetInfo &STI);
+  AArch64InstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
+                     const MCRegisterInfo &MRI, const MCSubtargetInfo &STI);
 
   void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot) override;
   void printRegName(raw_ostream &OS, unsigned RegNo) const override;
@@ -40,7 +40,7 @@ public:
     return getRegisterName(RegNo);
   }
   static const char *getRegisterName(unsigned RegNo,
-                                     unsigned AltIdx = ARM64::NoRegAltName);
+                                     unsigned AltIdx = AArch64::NoRegAltName);
 
 protected:
   bool printSysAlias(const MCInst *MI, raw_ostream &O);
@@ -118,9 +118,9 @@ protected:
   void printSIMDType10Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
 };
 
-class ARM64AppleInstPrinter : public ARM64InstPrinter {
+class AArch64AppleInstPrinter : public AArch64InstPrinter {
 public:
-  ARM64AppleInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
+  AArch64AppleInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
                         const MCRegisterInfo &MRI, const MCSubtargetInfo &STI);
 
   void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot) override;
@@ -133,7 +133,7 @@ public:
     return getRegisterName(RegNo);
   }
   static const char *getRegisterName(unsigned RegNo,
-                                     unsigned AltIdx = ARM64::NoRegAltName);
+                                     unsigned AltIdx = AArch64::NoRegAltName);
 };
 }
 
diff --git a/lib/Target/AArch64/InstPrinter/CMakeLists.txt b/lib/Target/AArch64/InstPrinter/CMakeLists.txt
new file mode 100644
index 00000000000..363f50258d7
--- /dev/null
+++ b/lib/Target/AArch64/InstPrinter/CMakeLists.txt
@@ -0,0 +1,7 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMAArch64AsmPrinter
+  AArch64InstPrinter.cpp
+  )
+
+add_dependencies(LLVMAArch64AsmPrinter AArch64CommonTableGen)
diff --git a/lib/Target/ARM64/InstPrinter/LLVMBuild.txt b/lib/Target/AArch64/InstPrinter/LLVMBuild.txt
similarity index 73%
rename from lib/Target/ARM64/InstPrinter/LLVMBuild.txt
rename to lib/Target/AArch64/InstPrinter/LLVMBuild.txt
index 7ab43924921..a13e842cdd3 100644
--- a/lib/Target/ARM64/InstPrinter/LLVMBuild.txt
+++ b/lib/Target/AArch64/InstPrinter/LLVMBuild.txt
@@ -1,4 +1,4 @@
-;===- ./lib/Target/ARM64/InstPrinter/LLVMBuild.txt -------------*- Conf -*--===;
+;===- ./lib/Target/AArch64/InstPrinter/LLVMBuild.txt -------------*- Conf -*--===;
 ;
 ;                     The LLVM Compiler Infrastructure
 ;
@@ -17,8 +17,8 @@
 
 [component_0]
 type = Library
-name = ARM64AsmPrinter
-parent = ARM64
-required_libraries = ARM64Utils MC Support
-add_to_library_groups = ARM64
+name = AArch64AsmPrinter
+parent = AArch64
+required_libraries = AArch64Utils MC Support
+add_to_library_groups = AArch64
 
diff --git a/lib/Target/ARM64/InstPrinter/Makefile b/lib/Target/AArch64/InstPrinter/Makefile
similarity index 82%
rename from lib/Target/ARM64/InstPrinter/Makefile
rename to lib/Target/AArch64/InstPrinter/Makefile
index a59efb08465..b17e8d08011 100644
--- a/lib/Target/ARM64/InstPrinter/Makefile
+++ b/lib/Target/AArch64/InstPrinter/Makefile
@@ -1,4 +1,4 @@
-##===- lib/Target/ARM64/AsmPrinter/Makefile ----------------*- Makefile -*-===##
+##===- lib/Target/AArch64/AsmPrinter/Makefile --------------*- Makefile -*-===##
 #
 #                     The LLVM Compiler Infrastructure
 #
@@ -7,7 +7,7 @@
 #
 ##===----------------------------------------------------------------------===##
 LEVEL = ../../../..
-LIBRARYNAME = LLVMARM64AsmPrinter
+LIBRARYNAME = LLVMAArch64AsmPrinter
 
 # Hack: we need to include 'main' arm target directory to grab private headers
 CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
diff --git a/lib/Target/ARM64/LLVMBuild.txt b/lib/Target/AArch64/LLVMBuild.txt
similarity index 70%
rename from lib/Target/ARM64/LLVMBuild.txt
rename to lib/Target/AArch64/LLVMBuild.txt
index 3d1e56e7ca6..642c18394a6 100644
--- a/lib/Target/ARM64/LLVMBuild.txt
+++ b/lib/Target/AArch64/LLVMBuild.txt
@@ -1,4 +1,4 @@
-;===- ./lib/Target/ARM64/LLVMBuild.txt -------------------------*- Conf -*--===;
+;===- ./lib/Target/AArch64/LLVMBuild.txt -------------------------*- Conf -*--===;
 ;
 ;                     The LLVM Compiler Infrastructure
 ;
@@ -20,7 +20,7 @@ subdirectories = AsmParser Disassembler InstPrinter MCTargetDesc TargetInfo Util
 
 [component_0]
 type = TargetGroup
-name = ARM64
+name = AArch64
 parent = Target
 has_asmparser = 1
 has_asmprinter = 1
@@ -29,7 +29,7 @@ has_jit = 1
 
 [component_1]
 type = Library
-name = ARM64CodeGen
-parent = ARM64
-required_libraries = ARM64AsmPrinter ARM64Desc ARM64Info ARM64Utils Analysis AsmPrinter CodeGen Core MC Scalar SelectionDAG Support Target
-add_to_library_groups = ARM64
+name = AArch64CodeGen
+parent = AArch64
+required_libraries = AArch64AsmPrinter AArch64Desc AArch64Info AArch64Utils Analysis AsmPrinter CodeGen Core MC Scalar SelectionDAG Support Target
+add_to_library_groups = AArch64
diff --git a/lib/Target/ARM64/MCTargetDesc/ARM64AddressingModes.h b/lib/Target/AArch64/MCTargetDesc/AArch64AddressingModes.h
similarity index 89%
rename from lib/Target/ARM64/MCTargetDesc/ARM64AddressingModes.h
rename to lib/Target/AArch64/MCTargetDesc/AArch64AddressingModes.h
index 53bd3545a59..8b1e44e26e9 100644
--- a/lib/Target/ARM64/MCTargetDesc/ARM64AddressingModes.h
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64AddressingModes.h
@@ -1,4 +1,4 @@
-//===- ARM64AddressingModes.h - ARM64 Addressing Modes ----------*- C++ -*-===//
+//===- AArch64AddressingModes.h - AArch64 Addressing Modes ------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -7,12 +7,12 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file contains the ARM64 addressing mode implementation stuff.
+// This file contains the AArch64 addressing mode implementation stuff.
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_TARGET_ARM64_ARM64ADDRESSINGMODES_H
-#define LLVM_TARGET_ARM64_ARM64ADDRESSINGMODES_H
+#ifndef LLVM_TARGET_AArch64_AArch64ADDRESSINGMODES_H
+#define LLVM_TARGET_AArch64_AArch64ADDRESSINGMODES_H
 
 #include "llvm/ADT/APFloat.h"
 #include "llvm/ADT/APInt.h"
@@ -22,8 +22,8 @@
 
 namespace llvm {
 
-/// ARM64_AM - ARM64 Addressing Mode Stuff
-namespace ARM64_AM {
+/// AArch64_AM - AArch64 Addressing Mode Stuff
+namespace AArch64_AM {
 
 //===----------------------------------------------------------------------===//
 // Shifts
@@ -49,35 +49,35 @@ enum ShiftExtendType {
 };
 
 /// getShiftName - Get the string encoding for the shift type.
-static inline const char *getShiftExtendName(ARM64_AM::ShiftExtendType ST) {
+static inline const char *getShiftExtendName(AArch64_AM::ShiftExtendType ST) {
   switch (ST) {
   default: assert(false && "unhandled shift type!");
-  case ARM64_AM::LSL: return "lsl";
-  case ARM64_AM::LSR: return "lsr";
-  case ARM64_AM::ASR: return "asr";
-  case ARM64_AM::ROR: return "ror";
-  case ARM64_AM::MSL: return "msl";
-  case ARM64_AM::UXTB: return "uxtb";
-  case ARM64_AM::UXTH: return "uxth";
-  case ARM64_AM::UXTW: return "uxtw";
-  case ARM64_AM::UXTX: return "uxtx";
-  case ARM64_AM::SXTB: return "sxtb";
-  case ARM64_AM::SXTH: return "sxth";
-  case ARM64_AM::SXTW: return "sxtw";
-  case ARM64_AM::SXTX: return "sxtx";
+  case AArch64_AM::LSL: return "lsl";
+  case AArch64_AM::LSR: return "lsr";
+  case AArch64_AM::ASR: return "asr";
+  case AArch64_AM::ROR: return "ror";
+  case AArch64_AM::MSL: return "msl";
+  case AArch64_AM::UXTB: return "uxtb";
+  case AArch64_AM::UXTH: return "uxth";
+  case AArch64_AM::UXTW: return "uxtw";
+  case AArch64_AM::UXTX: return "uxtx";
+  case AArch64_AM::SXTB: return "sxtb";
+  case AArch64_AM::SXTH: return "sxth";
+  case AArch64_AM::SXTW: return "sxtw";
+  case AArch64_AM::SXTX: return "sxtx";
   }
   return nullptr;
 }
 
 /// getShiftType - Extract the shift type.
-static inline ARM64_AM::ShiftExtendType getShiftType(unsigned Imm) {
+static inline AArch64_AM::ShiftExtendType getShiftType(unsigned Imm) {
   switch ((Imm >> 6) & 0x7) {
-  default: return ARM64_AM::InvalidShiftExtend;
-  case 0: return ARM64_AM::LSL;
-  case 1: return ARM64_AM::LSR;
-  case 2: return ARM64_AM::ASR;
-  case 3: return ARM64_AM::ROR;
-  case 4: return ARM64_AM::MSL;
+  default: return AArch64_AM::InvalidShiftExtend;
+  case 0: return AArch64_AM::LSL;
+  case 1: return AArch64_AM::LSR;
+  case 2: return AArch64_AM::ASR;
+  case 3: return AArch64_AM::ROR;
+  case 4: return AArch64_AM::MSL;
   }
 }
 
@@ -95,17 +95,17 @@ static inline unsigned getShiftValue(unsigned Imm) {
 ///            100 ==> msl
 ///   {8-6}  = shifter
 ///   {5-0}  = imm
-static inline unsigned getShifterImm(ARM64_AM::ShiftExtendType ST,
+static inline unsigned getShifterImm(AArch64_AM::ShiftExtendType ST,
                                      unsigned Imm) {
   assert((Imm & 0x3f) == Imm && "Illegal shifted immedate value!");
   unsigned STEnc = 0;
   switch (ST) {
   default:  llvm_unreachable("Invalid shift requested");
-  case ARM64_AM::LSL: STEnc = 0; break;
-  case ARM64_AM::LSR: STEnc = 1; break;
-  case ARM64_AM::ASR: STEnc = 2; break;
-  case ARM64_AM::ROR: STEnc = 3; break;
-  case ARM64_AM::MSL: STEnc = 4; break;
+  case AArch64_AM::LSL: STEnc = 0; break;
+  case AArch64_AM::LSR: STEnc = 1; break;
+  case AArch64_AM::ASR: STEnc = 2; break;
+  case AArch64_AM::ROR: STEnc = 3; break;
+  case AArch64_AM::MSL: STEnc = 4; break;
   }
   return (STEnc << 6) | (Imm & 0x3f);
 }
@@ -120,22 +120,22 @@ static inline unsigned getArithShiftValue(unsigned Imm) {
 }
 
 /// getExtendType - Extract the extend type for operands of arithmetic ops.
-static inline ARM64_AM::ShiftExtendType getExtendType(unsigned Imm) {
+static inline AArch64_AM::ShiftExtendType getExtendType(unsigned Imm) {
   assert((Imm & 0x7) == Imm && "invalid immediate!");
   switch (Imm) {
   default: llvm_unreachable("Compiler bug!");
-  case 0: return ARM64_AM::UXTB;
-  case 1: return ARM64_AM::UXTH;
-  case 2: return ARM64_AM::UXTW;
-  case 3: return ARM64_AM::UXTX;
-  case 4: return ARM64_AM::SXTB;
-  case 5: return ARM64_AM::SXTH;
-  case 6: return ARM64_AM::SXTW;
-  case 7: return ARM64_AM::SXTX;
+  case 0: return AArch64_AM::UXTB;
+  case 1: return AArch64_AM::UXTH;
+  case 2: return AArch64_AM::UXTW;
+  case 3: return AArch64_AM::UXTX;
+  case 4: return AArch64_AM::SXTB;
+  case 5: return AArch64_AM::SXTH;
+  case 6: return AArch64_AM::SXTW;
+  case 7: return AArch64_AM::SXTX;
   }
 }
 
-static inline ARM64_AM::ShiftExtendType getArithExtendType(unsigned Imm) {
+static inline AArch64_AM::ShiftExtendType getArithExtendType(unsigned Imm) {
   return getExtendType((Imm >> 3) & 0x7);
 }
 
@@ -148,17 +148,17 @@ static inline ARM64_AM::ShiftExtendType getArithExtendType(unsigned Imm) {
 ///            101 ==> sxth
 ///            110 ==> sxtw
 ///            111 ==> sxtx
-inline unsigned getExtendEncoding(ARM64_AM::ShiftExtendType ET) {
+inline unsigned getExtendEncoding(AArch64_AM::ShiftExtendType ET) {
   switch (ET) {
   default: llvm_unreachable("Invalid extend type requested");
-  case ARM64_AM::UXTB: return 0; break;
-  case ARM64_AM::UXTH: return 1; break;
-  case ARM64_AM::UXTW: return 2; break;
-  case ARM64_AM::UXTX: return 3; break;
-  case ARM64_AM::SXTB: return 4; break;
-  case ARM64_AM::SXTH: return 5; break;
-  case ARM64_AM::SXTW: return 6; break;
-  case ARM64_AM::SXTX: return 7; break;
+  case AArch64_AM::UXTB: return 0; break;
+  case AArch64_AM::UXTH: return 1; break;
+  case AArch64_AM::UXTW: return 2; break;
+  case AArch64_AM::UXTX: return 3; break;
+  case AArch64_AM::SXTB: return 4; break;
+  case AArch64_AM::SXTH: return 5; break;
+  case AArch64_AM::SXTW: return 6; break;
+  case AArch64_AM::SXTX: return 7; break;
   }
 }
 
@@ -167,7 +167,7 @@ inline unsigned getExtendEncoding(ARM64_AM::ShiftExtendType ET) {
 ///   imm:     3-bit extend amount
 ///   {5-3}  = shifter
 ///   {2-0}  = imm3
-static inline unsigned getArithExtendImm(ARM64_AM::ShiftExtendType ET,
+static inline unsigned getArithExtendImm(AArch64_AM::ShiftExtendType ET,
                                          unsigned Imm) {
   assert((Imm & 0x7) == Imm && "Illegal shifted immedate value!");
   return (getExtendEncoding(ET) << 3) | (Imm & 0x7);
@@ -181,7 +181,7 @@ static inline bool getMemDoShift(unsigned Imm) {
 
 /// getExtendType - Extract the extend type for the offset operand of
 /// loads/stores.
-static inline ARM64_AM::ShiftExtendType getMemExtendType(unsigned Imm) {
+static inline AArch64_AM::ShiftExtendType getMemExtendType(unsigned Imm) {
   return getExtendType((Imm >> 1) & 0x7);
 }
 
@@ -197,7 +197,7 @@ static inline ARM64_AM::ShiftExtendType getMemExtendType(unsigned Imm) {
 ///            111 ==> sxtx
 ///   {3-1}  = shifter
 ///   {0}  = doshift
-static inline unsigned getMemExtendImm(ARM64_AM::ShiftExtendType ET,
+static inline unsigned getMemExtendImm(AArch64_AM::ShiftExtendType ET,
                                        bool DoShift) {
   return (getExtendEncoding(ET) << 1) | unsigned(DoShift);
 }
@@ -731,7 +731,7 @@ static inline uint64_t decodeAdvSIMDModImmType12(uint8_t Imm) {
   return (EncVal << 32) | EncVal;
 }
 
-} // end namespace ARM64_AM
+} // end namespace AArch64_AM
 
 } // end namespace llvm
 
diff --git a/lib/Target/ARM64/MCTargetDesc/ARM64AsmBackend.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
similarity index 66%
rename from lib/Target/ARM64/MCTargetDesc/ARM64AsmBackend.cpp
rename to lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
index ba5025ab620..d8900d4fceb 100644
--- a/lib/Target/ARM64/MCTargetDesc/ARM64AsmBackend.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
@@ -1,4 +1,4 @@
-//===-- ARM64AsmBackend.cpp - ARM64 Assembler Backend ---------------------===//
+//===-- AArch64AsmBackend.cpp - AArch64 Assembler Backend -----------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -7,9 +7,9 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "ARM64.h"
-#include "ARM64RegisterInfo.h"
-#include "MCTargetDesc/ARM64FixupKinds.h"
+#include "AArch64.h"
+#include "AArch64RegisterInfo.h"
+#include "MCTargetDesc/AArch64FixupKinds.h"
 #include "llvm/ADT/Triple.h"
 #include "llvm/MC/MCAsmBackend.h"
 #include "llvm/MC/MCDirectives.h"
@@ -23,38 +23,38 @@ using namespace llvm;
 
 namespace {
 
-class ARM64AsmBackend : public MCAsmBackend {
+class AArch64AsmBackend : public MCAsmBackend {
   static const unsigned PCRelFlagVal =
       MCFixupKindInfo::FKF_IsAlignedDownTo32Bits | MCFixupKindInfo::FKF_IsPCRel;
 
 public:
-  ARM64AsmBackend(const Target &T) : MCAsmBackend() {}
+  AArch64AsmBackend(const Target &T) : MCAsmBackend() {}
 
   unsigned getNumFixupKinds() const override {
-    return ARM64::NumTargetFixupKinds;
+    return AArch64::NumTargetFixupKinds;
   }
 
   const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override {
-    const static MCFixupKindInfo Infos[ARM64::NumTargetFixupKinds] = {
+    const static MCFixupKindInfo Infos[AArch64::NumTargetFixupKinds] = {
       // This table *must* be in the order that the fixup_* kinds are defined in
-      // ARM64FixupKinds.h.
+      // AArch64FixupKinds.h.
       //
       // Name                           Offset (bits) Size (bits)     Flags
-      { "fixup_arm64_pcrel_adr_imm21", 0, 32, PCRelFlagVal },
-      { "fixup_arm64_pcrel_adrp_imm21", 0, 32, PCRelFlagVal },
-      { "fixup_arm64_add_imm12", 10, 12, 0 },
-      { "fixup_arm64_ldst_imm12_scale1", 10, 12, 0 },
-      { "fixup_arm64_ldst_imm12_scale2", 10, 12, 0 },
-      { "fixup_arm64_ldst_imm12_scale4", 10, 12, 0 },
-      { "fixup_arm64_ldst_imm12_scale8", 10, 12, 0 },
-      { "fixup_arm64_ldst_imm12_scale16", 10, 12, 0 },
-      { "fixup_arm64_ldr_pcrel_imm19", 5, 19, PCRelFlagVal },
-      { "fixup_arm64_movw", 5, 16, 0 },
-      { "fixup_arm64_pcrel_branch14", 5, 14, PCRelFlagVal },
-      { "fixup_arm64_pcrel_branch19", 5, 19, PCRelFlagVal },
-      { "fixup_arm64_pcrel_branch26", 0, 26, PCRelFlagVal },
-      { "fixup_arm64_pcrel_call26", 0, 26, PCRelFlagVal },
-      { "fixup_arm64_tlsdesc_call", 0, 0, 0 }
+      { "fixup_aarch64_pcrel_adr_imm21", 0, 32, PCRelFlagVal },
+      { "fixup_aarch64_pcrel_adrp_imm21", 0, 32, PCRelFlagVal },
+      { "fixup_aarch64_add_imm12", 10, 12, 0 },
+      { "fixup_aarch64_ldst_imm12_scale1", 10, 12, 0 },
+      { "fixup_aarch64_ldst_imm12_scale2", 10, 12, 0 },
+      { "fixup_aarch64_ldst_imm12_scale4", 10, 12, 0 },
+      { "fixup_aarch64_ldst_imm12_scale8", 10, 12, 0 },
+      { "fixup_aarch64_ldst_imm12_scale16", 10, 12, 0 },
+      { "fixup_aarch64_ldr_pcrel_imm19", 5, 19, PCRelFlagVal },
+      { "fixup_aarch64_movw", 5, 16, 0 },
+      { "fixup_aarch64_pcrel_branch14", 5, 14, PCRelFlagVal },
+      { "fixup_aarch64_pcrel_branch19", 5, 19, PCRelFlagVal },
+      { "fixup_aarch64_pcrel_branch26", 0, 26, PCRelFlagVal },
+      { "fixup_aarch64_pcrel_call26", 0, 26, PCRelFlagVal },
+      { "fixup_aarch64_tlsdesc_call", 0, 0, 0 }
     };
 
     if (Kind < FirstTargetFixupKind)
@@ -88,31 +88,31 @@ static unsigned getFixupKindNumBytes(unsigned Kind) {
   default:
     assert(0 && "Unknown fixup kind!");
 
-  case ARM64::fixup_arm64_tlsdesc_call:
+  case AArch64::fixup_aarch64_tlsdesc_call:
     return 0;
 
   case FK_Data_1:
     return 1;
 
   case FK_Data_2:
-  case ARM64::fixup_arm64_movw:
+  case AArch64::fixup_aarch64_movw:
     return 2;
 
-  case ARM64::fixup_arm64_pcrel_branch14:
-  case ARM64::fixup_arm64_add_imm12:
-  case ARM64::fixup_arm64_ldst_imm12_scale1:
-  case ARM64::fixup_arm64_ldst_imm12_scale2:
-  case ARM64::fixup_arm64_ldst_imm12_scale4:
-  case ARM64::fixup_arm64_ldst_imm12_scale8:
-  case ARM64::fixup_arm64_ldst_imm12_scale16:
-  case ARM64::fixup_arm64_ldr_pcrel_imm19:
-  case ARM64::fixup_arm64_pcrel_branch19:
+  case AArch64::fixup_aarch64_pcrel_branch14:
+  case AArch64::fixup_aarch64_add_imm12:
+  case AArch64::fixup_aarch64_ldst_imm12_scale1:
+  case AArch64::fixup_aarch64_ldst_imm12_scale2:
+  case AArch64::fixup_aarch64_ldst_imm12_scale4:
+  case AArch64::fixup_aarch64_ldst_imm12_scale8:
+  case AArch64::fixup_aarch64_ldst_imm12_scale16:
+  case AArch64::fixup_aarch64_ldr_pcrel_imm19:
+  case AArch64::fixup_aarch64_pcrel_branch19:
     return 3;
 
-  case ARM64::fixup_arm64_pcrel_adr_imm21:
-  case ARM64::fixup_arm64_pcrel_adrp_imm21:
-  case ARM64::fixup_arm64_pcrel_branch26:
-  case ARM64::fixup_arm64_pcrel_call26:
+  case AArch64::fixup_aarch64_pcrel_adr_imm21:
+  case AArch64::fixup_aarch64_pcrel_adrp_imm21:
+  case AArch64::fixup_aarch64_pcrel_branch26:
+  case AArch64::fixup_aarch64_pcrel_call26:
   case FK_Data_4:
     return 4;
 
@@ -132,49 +132,49 @@ static uint64_t adjustFixupValue(unsigned Kind, uint64_t Value) {
   switch (Kind) {
   default:
     assert(false && "Unknown fixup kind!");
-  case ARM64::fixup_arm64_pcrel_adr_imm21:
+  case AArch64::fixup_aarch64_pcrel_adr_imm21:
     if (SignedValue > 2097151 || SignedValue < -2097152)
       report_fatal_error("fixup value out of range");
     return AdrImmBits(Value & 0x1fffffULL);
-  case ARM64::fixup_arm64_pcrel_adrp_imm21:
+  case AArch64::fixup_aarch64_pcrel_adrp_imm21:
     return AdrImmBits((Value & 0x1fffff000ULL) >> 12);
-  case ARM64::fixup_arm64_ldr_pcrel_imm19:
-  case ARM64::fixup_arm64_pcrel_branch19:
+  case AArch64::fixup_aarch64_ldr_pcrel_imm19:
+  case AArch64::fixup_aarch64_pcrel_branch19:
     // Signed 21-bit immediate
     if (SignedValue > 2097151 || SignedValue < -2097152)
       report_fatal_error("fixup value out of range");
     // Low two bits are not encoded.
     return (Value >> 2) & 0x7ffff;
-  case ARM64::fixup_arm64_add_imm12:
-  case ARM64::fixup_arm64_ldst_imm12_scale1:
+  case AArch64::fixup_aarch64_add_imm12:
+  case AArch64::fixup_aarch64_ldst_imm12_scale1:
     // Unsigned 12-bit immediate
     if (Value >= 0x1000)
       report_fatal_error("invalid imm12 fixup value");
     return Value;
-  case ARM64::fixup_arm64_ldst_imm12_scale2:
+  case AArch64::fixup_aarch64_ldst_imm12_scale2:
     // Unsigned 12-bit immediate which gets multiplied by 2
     if (Value & 1 || Value >= 0x2000)
       report_fatal_error("invalid imm12 fixup value");
     return Value >> 1;
-  case ARM64::fixup_arm64_ldst_imm12_scale4:
+  case AArch64::fixup_aarch64_ldst_imm12_scale4:
     // Unsigned 12-bit immediate which gets multiplied by 4
     if (Value & 3 || Value >= 0x4000)
       report_fatal_error("invalid imm12 fixup value");
     return Value >> 2;
-  case ARM64::fixup_arm64_ldst_imm12_scale8:
+  case AArch64::fixup_aarch64_ldst_imm12_scale8:
     // Unsigned 12-bit immediate which gets multiplied by 8
     if (Value & 7 || Value >= 0x8000)
       report_fatal_error("invalid imm12 fixup value");
     return Value >> 3;
-  case ARM64::fixup_arm64_ldst_imm12_scale16:
+  case AArch64::fixup_aarch64_ldst_imm12_scale16:
     // Unsigned 12-bit immediate which gets multiplied by 16
     if (Value & 15 || Value >= 0x10000)
       report_fatal_error("invalid imm12 fixup value");
     return Value >> 4;
-  case ARM64::fixup_arm64_movw:
+  case AArch64::fixup_aarch64_movw:
     report_fatal_error("no resolvable MOVZ/MOVK fixups supported yet");
     return Value;
-  case ARM64::fixup_arm64_pcrel_branch14:
+  case AArch64::fixup_aarch64_pcrel_branch14:
     // Signed 16-bit immediate
     if (SignedValue > 32767 || SignedValue < -32768)
       report_fatal_error("fixup value out of range");
@@ -182,8 +182,8 @@ static uint64_t adjustFixupValue(unsigned Kind, uint64_t Value) {
     if (Value & 0x3)
       report_fatal_error("fixup not sufficiently aligned");
     return (Value >> 2) & 0x3fff;
-  case ARM64::fixup_arm64_pcrel_branch26:
-  case ARM64::fixup_arm64_pcrel_call26:
+  case AArch64::fixup_aarch64_pcrel_branch26:
+  case AArch64::fixup_aarch64_pcrel_call26:
     // Signed 28-bit immediate
     if (SignedValue > 134217727 || SignedValue < -134217728)
       report_fatal_error("fixup value out of range");
@@ -199,9 +199,9 @@ static uint64_t adjustFixupValue(unsigned Kind, uint64_t Value) {
   }
 }
 
-void ARM64AsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
-                                 unsigned DataSize, uint64_t Value,
-                                 bool IsPCRel) const {
+void AArch64AsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
+                                   unsigned DataSize, uint64_t Value,
+                                   bool IsPCRel) const {
   unsigned NumBytes = getFixupKindNumBytes(Fixup.getKind());
   if (!Value)
     return; // Doesn't change encoding.
@@ -221,25 +221,27 @@ void ARM64AsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
     Data[Offset + i] |= uint8_t((Value >> (i * 8)) & 0xff);
 }
 
-bool ARM64AsmBackend::mayNeedRelaxation(const MCInst &Inst) const {
+bool AArch64AsmBackend::mayNeedRelaxation(const MCInst &Inst) const {
   return false;
 }
 
-bool ARM64AsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value,
-                                           const MCRelaxableFragment *DF,
-                                           const MCAsmLayout &Layout) const {
-  // FIXME:  This isn't correct for ARM64. Just moving the "generic" logic
+bool AArch64AsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup,
+                                             uint64_t Value,
+                                             const MCRelaxableFragment *DF,
+                                             const MCAsmLayout &Layout) const {
+  // FIXME:  This isn't correct for AArch64. Just moving the "generic" logic
   // into the targets for now.
   //
   // Relax if the value is too big for a (signed) i8.
   return int64_t(Value) != int64_t(int8_t(Value));
 }
 
-void ARM64AsmBackend::relaxInstruction(const MCInst &Inst, MCInst &Res) const {
-  assert(false && "ARM64AsmBackend::relaxInstruction() unimplemented");
+void AArch64AsmBackend::relaxInstruction(const MCInst &Inst,
+                                         MCInst &Res) const {
+  assert(false && "AArch64AsmBackend::relaxInstruction() unimplemented");
 }
 
-bool ARM64AsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const {
+bool AArch64AsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const {
   // If the count is not 4-byte aligned, we must be writing data into the text
   // section (otherwise we have unaligned instructions, and thus have far
   // bigger problems), so just write zeros instead.
@@ -263,14 +265,14 @@ namespace CU {
 enum CompactUnwindEncodings {
   /// \brief A "frameless" leaf function, where no non-volatile registers are
   /// saved. The return remains in LR throughout the function.
-  UNWIND_ARM64_MODE_FRAMELESS = 0x02000000,
+  UNWIND_AArch64_MODE_FRAMELESS = 0x02000000,
 
   /// \brief No compact unwind encoding available. Instead the low 23-bits of
   /// the compact unwind encoding is the offset of the DWARF FDE in the
   /// __eh_frame section. This mode is never used in object files. It is only
   /// generated by the linker in final linked images, which have only DWARF info
   /// for a function.
-  UNWIND_ARM64_MODE_DWARF = 0x03000000,
+  UNWIND_AArch64_MODE_DWARF = 0x03000000,
 
   /// \brief This is a standard arm64 prologue where FP/LR are immediately
   /// pushed on the stack, then SP is copied to FP. If there are any
@@ -278,40 +280,40 @@ enum CompactUnwindEncodings {
   /// in a contiguous ranger right below the saved FP/LR pair. Any subset of the
   /// five X pairs and four D pairs can be saved, but the memory layout must be
   /// in register number order.
-  UNWIND_ARM64_MODE_FRAME = 0x04000000,
+  UNWIND_AArch64_MODE_FRAME = 0x04000000,
 
   /// \brief Frame register pair encodings.
-  UNWIND_ARM64_FRAME_X19_X20_PAIR = 0x00000001,
-  UNWIND_ARM64_FRAME_X21_X22_PAIR = 0x00000002,
-  UNWIND_ARM64_FRAME_X23_X24_PAIR = 0x00000004,
-  UNWIND_ARM64_FRAME_X25_X26_PAIR = 0x00000008,
-  UNWIND_ARM64_FRAME_X27_X28_PAIR = 0x00000010,
-  UNWIND_ARM64_FRAME_D8_D9_PAIR = 0x00000100,
-  UNWIND_ARM64_FRAME_D10_D11_PAIR = 0x00000200,
-  UNWIND_ARM64_FRAME_D12_D13_PAIR = 0x00000400,
-  UNWIND_ARM64_FRAME_D14_D15_PAIR = 0x00000800
+  UNWIND_AArch64_FRAME_X19_X20_PAIR = 0x00000001,
+  UNWIND_AArch64_FRAME_X21_X22_PAIR = 0x00000002,
+  UNWIND_AArch64_FRAME_X23_X24_PAIR = 0x00000004,
+  UNWIND_AArch64_FRAME_X25_X26_PAIR = 0x00000008,
+  UNWIND_AArch64_FRAME_X27_X28_PAIR = 0x00000010,
+  UNWIND_AArch64_FRAME_D8_D9_PAIR = 0x00000100,
+  UNWIND_AArch64_FRAME_D10_D11_PAIR = 0x00000200,
+  UNWIND_AArch64_FRAME_D12_D13_PAIR = 0x00000400,
+  UNWIND_AArch64_FRAME_D14_D15_PAIR = 0x00000800
 };
 
 } // end CU namespace
 
 // FIXME: This should be in a separate file.
-class DarwinARM64AsmBackend : public ARM64AsmBackend {
+class DarwinAArch64AsmBackend : public AArch64AsmBackend {
   const MCRegisterInfo &MRI;
 
   /// \brief Encode compact unwind stack adjustment for frameless functions.
-  /// See UNWIND_ARM64_FRAMELESS_STACK_SIZE_MASK in compact_unwind_encoding.h.
+  /// See UNWIND_AArch64_FRAMELESS_STACK_SIZE_MASK in compact_unwind_encoding.h.
   /// The stack size always needs to be 16 byte aligned.
   uint32_t encodeStackAdjustment(uint32_t StackSize) const {
     return (StackSize / 16) << 12;
   }
 
 public:
-  DarwinARM64AsmBackend(const Target &T, const MCRegisterInfo &MRI)
-      : ARM64AsmBackend(T), MRI(MRI) {}
+  DarwinAArch64AsmBackend(const Target &T, const MCRegisterInfo &MRI)
+      : AArch64AsmBackend(T), MRI(MRI) {}
 
   MCObjectWriter *createObjectWriter(raw_ostream &OS) const override {
-    return createARM64MachObjectWriter(OS, MachO::CPU_TYPE_ARM64,
-                                       MachO::CPU_SUBTYPE_ARM64_ALL);
+    return createAArch64MachObjectWriter(OS, MachO::CPU_TYPE_ARM64,
+                                         MachO::CPU_SUBTYPE_ARM64_ALL);
   }
 
   bool doesSectionRequireSymbols(const MCSection &Section) const override {
@@ -354,7 +356,7 @@ public:
   uint32_t generateCompactUnwindEncoding(
                              ArrayRef<MCCFIInstruction> Instrs) const override {
     if (Instrs.empty())
-      return CU::UNWIND_ARM64_MODE_FRAMELESS;
+      return CU::UNWIND_AArch64_MODE_FRAMELESS;
 
     bool HasFP = false;
     unsigned StackSize = 0;
@@ -366,11 +368,11 @@ public:
       switch (Inst.getOperation()) {
       default:
         // Cannot handle this directive:  bail out.
-        return CU::UNWIND_ARM64_MODE_DWARF;
+        return CU::UNWIND_AArch64_MODE_DWARF;
       case MCCFIInstruction::OpDefCfa: {
         // Defines a frame pointer.
         assert(getXRegFromWReg(MRI.getLLVMRegNum(Inst.getRegister(), true)) ==
-                   ARM64::FP &&
+                   AArch64::FP &&
                "Invalid frame pointer!");
         assert(i + 2 < e && "Insufficient CFI instructions to define a frame!");
 
@@ -387,11 +389,11 @@ public:
         LRReg = getXRegFromWReg(LRReg);
         FPReg = getXRegFromWReg(FPReg);
 
-        assert(LRReg == ARM64::LR && FPReg == ARM64::FP &&
+        assert(LRReg == AArch64::LR && FPReg == AArch64::FP &&
                "Pushing invalid registers for frame!");
 
         // Indicate that the function has a frame.
-        CompactUnwindEncoding |= CU::UNWIND_ARM64_MODE_FRAME;
+        CompactUnwindEncoding |= CU::UNWIND_AArch64_MODE_FRAME;
         HasFP = true;
         break;
       }
@@ -405,11 +407,11 @@ public:
         // `.cfi_offset' instructions with the appropriate registers specified.
         unsigned Reg1 = MRI.getLLVMRegNum(Inst.getRegister(), true);
         if (i + 1 == e)
-          return CU::UNWIND_ARM64_MODE_DWARF;
+          return CU::UNWIND_AArch64_MODE_DWARF;
 
         const MCCFIInstruction &Inst2 = Instrs[++i];
         if (Inst2.getOperation() != MCCFIInstruction::OpOffset)
-          return CU::UNWIND_ARM64_MODE_DWARF;
+          return CU::UNWIND_AArch64_MODE_DWARF;
         unsigned Reg2 = MRI.getLLVMRegNum(Inst2.getRegister(), true);
 
         // N.B. The encodings must be in register number order, and the X
@@ -423,21 +425,21 @@ public:
         Reg1 = getXRegFromWReg(Reg1);
         Reg2 = getXRegFromWReg(Reg2);
 
-        if (Reg1 == ARM64::X19 && Reg2 == ARM64::X20 &&
+        if (Reg1 == AArch64::X19 && Reg2 == AArch64::X20 &&
             (CompactUnwindEncoding & 0xF1E) == 0)
-          CompactUnwindEncoding |= CU::UNWIND_ARM64_FRAME_X19_X20_PAIR;
-        else if (Reg1 == ARM64::X21 && Reg2 == ARM64::X22 &&
+          CompactUnwindEncoding |= CU::UNWIND_AArch64_FRAME_X19_X20_PAIR;
+        else if (Reg1 == AArch64::X21 && Reg2 == AArch64::X22 &&
                  (CompactUnwindEncoding & 0xF1C) == 0)
-          CompactUnwindEncoding |= CU::UNWIND_ARM64_FRAME_X21_X22_PAIR;
-        else if (Reg1 == ARM64::X23 && Reg2 == ARM64::X24 &&
+          CompactUnwindEncoding |= CU::UNWIND_AArch64_FRAME_X21_X22_PAIR;
+        else if (Reg1 == AArch64::X23 && Reg2 == AArch64::X24 &&
                  (CompactUnwindEncoding & 0xF18) == 0)
-          CompactUnwindEncoding |= CU::UNWIND_ARM64_FRAME_X23_X24_PAIR;
-        else if (Reg1 == ARM64::X25 && Reg2 == ARM64::X26 &&
+          CompactUnwindEncoding |= CU::UNWIND_AArch64_FRAME_X23_X24_PAIR;
+        else if (Reg1 == AArch64::X25 && Reg2 == AArch64::X26 &&
                  (CompactUnwindEncoding & 0xF10) == 0)
-          CompactUnwindEncoding |= CU::UNWIND_ARM64_FRAME_X25_X26_PAIR;
-        else if (Reg1 == ARM64::X27 && Reg2 == ARM64::X28 &&
+          CompactUnwindEncoding |= CU::UNWIND_AArch64_FRAME_X25_X26_PAIR;
+        else if (Reg1 == AArch64::X27 && Reg2 == AArch64::X28 &&
                  (CompactUnwindEncoding & 0xF00) == 0)
-          CompactUnwindEncoding |= CU::UNWIND_ARM64_FRAME_X27_X28_PAIR;
+          CompactUnwindEncoding |= CU::UNWIND_AArch64_FRAME_X27_X28_PAIR;
         else {
           Reg1 = getDRegFromBReg(Reg1);
           Reg2 = getDRegFromBReg(Reg2);
@@ -446,20 +448,20 @@ public:
           // D10/D11 pair = 0x00000200,
           // D12/D13 pair = 0x00000400,
           // D14/D15 pair = 0x00000800
-          if (Reg1 == ARM64::D8 && Reg2 == ARM64::D9 &&
+          if (Reg1 == AArch64::D8 && Reg2 == AArch64::D9 &&
               (CompactUnwindEncoding & 0xE00) == 0)
-            CompactUnwindEncoding |= CU::UNWIND_ARM64_FRAME_D8_D9_PAIR;
-          else if (Reg1 == ARM64::D10 && Reg2 == ARM64::D11 &&
+            CompactUnwindEncoding |= CU::UNWIND_AArch64_FRAME_D8_D9_PAIR;
+          else if (Reg1 == AArch64::D10 && Reg2 == AArch64::D11 &&
                    (CompactUnwindEncoding & 0xC00) == 0)
-            CompactUnwindEncoding |= CU::UNWIND_ARM64_FRAME_D10_D11_PAIR;
-          else if (Reg1 == ARM64::D12 && Reg2 == ARM64::D13 &&
+            CompactUnwindEncoding |= CU::UNWIND_AArch64_FRAME_D10_D11_PAIR;
+          else if (Reg1 == AArch64::D12 && Reg2 == AArch64::D13 &&
                    (CompactUnwindEncoding & 0x800) == 0)
-            CompactUnwindEncoding |= CU::UNWIND_ARM64_FRAME_D12_D13_PAIR;
-          else if (Reg1 == ARM64::D14 && Reg2 == ARM64::D15)
-            CompactUnwindEncoding |= CU::UNWIND_ARM64_FRAME_D14_D15_PAIR;
+            CompactUnwindEncoding |= CU::UNWIND_AArch64_FRAME_D12_D13_PAIR;
+          else if (Reg1 == AArch64::D14 && Reg2 == AArch64::D15)
+            CompactUnwindEncoding |= CU::UNWIND_AArch64_FRAME_D14_D15_PAIR;
           else
             // A pair was pushed which we cannot handle.
-            return CU::UNWIND_ARM64_MODE_DWARF;
+            return CU::UNWIND_AArch64_MODE_DWARF;
         }
 
         break;
@@ -471,9 +473,9 @@ public:
       // With compact unwind info we can only represent stack adjustments of up
       // to 65520 bytes.
       if (StackSize > 65520)
-        return CU::UNWIND_ARM64_MODE_DWARF;
+        return CU::UNWIND_AArch64_MODE_DWARF;
 
-      CompactUnwindEncoding |= CU::UNWIND_ARM64_MODE_FRAMELESS;
+      CompactUnwindEncoding |= CU::UNWIND_AArch64_MODE_FRAMELESS;
       CompactUnwindEncoding |= encodeStackAdjustment(StackSize);
     }
 
@@ -485,16 +487,16 @@ public:
 
 namespace {
 
-class ELFARM64AsmBackend : public ARM64AsmBackend {
+class ELFAArch64AsmBackend : public AArch64AsmBackend {
 public:
   uint8_t OSABI;
   bool IsLittleEndian;
 
-  ELFARM64AsmBackend(const Target &T, uint8_t OSABI, bool IsLittleEndian)
-    : ARM64AsmBackend(T), OSABI(OSABI), IsLittleEndian(IsLittleEndian) {}
+  ELFAArch64AsmBackend(const Target &T, uint8_t OSABI, bool IsLittleEndian)
+    : AArch64AsmBackend(T), OSABI(OSABI), IsLittleEndian(IsLittleEndian) {}
 
   MCObjectWriter *createObjectWriter(raw_ostream &OS) const override {
-    return createARM64ELFObjectWriter(OS, OSABI, IsLittleEndian);
+    return createAArch64ELFObjectWriter(OS, OSABI, IsLittleEndian);
   }
 
   void processFixupValue(const MCAssembler &Asm, const MCAsmLayout &Layout,
@@ -506,12 +508,10 @@ public:
                   uint64_t Value, bool IsPCRel) const override;
 };
 
-void ELFARM64AsmBackend::processFixupValue(const MCAssembler &Asm,
-                                           const MCAsmLayout &Layout,
-                                           const MCFixup &Fixup,
-                                           const MCFragment *DF,
-                                           const MCValue &Target,
-                                           uint64_t &Value, bool &IsResolved) {
+void ELFAArch64AsmBackend::processFixupValue(
+    const MCAssembler &Asm, const MCAsmLayout &Layout, const MCFixup &Fixup,
+    const MCFragment *DF, const MCValue &Target, uint64_t &Value,
+    bool &IsResolved) {
   // The ADRP instruction adds some multiple of 0x1000 to the current PC &
   // ~0xfff. This means that the required offset to reach a symbol can vary by
   // up to one step depending on where the ADRP is in memory. For example:
@@ -524,13 +524,13 @@ void ELFARM64AsmBackend::processFixupValue(const MCAssembler &Asm,
   // same page as the ADRP and the instruction should encode 0x0. Assuming the
   // section isn't 0x1000-aligned, we therefore need to delegate this decision
   // to the linker -- a relocation!
-  if ((uint32_t)Fixup.getKind() == ARM64::fixup_arm64_pcrel_adrp_imm21)
+  if ((uint32_t)Fixup.getKind() == AArch64::fixup_aarch64_pcrel_adrp_imm21)
     IsResolved = false;
 }
 
-void ELFARM64AsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
-                                    unsigned DataSize, uint64_t Value,
-                                    bool IsPCRel) const {
+void ELFAArch64AsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
+                                      unsigned DataSize, uint64_t Value,
+                                      bool IsPCRel) const {
   // store fixups in .eh_frame section in big endian order
   if (!IsLittleEndian && Fixup.getKind() == FK_Data_4) {
     const MCSection *Sec = Fixup.getValue()->FindAssociatedSection();
@@ -538,27 +538,29 @@ void ELFARM64AsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
     if (SecELF->getSectionName() == ".eh_frame")
       Value = ByteSwap_32(unsigned(Value));
   }
-  ARM64AsmBackend::applyFixup (Fixup, Data, DataSize, Value, IsPCRel);
+  AArch64AsmBackend::applyFixup (Fixup, Data, DataSize, Value, IsPCRel);
 }
 }
 
-MCAsmBackend *llvm::createARM64leAsmBackend(const Target &T,
+MCAsmBackend *llvm::createAArch64leAsmBackend(const Target &T,
                                             const MCRegisterInfo &MRI,
                                             StringRef TT, StringRef CPU) {
   Triple TheTriple(TT);
 
   if (TheTriple.isOSDarwin())
-    return new DarwinARM64AsmBackend(T, MRI);
+    return new DarwinAArch64AsmBackend(T, MRI);
 
   assert(TheTriple.isOSBinFormatELF() && "Expect either MachO or ELF target");
-  return new ELFARM64AsmBackend(T, TheTriple.getOS(), /*IsLittleEndian=*/true);
+  return new ELFAArch64AsmBackend(T, TheTriple.getOS(), /*IsLittleEndian=*/true);
 }
 
-MCAsmBackend *llvm::createARM64beAsmBackend(const Target &T,
+MCAsmBackend *llvm::createAArch64beAsmBackend(const Target &T,
                                             const MCRegisterInfo &MRI,
                                             StringRef TT, StringRef CPU) {
   Triple TheTriple(TT);
 
-  assert(TheTriple.isOSBinFormatELF() && "Big endian is only supported for ELF targets!");
-  return new ELFARM64AsmBackend(T, TheTriple.getOS(), /*IsLittleEndian=*/false);
+  assert(TheTriple.isOSBinFormatELF() &&
+         "Big endian is only supported for ELF targets!");
+  return new ELFAArch64AsmBackend(T, TheTriple.getOS(),
+                                  /*IsLittleEndian=*/false);
 }
diff --git a/lib/Target/ARM64/MCTargetDesc/ARM64ELFObjectWriter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp
similarity index 56%
rename from lib/Target/ARM64/MCTargetDesc/ARM64ELFObjectWriter.cpp
rename to lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp
index 0990a701bc8..e05191eaf3e 100644
--- a/lib/Target/ARM64/MCTargetDesc/ARM64ELFObjectWriter.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp
@@ -1,4 +1,4 @@
-//===-- ARM64ELFObjectWriter.cpp - ARM64 ELF Writer -----------------------===//
+//===-- AArch64ELFObjectWriter.cpp - AArch64 ELF Writer -------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -12,9 +12,9 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "MCTargetDesc/ARM64FixupKinds.h"
-#include "MCTargetDesc/ARM64MCExpr.h"
-#include "MCTargetDesc/ARM64MCTargetDesc.h"
+#include "MCTargetDesc/AArch64FixupKinds.h"
+#include "MCTargetDesc/AArch64MCExpr.h"
+#include "MCTargetDesc/AArch64MCTargetDesc.h"
 #include "llvm/MC/MCELFObjectWriter.h"
 #include "llvm/MC/MCValue.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -22,11 +22,11 @@
 using namespace llvm;
 
 namespace {
-class ARM64ELFObjectWriter : public MCELFObjectTargetWriter {
+class AArch64ELFObjectWriter : public MCELFObjectTargetWriter {
 public:
-  ARM64ELFObjectWriter(uint8_t OSABI, bool IsLittleEndian);
+  AArch64ELFObjectWriter(uint8_t OSABI, bool IsLittleEndian);
 
-  virtual ~ARM64ELFObjectWriter();
+  virtual ~AArch64ELFObjectWriter();
 
 protected:
   unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup,
@@ -36,19 +36,20 @@ private:
 };
 }
 
-ARM64ELFObjectWriter::ARM64ELFObjectWriter(uint8_t OSABI, bool IsLittleEndian)
+AArch64ELFObjectWriter::AArch64ELFObjectWriter(uint8_t OSABI,
+                                               bool IsLittleEndian)
     : MCELFObjectTargetWriter(/*Is64Bit*/ true, OSABI, ELF::EM_AARCH64,
                               /*HasRelocationAddend*/ true) {}
 
-ARM64ELFObjectWriter::~ARM64ELFObjectWriter() {}
+AArch64ELFObjectWriter::~AArch64ELFObjectWriter() {}
 
-unsigned ARM64ELFObjectWriter::GetRelocType(const MCValue &Target,
+unsigned AArch64ELFObjectWriter::GetRelocType(const MCValue &Target,
                                             const MCFixup &Fixup,
                                             bool IsPCRel) const {
-  ARM64MCExpr::VariantKind RefKind =
-      static_cast<ARM64MCExpr::VariantKind>(Target.getRefKind());
-  ARM64MCExpr::VariantKind SymLoc = ARM64MCExpr::getSymbolLoc(RefKind);
-  bool IsNC = ARM64MCExpr::isNotChecked(RefKind);
+  AArch64MCExpr::VariantKind RefKind =
+      static_cast<AArch64MCExpr::VariantKind>(Target.getRefKind());
+  AArch64MCExpr::VariantKind SymLoc = AArch64MCExpr::getSymbolLoc(RefKind);
+  bool IsNC = AArch64MCExpr::isNotChecked(RefKind);
 
   assert((!Target.getSymA() ||
           Target.getSymA()->getKind() == MCSymbolRefExpr::VK_None) &&
@@ -66,30 +67,30 @@ unsigned ARM64ELFObjectWriter::GetRelocType(const MCValue &Target,
       return ELF::R_AARCH64_PREL32;
     case FK_Data_8:
       return ELF::R_AARCH64_PREL64;
-    case ARM64::fixup_arm64_pcrel_adr_imm21:
-      assert(SymLoc == ARM64MCExpr::VK_NONE && "unexpected ADR relocation");
+    case AArch64::fixup_aarch64_pcrel_adr_imm21:
+      assert(SymLoc == AArch64MCExpr::VK_NONE && "unexpected ADR relocation");
       return ELF::R_AARCH64_ADR_PREL_LO21;
-    case ARM64::fixup_arm64_pcrel_adrp_imm21:
-      if (SymLoc == ARM64MCExpr::VK_ABS && !IsNC)
+    case AArch64::fixup_aarch64_pcrel_adrp_imm21:
+      if (SymLoc == AArch64MCExpr::VK_ABS && !IsNC)
         return ELF::R_AARCH64_ADR_PREL_PG_HI21;
-      if (SymLoc == ARM64MCExpr::VK_GOT && !IsNC)
+      if (SymLoc == AArch64MCExpr::VK_GOT && !IsNC)
         return ELF::R_AARCH64_ADR_GOT_PAGE;
-      if (SymLoc == ARM64MCExpr::VK_GOTTPREL && !IsNC)
+      if (SymLoc == AArch64MCExpr::VK_GOTTPREL && !IsNC)
         return ELF::R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21;
-      if (SymLoc == ARM64MCExpr::VK_TLSDESC && !IsNC)
+      if (SymLoc == AArch64MCExpr::VK_TLSDESC && !IsNC)
         return ELF::R_AARCH64_TLSDESC_ADR_PAGE;
       llvm_unreachable("invalid symbol kind for ADRP relocation");
-    case ARM64::fixup_arm64_pcrel_branch26:
+    case AArch64::fixup_aarch64_pcrel_branch26:
       return ELF::R_AARCH64_JUMP26;
-    case ARM64::fixup_arm64_pcrel_call26:
+    case AArch64::fixup_aarch64_pcrel_call26:
       return ELF::R_AARCH64_CALL26;
-    case ARM64::fixup_arm64_ldr_pcrel_imm19:
-      if (SymLoc == ARM64MCExpr::VK_GOTTPREL)
+    case AArch64::fixup_aarch64_ldr_pcrel_imm19:
+      if (SymLoc == AArch64MCExpr::VK_GOTTPREL)
         return ELF::R_AARCH64_TLSIE_LD_GOTTPREL_PREL19;
       return ELF::R_AARCH64_LD_PREL_LO19;
-    case ARM64::fixup_arm64_pcrel_branch14:
+    case AArch64::fixup_aarch64_pcrel_branch14:
       return ELF::R_AARCH64_TSTBR14;
-    case ARM64::fixup_arm64_pcrel_branch19:
+    case AArch64::fixup_aarch64_pcrel_branch19:
       return ELF::R_AARCH64_CONDBR19;
     default:
       llvm_unreachable("Unsupported pc-relative fixup kind");
@@ -102,142 +103,142 @@ unsigned ARM64ELFObjectWriter::GetRelocType(const MCValue &Target,
       return ELF::R_AARCH64_ABS32;
     case FK_Data_8:
       return ELF::R_AARCH64_ABS64;
-    case ARM64::fixup_arm64_add_imm12:
-      if (RefKind == ARM64MCExpr::VK_DTPREL_HI12)
+    case AArch64::fixup_aarch64_add_imm12:
+      if (RefKind == AArch64MCExpr::VK_DTPREL_HI12)
         return ELF::R_AARCH64_TLSLD_ADD_DTPREL_HI12;
-      if (RefKind == ARM64MCExpr::VK_TPREL_HI12)
+      if (RefKind == AArch64MCExpr::VK_TPREL_HI12)
         return ELF::R_AARCH64_TLSLE_ADD_TPREL_HI12;
-      if (RefKind == ARM64MCExpr::VK_DTPREL_LO12_NC)
+      if (RefKind == AArch64MCExpr::VK_DTPREL_LO12_NC)
         return ELF::R_AARCH64_TLSLD_ADD_DTPREL_LO12_NC;
-      if (RefKind == ARM64MCExpr::VK_DTPREL_LO12)
+      if (RefKind == AArch64MCExpr::VK_DTPREL_LO12)
         return ELF::R_AARCH64_TLSLD_ADD_DTPREL_LO12;
-      if (RefKind == ARM64MCExpr::VK_TPREL_LO12_NC)
+      if (RefKind == AArch64MCExpr::VK_TPREL_LO12_NC)
         return ELF::R_AARCH64_TLSLE_ADD_TPREL_LO12_NC;
-      if (RefKind == ARM64MCExpr::VK_TPREL_LO12)
+      if (RefKind == AArch64MCExpr::VK_TPREL_LO12)
         return ELF::R_AARCH64_TLSLE_ADD_TPREL_LO12;
-      if (RefKind == ARM64MCExpr::VK_TLSDESC_LO12)
+      if (RefKind == AArch64MCExpr::VK_TLSDESC_LO12)
         return ELF::R_AARCH64_TLSDESC_ADD_LO12_NC;
-      if (SymLoc == ARM64MCExpr::VK_ABS && IsNC)
+      if (SymLoc == AArch64MCExpr::VK_ABS && IsNC)
         return ELF::R_AARCH64_ADD_ABS_LO12_NC;
 
       report_fatal_error("invalid fixup for add (uimm12) instruction");
       return 0;
-    case ARM64::fixup_arm64_ldst_imm12_scale1:
-      if (SymLoc == ARM64MCExpr::VK_ABS && IsNC)
+    case AArch64::fixup_aarch64_ldst_imm12_scale1:
+      if (SymLoc == AArch64MCExpr::VK_ABS && IsNC)
         return ELF::R_AARCH64_LDST8_ABS_LO12_NC;
-      if (SymLoc == ARM64MCExpr::VK_DTPREL && !IsNC)
+      if (SymLoc == AArch64MCExpr::VK_DTPREL && !IsNC)
         return ELF::R_AARCH64_TLSLD_LDST8_DTPREL_LO12;
-      if (SymLoc == ARM64MCExpr::VK_DTPREL && IsNC)
+      if (SymLoc == AArch64MCExpr::VK_DTPREL && IsNC)
         return ELF::R_AARCH64_TLSLD_LDST8_DTPREL_LO12_NC;
-      if (SymLoc == ARM64MCExpr::VK_TPREL && !IsNC)
+      if (SymLoc == AArch64MCExpr::VK_TPREL && !IsNC)
         return ELF::R_AARCH64_TLSLE_LDST8_TPREL_LO12;
-      if (SymLoc == ARM64MCExpr::VK_TPREL && IsNC)
+      if (SymLoc == AArch64MCExpr::VK_TPREL && IsNC)
         return ELF::R_AARCH64_TLSLE_LDST8_TPREL_LO12_NC;
 
       report_fatal_error("invalid fixup for 8-bit load/store instruction");
       return 0;
-    case ARM64::fixup_arm64_ldst_imm12_scale2:
-      if (SymLoc == ARM64MCExpr::VK_ABS && IsNC)
+    case AArch64::fixup_aarch64_ldst_imm12_scale2:
+      if (SymLoc == AArch64MCExpr::VK_ABS && IsNC)
         return ELF::R_AARCH64_LDST16_ABS_LO12_NC;
-      if (SymLoc == ARM64MCExpr::VK_DTPREL && !IsNC)
+      if (SymLoc == AArch64MCExpr::VK_DTPREL && !IsNC)
         return ELF::R_AARCH64_TLSLD_LDST16_DTPREL_LO12;
-      if (SymLoc == ARM64MCExpr::VK_DTPREL && IsNC)
+      if (SymLoc == AArch64MCExpr::VK_DTPREL && IsNC)
         return ELF::R_AARCH64_TLSLD_LDST16_DTPREL_LO12_NC;
-      if (SymLoc == ARM64MCExpr::VK_TPREL && !IsNC)
+      if (SymLoc == AArch64MCExpr::VK_TPREL && !IsNC)
         return ELF::R_AARCH64_TLSLE_LDST16_TPREL_LO12;
-      if (SymLoc == ARM64MCExpr::VK_TPREL && IsNC)
+      if (SymLoc == AArch64MCExpr::VK_TPREL && IsNC)
         return ELF::R_AARCH64_TLSLE_LDST16_TPREL_LO12_NC;
 
       report_fatal_error("invalid fixup for 16-bit load/store instruction");
       return 0;
-    case ARM64::fixup_arm64_ldst_imm12_scale4:
-      if (SymLoc == ARM64MCExpr::VK_ABS && IsNC)
+    case AArch64::fixup_aarch64_ldst_imm12_scale4:
+      if (SymLoc == AArch64MCExpr::VK_ABS && IsNC)
         return ELF::R_AARCH64_LDST32_ABS_LO12_NC;
-      if (SymLoc == ARM64MCExpr::VK_DTPREL && !IsNC)
+      if (SymLoc == AArch64MCExpr::VK_DTPREL && !IsNC)
         return ELF::R_AARCH64_TLSLD_LDST32_DTPREL_LO12;
-      if (SymLoc == ARM64MCExpr::VK_DTPREL && IsNC)
+      if (SymLoc == AArch64MCExpr::VK_DTPREL && IsNC)
         return ELF::R_AARCH64_TLSLD_LDST32_DTPREL_LO12_NC;
-      if (SymLoc == ARM64MCExpr::VK_TPREL && !IsNC)
+      if (SymLoc == AArch64MCExpr::VK_TPREL && !IsNC)
         return ELF::R_AARCH64_TLSLE_LDST32_TPREL_LO12;
-      if (SymLoc == ARM64MCExpr::VK_TPREL && IsNC)
+      if (SymLoc == AArch64MCExpr::VK_TPREL && IsNC)
         return ELF::R_AARCH64_TLSLE_LDST32_TPREL_LO12_NC;
 
       report_fatal_error("invalid fixup for 32-bit load/store instruction");
       return 0;
-    case ARM64::fixup_arm64_ldst_imm12_scale8:
-      if (SymLoc == ARM64MCExpr::VK_ABS && IsNC)
+    case AArch64::fixup_aarch64_ldst_imm12_scale8:
+      if (SymLoc == AArch64MCExpr::VK_ABS && IsNC)
         return ELF::R_AARCH64_LDST64_ABS_LO12_NC;
-      if (SymLoc == ARM64MCExpr::VK_GOT && IsNC)
+      if (SymLoc == AArch64MCExpr::VK_GOT && IsNC)
         return ELF::R_AARCH64_LD64_GOT_LO12_NC;
-      if (SymLoc == ARM64MCExpr::VK_DTPREL && !IsNC)
+      if (SymLoc == AArch64MCExpr::VK_DTPREL && !IsNC)
         return ELF::R_AARCH64_TLSLD_LDST64_DTPREL_LO12;
-      if (SymLoc == ARM64MCExpr::VK_DTPREL && IsNC)
+      if (SymLoc == AArch64MCExpr::VK_DTPREL && IsNC)
         return ELF::R_AARCH64_TLSLD_LDST64_DTPREL_LO12_NC;
-      if (SymLoc == ARM64MCExpr::VK_TPREL && !IsNC)
+      if (SymLoc == AArch64MCExpr::VK_TPREL && !IsNC)
         return ELF::R_AARCH64_TLSLE_LDST64_TPREL_LO12;
-      if (SymLoc == ARM64MCExpr::VK_TPREL && IsNC)
+      if (SymLoc == AArch64MCExpr::VK_TPREL && IsNC)
         return ELF::R_AARCH64_TLSLE_LDST64_TPREL_LO12_NC;
-      if (SymLoc == ARM64MCExpr::VK_GOTTPREL && IsNC)
+      if (SymLoc == AArch64MCExpr::VK_GOTTPREL && IsNC)
         return ELF::R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC;
-      if (SymLoc == ARM64MCExpr::VK_TLSDESC && IsNC)
+      if (SymLoc == AArch64MCExpr::VK_TLSDESC && IsNC)
         return ELF::R_AARCH64_TLSDESC_LD64_LO12_NC;
 
       report_fatal_error("invalid fixup for 64-bit load/store instruction");
       return 0;
-    case ARM64::fixup_arm64_ldst_imm12_scale16:
-      if (SymLoc == ARM64MCExpr::VK_ABS && IsNC)
+    case AArch64::fixup_aarch64_ldst_imm12_scale16:
+      if (SymLoc == AArch64MCExpr::VK_ABS && IsNC)
         return ELF::R_AARCH64_LDST128_ABS_LO12_NC;
 
       report_fatal_error("invalid fixup for 128-bit load/store instruction");
       return 0;
-    case ARM64::fixup_arm64_movw:
-      if (RefKind == ARM64MCExpr::VK_ABS_G3)
+    case AArch64::fixup_aarch64_movw:
+      if (RefKind == AArch64MCExpr::VK_ABS_G3)
         return ELF::R_AARCH64_MOVW_UABS_G3;
-      if (RefKind == ARM64MCExpr::VK_ABS_G2)
+      if (RefKind == AArch64MCExpr::VK_ABS_G2)
         return ELF::R_AARCH64_MOVW_UABS_G2;
-      if (RefKind == ARM64MCExpr::VK_ABS_G2_S)
+      if (RefKind == AArch64MCExpr::VK_ABS_G2_S)
         return ELF::R_AARCH64_MOVW_SABS_G2;
-      if (RefKind == ARM64MCExpr::VK_ABS_G2_NC)
+      if (RefKind == AArch64MCExpr::VK_ABS_G2_NC)
         return ELF::R_AARCH64_MOVW_UABS_G2_NC;
-      if (RefKind == ARM64MCExpr::VK_ABS_G1)
+      if (RefKind == AArch64MCExpr::VK_ABS_G1)
         return ELF::R_AARCH64_MOVW_UABS_G1;
-      if (RefKind == ARM64MCExpr::VK_ABS_G1_S)
+      if (RefKind == AArch64MCExpr::VK_ABS_G1_S)
         return ELF::R_AARCH64_MOVW_SABS_G1;
-      if (RefKind == ARM64MCExpr::VK_ABS_G1_NC)
+      if (RefKind == AArch64MCExpr::VK_ABS_G1_NC)
         return ELF::R_AARCH64_MOVW_UABS_G1_NC;
-      if (RefKind == ARM64MCExpr::VK_ABS_G0)
+      if (RefKind == AArch64MCExpr::VK_ABS_G0)
         return ELF::R_AARCH64_MOVW_UABS_G0;
-      if (RefKind == ARM64MCExpr::VK_ABS_G0_S)
+      if (RefKind == AArch64MCExpr::VK_ABS_G0_S)
         return ELF::R_AARCH64_MOVW_SABS_G0;
-      if (RefKind == ARM64MCExpr::VK_ABS_G0_NC)
+      if (RefKind == AArch64MCExpr::VK_ABS_G0_NC)
         return ELF::R_AARCH64_MOVW_UABS_G0_NC;
-      if (RefKind == ARM64MCExpr::VK_DTPREL_G2)
+      if (RefKind == AArch64MCExpr::VK_DTPREL_G2)
         return ELF::R_AARCH64_TLSLD_MOVW_DTPREL_G2;
-      if (RefKind == ARM64MCExpr::VK_DTPREL_G1)
+      if (RefKind == AArch64MCExpr::VK_DTPREL_G1)
         return ELF::R_AARCH64_TLSLD_MOVW_DTPREL_G1;
-      if (RefKind == ARM64MCExpr::VK_DTPREL_G1_NC)
+      if (RefKind == AArch64MCExpr::VK_DTPREL_G1_NC)
         return ELF::R_AARCH64_TLSLD_MOVW_DTPREL_G1_NC;
-      if (RefKind == ARM64MCExpr::VK_DTPREL_G0)
+      if (RefKind == AArch64MCExpr::VK_DTPREL_G0)
         return ELF::R_AARCH64_TLSLD_MOVW_DTPREL_G0;
-      if (RefKind == ARM64MCExpr::VK_DTPREL_G0_NC)
+      if (RefKind == AArch64MCExpr::VK_DTPREL_G0_NC)
         return ELF::R_AARCH64_TLSLD_MOVW_DTPREL_G0_NC;
-      if (RefKind == ARM64MCExpr::VK_TPREL_G2)
+      if (RefKind == AArch64MCExpr::VK_TPREL_G2)
         return ELF::R_AARCH64_TLSLE_MOVW_TPREL_G2;
-      if (RefKind == ARM64MCExpr::VK_TPREL_G1)
+      if (RefKind == AArch64MCExpr::VK_TPREL_G1)
         return ELF::R_AARCH64_TLSLE_MOVW_TPREL_G1;
-      if (RefKind == ARM64MCExpr::VK_TPREL_G1_NC)
+      if (RefKind == AArch64MCExpr::VK_TPREL_G1_NC)
         return ELF::R_AARCH64_TLSLE_MOVW_TPREL_G1_NC;
-      if (RefKind == ARM64MCExpr::VK_TPREL_G0)
+      if (RefKind == AArch64MCExpr::VK_TPREL_G0)
         return ELF::R_AARCH64_TLSLE_MOVW_TPREL_G0;
-      if (RefKind == ARM64MCExpr::VK_TPREL_G0_NC)
+      if (RefKind == AArch64MCExpr::VK_TPREL_G0_NC)
         return ELF::R_AARCH64_TLSLE_MOVW_TPREL_G0_NC;
-      if (RefKind == ARM64MCExpr::VK_GOTTPREL_G1)
+      if (RefKind == AArch64MCExpr::VK_GOTTPREL_G1)
         return ELF::R_AARCH64_TLSIE_MOVW_GOTTPREL_G1;
-      if (RefKind == ARM64MCExpr::VK_GOTTPREL_G0_NC)
+      if (RefKind == AArch64MCExpr::VK_GOTTPREL_G0_NC)
         return ELF::R_AARCH64_TLSIE_MOVW_GOTTPREL_G0_NC;
       report_fatal_error("invalid fixup for movz/movk instruction");
       return 0;
-    case ARM64::fixup_arm64_tlsdesc_call:
+    case AArch64::fixup_aarch64_tlsdesc_call:
       return ELF::R_AARCH64_TLSDESC_CALL;
     default:
       llvm_unreachable("Unknown ELF relocation type");
@@ -247,9 +248,10 @@ unsigned ARM64ELFObjectWriter::GetRelocType(const MCValue &Target,
   llvm_unreachable("Unimplemented fixup -> relocation");
 }
 
-MCObjectWriter *llvm::createARM64ELFObjectWriter(raw_ostream &OS,
+MCObjectWriter *llvm::createAArch64ELFObjectWriter(raw_ostream &OS,
                                                  uint8_t OSABI,
                                                  bool IsLittleEndian) {
-  MCELFObjectTargetWriter *MOTW = new ARM64ELFObjectWriter(OSABI, IsLittleEndian);
+  MCELFObjectTargetWriter *MOTW =
+      new AArch64ELFObjectWriter(OSABI, IsLittleEndian);
   return createELFObjectWriter(MOTW, OS, IsLittleEndian);
 }
diff --git a/lib/Target/ARM64/MCTargetDesc/ARM64ELFStreamer.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
similarity index 86%
rename from lib/Target/ARM64/MCTargetDesc/ARM64ELFStreamer.cpp
rename to lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
index adbf8307972..a79406d9d1f 100644
--- a/lib/Target/ARM64/MCTargetDesc/ARM64ELFStreamer.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
@@ -1,4 +1,4 @@
-//===- lib/MC/ARM64ELFStreamer.cpp - ELF Object Output for ARM64 ----------===//
+//===- lib/MC/AArch64ELFStreamer.cpp - ELF Object Output for AArch64 ------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -53,14 +53,14 @@ namespace {
 ///
 /// As a result this system is orthogonal to the DataRegion infrastructure used
 /// by MachO. Beware!
-class ARM64ELFStreamer : public MCELFStreamer {
+class AArch64ELFStreamer : public MCELFStreamer {
 public:
-  ARM64ELFStreamer(MCContext &Context, MCAsmBackend &TAB, raw_ostream &OS,
+  AArch64ELFStreamer(MCContext &Context, MCAsmBackend &TAB, raw_ostream &OS,
                    MCCodeEmitter *Emitter)
       : MCELFStreamer(Context, TAB, OS, Emitter), MappingSymbolCounter(0),
         LastEMS(EMS_None) {}
 
-  ~ARM64ELFStreamer() {}
+  ~AArch64ELFStreamer() {}
 
   void ChangeSection(const MCSection *Section,
                      const MCExpr *Subsection) override {
@@ -83,7 +83,7 @@ public:
   }
 
   /// This is one of the functions used to emit data into an ELF section, so the
-  /// ARM64 streamer overrides it to add the appropriate mapping symbol ($d)
+  /// AArch64 streamer overrides it to add the appropriate mapping symbol ($d)
   /// if necessary.
   void EmitBytes(StringRef Data) override {
     EmitDataMappingSymbol();
@@ -91,7 +91,7 @@ public:
   }
 
   /// This is one of the functions used to emit data into an ELF section, so the
-  /// ARM64 streamer overrides it to add the appropriate mapping symbol ($d)
+  /// AArch64 streamer overrides it to add the appropriate mapping symbol ($d)
   /// if necessary.
   void EmitValueImpl(const MCExpr *Value, unsigned Size,
                      const SMLoc &Loc) override {
@@ -147,10 +147,10 @@ private:
 }
 
 namespace llvm {
-MCELFStreamer *createARM64ELFStreamer(MCContext &Context, MCAsmBackend &TAB,
-                                      raw_ostream &OS, MCCodeEmitter *Emitter,
-                                      bool RelaxAll, bool NoExecStack) {
-  ARM64ELFStreamer *S = new ARM64ELFStreamer(Context, TAB, OS, Emitter);
+MCELFStreamer *createAArch64ELFStreamer(MCContext &Context, MCAsmBackend &TAB,
+                                        raw_ostream &OS, MCCodeEmitter *Emitter,
+                                        bool RelaxAll, bool NoExecStack) {
+  AArch64ELFStreamer *S = new AArch64ELFStreamer(Context, TAB, OS, Emitter);
   if (RelaxAll)
     S->getAssembler().setRelaxAll(true);
   if (NoExecStack)
diff --git a/lib/Target/ARM64/MCTargetDesc/ARM64ELFStreamer.h b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.h
similarity index 55%
rename from lib/Target/ARM64/MCTargetDesc/ARM64ELFStreamer.h
rename to lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.h
index 72dadbc50aa..bc6973bd5f8 100644
--- a/lib/Target/ARM64/MCTargetDesc/ARM64ELFStreamer.h
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.h
@@ -1,4 +1,4 @@
-//===-- ARM64ELFStreamer.h - ELF Streamer for ARM64 -------------*- C++ -*-===//
+//===-- AArch64ELFStreamer.h - ELF Streamer for AArch64 ---------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -7,7 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file implements ELF streamer information for the ARM64 backend.
+// This file implements ELF streamer information for the AArch64 backend.
 //
 //===----------------------------------------------------------------------===//
 
@@ -18,9 +18,9 @@
 
 namespace llvm {
 
-MCELFStreamer *createARM64ELFStreamer(MCContext &Context, MCAsmBackend &TAB,
-                                      raw_ostream &OS, MCCodeEmitter *Emitter,
-                                      bool RelaxAll, bool NoExecStack);
+MCELFStreamer *createAArch64ELFStreamer(MCContext &Context, MCAsmBackend &TAB,
+                                        raw_ostream &OS, MCCodeEmitter *Emitter,
+                                        bool RelaxAll, bool NoExecStack);
 }
 
-#endif // ARM64_ELF_STREAMER_H
+#endif // AArch64_ELF_STREAMER_H
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64FixupKinds.h b/lib/Target/AArch64/MCTargetDesc/AArch64FixupKinds.h
new file mode 100644
index 00000000000..bf405fbac77
--- /dev/null
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64FixupKinds.h
@@ -0,0 +1,76 @@
+//===-- AArch64FixupKinds.h - AArch64 Specific Fixup Entries ----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_AArch64FIXUPKINDS_H
+#define LLVM_AArch64FIXUPKINDS_H
+
+#include "llvm/MC/MCFixup.h"
+
+namespace llvm {
+namespace AArch64 {
+
+enum Fixups {
+  // fixup_aarch64_pcrel_adr_imm21 - A 21-bit pc-relative immediate inserted into
+  // an ADR instruction.
+  fixup_aarch64_pcrel_adr_imm21 = FirstTargetFixupKind,
+
+  // fixup_aarch64_pcrel_adrp_imm21 - A 21-bit pc-relative immediate inserted into
+  // an ADRP instruction.
+  fixup_aarch64_pcrel_adrp_imm21,
+
+  // fixup_aarch64_imm12 - 12-bit fixup for add/sub instructions.
+  //     No alignment adjustment. All value bits are encoded.
+  fixup_aarch64_add_imm12,
+
+  // fixup_aarch64_ldst_imm12_* - unsigned 12-bit fixups for load and
+  // store instructions.
+  fixup_aarch64_ldst_imm12_scale1,
+  fixup_aarch64_ldst_imm12_scale2,
+  fixup_aarch64_ldst_imm12_scale4,
+  fixup_aarch64_ldst_imm12_scale8,
+  fixup_aarch64_ldst_imm12_scale16,
+
+  // fixup_aarch64_ldr_pcrel_imm19 - The high 19 bits of a 21-bit pc-relative
+  // immediate. Same encoding as fixup_aarch64_pcrel_adrhi, except this is used by
+  // pc-relative loads and generates relocations directly when necessary.
+  fixup_aarch64_ldr_pcrel_imm19,
+
+  // FIXME: comment
+  fixup_aarch64_movw,
+
+  // fixup_aarch64_pcrel_imm14 - The high 14 bits of a 21-bit pc-relative
+  // immediate.
+  fixup_aarch64_pcrel_branch14,
+
+  // fixup_aarch64_pcrel_branch19 - The high 19 bits of a 21-bit pc-relative
+  // immediate. Same encoding as fixup_aarch64_pcrel_adrhi, except this is use by
+  // b.cc and generates relocations directly when necessary.
+  fixup_aarch64_pcrel_branch19,
+
+  // fixup_aarch64_pcrel_branch26 - The high 26 bits of a 28-bit pc-relative
+  // immediate.
+  fixup_aarch64_pcrel_branch26,
+
+  // fixup_aarch64_pcrel_call26 - The high 26 bits of a 28-bit pc-relative
+  // immediate. Distinguished from branch26 only on ELF.
+  fixup_aarch64_pcrel_call26,
+
+  // fixup_aarch64_tlsdesc_call - zero-space placeholder for the ELF
+  // R_AARCH64_TLSDESC_CALL relocation.
+  fixup_aarch64_tlsdesc_call,
+
+  // Marker
+  LastTargetFixupKind,
+  NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind
+};
+
+} // end namespace AArch64
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/ARM64/MCTargetDesc/ARM64MCAsmInfo.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp
similarity index 82%
rename from lib/Target/ARM64/MCTargetDesc/ARM64MCAsmInfo.cpp
rename to lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp
index e211d3428bf..dc4a8bf6c9a 100644
--- a/lib/Target/ARM64/MCTargetDesc/ARM64MCAsmInfo.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp
@@ -1,4 +1,4 @@
-//===-- ARM64MCAsmInfo.cpp - ARM64 asm properties -----------------------===//
+//===-- AArch64MCAsmInfo.cpp - AArch64 asm properties ---------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -7,11 +7,11 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file contains the declarations of the ARM64MCAsmInfo properties.
+// This file contains the declarations of the AArch64MCAsmInfo properties.
 //
 //===----------------------------------------------------------------------===//
 
-#include "ARM64MCAsmInfo.h"
+#include "AArch64MCAsmInfo.h"
 #include "llvm/ADT/Triple.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCContext.h"
@@ -26,13 +26,13 @@ enum AsmWriterVariantTy {
 };
 
 static cl::opt<AsmWriterVariantTy> AsmWriterVariant(
-    "arm64-neon-syntax", cl::init(Default),
-    cl::desc("Choose style of NEON code to emit from ARM64 backend:"),
+    "aarch64-neon-syntax", cl::init(Default),
+    cl::desc("Choose style of NEON code to emit from AArch64 backend:"),
     cl::values(clEnumValN(Generic, "generic", "Emit generic NEON assembly"),
                clEnumValN(Apple, "apple", "Emit Apple-style NEON assembly"),
                clEnumValEnd));
 
-ARM64MCAsmInfoDarwin::ARM64MCAsmInfoDarwin() {
+AArch64MCAsmInfoDarwin::AArch64MCAsmInfoDarwin() {
   // We prefer NEON instructions to be printed in the short form.
   AssemblerDialect = AsmWriterVariant == Default ? 1 : AsmWriterVariant;
 
@@ -49,7 +49,7 @@ ARM64MCAsmInfoDarwin::ARM64MCAsmInfoDarwin() {
   ExceptionsType = ExceptionHandling::DwarfCFI;
 }
 
-const MCExpr *ARM64MCAsmInfoDarwin::getExprForPersonalitySymbol(
+const MCExpr *AArch64MCAsmInfoDarwin::getExprForPersonalitySymbol(
     const MCSymbol *Sym, unsigned Encoding, MCStreamer &Streamer) const {
   // On Darwin, we can reference dwarf symbols with foo@GOT-., which
   // is an indirect pc-relative reference. The default implementation
@@ -64,9 +64,9 @@ const MCExpr *ARM64MCAsmInfoDarwin::getExprForPersonalitySymbol(
   return MCBinaryExpr::CreateSub(Res, PC, Context);
 }
 
-ARM64MCAsmInfoELF::ARM64MCAsmInfoELF(StringRef TT) {
+AArch64MCAsmInfoELF::AArch64MCAsmInfoELF(StringRef TT) {
   Triple T(TT);
-  if (T.getArch() == Triple::arm64_be)
+  if (T.getArch() == Triple::arm64_be || T.getArch() == Triple::aarch64_be)
     IsLittleEndian = false;
 
   // We prefer NEON instructions to be printed in the short form.
diff --git a/lib/Target/ARM64/MCTargetDesc/ARM64MCAsmInfo.h b/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h
similarity index 61%
rename from lib/Target/ARM64/MCTargetDesc/ARM64MCAsmInfo.h
rename to lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h
index 324bc39560f..42a031d7c2c 100644
--- a/lib/Target/ARM64/MCTargetDesc/ARM64MCAsmInfo.h
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h
@@ -1,4 +1,4 @@
-//=====-- ARM64MCAsmInfo.h - ARM64 asm properties -----------*- C++ -*--====//
+//=====-- AArch64MCAsmInfo.h - AArch64 asm properties ---------*- C++ -*--====//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -7,12 +7,12 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file contains the declaration of the ARM64MCAsmInfo class.
+// This file contains the declaration of the AArch64MCAsmInfo class.
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef ARM64TARGETASMINFO_H
-#define ARM64TARGETASMINFO_H
+#ifndef AArch64TARGETASMINFO_H
+#define AArch64TARGETASMINFO_H
 
 #include "llvm/MC/MCAsmInfoDarwin.h"
 
@@ -20,15 +20,15 @@ namespace llvm {
 class Target;
 class StringRef;
 class MCStreamer;
-struct ARM64MCAsmInfoDarwin : public MCAsmInfoDarwin {
-  explicit ARM64MCAsmInfoDarwin();
+struct AArch64MCAsmInfoDarwin : public MCAsmInfoDarwin {
+  explicit AArch64MCAsmInfoDarwin();
   const MCExpr *
   getExprForPersonalitySymbol(const MCSymbol *Sym, unsigned Encoding,
                               MCStreamer &Streamer) const override;
 };
 
-struct ARM64MCAsmInfoELF : public MCAsmInfo {
-  explicit ARM64MCAsmInfoELF(StringRef TT);
+struct AArch64MCAsmInfoELF : public MCAsmInfo {
+  explicit AArch64MCAsmInfoELF(StringRef TT);
 };
 
 } // namespace llvm
diff --git a/lib/Target/ARM64/MCTargetDesc/ARM64MCCodeEmitter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp
similarity index 73%
rename from lib/Target/ARM64/MCTargetDesc/ARM64MCCodeEmitter.cpp
rename to lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp
index 0db08f422e4..464a18cdbc0 100644
--- a/lib/Target/ARM64/MCTargetDesc/ARM64MCCodeEmitter.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp
@@ -1,4 +1,4 @@
-//===-- ARM64/ARM64MCCodeEmitter.cpp - Convert ARM64 code to machine code -===//
+//=- AArch64/AArch64MCCodeEmitter.cpp - Convert AArch64 code to machine code-=//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -7,14 +7,14 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file implements the ARM64MCCodeEmitter class.
+// This file implements the AArch64MCCodeEmitter class.
 //
 //===----------------------------------------------------------------------===//
 
-#include "MCTargetDesc/ARM64AddressingModes.h"
-#include "MCTargetDesc/ARM64FixupKinds.h"
-#include "MCTargetDesc/ARM64MCExpr.h"
-#include "Utils/ARM64BaseInfo.h"
+#include "MCTargetDesc/AArch64AddressingModes.h"
+#include "MCTargetDesc/AArch64FixupKinds.h"
+#include "MCTargetDesc/AArch64MCExpr.h"
+#include "Utils/AArch64BaseInfo.h"
 #include "llvm/MC/MCCodeEmitter.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCInst.h"
@@ -32,17 +32,17 @@ STATISTIC(MCNumFixups, "Number of MC fixups created.");
 
 namespace {
 
-class ARM64MCCodeEmitter : public MCCodeEmitter {
+class AArch64MCCodeEmitter : public MCCodeEmitter {
   MCContext &Ctx;
 
-  ARM64MCCodeEmitter(const ARM64MCCodeEmitter &); // DO NOT IMPLEMENT
-  void operator=(const ARM64MCCodeEmitter &);     // DO NOT IMPLEMENT
+  AArch64MCCodeEmitter(const AArch64MCCodeEmitter &); // DO NOT IMPLEMENT
+  void operator=(const AArch64MCCodeEmitter &);     // DO NOT IMPLEMENT
 public:
-  ARM64MCCodeEmitter(const MCInstrInfo &mcii, const MCSubtargetInfo &sti,
+  AArch64MCCodeEmitter(const MCInstrInfo &mcii, const MCSubtargetInfo &sti,
                      MCContext &ctx)
       : Ctx(ctx) {}
 
-  ~ARM64MCCodeEmitter() {}
+  ~AArch64MCCodeEmitter() {}
 
   // getBinaryCodeForInstr - TableGen'erated function for getting the
   // binary encoding for an instruction.
@@ -203,19 +203,19 @@ public:
 
 } // end anonymous namespace
 
-MCCodeEmitter *llvm::createARM64MCCodeEmitter(const MCInstrInfo &MCII,
-                                              const MCRegisterInfo &MRI,
-                                              const MCSubtargetInfo &STI,
-                                              MCContext &Ctx) {
-  return new ARM64MCCodeEmitter(MCII, STI, Ctx);
+MCCodeEmitter *llvm::createAArch64MCCodeEmitter(const MCInstrInfo &MCII,
+                                                const MCRegisterInfo &MRI,
+                                                const MCSubtargetInfo &STI,
+                                                MCContext &Ctx) {
+  return new AArch64MCCodeEmitter(MCII, STI, Ctx);
 }
 
 /// getMachineOpValue - Return binary encoding of operand. If the machine
 /// operand requires relocation, record the relocation and return zero.
 unsigned
-ARM64MCCodeEmitter::getMachineOpValue(const MCInst &MI, const MCOperand &MO,
-                                      SmallVectorImpl<MCFixup> &Fixups,
-                                      const MCSubtargetInfo &STI) const {
+AArch64MCCodeEmitter::getMachineOpValue(const MCInst &MI, const MCOperand &MO,
+                                        SmallVectorImpl<MCFixup> &Fixups,
+                                        const MCSubtargetInfo &STI) const {
   if (MO.isReg())
     return Ctx.getRegisterInfo()->getEncodingValue(MO.getReg());
   else {
@@ -228,9 +228,9 @@ ARM64MCCodeEmitter::getMachineOpValue(const MCInst &MI, const MCOperand &MO,
 }
 
 template<unsigned FixupKind> uint32_t
-ARM64MCCodeEmitter::getLdStUImm12OpValue(const MCInst &MI, unsigned OpIdx,
-                                         SmallVectorImpl<MCFixup> &Fixups,
-                                         const MCSubtargetInfo &STI) const {
+AArch64MCCodeEmitter::getLdStUImm12OpValue(const MCInst &MI, unsigned OpIdx,
+                                           SmallVectorImpl<MCFixup> &Fixups,
+                                           const MCSubtargetInfo &STI) const {
   const MCOperand &MO = MI.getOperand(OpIdx);
   uint32_t ImmVal = 0;
 
@@ -249,9 +249,9 @@ ARM64MCCodeEmitter::getLdStUImm12OpValue(const MCInst &MI, unsigned OpIdx,
 /// getAdrLabelOpValue - Return encoding info for 21-bit immediate ADR label
 /// target.
 uint32_t
-ARM64MCCodeEmitter::getAdrLabelOpValue(const MCInst &MI, unsigned OpIdx,
-                                       SmallVectorImpl<MCFixup> &Fixups,
-                                       const MCSubtargetInfo &STI) const {
+AArch64MCCodeEmitter::getAdrLabelOpValue(const MCInst &MI, unsigned OpIdx,
+                                         SmallVectorImpl<MCFixup> &Fixups,
+                                         const MCSubtargetInfo &STI) const {
   const MCOperand &MO = MI.getOperand(OpIdx);
 
   // If the destination is an immediate, we have nothing to do.
@@ -260,9 +260,9 @@ ARM64MCCodeEmitter::getAdrLabelOpValue(const MCInst &MI, unsigned OpIdx,
   assert(MO.isExpr() && "Unexpected target type!");
   const MCExpr *Expr = MO.getExpr();
 
-  MCFixupKind Kind = MI.getOpcode() == ARM64::ADR
-                         ? MCFixupKind(ARM64::fixup_arm64_pcrel_adr_imm21)
-                         : MCFixupKind(ARM64::fixup_arm64_pcrel_adrp_imm21);
+  MCFixupKind Kind = MI.getOpcode() == AArch64::ADR
+                         ? MCFixupKind(AArch64::fixup_aarch64_pcrel_adr_imm21)
+                         : MCFixupKind(AArch64::fixup_aarch64_pcrel_adrp_imm21);
   Fixups.push_back(MCFixup::Create(0, Expr, Kind, MI.getLoc()));
 
   MCNumFixups += 1;
@@ -275,15 +275,15 @@ ARM64MCCodeEmitter::getAdrLabelOpValue(const MCInst &MI, unsigned OpIdx,
 /// the 2-bit shift field.  The shift field is stored in bits 13-14 of the
 /// return value.
 uint32_t
-ARM64MCCodeEmitter::getAddSubImmOpValue(const MCInst &MI, unsigned OpIdx,
-                                        SmallVectorImpl<MCFixup> &Fixups,
-                                        const MCSubtargetInfo &STI) const {
+AArch64MCCodeEmitter::getAddSubImmOpValue(const MCInst &MI, unsigned OpIdx,
+                                          SmallVectorImpl<MCFixup> &Fixups,
+                                          const MCSubtargetInfo &STI) const {
   // Suboperands are [imm, shifter].
   const MCOperand &MO = MI.getOperand(OpIdx);
   const MCOperand &MO1 = MI.getOperand(OpIdx + 1);
-  assert(ARM64_AM::getShiftType(MO1.getImm()) == ARM64_AM::LSL &&
+  assert(AArch64_AM::getShiftType(MO1.getImm()) == AArch64_AM::LSL &&
          "unexpected shift type for add/sub immediate");
-  unsigned ShiftVal = ARM64_AM::getShiftValue(MO1.getImm());
+  unsigned ShiftVal = AArch64_AM::getShiftValue(MO1.getImm());
   assert((ShiftVal == 0 || ShiftVal == 12) &&
          "unexpected shift value for add/sub immediate");
   if (MO.isImm())
@@ -292,7 +292,7 @@ ARM64MCCodeEmitter::getAddSubImmOpValue(const MCInst &MI, unsigned OpIdx,
   const MCExpr *Expr = MO.getExpr();
 
   // Encode the 12 bits of the fixup.
-  MCFixupKind Kind = MCFixupKind(ARM64::fixup_arm64_add_imm12);
+  MCFixupKind Kind = MCFixupKind(AArch64::fixup_aarch64_add_imm12);
   Fixups.push_back(MCFixup::Create(0, Expr, Kind, MI.getLoc()));
 
   ++MCNumFixups;
@@ -302,7 +302,7 @@ ARM64MCCodeEmitter::getAddSubImmOpValue(const MCInst &MI, unsigned OpIdx,
 
 /// getCondBranchTargetOpValue - Return the encoded value for a conditional
 /// branch target.
-uint32_t ARM64MCCodeEmitter::getCondBranchTargetOpValue(
+uint32_t AArch64MCCodeEmitter::getCondBranchTargetOpValue(
     const MCInst &MI, unsigned OpIdx, SmallVectorImpl<MCFixup> &Fixups,
     const MCSubtargetInfo &STI) const {
   const MCOperand &MO = MI.getOperand(OpIdx);
@@ -312,7 +312,7 @@ uint32_t ARM64MCCodeEmitter::getCondBranchTargetOpValue(
     return MO.getImm();
   assert(MO.isExpr() && "Unexpected target type!");
 
-  MCFixupKind Kind = MCFixupKind(ARM64::fixup_arm64_pcrel_branch19);
+  MCFixupKind Kind = MCFixupKind(AArch64::fixup_aarch64_pcrel_branch19);
   Fixups.push_back(MCFixup::Create(0, MO.getExpr(), Kind, MI.getLoc()));
 
   ++MCNumFixups;
@@ -324,9 +324,9 @@ uint32_t ARM64MCCodeEmitter::getCondBranchTargetOpValue(
 /// getLoadLiteralOpValue - Return the encoded value for a load-literal
 /// pc-relative address.
 uint32_t
-ARM64MCCodeEmitter::getLoadLiteralOpValue(const MCInst &MI, unsigned OpIdx,
-                                          SmallVectorImpl<MCFixup> &Fixups,
-                                          const MCSubtargetInfo &STI) const {
+AArch64MCCodeEmitter::getLoadLiteralOpValue(const MCInst &MI, unsigned OpIdx,
+                                            SmallVectorImpl<MCFixup> &Fixups,
+                                            const MCSubtargetInfo &STI) const {
   const MCOperand &MO = MI.getOperand(OpIdx);
 
   // If the destination is an immediate, we have nothing to do.
@@ -334,7 +334,7 @@ ARM64MCCodeEmitter::getLoadLiteralOpValue(const MCInst &MI, unsigned OpIdx,
     return MO.getImm();
   assert(MO.isExpr() && "Unexpected target type!");
 
-  MCFixupKind Kind = MCFixupKind(ARM64::fixup_arm64_ldr_pcrel_imm19);
+  MCFixupKind Kind = MCFixupKind(AArch64::fixup_aarch64_ldr_pcrel_imm19);
   Fixups.push_back(MCFixup::Create(0, MO.getExpr(), Kind, MI.getLoc()));
 
   ++MCNumFixups;
@@ -344,18 +344,18 @@ ARM64MCCodeEmitter::getLoadLiteralOpValue(const MCInst &MI, unsigned OpIdx,
 }
 
 uint32_t
-ARM64MCCodeEmitter::getMemExtendOpValue(const MCInst &MI, unsigned OpIdx,
-                                        SmallVectorImpl<MCFixup> &Fixups,
-                                        const MCSubtargetInfo &STI) const {
+AArch64MCCodeEmitter::getMemExtendOpValue(const MCInst &MI, unsigned OpIdx,
+                                          SmallVectorImpl<MCFixup> &Fixups,
+                                          const MCSubtargetInfo &STI) const {
   unsigned SignExtend = MI.getOperand(OpIdx).getImm();
   unsigned DoShift = MI.getOperand(OpIdx + 1).getImm();
   return (SignExtend << 1) | DoShift;
 }
 
 uint32_t
-ARM64MCCodeEmitter::getMoveWideImmOpValue(const MCInst &MI, unsigned OpIdx,
-                                          SmallVectorImpl<MCFixup> &Fixups,
-                                          const MCSubtargetInfo &STI) const {
+AArch64MCCodeEmitter::getMoveWideImmOpValue(const MCInst &MI, unsigned OpIdx,
+                                            SmallVectorImpl<MCFixup> &Fixups,
+                                            const MCSubtargetInfo &STI) const {
   const MCOperand &MO = MI.getOperand(OpIdx);
 
   if (MO.isImm())
@@ -363,7 +363,7 @@ ARM64MCCodeEmitter::getMoveWideImmOpValue(const MCInst &MI, unsigned OpIdx,
   assert(MO.isExpr() && "Unexpected movz/movk immediate");
 
   Fixups.push_back(MCFixup::Create(
-      0, MO.getExpr(), MCFixupKind(ARM64::fixup_arm64_movw), MI.getLoc()));
+      0, MO.getExpr(), MCFixupKind(AArch64::fixup_aarch64_movw), MI.getLoc()));
 
   ++MCNumFixups;
 
@@ -372,7 +372,7 @@ ARM64MCCodeEmitter::getMoveWideImmOpValue(const MCInst &MI, unsigned OpIdx,
 
 /// getTestBranchTargetOpValue - Return the encoded value for a test-bit-and-
 /// branch target.
-uint32_t ARM64MCCodeEmitter::getTestBranchTargetOpValue(
+uint32_t AArch64MCCodeEmitter::getTestBranchTargetOpValue(
     const MCInst &MI, unsigned OpIdx, SmallVectorImpl<MCFixup> &Fixups,
     const MCSubtargetInfo &STI) const {
   const MCOperand &MO = MI.getOperand(OpIdx);
@@ -382,7 +382,7 @@ uint32_t ARM64MCCodeEmitter::getTestBranchTargetOpValue(
     return MO.getImm();
   assert(MO.isExpr() && "Unexpected ADR target type!");
 
-  MCFixupKind Kind = MCFixupKind(ARM64::fixup_arm64_pcrel_branch14);
+  MCFixupKind Kind = MCFixupKind(AArch64::fixup_aarch64_pcrel_branch14);
   Fixups.push_back(MCFixup::Create(0, MO.getExpr(), Kind, MI.getLoc()));
 
   ++MCNumFixups;
@@ -394,9 +394,9 @@ uint32_t ARM64MCCodeEmitter::getTestBranchTargetOpValue(
 /// getBranchTargetOpValue - Return the encoded value for an unconditional
 /// branch target.
 uint32_t
-ARM64MCCodeEmitter::getBranchTargetOpValue(const MCInst &MI, unsigned OpIdx,
-                                           SmallVectorImpl<MCFixup> &Fixups,
-                                           const MCSubtargetInfo &STI) const {
+AArch64MCCodeEmitter::getBranchTargetOpValue(const MCInst &MI, unsigned OpIdx,
+                                             SmallVectorImpl<MCFixup> &Fixups,
+                                             const MCSubtargetInfo &STI) const {
   const MCOperand &MO = MI.getOperand(OpIdx);
 
   // If the destination is an immediate, we have nothing to do.
@@ -404,9 +404,9 @@ ARM64MCCodeEmitter::getBranchTargetOpValue(const MCInst &MI, unsigned OpIdx,
     return MO.getImm();
   assert(MO.isExpr() && "Unexpected ADR target type!");
 
-  MCFixupKind Kind = MI.getOpcode() == ARM64::BL
-                         ? MCFixupKind(ARM64::fixup_arm64_pcrel_call26)
-                         : MCFixupKind(ARM64::fixup_arm64_pcrel_branch26);
+  MCFixupKind Kind = MI.getOpcode() == AArch64::BL
+                         ? MCFixupKind(AArch64::fixup_aarch64_pcrel_call26)
+                         : MCFixupKind(AArch64::fixup_aarch64_pcrel_branch26);
   Fixups.push_back(MCFixup::Create(0, MO.getExpr(), Kind, MI.getLoc()));
 
   ++MCNumFixups;
@@ -422,9 +422,9 @@ ARM64MCCodeEmitter::getBranchTargetOpValue(const MCInst &MI, unsigned OpIdx,
 ///   10 -> 16
 ///   11 -> 24
 uint32_t
-ARM64MCCodeEmitter::getVecShifterOpValue(const MCInst &MI, unsigned OpIdx,
-                                         SmallVectorImpl<MCFixup> &Fixups,
-                                         const MCSubtargetInfo &STI) const {
+AArch64MCCodeEmitter::getVecShifterOpValue(const MCInst &MI, unsigned OpIdx,
+                                           SmallVectorImpl<MCFixup> &Fixups,
+                                           const MCSubtargetInfo &STI) const {
   const MCOperand &MO = MI.getOperand(OpIdx);
   assert(MO.isImm() && "Expected an immediate value for the shift amount!");
 
@@ -446,36 +446,35 @@ ARM64MCCodeEmitter::getVecShifterOpValue(const MCInst &MI, unsigned OpIdx,
 }
 
 uint32_t
-ARM64MCCodeEmitter::getSIMDShift64OpValue(const MCInst &MI, unsigned OpIdx,
-                                          SmallVectorImpl<MCFixup> &Fixups,
-                                          const MCSubtargetInfo &STI) const {
+AArch64MCCodeEmitter::getSIMDShift64OpValue(const MCInst &MI, unsigned OpIdx,
+                                            SmallVectorImpl<MCFixup> &Fixups,
+                                            const MCSubtargetInfo &STI) const {
   const MCOperand &MO = MI.getOperand(OpIdx);
   assert(MO.isImm() && "Expected an immediate value for the shift amount!");
   return 64 - (MO.getImm());
 }
 
-uint32_t
-ARM64MCCodeEmitter::getSIMDShift64_32OpValue(const MCInst &MI, unsigned OpIdx,
-                                             SmallVectorImpl<MCFixup> &Fixups,
-                                             const MCSubtargetInfo &STI) const {
+uint32_t AArch64MCCodeEmitter::getSIMDShift64_32OpValue(
+    const MCInst &MI, unsigned OpIdx, SmallVectorImpl<MCFixup> &Fixups,
+    const MCSubtargetInfo &STI) const {
   const MCOperand &MO = MI.getOperand(OpIdx);
   assert(MO.isImm() && "Expected an immediate value for the shift amount!");
   return 64 - (MO.getImm() | 32);
 }
 
 uint32_t
-ARM64MCCodeEmitter::getSIMDShift32OpValue(const MCInst &MI, unsigned OpIdx,
-                                          SmallVectorImpl<MCFixup> &Fixups,
-                                          const MCSubtargetInfo &STI) const {
+AArch64MCCodeEmitter::getSIMDShift32OpValue(const MCInst &MI, unsigned OpIdx,
+                                            SmallVectorImpl<MCFixup> &Fixups,
+                                            const MCSubtargetInfo &STI) const {
   const MCOperand &MO = MI.getOperand(OpIdx);
   assert(MO.isImm() && "Expected an immediate value for the shift amount!");
   return 32 - (MO.getImm() | 16);
 }
 
 uint32_t
-ARM64MCCodeEmitter::getSIMDShift16OpValue(const MCInst &MI, unsigned OpIdx,
-                                          SmallVectorImpl<MCFixup> &Fixups,
-                                          const MCSubtargetInfo &STI) const {
+AArch64MCCodeEmitter::getSIMDShift16OpValue(const MCInst &MI, unsigned OpIdx,
+                                            SmallVectorImpl<MCFixup> &Fixups,
+                                            const MCSubtargetInfo &STI) const {
   const MCOperand &MO = MI.getOperand(OpIdx);
   assert(MO.isImm() && "Expected an immediate value for the shift amount!");
   return 16 - (MO.getImm() | 8);
@@ -483,7 +482,7 @@ ARM64MCCodeEmitter::getSIMDShift16OpValue(const MCInst &MI, unsigned OpIdx,
 
 /// getFixedPointScaleOpValue - Return the encoded value for the
 // FP-to-fixed-point scale factor.
-uint32_t ARM64MCCodeEmitter::getFixedPointScaleOpValue(
+uint32_t AArch64MCCodeEmitter::getFixedPointScaleOpValue(
     const MCInst &MI, unsigned OpIdx, SmallVectorImpl<MCFixup> &Fixups,
     const MCSubtargetInfo &STI) const {
   const MCOperand &MO = MI.getOperand(OpIdx);
@@ -492,72 +491,72 @@ uint32_t ARM64MCCodeEmitter::getFixedPointScaleOpValue(
 }
 
 uint32_t
-ARM64MCCodeEmitter::getVecShiftR64OpValue(const MCInst &MI, unsigned OpIdx,
-                                          SmallVectorImpl<MCFixup> &Fixups,
-                                          const MCSubtargetInfo &STI) const {
+AArch64MCCodeEmitter::getVecShiftR64OpValue(const MCInst &MI, unsigned OpIdx,
+                                            SmallVectorImpl<MCFixup> &Fixups,
+                                            const MCSubtargetInfo &STI) const {
   const MCOperand &MO = MI.getOperand(OpIdx);
   assert(MO.isImm() && "Expected an immediate value for the scale amount!");
   return 64 - MO.getImm();
 }
 
 uint32_t
-ARM64MCCodeEmitter::getVecShiftR32OpValue(const MCInst &MI, unsigned OpIdx,
-                                          SmallVectorImpl<MCFixup> &Fixups,
-                                          const MCSubtargetInfo &STI) const {
+AArch64MCCodeEmitter::getVecShiftR32OpValue(const MCInst &MI, unsigned OpIdx,
+                                            SmallVectorImpl<MCFixup> &Fixups,
+                                            const MCSubtargetInfo &STI) const {
   const MCOperand &MO = MI.getOperand(OpIdx);
   assert(MO.isImm() && "Expected an immediate value for the scale amount!");
   return 32 - MO.getImm();
 }
 
 uint32_t
-ARM64MCCodeEmitter::getVecShiftR16OpValue(const MCInst &MI, unsigned OpIdx,
-                                          SmallVectorImpl<MCFixup> &Fixups,
-                                          const MCSubtargetInfo &STI) const {
+AArch64MCCodeEmitter::getVecShiftR16OpValue(const MCInst &MI, unsigned OpIdx,
+                                            SmallVectorImpl<MCFixup> &Fixups,
+                                            const MCSubtargetInfo &STI) const {
   const MCOperand &MO = MI.getOperand(OpIdx);
   assert(MO.isImm() && "Expected an immediate value for the scale amount!");
   return 16 - MO.getImm();
 }
 
 uint32_t
-ARM64MCCodeEmitter::getVecShiftR8OpValue(const MCInst &MI, unsigned OpIdx,
-                                         SmallVectorImpl<MCFixup> &Fixups,
-                                         const MCSubtargetInfo &STI) const {
+AArch64MCCodeEmitter::getVecShiftR8OpValue(const MCInst &MI, unsigned OpIdx,
+                                           SmallVectorImpl<MCFixup> &Fixups,
+                                           const MCSubtargetInfo &STI) const {
   const MCOperand &MO = MI.getOperand(OpIdx);
   assert(MO.isImm() && "Expected an immediate value for the scale amount!");
   return 8 - MO.getImm();
 }
 
 uint32_t
-ARM64MCCodeEmitter::getVecShiftL64OpValue(const MCInst &MI, unsigned OpIdx,
-                                          SmallVectorImpl<MCFixup> &Fixups,
-                                          const MCSubtargetInfo &STI) const {
+AArch64MCCodeEmitter::getVecShiftL64OpValue(const MCInst &MI, unsigned OpIdx,
+                                            SmallVectorImpl<MCFixup> &Fixups,
+                                            const MCSubtargetInfo &STI) const {
   const MCOperand &MO = MI.getOperand(OpIdx);
   assert(MO.isImm() && "Expected an immediate value for the scale amount!");
   return MO.getImm() - 64;
 }
 
 uint32_t
-ARM64MCCodeEmitter::getVecShiftL32OpValue(const MCInst &MI, unsigned OpIdx,
-                                          SmallVectorImpl<MCFixup> &Fixups,
-                                          const MCSubtargetInfo &STI) const {
+AArch64MCCodeEmitter::getVecShiftL32OpValue(const MCInst &MI, unsigned OpIdx,
+                                            SmallVectorImpl<MCFixup> &Fixups,
+                                            const MCSubtargetInfo &STI) const {
   const MCOperand &MO = MI.getOperand(OpIdx);
   assert(MO.isImm() && "Expected an immediate value for the scale amount!");
   return MO.getImm() - 32;
 }
 
 uint32_t
-ARM64MCCodeEmitter::getVecShiftL16OpValue(const MCInst &MI, unsigned OpIdx,
-                                          SmallVectorImpl<MCFixup> &Fixups,
-                                          const MCSubtargetInfo &STI) const {
+AArch64MCCodeEmitter::getVecShiftL16OpValue(const MCInst &MI, unsigned OpIdx,
+                                            SmallVectorImpl<MCFixup> &Fixups,
+                                            const MCSubtargetInfo &STI) const {
   const MCOperand &MO = MI.getOperand(OpIdx);
   assert(MO.isImm() && "Expected an immediate value for the scale amount!");
   return MO.getImm() - 16;
 }
 
 uint32_t
-ARM64MCCodeEmitter::getVecShiftL8OpValue(const MCInst &MI, unsigned OpIdx,
-                                         SmallVectorImpl<MCFixup> &Fixups,
-                                         const MCSubtargetInfo &STI) const {
+AArch64MCCodeEmitter::getVecShiftL8OpValue(const MCInst &MI, unsigned OpIdx,
+                                           SmallVectorImpl<MCFixup> &Fixups,
+                                           const MCSubtargetInfo &STI) const {
   const MCOperand &MO = MI.getOperand(OpIdx);
   assert(MO.isImm() && "Expected an immediate value for the scale amount!");
   return MO.getImm() - 8;
@@ -565,20 +564,19 @@ ARM64MCCodeEmitter::getVecShiftL8OpValue(const MCInst &MI, unsigned OpIdx,
 
 /// getMoveVecShifterOpValue - Return the encoded value for the vector move
 /// shifter (MSL).
-uint32_t
-ARM64MCCodeEmitter::getMoveVecShifterOpValue(const MCInst &MI, unsigned OpIdx,
-                                             SmallVectorImpl<MCFixup> &Fixups,
-                                             const MCSubtargetInfo &STI) const {
+uint32_t AArch64MCCodeEmitter::getMoveVecShifterOpValue(
+    const MCInst &MI, unsigned OpIdx, SmallVectorImpl<MCFixup> &Fixups,
+    const MCSubtargetInfo &STI) const {
   const MCOperand &MO = MI.getOperand(OpIdx);
   assert(MO.isImm() &&
          "Expected an immediate value for the move shift amount!");
-  unsigned ShiftVal = ARM64_AM::getShiftValue(MO.getImm());
+  unsigned ShiftVal = AArch64_AM::getShiftValue(MO.getImm());
   assert((ShiftVal == 8 || ShiftVal == 16) && "Invalid shift amount!");
   return ShiftVal == 8 ? 0 : 1;
 }
 
-unsigned ARM64MCCodeEmitter::fixMOVZ(const MCInst &MI, unsigned EncodedValue,
-                                     const MCSubtargetInfo &STI) const {
+unsigned AArch64MCCodeEmitter::fixMOVZ(const MCInst &MI, unsigned EncodedValue,
+                                       const MCSubtargetInfo &STI) const {
   // If one of the signed fixup kinds is applied to a MOVZ instruction, the
   // eventual result could be either a MOVZ or a MOVN. It's the MCCodeEmitter's
   // job to ensure that any bits possibly affected by this are 0. This means we
@@ -589,15 +587,15 @@ unsigned ARM64MCCodeEmitter::fixMOVZ(const MCInst &MI, unsigned EncodedValue,
   if (UImm16MO.isImm())
     return EncodedValue;
 
-  const ARM64MCExpr *A64E = cast<ARM64MCExpr>(UImm16MO.getExpr());
+  const AArch64MCExpr *A64E = cast<AArch64MCExpr>(UImm16MO.getExpr());
   switch (A64E->getKind()) {
-  case ARM64MCExpr::VK_DTPREL_G2:
-  case ARM64MCExpr::VK_DTPREL_G1:
-  case ARM64MCExpr::VK_DTPREL_G0:
-  case ARM64MCExpr::VK_GOTTPREL_G1:
-  case ARM64MCExpr::VK_TPREL_G2:
-  case ARM64MCExpr::VK_TPREL_G1:
-  case ARM64MCExpr::VK_TPREL_G0:
+  case AArch64MCExpr::VK_DTPREL_G2:
+  case AArch64MCExpr::VK_DTPREL_G1:
+  case AArch64MCExpr::VK_DTPREL_G0:
+  case AArch64MCExpr::VK_GOTTPREL_G1:
+  case AArch64MCExpr::VK_TPREL_G2:
+  case AArch64MCExpr::VK_TPREL_G1:
+  case AArch64MCExpr::VK_TPREL_G0:
     return EncodedValue & ~(1u << 30);
   default:
     // Nothing to do for an unsigned fixup.
@@ -608,14 +606,14 @@ unsigned ARM64MCCodeEmitter::fixMOVZ(const MCInst &MI, unsigned EncodedValue,
   return EncodedValue & ~(1u << 30);
 }
 
-void ARM64MCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS,
-                                           SmallVectorImpl<MCFixup> &Fixups,
-                                           const MCSubtargetInfo &STI) const {
-  if (MI.getOpcode() == ARM64::TLSDESCCALL) {
+void AArch64MCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS,
+                                             SmallVectorImpl<MCFixup> &Fixups,
+                                             const MCSubtargetInfo &STI) const {
+  if (MI.getOpcode() == AArch64::TLSDESCCALL) {
     // This is a directive which applies an R_AARCH64_TLSDESC_CALL to the
     // following (BLR) instruction. It doesn't emit any code itself so it
     // doesn't go through the normal TableGenerated channels.
-    MCFixupKind Fixup = MCFixupKind(ARM64::fixup_arm64_tlsdesc_call);
+    MCFixupKind Fixup = MCFixupKind(AArch64::fixup_aarch64_tlsdesc_call);
     Fixups.push_back(MCFixup::Create(0, MI.getOperand(0).getExpr(), Fixup));
     return;
   }
@@ -626,9 +624,9 @@ void ARM64MCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS,
 }
 
 unsigned
-ARM64MCCodeEmitter::fixMulHigh(const MCInst &MI,
-                               unsigned EncodedValue,
-                               const MCSubtargetInfo &STI) const {
+AArch64MCCodeEmitter::fixMulHigh(const MCInst &MI,
+                                 unsigned EncodedValue,
+                                 const MCSubtargetInfo &STI) const {
   // The Ra field of SMULH and UMULH is unused: it should be assembled as 31
   // (i.e. all bits 1) but is ignored by the processor.
   EncodedValue |= 0x1f << 10;
@@ -636,23 +634,21 @@ ARM64MCCodeEmitter::fixMulHigh(const MCInst &MI,
 }
 
 template<int hasRs, int hasRt2> unsigned
-ARM64MCCodeEmitter::fixLoadStoreExclusive(const MCInst &MI,
-                                          unsigned EncodedValue,
-                                          const MCSubtargetInfo &STI) const {
+AArch64MCCodeEmitter::fixLoadStoreExclusive(const MCInst &MI,
+                                            unsigned EncodedValue,
+                                            const MCSubtargetInfo &STI) const {
   if (!hasRs) EncodedValue |= 0x001F0000;
   if (!hasRt2) EncodedValue |= 0x00007C00;
 
   return EncodedValue;
 }
 
-unsigned
-ARM64MCCodeEmitter::fixOneOperandFPComparison(const MCInst &MI,
-                                              unsigned EncodedValue,
-                                              const MCSubtargetInfo &STI) const {
+unsigned AArch64MCCodeEmitter::fixOneOperandFPComparison(
+    const MCInst &MI, unsigned EncodedValue, const MCSubtargetInfo &STI) const {
   // The Rm field of FCMP and friends is unused - it should be assembled
   // as 0, but is ignored by the processor.
   EncodedValue &= ~(0x1f << 16);
   return EncodedValue;
 }
 
-#include "ARM64GenMCCodeEmitter.inc"
+#include "AArch64GenMCCodeEmitter.inc"
diff --git a/lib/Target/ARM64/MCTargetDesc/ARM64MCExpr.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp
similarity index 89%
rename from lib/Target/ARM64/MCTargetDesc/ARM64MCExpr.cpp
rename to lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp
index efa820b097f..85c3ec7a55f 100644
--- a/lib/Target/ARM64/MCTargetDesc/ARM64MCExpr.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp
@@ -1,4 +1,4 @@
-//===-- ARM64MCExpr.cpp - ARM64 specific MC expression classes --------===//
+//===-- AArch64MCExpr.cpp - AArch64 specific MC expression classes --------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -12,7 +12,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "ARM64MCExpr.h"
+#include "AArch64MCExpr.h"
 #include "llvm/MC/MCAssembler.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCELF.h"
@@ -25,12 +25,12 @@ using namespace llvm;
 
 #define DEBUG_TYPE "aarch64symbolrefexpr"
 
-const ARM64MCExpr *ARM64MCExpr::Create(const MCExpr *Expr, VariantKind Kind,
+const AArch64MCExpr *AArch64MCExpr::Create(const MCExpr *Expr, VariantKind Kind,
                                        MCContext &Ctx) {
-  return new (Ctx) ARM64MCExpr(Expr, Kind);
+  return new (Ctx) AArch64MCExpr(Expr, Kind);
 }
 
-StringRef ARM64MCExpr::getVariantKindName() const {
+StringRef AArch64MCExpr::getVariantKindName() const {
   switch (static_cast<uint32_t>(getKind())) {
   case VK_CALL:                return "";
   case VK_LO12:                return ":lo12:";
@@ -75,7 +75,7 @@ StringRef ARM64MCExpr::getVariantKindName() const {
   }
 }
 
-void ARM64MCExpr::PrintImpl(raw_ostream &OS) const {
+void AArch64MCExpr::PrintImpl(raw_ostream &OS) const {
   if (getKind() != VK_NONE)
     OS << getVariantKindName();
   OS << *Expr;
@@ -110,15 +110,15 @@ static void AddValueSymbolsImpl(const MCExpr *Value, MCAssembler *Asm) {
   }
 }
 
-void ARM64MCExpr::AddValueSymbols(MCAssembler *Asm) const {
+void AArch64MCExpr::AddValueSymbols(MCAssembler *Asm) const {
   AddValueSymbolsImpl(getSubExpr(), Asm);
 }
 
-const MCSection *ARM64MCExpr::FindAssociatedSection() const {
+const MCSection *AArch64MCExpr::FindAssociatedSection() const {
   llvm_unreachable("FIXME: what goes here?");
 }
 
-bool ARM64MCExpr::EvaluateAsRelocatableImpl(MCValue &Res,
+bool AArch64MCExpr::EvaluateAsRelocatableImpl(MCValue &Res,
                                             const MCAsmLayout *Layout) const {
   if (!getSubExpr()->EvaluateAsRelocatable(Res, Layout))
     return false;
@@ -159,7 +159,7 @@ static void fixELFSymbolsInTLSFixupsImpl(const MCExpr *Expr, MCAssembler &Asm) {
   }
 }
 
-void ARM64MCExpr::fixELFSymbolsInTLSFixups(MCAssembler &Asm) const {
+void AArch64MCExpr::fixELFSymbolsInTLSFixups(MCAssembler &Asm) const {
   switch (getSymbolLoc(Kind)) {
   default:
     return;
diff --git a/lib/Target/ARM64/MCTargetDesc/ARM64MCExpr.h b/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h
similarity index 92%
rename from lib/Target/ARM64/MCTargetDesc/ARM64MCExpr.h
rename to lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h
index d8325465178..e869ed0a26a 100644
--- a/lib/Target/ARM64/MCTargetDesc/ARM64MCExpr.h
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h
@@ -1,4 +1,4 @@
-//=---- ARM64MCExpr.h - ARM64 specific MC expression classes ------*- C++ -*-=//
+//=--- AArch64MCExpr.h - AArch64 specific MC expression classes ---*- C++ -*-=//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -7,20 +7,20 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file describes ARM64-specific MCExprs, used for modifiers like
+// This file describes AArch64-specific MCExprs, used for modifiers like
 // ":lo12:" or ":gottprel_g1:".
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_ARM64MCEXPR_H
-#define LLVM_ARM64MCEXPR_H
+#ifndef LLVM_AArch64MCEXPR_H
+#define LLVM_AArch64MCEXPR_H
 
 #include "llvm/MC/MCExpr.h"
 #include "llvm/Support/ErrorHandling.h"
 
 namespace llvm {
 
-class ARM64MCExpr : public MCTargetExpr {
+class AArch64MCExpr : public MCTargetExpr {
 public:
   enum VariantKind {
     VK_NONE     = 0x000,
@@ -105,14 +105,14 @@ private:
   const MCExpr *Expr;
   const VariantKind Kind;
 
-  explicit ARM64MCExpr(const MCExpr *Expr, VariantKind Kind)
+  explicit AArch64MCExpr(const MCExpr *Expr, VariantKind Kind)
     : Expr(Expr), Kind(Kind) {}
 
 public:
   /// @name Construction
   /// @{
 
-  static const ARM64MCExpr *Create(const MCExpr *Expr, VariantKind Kind,
+  static const AArch64MCExpr *Create(const MCExpr *Expr, VariantKind Kind,
                                    MCContext &Ctx);
 
   /// @}
@@ -160,7 +160,7 @@ public:
     return E->getKind() == MCExpr::Target;
   }
 
-  static bool classof(const ARM64MCExpr *) { return true; }
+  static bool classof(const AArch64MCExpr *) { return true; }
 
 };
 } // end namespace llvm
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
new file mode 100644
index 00000000000..ae698c59f6c
--- /dev/null
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
@@ -0,0 +1,225 @@
+//===-- AArch64MCTargetDesc.cpp - AArch64 Target Descriptions ---*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides AArch64 specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AArch64MCTargetDesc.h"
+#include "AArch64ELFStreamer.h"
+#include "AArch64MCAsmInfo.h"
+#include "InstPrinter/AArch64InstPrinter.h"
+#include "llvm/MC/MCCodeGenInfo.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
+
+using namespace llvm;
+
+#define GET_INSTRINFO_MC_DESC
+#include "AArch64GenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_MC_DESC
+#include "AArch64GenSubtargetInfo.inc"
+
+#define GET_REGINFO_MC_DESC
+#include "AArch64GenRegisterInfo.inc"
+
+static MCInstrInfo *createAArch64MCInstrInfo() {
+  MCInstrInfo *X = new MCInstrInfo();
+  InitAArch64MCInstrInfo(X);
+  return X;
+}
+
+static MCSubtargetInfo *
+createAArch64MCSubtargetInfo(StringRef TT, StringRef CPU, StringRef FS) {
+  MCSubtargetInfo *X = new MCSubtargetInfo();
+
+  if (CPU.empty())
+    CPU = "generic";
+
+  InitAArch64MCSubtargetInfo(X, TT, CPU, FS);
+  return X;
+}
+
+static MCRegisterInfo *createAArch64MCRegisterInfo(StringRef Triple) {
+  MCRegisterInfo *X = new MCRegisterInfo();
+  InitAArch64MCRegisterInfo(X, AArch64::LR);
+  return X;
+}
+
+static MCAsmInfo *createAArch64MCAsmInfo(const MCRegisterInfo &MRI,
+                                         StringRef TT) {
+  Triple TheTriple(TT);
+
+  MCAsmInfo *MAI;
+  if (TheTriple.isOSDarwin())
+    MAI = new AArch64MCAsmInfoDarwin();
+  else {
+    assert(TheTriple.isOSBinFormatELF() && "Only expect Darwin or ELF");
+    MAI = new AArch64MCAsmInfoELF(TT);
+  }
+
+  // Initial state of the frame pointer is SP.
+  unsigned Reg = MRI.getDwarfRegNum(AArch64::SP, true);
+  MCCFIInstruction Inst = MCCFIInstruction::createDefCfa(nullptr, Reg, 0);
+  MAI->addInitialFrameState(Inst);
+
+  return MAI;
+}
+
+static MCCodeGenInfo *createAArch64MCCodeGenInfo(StringRef TT, Reloc::Model RM,
+                                                 CodeModel::Model CM,
+                                                 CodeGenOpt::Level OL) {
+  Triple TheTriple(TT);
+  assert((TheTriple.isOSBinFormatELF() || TheTriple.isOSBinFormatMachO()) &&
+         "Only expect Darwin and ELF targets");
+
+  if (CM == CodeModel::Default)
+    CM = CodeModel::Small;
+  // The default MCJIT memory managers make no guarantees about where they can
+  // find an executable page; JITed code needs to be able to refer to globals
+  // no matter how far away they are.
+  else if (CM == CodeModel::JITDefault)
+    CM = CodeModel::Large;
+  else if (CM != CodeModel::Small && CM != CodeModel::Large)
+    report_fatal_error(
+        "Only small and large code models are allowed on AArch64");
+
+  // AArch64 Darwin is always PIC.
+  if (TheTriple.isOSDarwin())
+    RM = Reloc::PIC_;
+  // On ELF platforms the default static relocation model has a smart enough
+  // linker to cope with referencing external symbols defined in a shared
+  // library. Hence DynamicNoPIC doesn't need to be promoted to PIC.
+  else if (RM == Reloc::Default || RM == Reloc::DynamicNoPIC)
+    RM = Reloc::Static;
+
+  MCCodeGenInfo *X = new MCCodeGenInfo();
+  X->InitMCCodeGenInfo(RM, CM, OL);
+  return X;
+}
+
+static MCInstPrinter *createAArch64MCInstPrinter(const Target &T,
+                                                 unsigned SyntaxVariant,
+                                                 const MCAsmInfo &MAI,
+                                                 const MCInstrInfo &MII,
+                                                 const MCRegisterInfo &MRI,
+                                                 const MCSubtargetInfo &STI) {
+  if (SyntaxVariant == 0)
+    return new AArch64InstPrinter(MAI, MII, MRI, STI);
+  if (SyntaxVariant == 1)
+    return new AArch64AppleInstPrinter(MAI, MII, MRI, STI);
+
+  return nullptr;
+}
+
+static MCStreamer *createMCStreamer(const Target &T, StringRef TT,
+                                    MCContext &Ctx, MCAsmBackend &TAB,
+                                    raw_ostream &OS, MCCodeEmitter *Emitter,
+                                    const MCSubtargetInfo &STI, bool RelaxAll,
+                                    bool NoExecStack) {
+  Triple TheTriple(TT);
+
+  if (TheTriple.isOSDarwin())
+    return createMachOStreamer(Ctx, TAB, OS, Emitter, RelaxAll,
+                               /*LabelSections*/ true);
+
+  return createAArch64ELFStreamer(Ctx, TAB, OS, Emitter, RelaxAll, NoExecStack);
+}
+
+// Force static initialization.
+extern "C" void LLVMInitializeAArch64TargetMC() {
+  // Register the MC asm info.
+  RegisterMCAsmInfoFn X(TheAArch64leTarget, createAArch64MCAsmInfo);
+  RegisterMCAsmInfoFn Y(TheAArch64beTarget, createAArch64MCAsmInfo);
+  RegisterMCAsmInfoFn Z(TheARM64leTarget, createAArch64MCAsmInfo);
+  RegisterMCAsmInfoFn W(TheARM64beTarget, createAArch64MCAsmInfo);
+
+  // Register the MC codegen info.
+  TargetRegistry::RegisterMCCodeGenInfo(TheAArch64leTarget,
+                                        createAArch64MCCodeGenInfo);
+  TargetRegistry::RegisterMCCodeGenInfo(TheAArch64beTarget,
+                                        createAArch64MCCodeGenInfo);
+  TargetRegistry::RegisterMCCodeGenInfo(TheARM64leTarget,
+                                        createAArch64MCCodeGenInfo);
+  TargetRegistry::RegisterMCCodeGenInfo(TheARM64beTarget,
+                                        createAArch64MCCodeGenInfo);
+
+  // Register the MC instruction info.
+  TargetRegistry::RegisterMCInstrInfo(TheAArch64leTarget,
+                                      createAArch64MCInstrInfo);
+  TargetRegistry::RegisterMCInstrInfo(TheAArch64beTarget,
+                                      createAArch64MCInstrInfo);
+  TargetRegistry::RegisterMCInstrInfo(TheARM64leTarget,
+                                      createAArch64MCInstrInfo);
+  TargetRegistry::RegisterMCInstrInfo(TheARM64beTarget,
+                                      createAArch64MCInstrInfo);
+
+  // Register the MC register info.
+  TargetRegistry::RegisterMCRegInfo(TheAArch64leTarget,
+                                    createAArch64MCRegisterInfo);
+  TargetRegistry::RegisterMCRegInfo(TheAArch64beTarget,
+                                    createAArch64MCRegisterInfo);
+  TargetRegistry::RegisterMCRegInfo(TheARM64leTarget,
+                                    createAArch64MCRegisterInfo);
+  TargetRegistry::RegisterMCRegInfo(TheARM64beTarget,
+                                    createAArch64MCRegisterInfo);
+
+  // Register the MC subtarget info.
+  TargetRegistry::RegisterMCSubtargetInfo(TheAArch64leTarget,
+                                          createAArch64MCSubtargetInfo);
+  TargetRegistry::RegisterMCSubtargetInfo(TheAArch64beTarget,
+                                          createAArch64MCSubtargetInfo);
+  TargetRegistry::RegisterMCSubtargetInfo(TheARM64leTarget,
+                                          createAArch64MCSubtargetInfo);
+  TargetRegistry::RegisterMCSubtargetInfo(TheARM64beTarget,
+                                          createAArch64MCSubtargetInfo);
+
+  // Register the asm backend.
+  TargetRegistry::RegisterMCAsmBackend(TheAArch64leTarget,
+                                       createAArch64leAsmBackend);
+  TargetRegistry::RegisterMCAsmBackend(TheAArch64beTarget,
+                                       createAArch64beAsmBackend);
+  TargetRegistry::RegisterMCAsmBackend(TheARM64leTarget,
+                                       createAArch64leAsmBackend);
+  TargetRegistry::RegisterMCAsmBackend(TheARM64beTarget,
+                                       createAArch64beAsmBackend);
+
+  // Register the MC Code Emitter
+  TargetRegistry::RegisterMCCodeEmitter(TheAArch64leTarget,
+                                        createAArch64MCCodeEmitter);
+  TargetRegistry::RegisterMCCodeEmitter(TheAArch64beTarget,
+                                        createAArch64MCCodeEmitter);
+  TargetRegistry::RegisterMCCodeEmitter(TheARM64leTarget,
+                                        createAArch64MCCodeEmitter);
+  TargetRegistry::RegisterMCCodeEmitter(TheARM64beTarget,
+                                        createAArch64MCCodeEmitter);
+
+  // Register the object streamer.
+  TargetRegistry::RegisterMCObjectStreamer(TheAArch64leTarget,
+                                           createMCStreamer);
+  TargetRegistry::RegisterMCObjectStreamer(TheAArch64beTarget,
+                                           createMCStreamer);
+  TargetRegistry::RegisterMCObjectStreamer(TheARM64leTarget, createMCStreamer);
+  TargetRegistry::RegisterMCObjectStreamer(TheARM64beTarget, createMCStreamer);
+
+  // Register the MCInstPrinter.
+  TargetRegistry::RegisterMCInstPrinter(TheAArch64leTarget,
+                                        createAArch64MCInstPrinter);
+  TargetRegistry::RegisterMCInstPrinter(TheAArch64beTarget,
+                                        createAArch64MCInstPrinter);
+  TargetRegistry::RegisterMCInstPrinter(TheARM64leTarget,
+                                        createAArch64MCInstPrinter);
+  TargetRegistry::RegisterMCInstPrinter(TheARM64beTarget,
+                                        createAArch64MCInstPrinter);
+}
diff --git a/lib/Target/ARM64/MCTargetDesc/ARM64MCTargetDesc.h b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h
similarity index 51%
rename from lib/Target/ARM64/MCTargetDesc/ARM64MCTargetDesc.h
rename to lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h
index f2e9c17a378..d886ea23c13 100644
--- a/lib/Target/ARM64/MCTargetDesc/ARM64MCTargetDesc.h
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h
@@ -1,4 +1,4 @@
-//===-- ARM64MCTargetDesc.h - ARM64 Target Descriptions ---------*- C++ -*-===//
+//===-- AArch64MCTargetDesc.h - AArch64 Target Descriptions -----*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -7,12 +7,12 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file provides ARM64 specific target descriptions.
+// This file provides AArch64 specific target descriptions.
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef ARM64MCTARGETDESC_H
-#define ARM64MCTARGETDESC_H
+#ifndef AArch64MCTARGETDESC_H
+#define AArch64MCTARGETDESC_H
 
 #include "llvm/Support/DataTypes.h"
 #include <string>
@@ -29,40 +29,42 @@ class StringRef;
 class Target;
 class raw_ostream;
 
-extern Target TheARM64leTarget;
-extern Target TheARM64beTarget;
 extern Target TheAArch64leTarget;
 extern Target TheAArch64beTarget;
+extern Target TheARM64leTarget;
+extern Target TheARM64beTarget;
 
-MCCodeEmitter *createARM64MCCodeEmitter(const MCInstrInfo &MCII,
+MCCodeEmitter *createAArch64MCCodeEmitter(const MCInstrInfo &MCII,
                                         const MCRegisterInfo &MRI,
                                         const MCSubtargetInfo &STI,
                                         MCContext &Ctx);
-MCAsmBackend *createARM64leAsmBackend(const Target &T, const MCRegisterInfo &MRI,
-                                      StringRef TT, StringRef CPU);
-MCAsmBackend *createARM64beAsmBackend(const Target &T, const MCRegisterInfo &MRI,
-                                      StringRef TT, StringRef CPU);
+MCAsmBackend *createAArch64leAsmBackend(const Target &T,
+                                        const MCRegisterInfo &MRI, StringRef TT,
+                                        StringRef CPU);
+MCAsmBackend *createAArch64beAsmBackend(const Target &T,
+                                        const MCRegisterInfo &MRI, StringRef TT,
+                                        StringRef CPU);
 
-        MCObjectWriter *createARM64ELFObjectWriter(raw_ostream &OS, uint8_t OSABI,
-                                                   bool IsLittleEndian);
+MCObjectWriter *createAArch64ELFObjectWriter(raw_ostream &OS, uint8_t OSABI,
+                                             bool IsLittleEndian);
 
-MCObjectWriter *createARM64MachObjectWriter(raw_ostream &OS, uint32_t CPUType,
+MCObjectWriter *createAArch64MachObjectWriter(raw_ostream &OS, uint32_t CPUType,
                                             uint32_t CPUSubtype);
 
 } // End llvm namespace
 
-// Defines symbolic names for ARM64 registers.  This defines a mapping from
+// Defines symbolic names for AArch64 registers.  This defines a mapping from
 // register name to register number.
 //
 #define GET_REGINFO_ENUM
-#include "ARM64GenRegisterInfo.inc"
+#include "AArch64GenRegisterInfo.inc"
 
-// Defines symbolic names for the ARM64 instructions.
+// Defines symbolic names for the AArch64 instructions.
 //
 #define GET_INSTRINFO_ENUM
-#include "ARM64GenInstrInfo.inc"
+#include "AArch64GenInstrInfo.inc"
 
 #define GET_SUBTARGETINFO_ENUM
-#include "ARM64GenSubtargetInfo.inc"
+#include "AArch64GenSubtargetInfo.inc"
 
 #endif
diff --git a/lib/Target/ARM64/MCTargetDesc/ARM64MachObjectWriter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp
similarity index 88%
rename from lib/Target/ARM64/MCTargetDesc/ARM64MachObjectWriter.cpp
rename to lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp
index 1c48159bbe9..5c86189a6ef 100644
--- a/lib/Target/ARM64/MCTargetDesc/ARM64MachObjectWriter.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp
@@ -1,4 +1,4 @@
-//===-- ARMMachObjectWriter.cpp - ARM Mach Object Writer ------------------===//
+//===-- AArch64MachObjectWriter.cpp - ARM Mach Object Writer --------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -7,8 +7,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "MCTargetDesc/ARM64FixupKinds.h"
-#include "MCTargetDesc/ARM64MCTargetDesc.h"
+#include "MCTargetDesc/AArch64FixupKinds.h"
+#include "MCTargetDesc/AArch64MCTargetDesc.h"
 #include "llvm/MC/MCAssembler.h"
 #include "llvm/MC/MCAsmLayout.h"
 #include "llvm/MC/MCContext.h"
@@ -23,13 +23,13 @@
 using namespace llvm;
 
 namespace {
-class ARM64MachObjectWriter : public MCMachObjectTargetWriter {
-  bool getARM64FixupKindMachOInfo(const MCFixup &Fixup, unsigned &RelocType,
+class AArch64MachObjectWriter : public MCMachObjectTargetWriter {
+  bool getAArch64FixupKindMachOInfo(const MCFixup &Fixup, unsigned &RelocType,
                                   const MCSymbolRefExpr *Sym,
                                   unsigned &Log2Size, const MCAssembler &Asm);
 
 public:
-  ARM64MachObjectWriter(uint32_t CPUType, uint32_t CPUSubtype)
+  AArch64MachObjectWriter(uint32_t CPUType, uint32_t CPUSubtype)
       : MCMachObjectTargetWriter(true /* is64Bit */, CPUType, CPUSubtype,
                                  /*UseAggressiveSymbolFolding=*/true) {}
 
@@ -40,7 +40,7 @@ public:
 };
 }
 
-bool ARM64MachObjectWriter::getARM64FixupKindMachOInfo(
+bool AArch64MachObjectWriter::getAArch64FixupKindMachOInfo(
     const MCFixup &Fixup, unsigned &RelocType, const MCSymbolRefExpr *Sym,
     unsigned &Log2Size, const MCAssembler &Asm) {
   RelocType = unsigned(MachO::ARM64_RELOC_UNSIGNED);
@@ -66,12 +66,12 @@ bool ARM64MachObjectWriter::getARM64FixupKindMachOInfo(
     if (Sym->getKind() == MCSymbolRefExpr::VK_GOT)
       RelocType = unsigned(MachO::ARM64_RELOC_POINTER_TO_GOT);
     return true;
-  case ARM64::fixup_arm64_add_imm12:
-  case ARM64::fixup_arm64_ldst_imm12_scale1:
-  case ARM64::fixup_arm64_ldst_imm12_scale2:
-  case ARM64::fixup_arm64_ldst_imm12_scale4:
-  case ARM64::fixup_arm64_ldst_imm12_scale8:
-  case ARM64::fixup_arm64_ldst_imm12_scale16:
+  case AArch64::fixup_aarch64_add_imm12:
+  case AArch64::fixup_aarch64_ldst_imm12_scale1:
+  case AArch64::fixup_aarch64_ldst_imm12_scale2:
+  case AArch64::fixup_aarch64_ldst_imm12_scale4:
+  case AArch64::fixup_aarch64_ldst_imm12_scale8:
+  case AArch64::fixup_aarch64_ldst_imm12_scale16:
     Log2Size = llvm::Log2_32(4);
     switch (Sym->getKind()) {
     default:
@@ -86,7 +86,7 @@ bool ARM64MachObjectWriter::getARM64FixupKindMachOInfo(
       RelocType = unsigned(MachO::ARM64_RELOC_TLVP_LOAD_PAGEOFF12);
       return true;
     }
-  case ARM64::fixup_arm64_pcrel_adrp_imm21:
+  case AArch64::fixup_aarch64_pcrel_adrp_imm21:
     Log2Size = llvm::Log2_32(4);
     // This encompasses the relocation for the whole 21-bit value.
     switch (Sym->getKind()) {
@@ -104,15 +104,15 @@ bool ARM64MachObjectWriter::getARM64FixupKindMachOInfo(
       return true;
     }
     return true;
-  case ARM64::fixup_arm64_pcrel_branch26:
-  case ARM64::fixup_arm64_pcrel_call26:
+  case AArch64::fixup_aarch64_pcrel_branch26:
+  case AArch64::fixup_aarch64_pcrel_call26:
     Log2Size = llvm::Log2_32(4);
     RelocType = unsigned(MachO::ARM64_RELOC_BRANCH26);
     return true;
   }
 }
 
-void ARM64MachObjectWriter::RecordRelocation(
+void AArch64MachObjectWriter::RecordRelocation(
     MachObjectWriter *Writer, const MCAssembler &Asm, const MCAsmLayout &Layout,
     const MCFragment *Fragment, const MCFixup &Fixup, MCValue Target,
     uint64_t &FixedValue) {
@@ -129,20 +129,20 @@ void ARM64MachObjectWriter::RecordRelocation(
 
   FixupOffset += Fixup.getOffset();
 
-  // ARM64 pcrel relocation addends do not include the section offset.
+  // AArch64 pcrel relocation addends do not include the section offset.
   if (IsPCRel)
     FixedValue += FixupOffset;
 
   // ADRP fixups use relocations for the whole symbol value and only
   // put the addend in the instruction itself. Clear out any value the
   // generic code figured out from the sybmol definition.
-  if (Kind == ARM64::fixup_arm64_pcrel_adrp_imm21)
+  if (Kind == AArch64::fixup_aarch64_pcrel_adrp_imm21)
     FixedValue = 0;
 
   // imm19 relocations are for conditional branches, which require
   // assembler local symbols. If we got here, that's not what we have,
   // so complain loudly.
-  if (Kind == ARM64::fixup_arm64_pcrel_branch19) {
+  if (Kind == AArch64::fixup_aarch64_pcrel_branch19) {
     Asm.getContext().FatalError(Fixup.getLoc(),
                                 "conditional branch requires assembler-local"
                                 " label. '" +
@@ -153,15 +153,15 @@ void ARM64MachObjectWriter::RecordRelocation(
 
   // 14-bit branch relocations should only target internal labels, and so
   // should never get here.
-  if (Kind == ARM64::fixup_arm64_pcrel_branch14) {
+  if (Kind == AArch64::fixup_aarch64_pcrel_branch14) {
     Asm.getContext().FatalError(Fixup.getLoc(),
                                 "Invalid relocation on conditional branch!");
     return;
   }
 
-  if (!getARM64FixupKindMachOInfo(Fixup, Type, Target.getSymA(), Log2Size,
+  if (!getAArch64FixupKindMachOInfo(Fixup, Type, Target.getSymA(), Log2Size,
                                   Asm)) {
-    Asm.getContext().FatalError(Fixup.getLoc(), "unknown ARM64 fixup kind!");
+    Asm.getContext().FatalError(Fixup.getLoc(), "unknown AArch64 fixup kind!");
     return;
   }
 
@@ -220,7 +220,7 @@ void ARM64MachObjectWriter::RecordRelocation(
                                   "unsupported pc-relative relocation of "
                                   "difference");
 
-    // ARM64 always uses external relocations. If there is no symbol to use as
+    // AArch64 always uses external relocations. If there is no symbol to use as
     // a base address (a local symbol with no preceding non-local symbol),
     // error out.
     //
@@ -305,9 +305,9 @@ void ARM64MachObjectWriter::RecordRelocation(
         Base = nullptr;
     }
 
-    // ARM64 uses external relocations as much as possible. For debug sections,
-    // and for pointer-sized relocations (.quad), we allow section relocations.
-    // It's code sections that run into trouble.
+    // AArch64 uses external relocations as much as possible. For debug
+    // sections, and for pointer-sized relocations (.quad), we allow section
+    // relocations.  It's code sections that run into trouble.
     if (Base) {
       Index = Base->getIndex();
       IsExtern = 1;
@@ -387,9 +387,10 @@ void ARM64MachObjectWriter::RecordRelocation(
   Writer->addRelocation(Fragment->getParent(), MRE);
 }
 
-MCObjectWriter *llvm::createARM64MachObjectWriter(raw_ostream &OS,
+MCObjectWriter *llvm::createAArch64MachObjectWriter(raw_ostream &OS,
                                                   uint32_t CPUType,
                                                   uint32_t CPUSubtype) {
-  return createMachObjectWriter(new ARM64MachObjectWriter(CPUType, CPUSubtype),
-                                OS, /*IsLittleEndian=*/true);
+  return createMachObjectWriter(
+      new AArch64MachObjectWriter(CPUType, CPUSubtype), OS,
+      /*IsLittleEndian=*/true);
 }
diff --git a/lib/Target/AArch64/MCTargetDesc/CMakeLists.txt b/lib/Target/AArch64/MCTargetDesc/CMakeLists.txt
new file mode 100644
index 00000000000..7d5bced17a6
--- /dev/null
+++ b/lib/Target/AArch64/MCTargetDesc/CMakeLists.txt
@@ -0,0 +1,14 @@
+add_llvm_library(LLVMAArch64Desc
+  AArch64AsmBackend.cpp
+  AArch64ELFObjectWriter.cpp
+  AArch64ELFStreamer.cpp
+  AArch64MCAsmInfo.cpp
+  AArch64MCCodeEmitter.cpp
+  AArch64MCExpr.cpp
+  AArch64MCTargetDesc.cpp
+  AArch64MachObjectWriter.cpp
+)
+add_dependencies(LLVMAArch64Desc AArch64CommonTableGen)
+
+# Hack: we need to include 'main' target directory to grab private headers
+include_directories(${CMAKE_CURRENT_SOURCE_DIR}/.. ${CMAKE_CURRENT_BINARY_DIR}/..)
diff --git a/lib/Target/ARM64/MCTargetDesc/LLVMBuild.txt b/lib/Target/AArch64/MCTargetDesc/LLVMBuild.txt
similarity index 72%
rename from lib/Target/ARM64/MCTargetDesc/LLVMBuild.txt
rename to lib/Target/AArch64/MCTargetDesc/LLVMBuild.txt
index e4c74d285d4..70cff0b704f 100644
--- a/lib/Target/ARM64/MCTargetDesc/LLVMBuild.txt
+++ b/lib/Target/AArch64/MCTargetDesc/LLVMBuild.txt
@@ -1,4 +1,4 @@
-;===- ./lib/Target/ARM64/MCTargetDesc/LLVMBuild.txt ------------*- Conf -*--===;
+;===- ./lib/Target/AArch64/MCTargetDesc/LLVMBuild.txt ------------*- Conf -*--===;
 ;
 ;                     The LLVM Compiler Infrastructure
 ;
@@ -17,8 +17,8 @@
 
 [component_0]
 type = Library
-name = ARM64Desc
-parent = ARM64
-required_libraries = ARM64AsmPrinter ARM64Info MC Support
-add_to_library_groups = ARM64
+name = AArch64Desc
+parent = AArch64
+required_libraries = AArch64AsmPrinter AArch64Info MC Support
+add_to_library_groups = AArch64
 
diff --git a/lib/Target/ARM64/MCTargetDesc/Makefile b/lib/Target/AArch64/MCTargetDesc/Makefile
similarity index 82%
rename from lib/Target/ARM64/MCTargetDesc/Makefile
rename to lib/Target/AArch64/MCTargetDesc/Makefile
index 013cc633f66..5779ac5ac60 100644
--- a/lib/Target/ARM64/MCTargetDesc/Makefile
+++ b/lib/Target/AArch64/MCTargetDesc/Makefile
@@ -1,4 +1,4 @@
-##===- lib/Target/ARM64/TargetDesc/Makefile ----------------*- Makefile -*-===##
+##===- lib/Target/AArch64/TargetDesc/Makefile --------------*- Makefile -*-===##
 #
 #                     The LLVM Compiler Infrastructure
 #
@@ -8,7 +8,7 @@
 ##===----------------------------------------------------------------------===##
 
 LEVEL = ../../../..
-LIBRARYNAME = LLVMARM64Desc
+LIBRARYNAME = LLVMAArch64Desc
 
 # Hack: we need to include 'main' target directory to grab private headers
 CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
diff --git a/lib/Target/AArch64/Makefile b/lib/Target/AArch64/Makefile
new file mode 100644
index 00000000000..f356c585041
--- /dev/null
+++ b/lib/Target/AArch64/Makefile
@@ -0,0 +1,25 @@
+##===- lib/Target/AArch64/Makefile -------------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../..
+LIBRARYNAME = LLVMAArch64CodeGen
+TARGET = AArch64
+
+# Make sure that tblgen is run, first thing.
+BUILT_SOURCES = AArch64GenRegisterInfo.inc AArch64GenInstrInfo.inc \
+		AArch64GenAsmWriter.inc AArch64GenAsmWriter1.inc \
+		AArch64GenDAGISel.inc \
+		AArch64GenCallingConv.inc AArch64GenAsmMatcher.inc \
+		AArch64GenSubtargetInfo.inc AArch64GenMCCodeEmitter.inc \
+		AArch64GenFastISel.inc AArch64GenDisassemblerTables.inc \
+		AArch64GenMCPseudoLowering.inc
+
+DIRS = TargetInfo InstPrinter AsmParser Disassembler MCTargetDesc Utils
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/ARM64/TargetInfo/ARM64TargetInfo.cpp b/lib/Target/AArch64/TargetInfo/AArch64TargetInfo.cpp
similarity index 64%
rename from lib/Target/ARM64/TargetInfo/ARM64TargetInfo.cpp
rename to lib/Target/AArch64/TargetInfo/AArch64TargetInfo.cpp
index 247566825ab..3a382c165e7 100644
--- a/lib/Target/ARM64/TargetInfo/ARM64TargetInfo.cpp
+++ b/lib/Target/AArch64/TargetInfo/AArch64TargetInfo.cpp
@@ -1,4 +1,4 @@
-//===-- ARM64TargetInfo.cpp - ARM64 Target Implementation -----------------===//
+//===-- AArch64TargetInfo.cpp - AArch64 Target Implementation -----------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -12,20 +12,20 @@
 using namespace llvm;
 
 namespace llvm {
-Target TheARM64leTarget;
-Target TheARM64beTarget;
 Target TheAArch64leTarget;
 Target TheAArch64beTarget;
+Target TheARM64leTarget;
+Target TheARM64beTarget;
 } // end namespace llvm
 
-extern "C" void LLVMInitializeARM64TargetInfo() {
+extern "C" void LLVMInitializeAArch64TargetInfo() {
   RegisterTarget<Triple::arm64, /*HasJIT=*/true> X(TheARM64leTarget, "arm64",
-                                                   "ARM64 (little endian)");
+                                                   "AArch64 (little endian)");
   RegisterTarget<Triple::arm64_be, /*HasJIT=*/true> Y(TheARM64beTarget, "arm64_be",
-                                                      "ARM64 (big endian)");
+                                                      "AArch64 (big endian)");
 
   RegisterTarget<Triple::aarch64, /*HasJIT=*/true> Z(
-      TheAArch64leTarget, "aarch64", "ARM64 (little endian)");
+      TheAArch64leTarget, "aarch64", "AArch64 (little endian)");
   RegisterTarget<Triple::aarch64_be, /*HasJIT=*/true> W(
-      TheAArch64beTarget, "aarch64_be", "ARM64 (big endian)");
+      TheAArch64beTarget, "aarch64_be", "AArch64 (big endian)");
 }
diff --git a/lib/Target/AArch64/TargetInfo/CMakeLists.txt b/lib/Target/AArch64/TargetInfo/CMakeLists.txt
new file mode 100644
index 00000000000..e236eed00be
--- /dev/null
+++ b/lib/Target/AArch64/TargetInfo/CMakeLists.txt
@@ -0,0 +1,7 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMAArch64Info
+  AArch64TargetInfo.cpp
+  )
+
+add_dependencies(LLVMAArch64Info AArch64CommonTableGen)
diff --git a/lib/Target/ARM64/Utils/LLVMBuild.txt b/lib/Target/AArch64/TargetInfo/LLVMBuild.txt
similarity index 79%
rename from lib/Target/ARM64/Utils/LLVMBuild.txt
rename to lib/Target/AArch64/TargetInfo/LLVMBuild.txt
index 232dca29f40..93c5407bb1f 100644
--- a/lib/Target/ARM64/Utils/LLVMBuild.txt
+++ b/lib/Target/AArch64/TargetInfo/LLVMBuild.txt
@@ -1,4 +1,4 @@
-;===- ./lib/Target/ARM64/Utils/LLVMBuild.txt ----------------*- Conf -*--===;
+;===- ./lib/Target/AArch64/TargetInfo/LLVMBuild.txt --------------*- Conf -*--===;
 ;
 ;                     The LLVM Compiler Infrastructure
 ;
@@ -17,7 +17,7 @@
 
 [component_0]
 type = Library
-name = ARM64Utils
-parent = ARM64
+name = AArch64Info
+parent = AArch64
 required_libraries = Support
-add_to_library_groups = ARM64
+add_to_library_groups = AArch64
diff --git a/lib/Target/ARM64/TargetInfo/Makefile b/lib/Target/AArch64/TargetInfo/Makefile
similarity index 82%
rename from lib/Target/ARM64/TargetInfo/Makefile
rename to lib/Target/AArch64/TargetInfo/Makefile
index 2d5a1a087a5..9dc9aa4bccf 100644
--- a/lib/Target/ARM64/TargetInfo/Makefile
+++ b/lib/Target/AArch64/TargetInfo/Makefile
@@ -1,4 +1,4 @@
-##===- lib/Target/ARM64/TargetInfo/Makefile ----------------*- Makefile -*-===##
+##===- lib/Target/AArch64/TargetInfo/Makefile --------------*- Makefile -*-===##
 #
 #                     The LLVM Compiler Infrastructure
 #
@@ -7,7 +7,7 @@
 #
 ##===----------------------------------------------------------------------===##
 LEVEL = ../../../..
-LIBRARYNAME = LLVMARM64Info
+LIBRARYNAME = LLVMAArch64Info
 
 # Hack: we need to include 'main' target directory to grab private headers
 CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
diff --git a/lib/Target/ARM64/Utils/ARM64BaseInfo.cpp b/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp
similarity index 89%
rename from lib/Target/ARM64/Utils/ARM64BaseInfo.cpp
rename to lib/Target/AArch64/Utils/AArch64BaseInfo.cpp
index 5142d18c23c..3c24bb30a26 100644
--- a/lib/Target/ARM64/Utils/ARM64BaseInfo.cpp
+++ b/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp
@@ -1,4 +1,4 @@
-//===-- ARM64BaseInfo.cpp - ARM64 Base encoding information------------===//
+//===-- AArch64BaseInfo.cpp - AArch64 Base encoding information------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -7,10 +7,10 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file provides basic encoding and assembly information for ARM64.
+// This file provides basic encoding and assembly information for AArch64.
 //
 //===----------------------------------------------------------------------===//
-#include "ARM64BaseInfo.h"
+#include "AArch64BaseInfo.h"
 #include "llvm/ADT/APFloat.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringExtras.h"
@@ -18,7 +18,7 @@
 
 using namespace llvm;
 
-StringRef ARM64NamedImmMapper::toString(uint32_t Value, bool &Valid) const {
+StringRef AArch64NamedImmMapper::toString(uint32_t Value, bool &Valid) const {
   for (unsigned i = 0; i < NumPairs; ++i) {
     if (Pairs[i].Value == Value) {
       Valid = true;
@@ -30,7 +30,7 @@ StringRef ARM64NamedImmMapper::toString(uint32_t Value, bool &Valid) const {
   return StringRef();
 }
 
-uint32_t ARM64NamedImmMapper::fromString(StringRef Name, bool &Valid) const {
+uint32_t AArch64NamedImmMapper::fromString(StringRef Name, bool &Valid) const {
   std::string LowerCaseName = Name.lower();
   for (unsigned i = 0; i < NumPairs; ++i) {
     if (Pairs[i].Name == LowerCaseName) {
@@ -43,11 +43,11 @@ uint32_t ARM64NamedImmMapper::fromString(StringRef Name, bool &Valid) const {
   return -1;
 }
 
-bool ARM64NamedImmMapper::validImm(uint32_t Value) const {
+bool AArch64NamedImmMapper::validImm(uint32_t Value) const {
   return Value < TooBigImm;
 }
 
-const ARM64NamedImmMapper::Mapping ARM64AT::ATMapper::ATPairs[] = {
+const AArch64NamedImmMapper::Mapping AArch64AT::ATMapper::ATPairs[] = {
   {"s1e1r", S1E1R},
   {"s1e2r", S1E2R},
   {"s1e3r", S1E3R},
@@ -62,10 +62,10 @@ const ARM64NamedImmMapper::Mapping ARM64AT::ATMapper::ATPairs[] = {
   {"s12e0w", S12E0W},
 };
 
-ARM64AT::ATMapper::ATMapper()
-  : ARM64NamedImmMapper(ATPairs, 0) {}
+AArch64AT::ATMapper::ATMapper()
+  : AArch64NamedImmMapper(ATPairs, 0) {}
 
-const ARM64NamedImmMapper::Mapping ARM64DB::DBarrierMapper::DBarrierPairs[] = {
+const AArch64NamedImmMapper::Mapping AArch64DB::DBarrierMapper::DBarrierPairs[] = {
   {"oshld", OSHLD},
   {"oshst", OSHST},
   {"osh", OSH},
@@ -80,10 +80,10 @@ const ARM64NamedImmMapper::Mapping ARM64DB::DBarrierMapper::DBarrierPairs[] = {
   {"sy", SY}
 };
 
-ARM64DB::DBarrierMapper::DBarrierMapper()
-  : ARM64NamedImmMapper(DBarrierPairs, 16u) {}
+AArch64DB::DBarrierMapper::DBarrierMapper()
+  : AArch64NamedImmMapper(DBarrierPairs, 16u) {}
 
-const ARM64NamedImmMapper::Mapping ARM64DC::DCMapper::DCPairs[] = {
+const AArch64NamedImmMapper::Mapping AArch64DC::DCMapper::DCPairs[] = {
   {"zva", ZVA},
   {"ivac", IVAC},
   {"isw", ISW},
@@ -94,26 +94,26 @@ const ARM64NamedImmMapper::Mapping ARM64DC::DCMapper::DCPairs[] = {
   {"cisw", CISW}
 };
 
-ARM64DC::DCMapper::DCMapper()
-  : ARM64NamedImmMapper(DCPairs, 0) {}
+AArch64DC::DCMapper::DCMapper()
+  : AArch64NamedImmMapper(DCPairs, 0) {}
 
-const ARM64NamedImmMapper::Mapping ARM64IC::ICMapper::ICPairs[] = {
+const AArch64NamedImmMapper::Mapping AArch64IC::ICMapper::ICPairs[] = {
   {"ialluis",  IALLUIS},
   {"iallu", IALLU},
   {"ivau", IVAU}
 };
 
-ARM64IC::ICMapper::ICMapper()
-  : ARM64NamedImmMapper(ICPairs, 0) {}
+AArch64IC::ICMapper::ICMapper()
+  : AArch64NamedImmMapper(ICPairs, 0) {}
 
-const ARM64NamedImmMapper::Mapping ARM64ISB::ISBMapper::ISBPairs[] = {
+const AArch64NamedImmMapper::Mapping AArch64ISB::ISBMapper::ISBPairs[] = {
   {"sy",  SY},
 };
 
-ARM64ISB::ISBMapper::ISBMapper()
-  : ARM64NamedImmMapper(ISBPairs, 16) {}
+AArch64ISB::ISBMapper::ISBMapper()
+  : AArch64NamedImmMapper(ISBPairs, 16) {}
 
-const ARM64NamedImmMapper::Mapping ARM64PRFM::PRFMMapper::PRFMPairs[] = {
+const AArch64NamedImmMapper::Mapping AArch64PRFM::PRFMMapper::PRFMPairs[] = {
   {"pldl1keep", PLDL1KEEP},
   {"pldl1strm", PLDL1STRM},
   {"pldl2keep", PLDL2KEEP},
@@ -134,19 +134,19 @@ const ARM64NamedImmMapper::Mapping ARM64PRFM::PRFMMapper::PRFMPairs[] = {
   {"pstl3strm", PSTL3STRM}
 };
 
-ARM64PRFM::PRFMMapper::PRFMMapper()
-  : ARM64NamedImmMapper(PRFMPairs, 32) {}
+AArch64PRFM::PRFMMapper::PRFMMapper()
+  : AArch64NamedImmMapper(PRFMPairs, 32) {}
 
-const ARM64NamedImmMapper::Mapping ARM64PState::PStateMapper::PStatePairs[] = {
+const AArch64NamedImmMapper::Mapping AArch64PState::PStateMapper::PStatePairs[] = {
   {"spsel", SPSel},
   {"daifset", DAIFSet},
   {"daifclr", DAIFClr}
 };
 
-ARM64PState::PStateMapper::PStateMapper()
-  : ARM64NamedImmMapper(PStatePairs, 0) {}
+AArch64PState::PStateMapper::PStateMapper()
+  : AArch64NamedImmMapper(PStatePairs, 0) {}
 
-const ARM64NamedImmMapper::Mapping ARM64SysReg::MRSMapper::MRSPairs[] = {
+const AArch64NamedImmMapper::Mapping AArch64SysReg::MRSMapper::MRSPairs[] = {
   {"mdccsr_el0", MDCCSR_EL0},
   {"dbgdtrrx_el0", DBGDTRRX_EL0},
   {"mdrar_el1", MDRAR_EL1},
@@ -176,16 +176,16 @@ const ARM64NamedImmMapper::Mapping ARM64SysReg::MRSMapper::MRSPairs[] = {
   {"id_isar3_el1", ID_ISAR3_EL1},
   {"id_isar4_el1", ID_ISAR4_EL1},
   {"id_isar5_el1", ID_ISAR5_EL1},
-  {"id_aa64pfr0_el1", ID_AARM64PFR0_EL1},
-  {"id_aa64pfr1_el1", ID_AARM64PFR1_EL1},
-  {"id_aa64dfr0_el1", ID_AARM64DFR0_EL1},
-  {"id_aa64dfr1_el1", ID_AARM64DFR1_EL1},
-  {"id_aa64afr0_el1", ID_AARM64AFR0_EL1},
-  {"id_aa64afr1_el1", ID_AARM64AFR1_EL1},
-  {"id_aa64isar0_el1", ID_AARM64ISAR0_EL1},
-  {"id_aa64isar1_el1", ID_AARM64ISAR1_EL1},
-  {"id_aa64mmfr0_el1", ID_AARM64MMFR0_EL1},
-  {"id_aa64mmfr1_el1", ID_AARM64MMFR1_EL1},
+  {"id_aa64pfr0_el1", ID_A64PFR0_EL1},
+  {"id_aa64pfr1_el1", ID_A64PFR1_EL1},
+  {"id_aa64dfr0_el1", ID_A64DFR0_EL1},
+  {"id_aa64dfr1_el1", ID_A64DFR1_EL1},
+  {"id_aa64afr0_el1", ID_A64AFR0_EL1},
+  {"id_aa64afr1_el1", ID_A64AFR1_EL1},
+  {"id_aa64isar0_el1", ID_A64ISAR0_EL1},
+  {"id_aa64isar1_el1", ID_A64ISAR1_EL1},
+  {"id_aa64mmfr0_el1", ID_A64MMFR0_EL1},
+  {"id_aa64mmfr1_el1", ID_A64MMFR1_EL1},
   {"mvfr0_el1", MVFR0_EL1},
   {"mvfr1_el1", MVFR1_EL1},
   {"mvfr2_el1", MVFR2_EL1},
@@ -245,13 +245,13 @@ const ARM64NamedImmMapper::Mapping ARM64SysReg::MRSMapper::MRSPairs[] = {
   {"ich_elsr_el2", ICH_ELSR_EL2}
 };
 
-ARM64SysReg::MRSMapper::MRSMapper(uint64_t FeatureBits)
+AArch64SysReg::MRSMapper::MRSMapper(uint64_t FeatureBits)
   : SysRegMapper(FeatureBits) {
     InstPairs = &MRSPairs[0];
     NumInstPairs = llvm::array_lengthof(MRSPairs);
 }
 
-const ARM64NamedImmMapper::Mapping ARM64SysReg::MSRMapper::MSRPairs[] = {
+const AArch64NamedImmMapper::Mapping AArch64SysReg::MSRMapper::MSRPairs[] = {
   {"dbgdtrtx_el0", DBGDTRTX_EL0},
   {"oslar_el1", OSLAR_EL1},
   {"pmswinc_el0", PMSWINC_EL0},
@@ -269,14 +269,14 @@ const ARM64NamedImmMapper::Mapping ARM64SysReg::MSRMapper::MSRPairs[] = {
   {"icc_sgi0r_el1", ICC_SGI0R_EL1}
 };
 
-ARM64SysReg::MSRMapper::MSRMapper(uint64_t FeatureBits)
+AArch64SysReg::MSRMapper::MSRMapper(uint64_t FeatureBits)
   : SysRegMapper(FeatureBits) {
     InstPairs = &MSRPairs[0];
     NumInstPairs = llvm::array_lengthof(MSRPairs);
 }
 
 
-const ARM64NamedImmMapper::Mapping ARM64SysReg::SysRegMapper::SysRegPairs[] = {
+const AArch64NamedImmMapper::Mapping AArch64SysReg::SysRegMapper::SysRegPairs[] = {
   {"osdtrrx_el1", OSDTRRX_EL1},
   {"osdtrtx_el1",  OSDTRTX_EL1},
   {"teecr32_el1", TEECR32_EL1},
@@ -755,13 +755,13 @@ const ARM64NamedImmMapper::Mapping ARM64SysReg::SysRegMapper::SysRegPairs[] = {
   {"ich_lr15_el2", ICH_LR15_EL2}
 };
 
-const ARM64NamedImmMapper::Mapping
-ARM64SysReg::SysRegMapper::CycloneSysRegPairs[] = {
+const AArch64NamedImmMapper::Mapping
+AArch64SysReg::SysRegMapper::CycloneSysRegPairs[] = {
   {"cpm_ioacc_ctl_el3", CPM_IOACC_CTL_EL3}
 };
 
 uint32_t
-ARM64SysReg::SysRegMapper::fromString(StringRef Name, bool &Valid) const {
+AArch64SysReg::SysRegMapper::fromString(StringRef Name, bool &Valid) const {
   std::string NameLower = Name.lower();
 
   // First search the registers shared by all
@@ -773,7 +773,7 @@ ARM64SysReg::SysRegMapper::fromString(StringRef Name, bool &Valid) const {
   }
 
   // Next search for target specific registers
-  if (FeatureBits & ARM64::ProcCyclone) {
+  if (FeatureBits & AArch64::ProcCyclone) {
     for (unsigned i = 0; i < array_lengthof(CycloneSysRegPairs); ++i) {
       if (CycloneSysRegPairs[i].Name == NameLower) {
         Valid = true;
@@ -814,7 +814,7 @@ ARM64SysReg::SysRegMapper::fromString(StringRef Name, bool &Valid) const {
 }
 
 std::string
-ARM64SysReg::SysRegMapper::toString(uint32_t Bits, bool &Valid) const {
+AArch64SysReg::SysRegMapper::toString(uint32_t Bits, bool &Valid) const {
   // First search the registers shared by all
   for (unsigned i = 0; i < array_lengthof(SysRegPairs); ++i) {
     if (SysRegPairs[i].Value == Bits) {
@@ -824,7 +824,7 @@ ARM64SysReg::SysRegMapper::toString(uint32_t Bits, bool &Valid) const {
   }
 
   // Next search for target specific registers
-  if (FeatureBits & ARM64::ProcCyclone) {
+  if (FeatureBits & AArch64::ProcCyclone) {
     for (unsigned i = 0; i < array_lengthof(CycloneSysRegPairs); ++i) {
       if (CycloneSysRegPairs[i].Value == Bits) {
         Valid = true;
@@ -862,7 +862,7 @@ ARM64SysReg::SysRegMapper::toString(uint32_t Bits, bool &Valid) const {
                + "_c" + utostr(CRm) + "_" + utostr(Op2);
 }
 
-const ARM64NamedImmMapper::Mapping ARM64TLBI::TLBIMapper::TLBIPairs[] = {
+const AArch64NamedImmMapper::Mapping AArch64TLBI::TLBIMapper::TLBIPairs[] = {
   {"ipas2e1is", IPAS2E1IS},
   {"ipas2le1is", IPAS2LE1IS},
   {"vmalle1is", VMALLE1IS},
@@ -897,5 +897,5 @@ const ARM64NamedImmMapper::Mapping ARM64TLBI::TLBIMapper::TLBIPairs[] = {
   {"vaale1", VAALE1}
 };
 
-ARM64TLBI::TLBIMapper::TLBIMapper()
-  : ARM64NamedImmMapper(TLBIPairs, 0) {}
+AArch64TLBI::TLBIMapper::TLBIMapper()
+  : AArch64NamedImmMapper(TLBIPairs, 0) {}
diff --git a/lib/Target/ARM64/Utils/ARM64BaseInfo.h b/lib/Target/AArch64/Utils/AArch64BaseInfo.h
similarity index 84%
rename from lib/Target/ARM64/Utils/ARM64BaseInfo.h
rename to lib/Target/AArch64/Utils/AArch64BaseInfo.h
index 8075d6b37c9..9e4c389cc2e 100644
--- a/lib/Target/ARM64/Utils/ARM64BaseInfo.h
+++ b/lib/Target/AArch64/Utils/AArch64BaseInfo.h
@@ -1,4 +1,4 @@
-//===-- ARM64BaseInfo.h - Top level definitions for ARM64 -------*- C++ -*-===//
+//===-- AArch64BaseInfo.h - Top level definitions for AArch64 ---*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -8,18 +8,18 @@
 //===----------------------------------------------------------------------===//
 //
 // This file contains small standalone helper functions and enum definitions for
-// the ARM64 target useful for the compiler back-end and the MC libraries.
+// the AArch64 target useful for the compiler back-end and the MC libraries.
 // As such, it deliberately does not include references to LLVM core
 // code gen types, passes, etc..
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef ARM64BASEINFO_H
-#define ARM64BASEINFO_H
+#ifndef AArch64BASEINFO_H
+#define AArch64BASEINFO_H
 
 // FIXME: Is it easiest to fix this layering violation by moving the .inc
-// #includes from ARM64MCTargetDesc.h to here?
-#include "MCTargetDesc/ARM64MCTargetDesc.h" // For ARM64::X0 and friends.
+// #includes from AArch64MCTargetDesc.h to here?
+#include "MCTargetDesc/AArch64MCTargetDesc.h" // For AArch64::X0 and friends.
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringSwitch.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -28,39 +28,39 @@ namespace llvm {
 
 inline static unsigned getWRegFromXReg(unsigned Reg) {
   switch (Reg) {
-  case ARM64::X0: return ARM64::W0;
-  case ARM64::X1: return ARM64::W1;
-  case ARM64::X2: return ARM64::W2;
-  case ARM64::X3: return ARM64::W3;
-  case ARM64::X4: return ARM64::W4;
-  case ARM64::X5: return ARM64::W5;
-  case ARM64::X6: return ARM64::W6;
-  case ARM64::X7: return ARM64::W7;
-  case ARM64::X8: return ARM64::W8;
-  case ARM64::X9: return ARM64::W9;
-  case ARM64::X10: return ARM64::W10;
-  case ARM64::X11: return ARM64::W11;
-  case ARM64::X12: return ARM64::W12;
-  case ARM64::X13: return ARM64::W13;
-  case ARM64::X14: return ARM64::W14;
-  case ARM64::X15: return ARM64::W15;
-  case ARM64::X16: return ARM64::W16;
-  case ARM64::X17: return ARM64::W17;
-  case ARM64::X18: return ARM64::W18;
-  case ARM64::X19: return ARM64::W19;
-  case ARM64::X20: return ARM64::W20;
-  case ARM64::X21: return ARM64::W21;
-  case ARM64::X22: return ARM64::W22;
-  case ARM64::X23: return ARM64::W23;
-  case ARM64::X24: return ARM64::W24;
-  case ARM64::X25: return ARM64::W25;
-  case ARM64::X26: return ARM64::W26;
-  case ARM64::X27: return ARM64::W27;
-  case ARM64::X28: return ARM64::W28;
-  case ARM64::FP: return ARM64::W29;
-  case ARM64::LR: return ARM64::W30;
-  case ARM64::SP: return ARM64::WSP;
-  case ARM64::XZR: return ARM64::WZR;
+  case AArch64::X0: return AArch64::W0;
+  case AArch64::X1: return AArch64::W1;
+  case AArch64::X2: return AArch64::W2;
+  case AArch64::X3: return AArch64::W3;
+  case AArch64::X4: return AArch64::W4;
+  case AArch64::X5: return AArch64::W5;
+  case AArch64::X6: return AArch64::W6;
+  case AArch64::X7: return AArch64::W7;
+  case AArch64::X8: return AArch64::W8;
+  case AArch64::X9: return AArch64::W9;
+  case AArch64::X10: return AArch64::W10;
+  case AArch64::X11: return AArch64::W11;
+  case AArch64::X12: return AArch64::W12;
+  case AArch64::X13: return AArch64::W13;
+  case AArch64::X14: return AArch64::W14;
+  case AArch64::X15: return AArch64::W15;
+  case AArch64::X16: return AArch64::W16;
+  case AArch64::X17: return AArch64::W17;
+  case AArch64::X18: return AArch64::W18;
+  case AArch64::X19: return AArch64::W19;
+  case AArch64::X20: return AArch64::W20;
+  case AArch64::X21: return AArch64::W21;
+  case AArch64::X22: return AArch64::W22;
+  case AArch64::X23: return AArch64::W23;
+  case AArch64::X24: return AArch64::W24;
+  case AArch64::X25: return AArch64::W25;
+  case AArch64::X26: return AArch64::W26;
+  case AArch64::X27: return AArch64::W27;
+  case AArch64::X28: return AArch64::W28;
+  case AArch64::FP: return AArch64::W29;
+  case AArch64::LR: return AArch64::W30;
+  case AArch64::SP: return AArch64::WSP;
+  case AArch64::XZR: return AArch64::WZR;
   }
   // For anything else, return it unchanged.
   return Reg;
@@ -68,39 +68,39 @@ inline static unsigned getWRegFromXReg(unsigned Reg) {
 
 inline static unsigned getXRegFromWReg(unsigned Reg) {
   switch (Reg) {
-  case ARM64::W0: return ARM64::X0;
-  case ARM64::W1: return ARM64::X1;
-  case ARM64::W2: return ARM64::X2;
-  case ARM64::W3: return ARM64::X3;
-  case ARM64::W4: return ARM64::X4;
-  case ARM64::W5: return ARM64::X5;
-  case ARM64::W6: return ARM64::X6;
-  case ARM64::W7: return ARM64::X7;
-  case ARM64::W8: return ARM64::X8;
-  case ARM64::W9: return ARM64::X9;
-  case ARM64::W10: return ARM64::X10;
-  case ARM64::W11: return ARM64::X11;
-  case ARM64::W12: return ARM64::X12;
-  case ARM64::W13: return ARM64::X13;
-  case ARM64::W14: return ARM64::X14;
-  case ARM64::W15: return ARM64::X15;
-  case ARM64::W16: return ARM64::X16;
-  case ARM64::W17: return ARM64::X17;
-  case ARM64::W18: return ARM64::X18;
-  case ARM64::W19: return ARM64::X19;
-  case ARM64::W20: return ARM64::X20;
-  case ARM64::W21: return ARM64::X21;
-  case ARM64::W22: return ARM64::X22;
-  case ARM64::W23: return ARM64::X23;
-  case ARM64::W24: return ARM64::X24;
-  case ARM64::W25: return ARM64::X25;
-  case ARM64::W26: return ARM64::X26;
-  case ARM64::W27: return ARM64::X27;
-  case ARM64::W28: return ARM64::X28;
-  case ARM64::W29: return ARM64::FP;
-  case ARM64::W30: return ARM64::LR;
-  case ARM64::WSP: return ARM64::SP;
-  case ARM64::WZR: return ARM64::XZR;
+  case AArch64::W0: return AArch64::X0;
+  case AArch64::W1: return AArch64::X1;
+  case AArch64::W2: return AArch64::X2;
+  case AArch64::W3: return AArch64::X3;
+  case AArch64::W4: return AArch64::X4;
+  case AArch64::W5: return AArch64::X5;
+  case AArch64::W6: return AArch64::X6;
+  case AArch64::W7: return AArch64::X7;
+  case AArch64::W8: return AArch64::X8;
+  case AArch64::W9: return AArch64::X9;
+  case AArch64::W10: return AArch64::X10;
+  case AArch64::W11: return AArch64::X11;
+  case AArch64::W12: return AArch64::X12;
+  case AArch64::W13: return AArch64::X13;
+  case AArch64::W14: return AArch64::X14;
+  case AArch64::W15: return AArch64::X15;
+  case AArch64::W16: return AArch64::X16;
+  case AArch64::W17: return AArch64::X17;
+  case AArch64::W18: return AArch64::X18;
+  case AArch64::W19: return AArch64::X19;
+  case AArch64::W20: return AArch64::X20;
+  case AArch64::W21: return AArch64::X21;
+  case AArch64::W22: return AArch64::X22;
+  case AArch64::W23: return AArch64::X23;
+  case AArch64::W24: return AArch64::X24;
+  case AArch64::W25: return AArch64::X25;
+  case AArch64::W26: return AArch64::X26;
+  case AArch64::W27: return AArch64::X27;
+  case AArch64::W28: return AArch64::X28;
+  case AArch64::W29: return AArch64::FP;
+  case AArch64::W30: return AArch64::LR;
+  case AArch64::WSP: return AArch64::SP;
+  case AArch64::WZR: return AArch64::XZR;
   }
   // For anything else, return it unchanged.
   return Reg;
@@ -108,38 +108,38 @@ inline static unsigned getXRegFromWReg(unsigned Reg) {
 
 static inline unsigned getBRegFromDReg(unsigned Reg) {
   switch (Reg) {
-  case ARM64::D0:  return ARM64::B0;
-  case ARM64::D1:  return ARM64::B1;
-  case ARM64::D2:  return ARM64::B2;
-  case ARM64::D3:  return ARM64::B3;
-  case ARM64::D4:  return ARM64::B4;
-  case ARM64::D5:  return ARM64::B5;
-  case ARM64::D6:  return ARM64::B6;
-  case ARM64::D7:  return ARM64::B7;
-  case ARM64::D8:  return ARM64::B8;
-  case ARM64::D9:  return ARM64::B9;
-  case ARM64::D10: return ARM64::B10;
-  case ARM64::D11: return ARM64::B11;
-  case ARM64::D12: return ARM64::B12;
-  case ARM64::D13: return ARM64::B13;
-  case ARM64::D14: return ARM64::B14;
-  case ARM64::D15: return ARM64::B15;
-  case ARM64::D16: return ARM64::B16;
-  case ARM64::D17: return ARM64::B17;
-  case ARM64::D18: return ARM64::B18;
-  case ARM64::D19: return ARM64::B19;
-  case ARM64::D20: return ARM64::B20;
-  case ARM64::D21: return ARM64::B21;
-  case ARM64::D22: return ARM64::B22;
-  case ARM64::D23: return ARM64::B23;
-  case ARM64::D24: return ARM64::B24;
-  case ARM64::D25: return ARM64::B25;
-  case ARM64::D26: return ARM64::B26;
-  case ARM64::D27: return ARM64::B27;
-  case ARM64::D28: return ARM64::B28;
-  case ARM64::D29: return ARM64::B29;
-  case ARM64::D30: return ARM64::B30;
-  case ARM64::D31: return ARM64::B31;
+  case AArch64::D0:  return AArch64::B0;
+  case AArch64::D1:  return AArch64::B1;
+  case AArch64::D2:  return AArch64::B2;
+  case AArch64::D3:  return AArch64::B3;
+  case AArch64::D4:  return AArch64::B4;
+  case AArch64::D5:  return AArch64::B5;
+  case AArch64::D6:  return AArch64::B6;
+  case AArch64::D7:  return AArch64::B7;
+  case AArch64::D8:  return AArch64::B8;
+  case AArch64::D9:  return AArch64::B9;
+  case AArch64::D10: return AArch64::B10;
+  case AArch64::D11: return AArch64::B11;
+  case AArch64::D12: return AArch64::B12;
+  case AArch64::D13: return AArch64::B13;
+  case AArch64::D14: return AArch64::B14;
+  case AArch64::D15: return AArch64::B15;
+  case AArch64::D16: return AArch64::B16;
+  case AArch64::D17: return AArch64::B17;
+  case AArch64::D18: return AArch64::B18;
+  case AArch64::D19: return AArch64::B19;
+  case AArch64::D20: return AArch64::B20;
+  case AArch64::D21: return AArch64::B21;
+  case AArch64::D22: return AArch64::B22;
+  case AArch64::D23: return AArch64::B23;
+  case AArch64::D24: return AArch64::B24;
+  case AArch64::D25: return AArch64::B25;
+  case AArch64::D26: return AArch64::B26;
+  case AArch64::D27: return AArch64::B27;
+  case AArch64::D28: return AArch64::B28;
+  case AArch64::D29: return AArch64::B29;
+  case AArch64::D30: return AArch64::B30;
+  case AArch64::D31: return AArch64::B31;
   }
   // For anything else, return it unchanged.
   return Reg;
@@ -148,44 +148,44 @@ static inline unsigned getBRegFromDReg(unsigned Reg) {
 
 static inline unsigned getDRegFromBReg(unsigned Reg) {
   switch (Reg) {
-  case ARM64::B0:  return ARM64::D0;
-  case ARM64::B1:  return ARM64::D1;
-  case ARM64::B2:  return ARM64::D2;
-  case ARM64::B3:  return ARM64::D3;
-  case ARM64::B4:  return ARM64::D4;
-  case ARM64::B5:  return ARM64::D5;
-  case ARM64::B6:  return ARM64::D6;
-  case ARM64::B7:  return ARM64::D7;
-  case ARM64::B8:  return ARM64::D8;
-  case ARM64::B9:  return ARM64::D9;
-  case ARM64::B10: return ARM64::D10;
-  case ARM64::B11: return ARM64::D11;
-  case ARM64::B12: return ARM64::D12;
-  case ARM64::B13: return ARM64::D13;
-  case ARM64::B14: return ARM64::D14;
-  case ARM64::B15: return ARM64::D15;
-  case ARM64::B16: return ARM64::D16;
-  case ARM64::B17: return ARM64::D17;
-  case ARM64::B18: return ARM64::D18;
-  case ARM64::B19: return ARM64::D19;
-  case ARM64::B20: return ARM64::D20;
-  case ARM64::B21: return ARM64::D21;
-  case ARM64::B22: return ARM64::D22;
-  case ARM64::B23: return ARM64::D23;
-  case ARM64::B24: return ARM64::D24;
-  case ARM64::B25: return ARM64::D25;
-  case ARM64::B26: return ARM64::D26;
-  case ARM64::B27: return ARM64::D27;
-  case ARM64::B28: return ARM64::D28;
-  case ARM64::B29: return ARM64::D29;
-  case ARM64::B30: return ARM64::D30;
-  case ARM64::B31: return ARM64::D31;
+  case AArch64::B0:  return AArch64::D0;
+  case AArch64::B1:  return AArch64::D1;
+  case AArch64::B2:  return AArch64::D2;
+  case AArch64::B3:  return AArch64::D3;
+  case AArch64::B4:  return AArch64::D4;
+  case AArch64::B5:  return AArch64::D5;
+  case AArch64::B6:  return AArch64::D6;
+  case AArch64::B7:  return AArch64::D7;
+  case AArch64::B8:  return AArch64::D8;
+  case AArch64::B9:  return AArch64::D9;
+  case AArch64::B10: return AArch64::D10;
+  case AArch64::B11: return AArch64::D11;
+  case AArch64::B12: return AArch64::D12;
+  case AArch64::B13: return AArch64::D13;
+  case AArch64::B14: return AArch64::D14;
+  case AArch64::B15: return AArch64::D15;
+  case AArch64::B16: return AArch64::D16;
+  case AArch64::B17: return AArch64::D17;
+  case AArch64::B18: return AArch64::D18;
+  case AArch64::B19: return AArch64::D19;
+  case AArch64::B20: return AArch64::D20;
+  case AArch64::B21: return AArch64::D21;
+  case AArch64::B22: return AArch64::D22;
+  case AArch64::B23: return AArch64::D23;
+  case AArch64::B24: return AArch64::D24;
+  case AArch64::B25: return AArch64::D25;
+  case AArch64::B26: return AArch64::D26;
+  case AArch64::B27: return AArch64::D27;
+  case AArch64::B28: return AArch64::D28;
+  case AArch64::B29: return AArch64::D29;
+  case AArch64::B30: return AArch64::D30;
+  case AArch64::B31: return AArch64::D31;
   }
   // For anything else, return it unchanged.
   return Reg;
 }
 
-namespace ARM64CC {
+namespace AArch64CC {
 
 // The CondCodes constants map directly to the 4-bit encoding of the condition
 // field for predicated instructions.
@@ -277,7 +277,7 @@ inline static unsigned getNZCVToSatisfyCondCode(CondCode Code) {
   case LE: return Z; // Z == 1 || N != V
   }
 }
-} // end namespace ARM64CC
+} // end namespace AArch64CC
 
 /// Instances of this class can perform bidirectional mapping from random
 /// identifier strings to operand encodings. For example "MSR" takes a named
@@ -290,14 +290,14 @@ inline static unsigned getNZCVToSatisfyCondCode(CondCode Code) {
 /// out just how often these instructions are emitted before working on it. It
 /// might even be optimal to just reorder the tables for the common instructions
 /// rather than changing the algorithm.
-struct ARM64NamedImmMapper {
+struct AArch64NamedImmMapper {
   struct Mapping {
     const char *Name;
     uint32_t Value;
   };
 
   template<int N>
-  ARM64NamedImmMapper(const Mapping (&Pairs)[N], uint32_t TooBigImm)
+  AArch64NamedImmMapper(const Mapping (&Pairs)[N], uint32_t TooBigImm)
     : Pairs(&Pairs[0]), NumPairs(N), TooBigImm(TooBigImm) {}
 
   StringRef toString(uint32_t Value, bool &Valid) const;
@@ -313,7 +313,7 @@ protected:
   uint32_t TooBigImm;
 };
 
-namespace ARM64AT {
+namespace AArch64AT {
   enum ATValues {
     Invalid = -1,    // Op0 Op1  CRn   CRm   Op2
     S1E1R = 0x43c0,  // 01  000  0111  1000  000
@@ -330,14 +330,14 @@ namespace ARM64AT {
     S12E0W = 0x63c7  // 01  100  0111  1000  111
   };
 
-  struct ATMapper : ARM64NamedImmMapper {
+  struct ATMapper : AArch64NamedImmMapper {
     const static Mapping ATPairs[];
 
     ATMapper();
   };
 
 }
-namespace ARM64DB {
+namespace AArch64DB {
   enum DBValues {
     Invalid = -1,
     OSHLD = 0x1,
@@ -354,14 +354,14 @@ namespace ARM64DB {
     SY =    0xf
   };
 
-  struct DBarrierMapper : ARM64NamedImmMapper {
+  struct DBarrierMapper : AArch64NamedImmMapper {
     const static Mapping DBarrierPairs[];
 
     DBarrierMapper();
   };
 }
 
-namespace  ARM64DC {
+namespace  AArch64DC {
   enum DCValues {
     Invalid = -1,   // Op1  CRn   CRm   Op2
     ZVA   = 0x5ba1, // 01  011  0111  0100  001
@@ -374,7 +374,7 @@ namespace  ARM64DC {
     CISW  = 0x43f2  // 01  000  0111  1110  010
   };
 
-  struct DCMapper : ARM64NamedImmMapper {
+  struct DCMapper : AArch64NamedImmMapper {
     const static Mapping DCPairs[];
 
     DCMapper();
@@ -382,7 +382,7 @@ namespace  ARM64DC {
 
 }
 
-namespace  ARM64IC {
+namespace  AArch64IC {
   enum ICValues {
     Invalid = -1,     // Op1  CRn   CRm   Op2
     IALLUIS = 0x0388, // 000  0111  0001  000
@@ -391,7 +391,7 @@ namespace  ARM64IC {
   };
 
 
-  struct ICMapper : ARM64NamedImmMapper {
+  struct ICMapper : AArch64NamedImmMapper {
     const static Mapping ICPairs[];
 
     ICMapper();
@@ -402,19 +402,19 @@ namespace  ARM64IC {
   }
 }
 
-namespace  ARM64ISB {
+namespace  AArch64ISB {
   enum ISBValues {
     Invalid = -1,
     SY = 0xf
   };
-  struct ISBMapper : ARM64NamedImmMapper {
+  struct ISBMapper : AArch64NamedImmMapper {
     const static Mapping ISBPairs[];
 
     ISBMapper();
   };
 }
 
-namespace ARM64PRFM {
+namespace AArch64PRFM {
   enum PRFMValues {
     Invalid = -1,
     PLDL1KEEP = 0x00,
@@ -437,14 +437,14 @@ namespace ARM64PRFM {
     PSTL3STRM = 0x15
   };
 
-  struct PRFMMapper : ARM64NamedImmMapper {
+  struct PRFMMapper : AArch64NamedImmMapper {
     const static Mapping PRFMPairs[];
 
     PRFMMapper();
   };
 }
 
-namespace ARM64PState {
+namespace AArch64PState {
   enum PStateValues {
     Invalid = -1,
     SPSel = 0x05,
@@ -452,7 +452,7 @@ namespace ARM64PState {
     DAIFClr = 0x1f
   };
 
-  struct PStateMapper : ARM64NamedImmMapper {
+  struct PStateMapper : AArch64NamedImmMapper {
     const static Mapping PStatePairs[];
 
     PStateMapper();
@@ -460,7 +460,7 @@ namespace ARM64PState {
 
 }
 
-namespace ARM64SE {
+namespace AArch64SE {
     enum ShiftExtSpecifiers {
         Invalid = -1,
         LSL,
@@ -481,7 +481,7 @@ namespace ARM64SE {
     };
 }
 
-namespace ARM64Layout {
+namespace AArch64Layout {
     enum VectorLayout {
         Invalid = -1,
         VL_8B,
@@ -504,43 +504,43 @@ namespace ARM64Layout {
 }
 
 inline static const char *
-ARM64VectorLayoutToString(ARM64Layout::VectorLayout Layout) {
+AArch64VectorLayoutToString(AArch64Layout::VectorLayout Layout) {
   switch (Layout) {
-  case ARM64Layout::VL_8B:  return ".8b";
-  case ARM64Layout::VL_4H:  return ".4h";
-  case ARM64Layout::VL_2S:  return ".2s";
-  case ARM64Layout::VL_1D:  return ".1d";
-  case ARM64Layout::VL_16B:  return ".16b";
-  case ARM64Layout::VL_8H:  return ".8h";
-  case ARM64Layout::VL_4S:  return ".4s";
-  case ARM64Layout::VL_2D:  return ".2d";
-  case ARM64Layout::VL_B:  return ".b";
-  case ARM64Layout::VL_H:  return ".h";
-  case ARM64Layout::VL_S:  return ".s";
-  case ARM64Layout::VL_D:  return ".d";
+  case AArch64Layout::VL_8B:  return ".8b";
+  case AArch64Layout::VL_4H:  return ".4h";
+  case AArch64Layout::VL_2S:  return ".2s";
+  case AArch64Layout::VL_1D:  return ".1d";
+  case AArch64Layout::VL_16B:  return ".16b";
+  case AArch64Layout::VL_8H:  return ".8h";
+  case AArch64Layout::VL_4S:  return ".4s";
+  case AArch64Layout::VL_2D:  return ".2d";
+  case AArch64Layout::VL_B:  return ".b";
+  case AArch64Layout::VL_H:  return ".h";
+  case AArch64Layout::VL_S:  return ".s";
+  case AArch64Layout::VL_D:  return ".d";
   default: llvm_unreachable("Unknown Vector Layout");
   }
 }
 
-inline static ARM64Layout::VectorLayout
-ARM64StringToVectorLayout(StringRef LayoutStr) {
-  return StringSwitch<ARM64Layout::VectorLayout>(LayoutStr)
-             .Case(".8b", ARM64Layout::VL_8B)
-             .Case(".4h", ARM64Layout::VL_4H)
-             .Case(".2s", ARM64Layout::VL_2S)
-             .Case(".1d", ARM64Layout::VL_1D)
-             .Case(".16b", ARM64Layout::VL_16B)
-             .Case(".8h", ARM64Layout::VL_8H)
-             .Case(".4s", ARM64Layout::VL_4S)
-             .Case(".2d", ARM64Layout::VL_2D)
-             .Case(".b", ARM64Layout::VL_B)
-             .Case(".h", ARM64Layout::VL_H)
-             .Case(".s", ARM64Layout::VL_S)
-             .Case(".d", ARM64Layout::VL_D)
-             .Default(ARM64Layout::Invalid);
+inline static AArch64Layout::VectorLayout
+AArch64StringToVectorLayout(StringRef LayoutStr) {
+  return StringSwitch<AArch64Layout::VectorLayout>(LayoutStr)
+             .Case(".8b", AArch64Layout::VL_8B)
+             .Case(".4h", AArch64Layout::VL_4H)
+             .Case(".2s", AArch64Layout::VL_2S)
+             .Case(".1d", AArch64Layout::VL_1D)
+             .Case(".16b", AArch64Layout::VL_16B)
+             .Case(".8h", AArch64Layout::VL_8H)
+             .Case(".4s", AArch64Layout::VL_4S)
+             .Case(".2d", AArch64Layout::VL_2D)
+             .Case(".b", AArch64Layout::VL_B)
+             .Case(".h", AArch64Layout::VL_H)
+             .Case(".s", AArch64Layout::VL_S)
+             .Case(".d", AArch64Layout::VL_D)
+             .Default(AArch64Layout::Invalid);
 }
 
-namespace ARM64SysReg {
+namespace AArch64SysReg {
   enum SysRegROValues {
     MDCCSR_EL0        = 0x9808, // 10  011  0000  0001  000
     DBGDTRRX_EL0      = 0x9828, // 10  011  0000  0101  000
@@ -571,16 +571,16 @@ namespace ARM64SysReg {
     ID_ISAR3_EL1      = 0xc013, // 11  000  0000  0010  011
     ID_ISAR4_EL1      = 0xc014, // 11  000  0000  0010  100
     ID_ISAR5_EL1      = 0xc015, // 11  000  0000  0010  101
-    ID_AARM64PFR0_EL1   = 0xc020, // 11  000  0000  0100  000
-    ID_AARM64PFR1_EL1   = 0xc021, // 11  000  0000  0100  001
-    ID_AARM64DFR0_EL1   = 0xc028, // 11  000  0000  0101  000
-    ID_AARM64DFR1_EL1   = 0xc029, // 11  000  0000  0101  001
-    ID_AARM64AFR0_EL1   = 0xc02c, // 11  000  0000  0101  100
-    ID_AARM64AFR1_EL1   = 0xc02d, // 11  000  0000  0101  101
-    ID_AARM64ISAR0_EL1  = 0xc030, // 11  000  0000  0110  000
-    ID_AARM64ISAR1_EL1  = 0xc031, // 11  000  0000  0110  001
-    ID_AARM64MMFR0_EL1  = 0xc038, // 11  000  0000  0111  000
-    ID_AARM64MMFR1_EL1  = 0xc039, // 11  000  0000  0111  001
+    ID_A64PFR0_EL1    = 0xc020, // 11  000  0000  0100  000
+    ID_A64PFR1_EL1    = 0xc021, // 11  000  0000  0100  001
+    ID_A64DFR0_EL1    = 0xc028, // 11  000  0000  0101  000
+    ID_A64DFR1_EL1    = 0xc029, // 11  000  0000  0101  001
+    ID_A64AFR0_EL1    = 0xc02c, // 11  000  0000  0101  100
+    ID_A64AFR1_EL1    = 0xc02d, // 11  000  0000  0101  101
+    ID_A64ISAR0_EL1   = 0xc030, // 11  000  0000  0110  000
+    ID_A64ISAR1_EL1   = 0xc031, // 11  000  0000  0110  001
+    ID_A64MMFR0_EL1   = 0xc038, // 11  000  0000  0111  000
+    ID_A64MMFR1_EL1   = 0xc039, // 11  000  0000  0111  001
     MVFR0_EL1         = 0xc018, // 11  000  0000  0011  000
     MVFR1_EL1         = 0xc019, // 11  000  0000  0011  001
     MVFR2_EL1         = 0xc01a, // 11  000  0000  0011  010
@@ -1143,15 +1143,15 @@ namespace ARM64SysReg {
     CPM_IOACC_CTL_EL3 = 0xff90
   };
 
-  // Note that these do not inherit from ARM64NamedImmMapper. This class is
+  // Note that these do not inherit from AArch64NamedImmMapper. This class is
   // sufficiently different in its behaviour that I don't believe it's worth
-  // burdening the common ARM64NamedImmMapper with abstractions only needed in
+  // burdening the common AArch64NamedImmMapper with abstractions only needed in
   // this one case.
   struct SysRegMapper {
-    static const ARM64NamedImmMapper::Mapping SysRegPairs[];
-    static const ARM64NamedImmMapper::Mapping CycloneSysRegPairs[];
+    static const AArch64NamedImmMapper::Mapping SysRegPairs[];
+    static const AArch64NamedImmMapper::Mapping CycloneSysRegPairs[];
 
-    const ARM64NamedImmMapper::Mapping *InstPairs;
+    const AArch64NamedImmMapper::Mapping *InstPairs;
     size_t NumInstPairs;
     uint64_t FeatureBits;
 
@@ -1161,19 +1161,19 @@ namespace ARM64SysReg {
   };
 
   struct MSRMapper : SysRegMapper {
-    static const ARM64NamedImmMapper::Mapping MSRPairs[];
+    static const AArch64NamedImmMapper::Mapping MSRPairs[];
     MSRMapper(uint64_t FeatureBits);
   };
 
   struct MRSMapper : SysRegMapper {
-    static const ARM64NamedImmMapper::Mapping MRSPairs[];
+    static const AArch64NamedImmMapper::Mapping MRSPairs[];
     MRSMapper(uint64_t FeatureBits);
   };
 
   uint32_t ParseGenericRegister(StringRef Name, bool &Valid);
 }
 
-namespace ARM64TLBI {
+namespace AArch64TLBI {
   enum TLBIValues {
     Invalid = -1,          // Op0 Op1  CRn   CRm   Op2
     IPAS2E1IS    = 0x6401, // 01  100  1000  0000  001
@@ -1210,7 +1210,7 @@ namespace ARM64TLBI {
     VAALE1       = 0x443f  // 01  000  1000  0111  111
   };
 
-  struct TLBIMapper : ARM64NamedImmMapper {
+  struct TLBIMapper : AArch64NamedImmMapper {
     const static Mapping TLBIPairs[];
 
     TLBIMapper();
@@ -1235,11 +1235,11 @@ namespace ARM64TLBI {
   }
 } 
 
-namespace ARM64II {
+namespace AArch64II {
   /// Target Operand Flag enum.
   enum TOF {
     //===------------------------------------------------------------------===//
-    // ARM64 Specific MachineOperand flags.
+    // AArch64 Specific MachineOperand flags.
 
     MO_NO_FLAG,
 
@@ -1287,7 +1287,7 @@ namespace ARM64II {
     /// referee will affect interpretation.
     MO_TLS = 0x20
   };
-} // end namespace ARM64II
+} // end namespace AArch64II
 
 } // end namespace llvm
 
diff --git a/lib/Target/AArch64/Utils/CMakeLists.txt b/lib/Target/AArch64/Utils/CMakeLists.txt
new file mode 100644
index 00000000000..8ee03a7571b
--- /dev/null
+++ b/lib/Target/AArch64/Utils/CMakeLists.txt
@@ -0,0 +1,3 @@
+add_llvm_library(LLVMAArch64Utils
+  AArch64BaseInfo.cpp
+  )
diff --git a/lib/Target/ARM64/TargetInfo/LLVMBuild.txt b/lib/Target/AArch64/Utils/LLVMBuild.txt
similarity index 79%
rename from lib/Target/ARM64/TargetInfo/LLVMBuild.txt
rename to lib/Target/AArch64/Utils/LLVMBuild.txt
index b9ecb706952..bcefeb672f7 100644
--- a/lib/Target/ARM64/TargetInfo/LLVMBuild.txt
+++ b/lib/Target/AArch64/Utils/LLVMBuild.txt
@@ -1,4 +1,4 @@
-;===- ./lib/Target/ARM64/TargetInfo/LLVMBuild.txt --------------*- Conf -*--===;
+;===- ./lib/Target/AArch64/Utils/LLVMBuild.txt ----------------*- Conf -*--===;
 ;
 ;                     The LLVM Compiler Infrastructure
 ;
@@ -17,7 +17,7 @@
 
 [component_0]
 type = Library
-name = ARM64Info
-parent = ARM64
+name = AArch64Utils
+parent = AArch64
 required_libraries = Support
-add_to_library_groups = ARM64
+add_to_library_groups = AArch64
diff --git a/lib/Target/ARM64/Utils/Makefile b/lib/Target/AArch64/Utils/Makefile
similarity index 64%
rename from lib/Target/ARM64/Utils/Makefile
rename to lib/Target/AArch64/Utils/Makefile
index 6491ad9a07b..0b80f82f2b9 100644
--- a/lib/Target/ARM64/Utils/Makefile
+++ b/lib/Target/AArch64/Utils/Makefile
@@ -1,4 +1,4 @@
-##===- lib/Target/ARM64/Utils/Makefile -------------------*- Makefile -*-===##
+##===- lib/Target/AArch64/Utils/Makefile -------------------*- Makefile -*-===##
 #
 #                     The LLVM Compiler Infrastructure
 #
@@ -7,9 +7,10 @@
 #
 ##===----------------------------------------------------------------------===##
 LEVEL = ../../../..
-LIBRARYNAME = LLVMARM64Utils
+LIBRARYNAME = LLVMAArch64Utils
 
-# Hack: we need to include 'main' ARM64 target directory to grab private headers
+# Hack: we need to include 'main' AArch64 target directory to grab private
+# headers
 CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
 
 include $(LEVEL)/Makefile.common
diff --git a/lib/Target/ARM64/ARM64.h b/lib/Target/ARM64/ARM64.h
deleted file mode 100644
index debb9002eb4..00000000000
--- a/lib/Target/ARM64/ARM64.h
+++ /dev/null
@@ -1,48 +0,0 @@
-//===-- ARM64.h - Top-level interface for ARM64 representation --*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the entry points for global functions defined in the LLVM
-// ARM64 back-end.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef TARGET_ARM64_H
-#define TARGET_ARM64_H
-
-#include "Utils/ARM64BaseInfo.h"
-#include "MCTargetDesc/ARM64MCTargetDesc.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Support/DataTypes.h"
-
-namespace llvm {
-
-class ARM64TargetMachine;
-class FunctionPass;
-class MachineFunctionPass;
-
-FunctionPass *createARM64DeadRegisterDefinitions();
-FunctionPass *createARM64ConditionalCompares();
-FunctionPass *createARM64AdvSIMDScalar();
-FunctionPass *createARM64BranchRelaxation();
-FunctionPass *createARM64ISelDag(ARM64TargetMachine &TM,
-                                 CodeGenOpt::Level OptLevel);
-FunctionPass *createARM64StorePairSuppressPass();
-FunctionPass *createARM64ExpandPseudoPass();
-FunctionPass *createARM64LoadStoreOptimizationPass();
-ModulePass *createARM64PromoteConstantPass();
-FunctionPass *createARM64AddressTypePromotionPass();
-/// \brief Creates an ARM-specific Target Transformation Info pass.
-ImmutablePass *createARM64TargetTransformInfoPass(const ARM64TargetMachine *TM);
-
-FunctionPass *createARM64CleanupLocalDynamicTLSPass();
-
-FunctionPass *createARM64CollectLOHPass();
-} // end namespace llvm
-
-#endif
diff --git a/lib/Target/ARM64/ARM64RegisterInfo.td b/lib/Target/ARM64/ARM64RegisterInfo.td
deleted file mode 100644
index 28d01809739..00000000000
--- a/lib/Target/ARM64/ARM64RegisterInfo.td
+++ /dev/null
@@ -1,593 +0,0 @@
-//===- ARM64RegisterInfo.td - Describe the ARM64 Regisers --*- tablegen -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-//
-//===----------------------------------------------------------------------===//
-
-
-class ARM64Reg<bits<16> enc, string n, list<Register> subregs = [],
-               list<string> altNames = []>
-        : Register<n, altNames> {
-  let HWEncoding = enc;
-  let Namespace = "ARM64";
-  let SubRegs = subregs;
-}
-
-let Namespace = "ARM64" in {
-  def sub_32 : SubRegIndex<32>;
-
-  def bsub : SubRegIndex<8>;
-  def hsub : SubRegIndex<16>;
-  def ssub : SubRegIndex<32>;
-  def dsub : SubRegIndex<32>;
-  def qhisub : SubRegIndex<64>;
-  def qsub : SubRegIndex<64>;
-  // Note: Code depends on these having consecutive numbers
-  def dsub0 : SubRegIndex<64>;
-  def dsub1 : SubRegIndex<64>;
-  def dsub2 : SubRegIndex<64>;
-  def dsub3 : SubRegIndex<64>;
-  // Note: Code depends on these having consecutive numbers
-  def qsub0 : SubRegIndex<128>;
-  def qsub1 : SubRegIndex<128>;
-  def qsub2 : SubRegIndex<128>;
-  def qsub3 : SubRegIndex<128>;
-}
-
-let Namespace = "ARM64" in {
-  def vreg : RegAltNameIndex;
-  def vlist1 : RegAltNameIndex;
-}
-
-//===----------------------------------------------------------------------===//
-// Registers
-//===----------------------------------------------------------------------===//
-def W0    : ARM64Reg<0,   "w0" >, DwarfRegNum<[0]>;
-def W1    : ARM64Reg<1,   "w1" >, DwarfRegNum<[1]>;
-def W2    : ARM64Reg<2,   "w2" >, DwarfRegNum<[2]>;
-def W3    : ARM64Reg<3,   "w3" >, DwarfRegNum<[3]>;
-def W4    : ARM64Reg<4,   "w4" >, DwarfRegNum<[4]>;
-def W5    : ARM64Reg<5,   "w5" >, DwarfRegNum<[5]>;
-def W6    : ARM64Reg<6,   "w6" >, DwarfRegNum<[6]>;
-def W7    : ARM64Reg<7,   "w7" >, DwarfRegNum<[7]>;
-def W8    : ARM64Reg<8,   "w8" >, DwarfRegNum<[8]>;
-def W9    : ARM64Reg<9,   "w9" >, DwarfRegNum<[9]>;
-def W10   : ARM64Reg<10, "w10">, DwarfRegNum<[10]>;
-def W11   : ARM64Reg<11, "w11">, DwarfRegNum<[11]>;
-def W12   : ARM64Reg<12, "w12">, DwarfRegNum<[12]>;
-def W13   : ARM64Reg<13, "w13">, DwarfRegNum<[13]>;
-def W14   : ARM64Reg<14, "w14">, DwarfRegNum<[14]>;
-def W15   : ARM64Reg<15, "w15">, DwarfRegNum<[15]>;
-def W16   : ARM64Reg<16, "w16">, DwarfRegNum<[16]>;
-def W17   : ARM64Reg<17, "w17">, DwarfRegNum<[17]>;
-def W18   : ARM64Reg<18, "w18">, DwarfRegNum<[18]>;
-def W19   : ARM64Reg<19, "w19">, DwarfRegNum<[19]>;
-def W20   : ARM64Reg<20, "w20">, DwarfRegNum<[20]>;
-def W21   : ARM64Reg<21, "w21">, DwarfRegNum<[21]>;
-def W22   : ARM64Reg<22, "w22">, DwarfRegNum<[22]>;
-def W23   : ARM64Reg<23, "w23">, DwarfRegNum<[23]>;
-def W24   : ARM64Reg<24, "w24">, DwarfRegNum<[24]>;
-def W25   : ARM64Reg<25, "w25">, DwarfRegNum<[25]>;
-def W26   : ARM64Reg<26, "w26">, DwarfRegNum<[26]>;
-def W27   : ARM64Reg<27, "w27">, DwarfRegNum<[27]>;
-def W28   : ARM64Reg<28, "w28">, DwarfRegNum<[28]>;
-def W29   : ARM64Reg<29, "w29">, DwarfRegNum<[29]>;
-def W30   : ARM64Reg<30, "w30">, DwarfRegNum<[30]>;
-def WSP   : ARM64Reg<31, "wsp">, DwarfRegNum<[31]>;
-def WZR   : ARM64Reg<31, "wzr">, DwarfRegAlias<WSP>;
-
-let SubRegIndices = [sub_32] in {
-def X0    : ARM64Reg<0,   "x0",  [W0]>, DwarfRegAlias<W0>;
-def X1    : ARM64Reg<1,   "x1",  [W1]>, DwarfRegAlias<W1>;
-def X2    : ARM64Reg<2,   "x2",  [W2]>, DwarfRegAlias<W2>;
-def X3    : ARM64Reg<3,   "x3",  [W3]>, DwarfRegAlias<W3>;
-def X4    : ARM64Reg<4,   "x4",  [W4]>, DwarfRegAlias<W4>;
-def X5    : ARM64Reg<5,   "x5",  [W5]>, DwarfRegAlias<W5>;
-def X6    : ARM64Reg<6,   "x6",  [W6]>, DwarfRegAlias<W6>;
-def X7    : ARM64Reg<7,   "x7",  [W7]>, DwarfRegAlias<W7>;
-def X8    : ARM64Reg<8,   "x8",  [W8]>, DwarfRegAlias<W8>;
-def X9    : ARM64Reg<9,   "x9",  [W9]>, DwarfRegAlias<W9>;
-def X10   : ARM64Reg<10, "x10", [W10]>, DwarfRegAlias<W10>;
-def X11   : ARM64Reg<11, "x11", [W11]>, DwarfRegAlias<W11>;
-def X12   : ARM64Reg<12, "x12", [W12]>, DwarfRegAlias<W12>;
-def X13   : ARM64Reg<13, "x13", [W13]>, DwarfRegAlias<W13>;
-def X14   : ARM64Reg<14, "x14", [W14]>, DwarfRegAlias<W14>;
-def X15   : ARM64Reg<15, "x15", [W15]>, DwarfRegAlias<W15>;
-def X16   : ARM64Reg<16, "x16", [W16]>, DwarfRegAlias<W16>;
-def X17   : ARM64Reg<17, "x17", [W17]>, DwarfRegAlias<W17>;
-def X18   : ARM64Reg<18, "x18", [W18]>, DwarfRegAlias<W18>;
-def X19   : ARM64Reg<19, "x19", [W19]>, DwarfRegAlias<W19>;
-def X20   : ARM64Reg<20, "x20", [W20]>, DwarfRegAlias<W20>;
-def X21   : ARM64Reg<21, "x21", [W21]>, DwarfRegAlias<W21>;
-def X22   : ARM64Reg<22, "x22", [W22]>, DwarfRegAlias<W22>;
-def X23   : ARM64Reg<23, "x23", [W23]>, DwarfRegAlias<W23>;
-def X24   : ARM64Reg<24, "x24", [W24]>, DwarfRegAlias<W24>;
-def X25   : ARM64Reg<25, "x25", [W25]>, DwarfRegAlias<W25>;
-def X26   : ARM64Reg<26, "x26", [W26]>, DwarfRegAlias<W26>;
-def X27   : ARM64Reg<27, "x27", [W27]>, DwarfRegAlias<W27>;
-def X28   : ARM64Reg<28, "x28", [W28]>, DwarfRegAlias<W28>;
-def FP    : ARM64Reg<29, "x29", [W29]>, DwarfRegAlias<W29>;
-def LR    : ARM64Reg<30, "x30", [W30]>, DwarfRegAlias<W30>;
-def SP    : ARM64Reg<31, "sp",  [WSP]>, DwarfRegAlias<WSP>;
-def XZR   : ARM64Reg<31, "xzr", [WZR]>, DwarfRegAlias<WSP>;
-}
-
-// Condition code register.
-def NZCV  : ARM64Reg<0, "nzcv">;
-
-// GPR register classes with the intersections of GPR32/GPR32sp and
-// GPR64/GPR64sp for use by the coalescer.
-def GPR32common : RegisterClass<"ARM64", [i32], 32, (sequence "W%u", 0, 30)> {
-  let AltOrders = [(rotl GPR32common, 8)];
-  let AltOrderSelect = [{ return 1; }];
-}
-def GPR64common : RegisterClass<"ARM64", [i64], 64,
-                                (add (sequence "X%u", 0, 28), FP, LR)> {
-  let AltOrders = [(rotl GPR64common, 8)];
-  let AltOrderSelect = [{ return 1; }];
-}
-// GPR register classes which exclude SP/WSP.
-def GPR32 : RegisterClass<"ARM64", [i32], 32, (add GPR32common, WZR)> {
-  let AltOrders = [(rotl GPR32, 8)];
-  let AltOrderSelect = [{ return 1; }];
-}
-def GPR64 : RegisterClass<"ARM64", [i64], 64, (add GPR64common, XZR)> {
-  let AltOrders = [(rotl GPR64, 8)];
-  let AltOrderSelect = [{ return 1; }];
-}
-
-// GPR register classes which include SP/WSP.
-def GPR32sp : RegisterClass<"ARM64", [i32], 32, (add GPR32common, WSP)> {
-  let AltOrders = [(rotl GPR32sp, 8)];
-  let AltOrderSelect = [{ return 1; }];
-}
-def GPR64sp : RegisterClass<"ARM64", [i64], 64, (add GPR64common, SP)> {
-  let AltOrders = [(rotl GPR64sp, 8)];
-  let AltOrderSelect = [{ return 1; }];
-}
-
-def GPR32sponly : RegisterClass<"ARM64", [i32], 32, (add WSP)>;
-def GPR64sponly : RegisterClass<"ARM64", [i64], 64, (add SP)>;
-
-def GPR64spPlus0Operand : AsmOperandClass {
-  let Name = "GPR64sp0";
-  let RenderMethod = "addRegOperands";
-  let ParserMethod = "tryParseGPR64sp0Operand";
-}
-
-def GPR64sp0 : RegisterOperand<GPR64sp> {
-  let ParserMatchClass = GPR64spPlus0Operand;
-}
-
-// GPR register classes which include WZR/XZR AND SP/WSP. This is not a
-// constraint used by any instructions, it is used as a common super-class.
-def GPR32all : RegisterClass<"ARM64", [i32], 32, (add GPR32common, WZR, WSP)>;
-def GPR64all : RegisterClass<"ARM64", [i64], 64, (add GPR64common, XZR, SP)>;
-
-// For tail calls, we can't use callee-saved registers, as they are restored
-// to the saved value before the tail call, which would clobber a call address.
-// This is for indirect tail calls to store the address of the destination.
-def tcGPR64 : RegisterClass<"ARM64", [i64], 64, (sub GPR64common, X19, X20, X21,
-                                                     X22, X23, X24, X25, X26,
-                                                     X27, X28)>;
-
-// GPR register classes for post increment amount of vector load/store that
-// has alternate printing when Rm=31 and prints a constant immediate value
-// equal to the total number of bytes transferred.
-
-// FIXME: TableGen *should* be able to do these itself now. There appears to be
-// a bug in counting how many operands a Post-indexed MCInst should have which
-// means the aliases don't trigger.
-def GPR64pi1  : RegisterOperand<GPR64, "printPostIncOperand<1>">;
-def GPR64pi2  : RegisterOperand<GPR64, "printPostIncOperand<2>">;
-def GPR64pi3  : RegisterOperand<GPR64, "printPostIncOperand<3>">;
-def GPR64pi4  : RegisterOperand<GPR64, "printPostIncOperand<4>">;
-def GPR64pi6  : RegisterOperand<GPR64, "printPostIncOperand<6>">;
-def GPR64pi8  : RegisterOperand<GPR64, "printPostIncOperand<8>">;
-def GPR64pi12 : RegisterOperand<GPR64, "printPostIncOperand<12>">;
-def GPR64pi16 : RegisterOperand<GPR64, "printPostIncOperand<16>">;
-def GPR64pi24 : RegisterOperand<GPR64, "printPostIncOperand<24>">;
-def GPR64pi32 : RegisterOperand<GPR64, "printPostIncOperand<32>">;
-def GPR64pi48 : RegisterOperand<GPR64, "printPostIncOperand<48>">;
-def GPR64pi64 : RegisterOperand<GPR64, "printPostIncOperand<64>">;
-
-// Condition code regclass.
-def CCR : RegisterClass<"ARM64", [i32], 32, (add NZCV)> {
-  let CopyCost = -1;  // Don't allow copying of status registers.
-
-  // CCR is not allocatable.
-  let isAllocatable = 0;
-}
-
-//===----------------------------------------------------------------------===//
-// Floating Point Scalar Registers
-//===----------------------------------------------------------------------===//
-
-def B0    : ARM64Reg<0,   "b0">, DwarfRegNum<[64]>;
-def B1    : ARM64Reg<1,   "b1">, DwarfRegNum<[65]>;
-def B2    : ARM64Reg<2,   "b2">, DwarfRegNum<[66]>;
-def B3    : ARM64Reg<3,   "b3">, DwarfRegNum<[67]>;
-def B4    : ARM64Reg<4,   "b4">, DwarfRegNum<[68]>;
-def B5    : ARM64Reg<5,   "b5">, DwarfRegNum<[69]>;
-def B6    : ARM64Reg<6,   "b6">, DwarfRegNum<[70]>;
-def B7    : ARM64Reg<7,   "b7">, DwarfRegNum<[71]>;
-def B8    : ARM64Reg<8,   "b8">, DwarfRegNum<[72]>;
-def B9    : ARM64Reg<9,   "b9">, DwarfRegNum<[73]>;
-def B10   : ARM64Reg<10, "b10">, DwarfRegNum<[74]>;
-def B11   : ARM64Reg<11, "b11">, DwarfRegNum<[75]>;
-def B12   : ARM64Reg<12, "b12">, DwarfRegNum<[76]>;
-def B13   : ARM64Reg<13, "b13">, DwarfRegNum<[77]>;
-def B14   : ARM64Reg<14, "b14">, DwarfRegNum<[78]>;
-def B15   : ARM64Reg<15, "b15">, DwarfRegNum<[79]>;
-def B16   : ARM64Reg<16, "b16">, DwarfRegNum<[80]>;
-def B17   : ARM64Reg<17, "b17">, DwarfRegNum<[81]>;
-def B18   : ARM64Reg<18, "b18">, DwarfRegNum<[82]>;
-def B19   : ARM64Reg<19, "b19">, DwarfRegNum<[83]>;
-def B20   : ARM64Reg<20, "b20">, DwarfRegNum<[84]>;
-def B21   : ARM64Reg<21, "b21">, DwarfRegNum<[85]>;
-def B22   : ARM64Reg<22, "b22">, DwarfRegNum<[86]>;
-def B23   : ARM64Reg<23, "b23">, DwarfRegNum<[87]>;
-def B24   : ARM64Reg<24, "b24">, DwarfRegNum<[88]>;
-def B25   : ARM64Reg<25, "b25">, DwarfRegNum<[89]>;
-def B26   : ARM64Reg<26, "b26">, DwarfRegNum<[90]>;
-def B27   : ARM64Reg<27, "b27">, DwarfRegNum<[91]>;
-def B28   : ARM64Reg<28, "b28">, DwarfRegNum<[92]>;
-def B29   : ARM64Reg<29, "b29">, DwarfRegNum<[93]>;
-def B30   : ARM64Reg<30, "b30">, DwarfRegNum<[94]>;
-def B31   : ARM64Reg<31, "b31">, DwarfRegNum<[95]>;
-
-let SubRegIndices = [bsub] in {
-def H0    : ARM64Reg<0,   "h0", [B0]>, DwarfRegAlias<B0>;
-def H1    : ARM64Reg<1,   "h1", [B1]>, DwarfRegAlias<B1>;
-def H2    : ARM64Reg<2,   "h2", [B2]>, DwarfRegAlias<B2>;
-def H3    : ARM64Reg<3,   "h3", [B3]>, DwarfRegAlias<B3>;
-def H4    : ARM64Reg<4,   "h4", [B4]>, DwarfRegAlias<B4>;
-def H5    : ARM64Reg<5,   "h5", [B5]>, DwarfRegAlias<B5>;
-def H6    : ARM64Reg<6,   "h6", [B6]>, DwarfRegAlias<B6>;
-def H7    : ARM64Reg<7,   "h7", [B7]>, DwarfRegAlias<B7>;
-def H8    : ARM64Reg<8,   "h8", [B8]>, DwarfRegAlias<B8>;
-def H9    : ARM64Reg<9,   "h9", [B9]>, DwarfRegAlias<B9>;
-def H10   : ARM64Reg<10, "h10", [B10]>, DwarfRegAlias<B10>;
-def H11   : ARM64Reg<11, "h11", [B11]>, DwarfRegAlias<B11>;
-def H12   : ARM64Reg<12, "h12", [B12]>, DwarfRegAlias<B12>;
-def H13   : ARM64Reg<13, "h13", [B13]>, DwarfRegAlias<B13>;
-def H14   : ARM64Reg<14, "h14", [B14]>, DwarfRegAlias<B14>;
-def H15   : ARM64Reg<15, "h15", [B15]>, DwarfRegAlias<B15>;
-def H16   : ARM64Reg<16, "h16", [B16]>, DwarfRegAlias<B16>;
-def H17   : ARM64Reg<17, "h17", [B17]>, DwarfRegAlias<B17>;
-def H18   : ARM64Reg<18, "h18", [B18]>, DwarfRegAlias<B18>;
-def H19   : ARM64Reg<19, "h19", [B19]>, DwarfRegAlias<B19>;
-def H20   : ARM64Reg<20, "h20", [B20]>, DwarfRegAlias<B20>;
-def H21   : ARM64Reg<21, "h21", [B21]>, DwarfRegAlias<B21>;
-def H22   : ARM64Reg<22, "h22", [B22]>, DwarfRegAlias<B22>;
-def H23   : ARM64Reg<23, "h23", [B23]>, DwarfRegAlias<B23>;
-def H24   : ARM64Reg<24, "h24", [B24]>, DwarfRegAlias<B24>;
-def H25   : ARM64Reg<25, "h25", [B25]>, DwarfRegAlias<B25>;
-def H26   : ARM64Reg<26, "h26", [B26]>, DwarfRegAlias<B26>;
-def H27   : ARM64Reg<27, "h27", [B27]>, DwarfRegAlias<B27>;
-def H28   : ARM64Reg<28, "h28", [B28]>, DwarfRegAlias<B28>;
-def H29   : ARM64Reg<29, "h29", [B29]>, DwarfRegAlias<B29>;
-def H30   : ARM64Reg<30, "h30", [B30]>, DwarfRegAlias<B30>;
-def H31   : ARM64Reg<31, "h31", [B31]>, DwarfRegAlias<B31>;
-}
-
-let SubRegIndices = [hsub] in {
-def S0    : ARM64Reg<0,   "s0", [H0]>, DwarfRegAlias<B0>;
-def S1    : ARM64Reg<1,   "s1", [H1]>, DwarfRegAlias<B1>;
-def S2    : ARM64Reg<2,   "s2", [H2]>, DwarfRegAlias<B2>;
-def S3    : ARM64Reg<3,   "s3", [H3]>, DwarfRegAlias<B3>;
-def S4    : ARM64Reg<4,   "s4", [H4]>, DwarfRegAlias<B4>;
-def S5    : ARM64Reg<5,   "s5", [H5]>, DwarfRegAlias<B5>;
-def S6    : ARM64Reg<6,   "s6", [H6]>, DwarfRegAlias<B6>;
-def S7    : ARM64Reg<7,   "s7", [H7]>, DwarfRegAlias<B7>;
-def S8    : ARM64Reg<8,   "s8", [H8]>, DwarfRegAlias<B8>;
-def S9    : ARM64Reg<9,   "s9", [H9]>, DwarfRegAlias<B9>;
-def S10   : ARM64Reg<10, "s10", [H10]>, DwarfRegAlias<B10>;
-def S11   : ARM64Reg<11, "s11", [H11]>, DwarfRegAlias<B11>;
-def S12   : ARM64Reg<12, "s12", [H12]>, DwarfRegAlias<B12>;
-def S13   : ARM64Reg<13, "s13", [H13]>, DwarfRegAlias<B13>;
-def S14   : ARM64Reg<14, "s14", [H14]>, DwarfRegAlias<B14>;
-def S15   : ARM64Reg<15, "s15", [H15]>, DwarfRegAlias<B15>;
-def S16   : ARM64Reg<16, "s16", [H16]>, DwarfRegAlias<B16>;
-def S17   : ARM64Reg<17, "s17", [H17]>, DwarfRegAlias<B17>;
-def S18   : ARM64Reg<18, "s18", [H18]>, DwarfRegAlias<B18>;
-def S19   : ARM64Reg<19, "s19", [H19]>, DwarfRegAlias<B19>;
-def S20   : ARM64Reg<20, "s20", [H20]>, DwarfRegAlias<B20>;
-def S21   : ARM64Reg<21, "s21", [H21]>, DwarfRegAlias<B21>;
-def S22   : ARM64Reg<22, "s22", [H22]>, DwarfRegAlias<B22>;
-def S23   : ARM64Reg<23, "s23", [H23]>, DwarfRegAlias<B23>;
-def S24   : ARM64Reg<24, "s24", [H24]>, DwarfRegAlias<B24>;
-def S25   : ARM64Reg<25, "s25", [H25]>, DwarfRegAlias<B25>;
-def S26   : ARM64Reg<26, "s26", [H26]>, DwarfRegAlias<B26>;
-def S27   : ARM64Reg<27, "s27", [H27]>, DwarfRegAlias<B27>;
-def S28   : ARM64Reg<28, "s28", [H28]>, DwarfRegAlias<B28>;
-def S29   : ARM64Reg<29, "s29", [H29]>, DwarfRegAlias<B29>;
-def S30   : ARM64Reg<30, "s30", [H30]>, DwarfRegAlias<B30>;
-def S31   : ARM64Reg<31, "s31", [H31]>, DwarfRegAlias<B31>;
-}
-
-let SubRegIndices = [ssub], RegAltNameIndices = [vreg, vlist1] in {
-def D0    : ARM64Reg<0,   "d0", [S0], ["v0", ""]>, DwarfRegAlias<B0>;
-def D1    : ARM64Reg<1,   "d1", [S1], ["v1", ""]>, DwarfRegAlias<B1>;
-def D2    : ARM64Reg<2,   "d2", [S2], ["v2", ""]>, DwarfRegAlias<B2>;
-def D3    : ARM64Reg<3,   "d3", [S3], ["v3", ""]>, DwarfRegAlias<B3>;
-def D4    : ARM64Reg<4,   "d4", [S4], ["v4", ""]>, DwarfRegAlias<B4>;
-def D5    : ARM64Reg<5,   "d5", [S5], ["v5", ""]>, DwarfRegAlias<B5>;
-def D6    : ARM64Reg<6,   "d6", [S6], ["v6", ""]>, DwarfRegAlias<B6>;
-def D7    : ARM64Reg<7,   "d7", [S7], ["v7", ""]>, DwarfRegAlias<B7>;
-def D8    : ARM64Reg<8,   "d8", [S8], ["v8", ""]>, DwarfRegAlias<B8>;
-def D9    : ARM64Reg<9,   "d9", [S9], ["v9", ""]>, DwarfRegAlias<B9>;
-def D10   : ARM64Reg<10, "d10", [S10], ["v10", ""]>, DwarfRegAlias<B10>;
-def D11   : ARM64Reg<11, "d11", [S11], ["v11", ""]>, DwarfRegAlias<B11>;
-def D12   : ARM64Reg<12, "d12", [S12], ["v12", ""]>, DwarfRegAlias<B12>;
-def D13   : ARM64Reg<13, "d13", [S13], ["v13", ""]>, DwarfRegAlias<B13>;
-def D14   : ARM64Reg<14, "d14", [S14], ["v14", ""]>, DwarfRegAlias<B14>;
-def D15   : ARM64Reg<15, "d15", [S15], ["v15", ""]>, DwarfRegAlias<B15>;
-def D16   : ARM64Reg<16, "d16", [S16], ["v16", ""]>, DwarfRegAlias<B16>;
-def D17   : ARM64Reg<17, "d17", [S17], ["v17", ""]>, DwarfRegAlias<B17>;
-def D18   : ARM64Reg<18, "d18", [S18], ["v18", ""]>, DwarfRegAlias<B18>;
-def D19   : ARM64Reg<19, "d19", [S19], ["v19", ""]>, DwarfRegAlias<B19>;
-def D20   : ARM64Reg<20, "d20", [S20], ["v20", ""]>, DwarfRegAlias<B20>;
-def D21   : ARM64Reg<21, "d21", [S21], ["v21", ""]>, DwarfRegAlias<B21>;
-def D22   : ARM64Reg<22, "d22", [S22], ["v22", ""]>, DwarfRegAlias<B22>;
-def D23   : ARM64Reg<23, "d23", [S23], ["v23", ""]>, DwarfRegAlias<B23>;
-def D24   : ARM64Reg<24, "d24", [S24], ["v24", ""]>, DwarfRegAlias<B24>;
-def D25   : ARM64Reg<25, "d25", [S25], ["v25", ""]>, DwarfRegAlias<B25>;
-def D26   : ARM64Reg<26, "d26", [S26], ["v26", ""]>, DwarfRegAlias<B26>;
-def D27   : ARM64Reg<27, "d27", [S27], ["v27", ""]>, DwarfRegAlias<B27>;
-def D28   : ARM64Reg<28, "d28", [S28], ["v28", ""]>, DwarfRegAlias<B28>;
-def D29   : ARM64Reg<29, "d29", [S29], ["v29", ""]>, DwarfRegAlias<B29>;
-def D30   : ARM64Reg<30, "d30", [S30], ["v30", ""]>, DwarfRegAlias<B30>;
-def D31   : ARM64Reg<31, "d31", [S31], ["v31", ""]>, DwarfRegAlias<B31>;
-}
-
-let SubRegIndices = [dsub], RegAltNameIndices = [vreg, vlist1] in {
-def Q0    : ARM64Reg<0,   "q0", [D0], ["v0", ""]>, DwarfRegAlias<B0>;
-def Q1    : ARM64Reg<1,   "q1", [D1], ["v1", ""]>, DwarfRegAlias<B1>;
-def Q2    : ARM64Reg<2,   "q2", [D2], ["v2", ""]>, DwarfRegAlias<B2>;
-def Q3    : ARM64Reg<3,   "q3", [D3], ["v3", ""]>, DwarfRegAlias<B3>;
-def Q4    : ARM64Reg<4,   "q4", [D4], ["v4", ""]>, DwarfRegAlias<B4>;
-def Q5    : ARM64Reg<5,   "q5", [D5], ["v5", ""]>, DwarfRegAlias<B5>;
-def Q6    : ARM64Reg<6,   "q6", [D6], ["v6", ""]>, DwarfRegAlias<B6>;
-def Q7    : ARM64Reg<7,   "q7", [D7], ["v7", ""]>, DwarfRegAlias<B7>;
-def Q8    : ARM64Reg<8,   "q8", [D8], ["v8", ""]>, DwarfRegAlias<B8>;
-def Q9    : ARM64Reg<9,   "q9", [D9], ["v9", ""]>, DwarfRegAlias<B9>;
-def Q10   : ARM64Reg<10, "q10", [D10], ["v10", ""]>, DwarfRegAlias<B10>;
-def Q11   : ARM64Reg<11, "q11", [D11], ["v11", ""]>, DwarfRegAlias<B11>;
-def Q12   : ARM64Reg<12, "q12", [D12], ["v12", ""]>, DwarfRegAlias<B12>;
-def Q13   : ARM64Reg<13, "q13", [D13], ["v13", ""]>, DwarfRegAlias<B13>;
-def Q14   : ARM64Reg<14, "q14", [D14], ["v14", ""]>, DwarfRegAlias<B14>;
-def Q15   : ARM64Reg<15, "q15", [D15], ["v15", ""]>, DwarfRegAlias<B15>;
-def Q16   : ARM64Reg<16, "q16", [D16], ["v16", ""]>, DwarfRegAlias<B16>;
-def Q17   : ARM64Reg<17, "q17", [D17], ["v17", ""]>, DwarfRegAlias<B17>;
-def Q18   : ARM64Reg<18, "q18", [D18], ["v18", ""]>, DwarfRegAlias<B18>;
-def Q19   : ARM64Reg<19, "q19", [D19], ["v19", ""]>, DwarfRegAlias<B19>;
-def Q20   : ARM64Reg<20, "q20", [D20], ["v20", ""]>, DwarfRegAlias<B20>;
-def Q21   : ARM64Reg<21, "q21", [D21], ["v21", ""]>, DwarfRegAlias<B21>;
-def Q22   : ARM64Reg<22, "q22", [D22], ["v22", ""]>, DwarfRegAlias<B22>;
-def Q23   : ARM64Reg<23, "q23", [D23], ["v23", ""]>, DwarfRegAlias<B23>;
-def Q24   : ARM64Reg<24, "q24", [D24], ["v24", ""]>, DwarfRegAlias<B24>;
-def Q25   : ARM64Reg<25, "q25", [D25], ["v25", ""]>, DwarfRegAlias<B25>;
-def Q26   : ARM64Reg<26, "q26", [D26], ["v26", ""]>, DwarfRegAlias<B26>;
-def Q27   : ARM64Reg<27, "q27", [D27], ["v27", ""]>, DwarfRegAlias<B27>;
-def Q28   : ARM64Reg<28, "q28", [D28], ["v28", ""]>, DwarfRegAlias<B28>;
-def Q29   : ARM64Reg<29, "q29", [D29], ["v29", ""]>, DwarfRegAlias<B29>;
-def Q30   : ARM64Reg<30, "q30", [D30], ["v30", ""]>, DwarfRegAlias<B30>;
-def Q31   : ARM64Reg<31, "q31", [D31], ["v31", ""]>, DwarfRegAlias<B31>;
-}
-
-def FPR8  : RegisterClass<"ARM64", [untyped], 8, (sequence "B%u", 0, 31)> {
-  let Size = 8;
-}
-def FPR16 : RegisterClass<"ARM64", [f16], 16, (sequence "H%u", 0, 31)> {
-  let Size = 16;
-}
-def FPR32 : RegisterClass<"ARM64", [f32, i32], 32,(sequence "S%u", 0, 31)>;
-def FPR64 : RegisterClass<"ARM64", [f64, i64, v2f32, v1f64, v8i8, v4i16, v2i32,
-                                    v1i64],
-                                    64, (sequence "D%u", 0, 31)>;
-// We don't (yet) have an f128 legal type, so don't use that here. We
-// normalize 128-bit vectors to v2f64 for arg passing and such, so use
-// that here.
-def FPR128 : RegisterClass<"ARM64",
-                           [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64, f128],
-                           128, (sequence "Q%u", 0, 31)>;
-
-// The lower 16 vector registers.  Some instructions can only take registers
-// in this range.
-def FPR128_lo : RegisterClass<"ARM64",
-                              [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
-                              128, (trunc FPR128, 16)>;
-
-// Pairs, triples, and quads of 64-bit vector registers.
-def DSeqPairs : RegisterTuples<[dsub0, dsub1], [(rotl FPR64, 0), (rotl FPR64, 1)]>;
-def DSeqTriples : RegisterTuples<[dsub0, dsub1, dsub2],
-                                 [(rotl FPR64, 0), (rotl FPR64, 1),
-                                  (rotl FPR64, 2)]>;
-def DSeqQuads : RegisterTuples<[dsub0, dsub1, dsub2, dsub3],
-                               [(rotl FPR64, 0), (rotl FPR64, 1),
-                                (rotl FPR64, 2), (rotl FPR64, 3)]>;
-def DD   : RegisterClass<"ARM64", [untyped], 64, (add DSeqPairs)> {
-  let Size = 128;
-}
-def DDD  : RegisterClass<"ARM64", [untyped], 64, (add DSeqTriples)> {
-  let Size = 196;
-}
-def DDDD : RegisterClass<"ARM64", [untyped], 64, (add DSeqQuads)> {
-  let Size = 256;
-}
-
-// Pairs, triples, and quads of 128-bit vector registers.
-def QSeqPairs : RegisterTuples<[qsub0, qsub1], [(rotl FPR128, 0), (rotl FPR128, 1)]>;
-def QSeqTriples : RegisterTuples<[qsub0, qsub1, qsub2],
-                                 [(rotl FPR128, 0), (rotl FPR128, 1),
-                                  (rotl FPR128, 2)]>;
-def QSeqQuads : RegisterTuples<[qsub0, qsub1, qsub2, qsub3],
-                               [(rotl FPR128, 0), (rotl FPR128, 1),
-                                (rotl FPR128, 2), (rotl FPR128, 3)]>;
-def QQ   : RegisterClass<"ARM64", [untyped], 128, (add QSeqPairs)> {
-  let Size = 256;
-}
-def QQQ  : RegisterClass<"ARM64", [untyped], 128, (add QSeqTriples)> {
-  let Size = 384;
-}
-def QQQQ : RegisterClass<"ARM64", [untyped], 128, (add QSeqQuads)> {
-  let Size = 512;
-}
-
-
-// Vector operand versions of the FP registers. Alternate name printing and
-// assmebler matching.
-def VectorReg64AsmOperand : AsmOperandClass {
-  let Name = "VectorReg64";
-  let PredicateMethod = "isVectorReg";
-}
-def VectorReg128AsmOperand : AsmOperandClass {
-  let Name = "VectorReg128";
-  let PredicateMethod = "isVectorReg";
-}
-
-def V64  : RegisterOperand<FPR64, "printVRegOperand"> {
-  let ParserMatchClass = VectorReg64AsmOperand;
-}
-
-def V128 : RegisterOperand<FPR128, "printVRegOperand"> {
-  let ParserMatchClass = VectorReg128AsmOperand;
-}
-
-def VectorRegLoAsmOperand : AsmOperandClass { let Name = "VectorRegLo"; }
-def V128_lo : RegisterOperand<FPR128_lo, "printVRegOperand"> {
-  let ParserMatchClass = VectorRegLoAsmOperand;
-}
-
-class TypedVecListAsmOperand<int count, int regsize, int lanes, string kind>
-    : AsmOperandClass {
-  let Name = "TypedVectorList" # count # "_" # lanes # kind;
-
-  let PredicateMethod
-      = "isTypedVectorList<" # count # ", " # lanes # ", '" # kind # "'>";
-  let RenderMethod = "addVectorList" # regsize # "Operands<" # count # ">";
-}
-
-class TypedVecListRegOperand<RegisterClass Reg, int lanes, string kind>
-    : RegisterOperand<Reg, "printTypedVectorList<" # lanes # ", '"
-                                                   # kind # "'>">;
-
-multiclass VectorList<int count, RegisterClass Reg64, RegisterClass Reg128> {
-  // With implicit types (probably on instruction instead). E.g. { v0, v1 }
-  def _64AsmOperand : AsmOperandClass {
-    let Name = NAME # "64";
-    let PredicateMethod = "isImplicitlyTypedVectorList<" # count # ">";
-    let RenderMethod = "addVectorList64Operands<" # count # ">";
-  }
-
-  def "64" : RegisterOperand<Reg64, "printImplicitlyTypedVectorList"> {
-    let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_64AsmOperand");
-  }
-
-  def _128AsmOperand : AsmOperandClass {
-    let Name = NAME # "128";
-    let PredicateMethod = "isImplicitlyTypedVectorList<" # count # ">";
-    let RenderMethod = "addVectorList128Operands<" # count # ">";
-  }
-
-  def "128" : RegisterOperand<Reg128, "printImplicitlyTypedVectorList"> {
-    let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_128AsmOperand");
-  }
-
-  // 64-bit register lists with explicit type.
-
-  // { v0.8b, v1.8b }
-  def _8bAsmOperand : TypedVecListAsmOperand<count, 64, 8, "b">;
-  def "8b" : TypedVecListRegOperand<Reg64, 8, "b"> {
-    let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_8bAsmOperand");
-  }
-
-  // { v0.4h, v1.4h }
-  def _4hAsmOperand : TypedVecListAsmOperand<count, 64, 4, "h">;
-  def "4h" : TypedVecListRegOperand<Reg64, 4, "h"> {
-    let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_4hAsmOperand");
-  }
-
-  // { v0.2s, v1.2s }
-  def _2sAsmOperand : TypedVecListAsmOperand<count, 64, 2, "s">;
-  def "2s" : TypedVecListRegOperand<Reg64, 2, "s"> {
-    let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_2sAsmOperand");
-  }
-
-  // { v0.1d, v1.1d }
-  def _1dAsmOperand : TypedVecListAsmOperand<count, 64, 1, "d">;
-  def "1d" : TypedVecListRegOperand<Reg64, 1, "d"> {
-    let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_1dAsmOperand");
-  }
-
-  // 128-bit register lists with explicit type
-
-  // { v0.16b, v1.16b }
-  def _16bAsmOperand : TypedVecListAsmOperand<count, 128, 16, "b">;
-  def "16b" : TypedVecListRegOperand<Reg128, 16, "b"> {
-    let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_16bAsmOperand");
-  }
-
-  // { v0.8h, v1.8h }
-  def _8hAsmOperand : TypedVecListAsmOperand<count, 128, 8, "h">;
-  def "8h" : TypedVecListRegOperand<Reg128, 8, "h"> {
-    let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_8hAsmOperand");
-  }
-
-  // { v0.4s, v1.4s }
-  def _4sAsmOperand : TypedVecListAsmOperand<count, 128, 4, "s">;
-  def "4s" : TypedVecListRegOperand<Reg128, 4, "s"> {
-    let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_4sAsmOperand");
-  }
-
-  // { v0.2d, v1.2d }
-  def _2dAsmOperand : TypedVecListAsmOperand<count, 128, 2, "d">;
-  def "2d" : TypedVecListRegOperand<Reg128, 2, "d"> {
-    let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_2dAsmOperand");
-  }
-
-  // { v0.b, v1.b }
-  def _bAsmOperand : TypedVecListAsmOperand<count, 128, 0, "b">;
-  def "b" : TypedVecListRegOperand<Reg128, 0, "b"> {
-    let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_bAsmOperand");
-  }
-
-  // { v0.h, v1.h }
-  def _hAsmOperand : TypedVecListAsmOperand<count, 128, 0, "h">;
-  def "h" : TypedVecListRegOperand<Reg128, 0, "h"> {
-    let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_hAsmOperand");
-  }
-
-  // { v0.s, v1.s }
-  def _sAsmOperand : TypedVecListAsmOperand<count, 128, 0, "s">;
-  def "s" : TypedVecListRegOperand<Reg128, 0, "s"> {
-    let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_sAsmOperand");
-  }
-
-  // { v0.d, v1.d }
-  def _dAsmOperand : TypedVecListAsmOperand<count, 128, 0, "d">;
-  def "d" : TypedVecListRegOperand<Reg128, 0, "d"> {
-    let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_dAsmOperand");
-  }
-
-
-}
-
-defm VecListOne   : VectorList<1, FPR64, FPR128>;
-defm VecListTwo   : VectorList<2, DD,    QQ>;
-defm VecListThree : VectorList<3, DDD,   QQQ>;
-defm VecListFour  : VectorList<4, DDDD,  QQQQ>;
-
-
-// Register operand versions of the scalar FP registers.
-def FPR16Op : RegisterOperand<FPR16, "printOperand">;
-def FPR32Op : RegisterOperand<FPR32, "printOperand">;
-def FPR64Op : RegisterOperand<FPR64, "printOperand">;
-def FPR128Op : RegisterOperand<FPR128, "printOperand">;
diff --git a/lib/Target/ARM64/ARM64TargetMachine.h b/lib/Target/ARM64/ARM64TargetMachine.h
deleted file mode 100644
index 730ffcaaf6d..00000000000
--- a/lib/Target/ARM64/ARM64TargetMachine.h
+++ /dev/null
@@ -1,92 +0,0 @@
-//===-- ARM64TargetMachine.h - Define TargetMachine for ARM64 ---*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file declares the ARM64 specific subclass of TargetMachine.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef ARM64TARGETMACHINE_H
-#define ARM64TARGETMACHINE_H
-
-#include "ARM64InstrInfo.h"
-#include "ARM64ISelLowering.h"
-#include "ARM64Subtarget.h"
-#include "ARM64FrameLowering.h"
-#include "ARM64SelectionDAGInfo.h"
-#include "llvm/IR/DataLayout.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/MC/MCStreamer.h"
-
-namespace llvm {
-
-class ARM64TargetMachine : public LLVMTargetMachine {
-protected:
-  ARM64Subtarget Subtarget;
-
-private:
-  const DataLayout DL;
-  ARM64InstrInfo InstrInfo;
-  ARM64TargetLowering TLInfo;
-  ARM64FrameLowering FrameLowering;
-  ARM64SelectionDAGInfo TSInfo;
-
-public:
-  ARM64TargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS,
-                     const TargetOptions &Options, Reloc::Model RM,
-                     CodeModel::Model CM, CodeGenOpt::Level OL,
-                     bool IsLittleEndian);
-
-  const ARM64Subtarget *getSubtargetImpl() const override { return &Subtarget; }
-  const ARM64TargetLowering *getTargetLowering() const override {
-    return &TLInfo;
-  }
-  const DataLayout *getDataLayout() const override { return &DL; }
-  const ARM64FrameLowering *getFrameLowering() const override {
-    return &FrameLowering;
-  }
-  const ARM64InstrInfo *getInstrInfo() const override { return &InstrInfo; }
-  const ARM64RegisterInfo *getRegisterInfo() const override {
-    return &InstrInfo.getRegisterInfo();
-  }
-  const ARM64SelectionDAGInfo *getSelectionDAGInfo() const override {
-    return &TSInfo;
-  }
-
-  // Pass Pipeline Configuration
-  TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
-
-  /// \brief Register ARM64 analysis passes with a pass manager.
-  void addAnalysisPasses(PassManagerBase &PM) override;
-};
-
-// ARM64leTargetMachine - ARM64 little endian target machine.
-//
-class ARM64leTargetMachine : public ARM64TargetMachine {
-  virtual void anchor();
-public:
-  ARM64leTargetMachine(const Target &T, StringRef TT,
-                       StringRef CPU, StringRef FS, const TargetOptions &Options,
-                       Reloc::Model RM, CodeModel::Model CM,
-                       CodeGenOpt::Level OL);
-};
-
-// ARM64beTargetMachine - ARM64 big endian target machine.
-//
-class ARM64beTargetMachine : public ARM64TargetMachine {
-  virtual void anchor();
-public:
-  ARM64beTargetMachine(const Target &T, StringRef TT,
-                       StringRef CPU, StringRef FS, const TargetOptions &Options,
-                       Reloc::Model RM, CodeModel::Model CM,
-                       CodeGenOpt::Level OL);
-};
-
-} // end namespace llvm
-
-#endif
diff --git a/lib/Target/ARM64/CMakeLists.txt b/lib/Target/ARM64/CMakeLists.txt
deleted file mode 100644
index 56ba3b73294..00000000000
--- a/lib/Target/ARM64/CMakeLists.txt
+++ /dev/null
@@ -1,51 +0,0 @@
-set(LLVM_TARGET_DEFINITIONS ARM64.td)
-
-tablegen(LLVM ARM64GenRegisterInfo.inc -gen-register-info)
-tablegen(LLVM ARM64GenInstrInfo.inc -gen-instr-info)
-tablegen(LLVM ARM64GenMCCodeEmitter.inc -gen-emitter -mc-emitter)
-tablegen(LLVM ARM64GenMCPseudoLowering.inc -gen-pseudo-lowering)
-tablegen(LLVM ARM64GenAsmWriter.inc -gen-asm-writer)
-tablegen(LLVM ARM64GenAsmWriter1.inc -gen-asm-writer -asmwriternum=1)
-tablegen(LLVM ARM64GenAsmMatcher.inc -gen-asm-matcher)
-tablegen(LLVM ARM64GenDAGISel.inc -gen-dag-isel)
-tablegen(LLVM ARM64GenFastISel.inc -gen-fast-isel)
-tablegen(LLVM ARM64GenCallingConv.inc -gen-callingconv)
-tablegen(LLVM ARM64GenSubtargetInfo.inc -gen-subtarget)
-tablegen(LLVM ARM64GenDisassemblerTables.inc -gen-disassembler)
-add_public_tablegen_target(ARM64CommonTableGen)
-
-add_llvm_target(ARM64CodeGen
-  ARM64AddressTypePromotion.cpp
-  ARM64AdvSIMDScalarPass.cpp
-  ARM64AsmPrinter.cpp
-  ARM64BranchRelaxation.cpp
-  ARM64CleanupLocalDynamicTLSPass.cpp
-  ARM64CollectLOH.cpp
-  ARM64ConditionalCompares.cpp
-  ARM64DeadRegisterDefinitionsPass.cpp
-  ARM64ExpandPseudoInsts.cpp
-  ARM64FastISel.cpp
-  ARM64FrameLowering.cpp
-  ARM64ISelDAGToDAG.cpp
-  ARM64ISelLowering.cpp
-  ARM64InstrInfo.cpp
-  ARM64LoadStoreOptimizer.cpp
-  ARM64MCInstLower.cpp
-  ARM64PromoteConstant.cpp
-  ARM64RegisterInfo.cpp
-  ARM64SelectionDAGInfo.cpp
-  ARM64StorePairSuppress.cpp
-  ARM64Subtarget.cpp
-  ARM64TargetMachine.cpp
-  ARM64TargetObjectFile.cpp
-  ARM64TargetTransformInfo.cpp
-)
-
-add_dependencies(LLVMARM64CodeGen intrinsics_gen)
-
-add_subdirectory(TargetInfo)
-add_subdirectory(AsmParser)
-add_subdirectory(Disassembler)
-add_subdirectory(InstPrinter)
-add_subdirectory(MCTargetDesc)
-add_subdirectory(Utils)
diff --git a/lib/Target/ARM64/InstPrinter/ARM64InstPrinter.cpp b/lib/Target/ARM64/InstPrinter/ARM64InstPrinter.cpp
deleted file mode 100644
index 529b450352e..00000000000
--- a/lib/Target/ARM64/InstPrinter/ARM64InstPrinter.cpp
+++ /dev/null
@@ -1,1312 +0,0 @@
-//===-- ARM64InstPrinter.cpp - Convert ARM64 MCInst to assembly syntax ----===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This class prints an ARM64 MCInst to a .s file.
-//
-//===----------------------------------------------------------------------===//
-
-#include "ARM64InstPrinter.h"
-#include "MCTargetDesc/ARM64AddressingModes.h"
-#include "Utils/ARM64BaseInfo.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/MC/MCInst.h"
-#include "llvm/MC/MCExpr.h"
-#include "llvm/MC/MCRegisterInfo.h"
-#include "llvm/Support/Format.h"
-#include "llvm/Support/raw_ostream.h"
-using namespace llvm;
-
-#define DEBUG_TYPE "asm-printer"
-
-#define GET_INSTRUCTION_NAME
-#define PRINT_ALIAS_INSTR
-#include "ARM64GenAsmWriter.inc"
-#define GET_INSTRUCTION_NAME
-#define PRINT_ALIAS_INSTR
-#include "ARM64GenAsmWriter1.inc"
-
-ARM64InstPrinter::ARM64InstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
-                                   const MCRegisterInfo &MRI,
-                                   const MCSubtargetInfo &STI)
-    : MCInstPrinter(MAI, MII, MRI) {
-  // Initialize the set of available features.
-  setAvailableFeatures(STI.getFeatureBits());
-}
-
-ARM64AppleInstPrinter::ARM64AppleInstPrinter(const MCAsmInfo &MAI,
-                                             const MCInstrInfo &MII,
-                                             const MCRegisterInfo &MRI,
-                                             const MCSubtargetInfo &STI)
-    : ARM64InstPrinter(MAI, MII, MRI, STI) {}
-
-void ARM64InstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
-  // This is for .cfi directives.
-  OS << getRegisterName(RegNo);
-}
-
-void ARM64InstPrinter::printInst(const MCInst *MI, raw_ostream &O,
-                                 StringRef Annot) {
-  // Check for special encodings and print the canonical alias instead.
-
-  unsigned Opcode = MI->getOpcode();
-
-  if (Opcode == ARM64::SYSxt)
-    if (printSysAlias(MI, O)) {
-      printAnnotation(O, Annot);
-      return;
-    }
-
-  // SBFM/UBFM should print to a nicer aliased form if possible.
-  if (Opcode == ARM64::SBFMXri || Opcode == ARM64::SBFMWri ||
-      Opcode == ARM64::UBFMXri || Opcode == ARM64::UBFMWri) {
-    const MCOperand &Op0 = MI->getOperand(0);
-    const MCOperand &Op1 = MI->getOperand(1);
-    const MCOperand &Op2 = MI->getOperand(2);
-    const MCOperand &Op3 = MI->getOperand(3);
-
-    bool IsSigned = (Opcode == ARM64::SBFMXri || Opcode == ARM64::SBFMWri);
-    bool Is64Bit = (Opcode == ARM64::SBFMXri || Opcode == ARM64::UBFMXri);
-    if (Op2.isImm() && Op2.getImm() == 0 && Op3.isImm()) {
-      const char *AsmMnemonic = nullptr;
-
-      switch (Op3.getImm()) {
-      default:
-        break;
-      case 7:
-        if (IsSigned)
-          AsmMnemonic = "sxtb";
-        else if (!Is64Bit)
-          AsmMnemonic = "uxtb";
-        break;
-      case 15:
-        if (IsSigned)
-          AsmMnemonic = "sxth";
-        else if (!Is64Bit)
-          AsmMnemonic = "uxth";
-        break;
-      case 31:
-        // *xtw is only valid for signed 64-bit operations.
-        if (Is64Bit && IsSigned)
-          AsmMnemonic = "sxtw";
-        break;
-      }
-
-      if (AsmMnemonic) {
-        O << '\t' << AsmMnemonic << '\t' << getRegisterName(Op0.getReg())
-          << ", " << getRegisterName(getWRegFromXReg(Op1.getReg()));
-        printAnnotation(O, Annot);
-        return;
-      }
-    }
-
-    // All immediate shifts are aliases, implemented using the Bitfield
-    // instruction. In all cases the immediate shift amount shift must be in
-    // the range 0 to (reg.size -1).
-    if (Op2.isImm() && Op3.isImm()) {
-      const char *AsmMnemonic = nullptr;
-      int shift = 0;
-      int64_t immr = Op2.getImm();
-      int64_t imms = Op3.getImm();
-      if (Opcode == ARM64::UBFMWri && imms != 0x1F && ((imms + 1) == immr)) {
-        AsmMnemonic = "lsl";
-        shift = 31 - imms;
-      } else if (Opcode == ARM64::UBFMXri && imms != 0x3f &&
-                 ((imms + 1 == immr))) {
-        AsmMnemonic = "lsl";
-        shift = 63 - imms;
-      } else if (Opcode == ARM64::UBFMWri && imms == 0x1f) {
-        AsmMnemonic = "lsr";
-        shift = immr;
-      } else if (Opcode == ARM64::UBFMXri && imms == 0x3f) {
-        AsmMnemonic = "lsr";
-        shift = immr;
-      } else if (Opcode == ARM64::SBFMWri && imms == 0x1f) {
-        AsmMnemonic = "asr";
-        shift = immr;
-      } else if (Opcode == ARM64::SBFMXri && imms == 0x3f) {
-        AsmMnemonic = "asr";
-        shift = immr;
-      }
-      if (AsmMnemonic) {
-        O << '\t' << AsmMnemonic << '\t' << getRegisterName(Op0.getReg())
-          << ", " << getRegisterName(Op1.getReg()) << ", #" << shift;
-        printAnnotation(O, Annot);
-        return;
-      }
-    }
-
-    // SBFIZ/UBFIZ aliases
-    if (Op2.getImm() > Op3.getImm()) {
-      O << '\t' << (IsSigned ? "sbfiz" : "ubfiz") << '\t'
-        << getRegisterName(Op0.getReg()) << ", " << getRegisterName(Op1.getReg())
-        << ", #" << (Is64Bit ? 64 : 32) - Op2.getImm() << ", #" << Op3.getImm() + 1;
-      printAnnotation(O, Annot);
-      return;
-    }
-
-    // Otherwise SBFX/UBFX is the preferred form
-    O << '\t' << (IsSigned ? "sbfx" : "ubfx") << '\t'
-      << getRegisterName(Op0.getReg()) << ", " << getRegisterName(Op1.getReg())
-      << ", #" << Op2.getImm() << ", #" << Op3.getImm() - Op2.getImm() + 1;
-    printAnnotation(O, Annot);
-    return;
-  }
-
-  if (Opcode == ARM64::BFMXri || Opcode == ARM64::BFMWri) {
-    const MCOperand &Op0 = MI->getOperand(0); // Op1 == Op0
-    const MCOperand &Op2 = MI->getOperand(2);
-    int ImmR = MI->getOperand(3).getImm();
-    int ImmS = MI->getOperand(4).getImm();
-
-    // BFI alias
-    if (ImmS < ImmR) {
-      int BitWidth = Opcode == ARM64::BFMXri ? 64 : 32;
-      int LSB = (BitWidth - ImmR) % BitWidth;
-      int Width = ImmS + 1;
-      O << "\tbfi\t" << getRegisterName(Op0.getReg()) << ", "
-        << getRegisterName(Op2.getReg()) << ", #" << LSB << ", #" << Width;
-      printAnnotation(O, Annot);
-      return;
-    }
-
-    int LSB = ImmR;
-    int Width = ImmS - ImmR + 1;
-    // Otherwise BFXIL the preferred form
-    O << "\tbfxil\t"
-      << getRegisterName(Op0.getReg()) << ", " << getRegisterName(Op2.getReg())
-      << ", #" << LSB << ", #" << Width;
-    printAnnotation(O, Annot);
-    return;
-  }
-
-  // Symbolic operands for MOVZ, MOVN and MOVK already imply a shift
-  // (e.g. :gottprel_g1: is always going to be "lsl #16") so it should not be
-  // printed.
-  if ((Opcode == ARM64::MOVZXi || Opcode == ARM64::MOVZWi ||
-       Opcode == ARM64::MOVNXi || Opcode == ARM64::MOVNWi) &&
-      MI->getOperand(1).isExpr()) {
-    if (Opcode == ARM64::MOVZXi || Opcode == ARM64::MOVZWi)
-      O << "\tmovz\t";
-    else
-      O << "\tmovn\t";
-
-    O << getRegisterName(MI->getOperand(0).getReg()) << ", #"
-      << *MI->getOperand(1).getExpr();
-    return;
-  }
-
-  if ((Opcode == ARM64::MOVKXi || Opcode == ARM64::MOVKWi) &&
-      MI->getOperand(2).isExpr()) {
-    O << "\tmovk\t" << getRegisterName(MI->getOperand(0).getReg()) << ", #"
-      << *MI->getOperand(2).getExpr();
-    return;
-  }
-
-  if (!printAliasInstr(MI, O))
-    printInstruction(MI, O);
-
-  printAnnotation(O, Annot);
-}
-
-static bool isTblTbxInstruction(unsigned Opcode, StringRef &Layout,
-                                bool &IsTbx) {
-  switch (Opcode) {
-  case ARM64::TBXv8i8One:
-  case ARM64::TBXv8i8Two:
-  case ARM64::TBXv8i8Three:
-  case ARM64::TBXv8i8Four:
-    IsTbx = true;
-    Layout = ".8b";
-    return true;
-  case ARM64::TBLv8i8One:
-  case ARM64::TBLv8i8Two:
-  case ARM64::TBLv8i8Three:
-  case ARM64::TBLv8i8Four:
-    IsTbx = false;
-    Layout = ".8b";
-    return true;
-  case ARM64::TBXv16i8One:
-  case ARM64::TBXv16i8Two:
-  case ARM64::TBXv16i8Three:
-  case ARM64::TBXv16i8Four:
-    IsTbx = true;
-    Layout = ".16b";
-    return true;
-  case ARM64::TBLv16i8One:
-  case ARM64::TBLv16i8Two:
-  case ARM64::TBLv16i8Three:
-  case ARM64::TBLv16i8Four:
-    IsTbx = false;
-    Layout = ".16b";
-    return true;
-  default:
-    return false;
-  }
-}
-
-struct LdStNInstrDesc {
-  unsigned Opcode;
-  const char *Mnemonic;
-  const char *Layout;
-  int ListOperand;
-  bool HasLane;
-  int NaturalOffset;
-};
-
-static LdStNInstrDesc LdStNInstInfo[] = {
-  { ARM64::LD1i8,             "ld1",  ".b",     1, true,  0  },
-  { ARM64::LD1i16,            "ld1",  ".h",     1, true,  0  },
-  { ARM64::LD1i32,            "ld1",  ".s",     1, true,  0  },
-  { ARM64::LD1i64,            "ld1",  ".d",     1, true,  0  },
-  { ARM64::LD1i8_POST,        "ld1",  ".b",     2, true,  1  },
-  { ARM64::LD1i16_POST,       "ld1",  ".h",     2, true,  2  },
-  { ARM64::LD1i32_POST,       "ld1",  ".s",     2, true,  4  },
-  { ARM64::LD1i64_POST,       "ld1",  ".d",     2, true,  8  },
-  { ARM64::LD1Rv16b,          "ld1r", ".16b",   0, false, 0  },
-  { ARM64::LD1Rv8h,           "ld1r", ".8h",    0, false, 0  },
-  { ARM64::LD1Rv4s,           "ld1r", ".4s",    0, false, 0  },
-  { ARM64::LD1Rv2d,           "ld1r", ".2d",    0, false, 0  },
-  { ARM64::LD1Rv8b,           "ld1r", ".8b",    0, false, 0  },
-  { ARM64::LD1Rv4h,           "ld1r", ".4h",    0, false, 0  },
-  { ARM64::LD1Rv2s,           "ld1r", ".2s",    0, false, 0  },
-  { ARM64::LD1Rv1d,           "ld1r", ".1d",    0, false, 0  },
-  { ARM64::LD1Rv16b_POST,     "ld1r", ".16b",   1, false, 1  },
-  { ARM64::LD1Rv8h_POST,      "ld1r", ".8h",    1, false, 2  },
-  { ARM64::LD1Rv4s_POST,      "ld1r", ".4s",    1, false, 4  },
-  { ARM64::LD1Rv2d_POST,      "ld1r", ".2d",    1, false, 8  },
-  { ARM64::LD1Rv8b_POST,      "ld1r", ".8b",    1, false, 1  },
-  { ARM64::LD1Rv4h_POST,      "ld1r", ".4h",    1, false, 2  },
-  { ARM64::LD1Rv2s_POST,      "ld1r", ".2s",    1, false, 4  },
-  { ARM64::LD1Rv1d_POST,      "ld1r", ".1d",    1, false, 8  },
-  { ARM64::LD1Onev16b,        "ld1",  ".16b",   0, false, 0  },
-  { ARM64::LD1Onev8h,         "ld1",  ".8h",    0, false, 0  },
-  { ARM64::LD1Onev4s,         "ld1",  ".4s",    0, false, 0  },
-  { ARM64::LD1Onev2d,         "ld1",  ".2d",    0, false, 0  },
-  { ARM64::LD1Onev8b,         "ld1",  ".8b",    0, false, 0  },
-  { ARM64::LD1Onev4h,         "ld1",  ".4h",    0, false, 0  },
-  { ARM64::LD1Onev2s,         "ld1",  ".2s",    0, false, 0  },
-  { ARM64::LD1Onev1d,         "ld1",  ".1d",    0, false, 0  },
-  { ARM64::LD1Onev16b_POST,   "ld1",  ".16b",   1, false, 16 },
-  { ARM64::LD1Onev8h_POST,    "ld1",  ".8h",    1, false, 16 },
-  { ARM64::LD1Onev4s_POST,    "ld1",  ".4s",    1, false, 16 },
-  { ARM64::LD1Onev2d_POST,    "ld1",  ".2d",    1, false, 16 },
-  { ARM64::LD1Onev8b_POST,    "ld1",  ".8b",    1, false, 8  },
-  { ARM64::LD1Onev4h_POST,    "ld1",  ".4h",    1, false, 8  },
-  { ARM64::LD1Onev2s_POST,    "ld1",  ".2s",    1, false, 8  },
-  { ARM64::LD1Onev1d_POST,    "ld1",  ".1d",    1, false, 8  },
-  { ARM64::LD1Twov16b,        "ld1",  ".16b",   0, false, 0  },
-  { ARM64::LD1Twov8h,         "ld1",  ".8h",    0, false, 0  },
-  { ARM64::LD1Twov4s,         "ld1",  ".4s",    0, false, 0  },
-  { ARM64::LD1Twov2d,         "ld1",  ".2d",    0, false, 0  },
-  { ARM64::LD1Twov8b,         "ld1",  ".8b",    0, false, 0  },
-  { ARM64::LD1Twov4h,         "ld1",  ".4h",    0, false, 0  },
-  { ARM64::LD1Twov2s,         "ld1",  ".2s",    0, false, 0  },
-  { ARM64::LD1Twov1d,         "ld1",  ".1d",    0, false, 0  },
-  { ARM64::LD1Twov16b_POST,   "ld1",  ".16b",   1, false, 32 },
-  { ARM64::LD1Twov8h_POST,    "ld1",  ".8h",    1, false, 32 },
-  { ARM64::LD1Twov4s_POST,    "ld1",  ".4s",    1, false, 32 },
-  { ARM64::LD1Twov2d_POST,    "ld1",  ".2d",    1, false, 32 },
-  { ARM64::LD1Twov8b_POST,    "ld1",  ".8b",    1, false, 16 },
-  { ARM64::LD1Twov4h_POST,    "ld1",  ".4h",    1, false, 16 },
-  { ARM64::LD1Twov2s_POST,    "ld1",  ".2s",    1, false, 16 },
-  { ARM64::LD1Twov1d_POST,    "ld1",  ".1d",    1, false, 16 },
-  { ARM64::LD1Threev16b,      "ld1",  ".16b",   0, false, 0  },
-  { ARM64::LD1Threev8h,       "ld1",  ".8h",    0, false, 0  },
-  { ARM64::LD1Threev4s,       "ld1",  ".4s",    0, false, 0  },
-  { ARM64::LD1Threev2d,       "ld1",  ".2d",    0, false, 0  },
-  { ARM64::LD1Threev8b,       "ld1",  ".8b",    0, false, 0  },
-  { ARM64::LD1Threev4h,       "ld1",  ".4h",    0, false, 0  },
-  { ARM64::LD1Threev2s,       "ld1",  ".2s",    0, false, 0  },
-  { ARM64::LD1Threev1d,       "ld1",  ".1d",    0, false, 0  },
-  { ARM64::LD1Threev16b_POST, "ld1",  ".16b",   1, false, 48 },
-  { ARM64::LD1Threev8h_POST,  "ld1",  ".8h",    1, false, 48 },
-  { ARM64::LD1Threev4s_POST,  "ld1",  ".4s",    1, false, 48 },
-  { ARM64::LD1Threev2d_POST,  "ld1",  ".2d",    1, false, 48 },
-  { ARM64::LD1Threev8b_POST,  "ld1",  ".8b",    1, false, 24 },
-  { ARM64::LD1Threev4h_POST,  "ld1",  ".4h",    1, false, 24 },
-  { ARM64::LD1Threev2s_POST,  "ld1",  ".2s",    1, false, 24 },
-  { ARM64::LD1Threev1d_POST,  "ld1",  ".1d",    1, false, 24 },
-  { ARM64::LD1Fourv16b,       "ld1",  ".16b",   0, false, 0  },
-  { ARM64::LD1Fourv8h,        "ld1",  ".8h",    0, false, 0  },
-  { ARM64::LD1Fourv4s,        "ld1",  ".4s",    0, false, 0  },
-  { ARM64::LD1Fourv2d,        "ld1",  ".2d",    0, false, 0  },
-  { ARM64::LD1Fourv8b,        "ld1",  ".8b",    0, false, 0  },
-  { ARM64::LD1Fourv4h,        "ld1",  ".4h",    0, false, 0  },
-  { ARM64::LD1Fourv2s,        "ld1",  ".2s",    0, false, 0  },
-  { ARM64::LD1Fourv1d,        "ld1",  ".1d",    0, false, 0  },
-  { ARM64::LD1Fourv16b_POST,  "ld1",  ".16b",   1, false, 64 },
-  { ARM64::LD1Fourv8h_POST,   "ld1",  ".8h",    1, false, 64 },
-  { ARM64::LD1Fourv4s_POST,   "ld1",  ".4s",    1, false, 64 },
-  { ARM64::LD1Fourv2d_POST,   "ld1",  ".2d",    1, false, 64 },
-  { ARM64::LD1Fourv8b_POST,   "ld1",  ".8b",    1, false, 32 },
-  { ARM64::LD1Fourv4h_POST,   "ld1",  ".4h",    1, false, 32 },
-  { ARM64::LD1Fourv2s_POST,   "ld1",  ".2s",    1, false, 32 },
-  { ARM64::LD1Fourv1d_POST,   "ld1",  ".1d",    1, false, 32 },
-  { ARM64::LD2i8,             "ld2",  ".b",     1, true,  0  },
-  { ARM64::LD2i16,            "ld2",  ".h",     1, true,  0  },
-  { ARM64::LD2i32,            "ld2",  ".s",     1, true,  0  },
-  { ARM64::LD2i64,            "ld2",  ".d",     1, true,  0  },
-  { ARM64::LD2i8_POST,        "ld2",  ".b",     2, true,  2  },
-  { ARM64::LD2i16_POST,       "ld2",  ".h",     2, true,  4  },
-  { ARM64::LD2i32_POST,       "ld2",  ".s",     2, true,  8  },
-  { ARM64::LD2i64_POST,       "ld2",  ".d",     2, true,  16  },
-  { ARM64::LD2Rv16b,          "ld2r", ".16b",   0, false, 0  },
-  { ARM64::LD2Rv8h,           "ld2r", ".8h",    0, false, 0  },
-  { ARM64::LD2Rv4s,           "ld2r", ".4s",    0, false, 0  },
-  { ARM64::LD2Rv2d,           "ld2r", ".2d",    0, false, 0  },
-  { ARM64::LD2Rv8b,           "ld2r", ".8b",    0, false, 0  },
-  { ARM64::LD2Rv4h,           "ld2r", ".4h",    0, false, 0  },
-  { ARM64::LD2Rv2s,           "ld2r", ".2s",    0, false, 0  },
-  { ARM64::LD2Rv1d,           "ld2r", ".1d",    0, false, 0  },
-  { ARM64::LD2Rv16b_POST,     "ld2r", ".16b",   1, false, 2  },
-  { ARM64::LD2Rv8h_POST,      "ld2r", ".8h",    1, false, 4  },
-  { ARM64::LD2Rv4s_POST,      "ld2r", ".4s",    1, false, 8  },
-  { ARM64::LD2Rv2d_POST,      "ld2r", ".2d",    1, false, 16 },
-  { ARM64::LD2Rv8b_POST,      "ld2r", ".8b",    1, false, 2  },
-  { ARM64::LD2Rv4h_POST,      "ld2r", ".4h",    1, false, 4  },
-  { ARM64::LD2Rv2s_POST,      "ld2r", ".2s",    1, false, 8  },
-  { ARM64::LD2Rv1d_POST,      "ld2r", ".1d",    1, false, 16 },
-  { ARM64::LD2Twov16b,        "ld2",  ".16b",   0, false, 0  },
-  { ARM64::LD2Twov8h,         "ld2",  ".8h",    0, false, 0  },
-  { ARM64::LD2Twov4s,         "ld2",  ".4s",    0, false, 0  },
-  { ARM64::LD2Twov2d,         "ld2",  ".2d",    0, false, 0  },
-  { ARM64::LD2Twov8b,         "ld2",  ".8b",    0, false, 0  },
-  { ARM64::LD2Twov4h,         "ld2",  ".4h",    0, false, 0  },
-  { ARM64::LD2Twov2s,         "ld2",  ".2s",    0, false, 0  },
-  { ARM64::LD2Twov16b_POST,   "ld2",  ".16b",   1, false, 32 },
-  { ARM64::LD2Twov8h_POST,    "ld2",  ".8h",    1, false, 32 },
-  { ARM64::LD2Twov4s_POST,    "ld2",  ".4s",    1, false, 32 },
-  { ARM64::LD2Twov2d_POST,    "ld2",  ".2d",    1, false, 32 },
-  { ARM64::LD2Twov8b_POST,    "ld2",  ".8b",    1, false, 16 },
-  { ARM64::LD2Twov4h_POST,    "ld2",  ".4h",    1, false, 16 },
-  { ARM64::LD2Twov2s_POST,    "ld2",  ".2s",    1, false, 16 },
-  { ARM64::LD3i8,             "ld3",  ".b",     1, true,  0  },
-  { ARM64::LD3i16,            "ld3",  ".h",     1, true,  0  },
-  { ARM64::LD3i32,            "ld3",  ".s",     1, true,  0  },
-  { ARM64::LD3i64,            "ld3",  ".d",     1, true,  0  },
-  { ARM64::LD3i8_POST,        "ld3",  ".b",     2, true,  3  },
-  { ARM64::LD3i16_POST,       "ld3",  ".h",     2, true,  6  },
-  { ARM64::LD3i32_POST,       "ld3",  ".s",     2, true,  12  },
-  { ARM64::LD3i64_POST,       "ld3",  ".d",     2, true,  24  },
-  { ARM64::LD3Rv16b,          "ld3r", ".16b",   0, false, 0  },
-  { ARM64::LD3Rv8h,           "ld3r", ".8h",    0, false, 0  },
-  { ARM64::LD3Rv4s,           "ld3r", ".4s",    0, false, 0  },
-  { ARM64::LD3Rv2d,           "ld3r", ".2d",    0, false, 0  },
-  { ARM64::LD3Rv8b,           "ld3r", ".8b",    0, false, 0  },
-  { ARM64::LD3Rv4h,           "ld3r", ".4h",    0, false, 0  },
-  { ARM64::LD3Rv2s,           "ld3r", ".2s",    0, false, 0  },
-  { ARM64::LD3Rv1d,           "ld3r", ".1d",    0, false, 0  },
-  { ARM64::LD3Rv16b_POST,     "ld3r", ".16b",   1, false, 3  },
-  { ARM64::LD3Rv8h_POST,      "ld3r", ".8h",    1, false, 6  },
-  { ARM64::LD3Rv4s_POST,      "ld3r", ".4s",    1, false, 12 },
-  { ARM64::LD3Rv2d_POST,      "ld3r", ".2d",    1, false, 24 },
-  { ARM64::LD3Rv8b_POST,      "ld3r", ".8b",    1, false, 3  },
-  { ARM64::LD3Rv4h_POST,      "ld3r", ".4h",    1, false, 6  },
-  { ARM64::LD3Rv2s_POST,      "ld3r", ".2s",    1, false, 12 },
-  { ARM64::LD3Rv1d_POST,      "ld3r", ".1d",    1, false, 24 },
-  { ARM64::LD3Threev16b,      "ld3",  ".16b",   0, false, 0  },
-  { ARM64::LD3Threev8h,       "ld3",  ".8h",    0, false, 0  },
-  { ARM64::LD3Threev4s,       "ld3",  ".4s",    0, false, 0  },
-  { ARM64::LD3Threev2d,       "ld3",  ".2d",    0, false, 0  },
-  { ARM64::LD3Threev8b,       "ld3",  ".8b",    0, false, 0  },
-  { ARM64::LD3Threev4h,       "ld3",  ".4h",    0, false, 0  },
-  { ARM64::LD3Threev2s,       "ld3",  ".2s",    0, false, 0  },
-  { ARM64::LD3Threev16b_POST, "ld3",  ".16b",   1, false, 48 },
-  { ARM64::LD3Threev8h_POST,  "ld3",  ".8h",    1, false, 48 },
-  { ARM64::LD3Threev4s_POST,  "ld3",  ".4s",    1, false, 48 },
-  { ARM64::LD3Threev2d_POST,  "ld3",  ".2d",    1, false, 48 },
-  { ARM64::LD3Threev8b_POST,  "ld3",  ".8b",    1, false, 24 },
-  { ARM64::LD3Threev4h_POST,  "ld3",  ".4h",    1, false, 24 },
-  { ARM64::LD3Threev2s_POST,  "ld3",  ".2s",    1, false, 24 },
-  { ARM64::LD4i8,             "ld4",  ".b",     1, true,  0  },
-  { ARM64::LD4i16,            "ld4",  ".h",     1, true,  0  },
-  { ARM64::LD4i32,            "ld4",  ".s",     1, true,  0  },
-  { ARM64::LD4i64,            "ld4",  ".d",     1, true,  0  },
-  { ARM64::LD4i8_POST,        "ld4",  ".b",     2, true,  4  },
-  { ARM64::LD4i16_POST,       "ld4",  ".h",     2, true,  8  },
-  { ARM64::LD4i32_POST,       "ld4",  ".s",     2, true,  16 },
-  { ARM64::LD4i64_POST,       "ld4",  ".d",     2, true,  32 },
-  { ARM64::LD4Rv16b,          "ld4r", ".16b",   0, false, 0  },
-  { ARM64::LD4Rv8h,           "ld4r", ".8h",    0, false, 0  },
-  { ARM64::LD4Rv4s,           "ld4r", ".4s",    0, false, 0  },
-  { ARM64::LD4Rv2d,           "ld4r", ".2d",    0, false, 0  },
-  { ARM64::LD4Rv8b,           "ld4r", ".8b",    0, false, 0  },
-  { ARM64::LD4Rv4h,           "ld4r", ".4h",    0, false, 0  },
-  { ARM64::LD4Rv2s,           "ld4r", ".2s",    0, false, 0  },
-  { ARM64::LD4Rv1d,           "ld4r", ".1d",    0, false, 0  },
-  { ARM64::LD4Rv16b_POST,     "ld4r", ".16b",   1, false, 4  },
-  { ARM64::LD4Rv8h_POST,      "ld4r", ".8h",    1, false, 8  },
-  { ARM64::LD4Rv4s_POST,      "ld4r", ".4s",    1, false, 16 },
-  { ARM64::LD4Rv2d_POST,      "ld4r", ".2d",    1, false, 32 },
-  { ARM64::LD4Rv8b_POST,      "ld4r", ".8b",    1, false, 4  },
-  { ARM64::LD4Rv4h_POST,      "ld4r", ".4h",    1, false, 8  },
-  { ARM64::LD4Rv2s_POST,      "ld4r", ".2s",    1, false, 16 },
-  { ARM64::LD4Rv1d_POST,      "ld4r", ".1d",    1, false, 32 },
-  { ARM64::LD4Fourv16b,       "ld4",  ".16b",   0, false, 0  },
-  { ARM64::LD4Fourv8h,        "ld4",  ".8h",    0, false, 0  },
-  { ARM64::LD4Fourv4s,        "ld4",  ".4s",    0, false, 0  },
-  { ARM64::LD4Fourv2d,        "ld4",  ".2d",    0, false, 0  },
-  { ARM64::LD4Fourv8b,        "ld4",  ".8b",    0, false, 0  },
-  { ARM64::LD4Fourv4h,        "ld4",  ".4h",    0, false, 0  },
-  { ARM64::LD4Fourv2s,        "ld4",  ".2s",    0, false, 0  },
-  { ARM64::LD4Fourv16b_POST,  "ld4",  ".16b",   1, false, 64 },
-  { ARM64::LD4Fourv8h_POST,   "ld4",  ".8h",    1, false, 64 },
-  { ARM64::LD4Fourv4s_POST,   "ld4",  ".4s",    1, false, 64 },
-  { ARM64::LD4Fourv2d_POST,   "ld4",  ".2d",    1, false, 64 },
-  { ARM64::LD4Fourv8b_POST,   "ld4",  ".8b",    1, false, 32 },
-  { ARM64::LD4Fourv4h_POST,   "ld4",  ".4h",    1, false, 32 },
-  { ARM64::LD4Fourv2s_POST,   "ld4",  ".2s",    1, false, 32 },
-  { ARM64::ST1i8,             "st1",  ".b",     0, true,  0  },
-  { ARM64::ST1i16,            "st1",  ".h",     0, true,  0  },
-  { ARM64::ST1i32,            "st1",  ".s",     0, true,  0  },
-  { ARM64::ST1i64,            "st1",  ".d",     0, true,  0  },
-  { ARM64::ST1i8_POST,        "st1",  ".b",     1, true,  1  },
-  { ARM64::ST1i16_POST,       "st1",  ".h",     1, true,  2  },
-  { ARM64::ST1i32_POST,       "st1",  ".s",     1, true,  4  },
-  { ARM64::ST1i64_POST,       "st1",  ".d",     1, true,  8  },
-  { ARM64::ST1Onev16b,        "st1",  ".16b",   0, false, 0  },
-  { ARM64::ST1Onev8h,         "st1",  ".8h",    0, false, 0  },
-  { ARM64::ST1Onev4s,         "st1",  ".4s",    0, false, 0  },
-  { ARM64::ST1Onev2d,         "st1",  ".2d",    0, false, 0  },
-  { ARM64::ST1Onev8b,         "st1",  ".8b",    0, false, 0  },
-  { ARM64::ST1Onev4h,         "st1",  ".4h",    0, false, 0  },
-  { ARM64::ST1Onev2s,         "st1",  ".2s",    0, false, 0  },
-  { ARM64::ST1Onev1d,         "st1",  ".1d",    0, false, 0  },
-  { ARM64::ST1Onev16b_POST,   "st1",  ".16b",   1, false, 16 },
-  { ARM64::ST1Onev8h_POST,    "st1",  ".8h",    1, false, 16 },
-  { ARM64::ST1Onev4s_POST,    "st1",  ".4s",    1, false, 16 },
-  { ARM64::ST1Onev2d_POST,    "st1",  ".2d",    1, false, 16 },
-  { ARM64::ST1Onev8b_POST,    "st1",  ".8b",    1, false, 8  },
-  { ARM64::ST1Onev4h_POST,    "st1",  ".4h",    1, false, 8  },
-  { ARM64::ST1Onev2s_POST,    "st1",  ".2s",    1, false, 8  },
-  { ARM64::ST1Onev1d_POST,    "st1",  ".1d",    1, false, 8  },
-  { ARM64::ST1Twov16b,        "st1",  ".16b",   0, false, 0  },
-  { ARM64::ST1Twov8h,         "st1",  ".8h",    0, false, 0  },
-  { ARM64::ST1Twov4s,         "st1",  ".4s",    0, false, 0  },
-  { ARM64::ST1Twov2d,         "st1",  ".2d",    0, false, 0  },
-  { ARM64::ST1Twov8b,         "st1",  ".8b",    0, false, 0  },
-  { ARM64::ST1Twov4h,         "st1",  ".4h",    0, false, 0  },
-  { ARM64::ST1Twov2s,         "st1",  ".2s",    0, false, 0  },
-  { ARM64::ST1Twov1d,         "st1",  ".1d",    0, false, 0  },
-  { ARM64::ST1Twov16b_POST,   "st1",  ".16b",   1, false, 32 },
-  { ARM64::ST1Twov8h_POST,    "st1",  ".8h",    1, false, 32 },
-  { ARM64::ST1Twov4s_POST,    "st1",  ".4s",    1, false, 32 },
-  { ARM64::ST1Twov2d_POST,    "st1",  ".2d",    1, false, 32 },
-  { ARM64::ST1Twov8b_POST,    "st1",  ".8b",    1, false, 16 },
-  { ARM64::ST1Twov4h_POST,    "st1",  ".4h",    1, false, 16 },
-  { ARM64::ST1Twov2s_POST,    "st1",  ".2s",    1, false, 16 },
-  { ARM64::ST1Twov1d_POST,    "st1",  ".1d",    1, false, 16 },
-  { ARM64::ST1Threev16b,      "st1",  ".16b",   0, false, 0  },
-  { ARM64::ST1Threev8h,       "st1",  ".8h",    0, false, 0  },
-  { ARM64::ST1Threev4s,       "st1",  ".4s",    0, false, 0  },
-  { ARM64::ST1Threev2d,       "st1",  ".2d",    0, false, 0  },
-  { ARM64::ST1Threev8b,       "st1",  ".8b",    0, false, 0  },
-  { ARM64::ST1Threev4h,       "st1",  ".4h",    0, false, 0  },
-  { ARM64::ST1Threev2s,       "st1",  ".2s",    0, false, 0  },
-  { ARM64::ST1Threev1d,       "st1",  ".1d",    0, false, 0  },
-  { ARM64::ST1Threev16b_POST, "st1",  ".16b",   1, false, 48 },
-  { ARM64::ST1Threev8h_POST,  "st1",  ".8h",    1, false, 48 },
-  { ARM64::ST1Threev4s_POST,  "st1",  ".4s",    1, false, 48 },
-  { ARM64::ST1Threev2d_POST,  "st1",  ".2d",    1, false, 48 },
-  { ARM64::ST1Threev8b_POST,  "st1",  ".8b",    1, false, 24 },
-  { ARM64::ST1Threev4h_POST,  "st1",  ".4h",    1, false, 24 },
-  { ARM64::ST1Threev2s_POST,  "st1",  ".2s",    1, false, 24 },
-  { ARM64::ST1Threev1d_POST,  "st1",  ".1d",    1, false, 24 },
-  { ARM64::ST1Fourv16b,       "st1",  ".16b",   0, false, 0  },
-  { ARM64::ST1Fourv8h,        "st1",  ".8h",    0, false, 0  },
-  { ARM64::ST1Fourv4s,        "st1",  ".4s",    0, false, 0  },
-  { ARM64::ST1Fourv2d,        "st1",  ".2d",    0, false, 0  },
-  { ARM64::ST1Fourv8b,        "st1",  ".8b",    0, false, 0  },
-  { ARM64::ST1Fourv4h,        "st1",  ".4h",    0, false, 0  },
-  { ARM64::ST1Fourv2s,        "st1",  ".2s",    0, false, 0  },
-  { ARM64::ST1Fourv1d,        "st1",  ".1d",    0, false, 0  },
-  { ARM64::ST1Fourv16b_POST,  "st1",  ".16b",   1, false, 64 },
-  { ARM64::ST1Fourv8h_POST,   "st1",  ".8h",    1, false, 64 },
-  { ARM64::ST1Fourv4s_POST,   "st1",  ".4s",    1, false, 64 },
-  { ARM64::ST1Fourv2d_POST,   "st1",  ".2d",    1, false, 64 },
-  { ARM64::ST1Fourv8b_POST,   "st1",  ".8b",    1, false, 32 },
-  { ARM64::ST1Fourv4h_POST,   "st1",  ".4h",    1, false, 32 },
-  { ARM64::ST1Fourv2s_POST,   "st1",  ".2s",    1, false, 32 },
-  { ARM64::ST1Fourv1d_POST,   "st1",  ".1d",    1, false, 32 },
-  { ARM64::ST2i8,             "st2",  ".b",     0, true,  0  },
-  { ARM64::ST2i16,            "st2",  ".h",     0, true,  0  },
-  { ARM64::ST2i32,            "st2",  ".s",     0, true,  0  },
-  { ARM64::ST2i64,            "st2",  ".d",     0, true,  0  },
-  { ARM64::ST2i8_POST,        "st2",  ".b",     1, true,  2  },
-  { ARM64::ST2i16_POST,       "st2",  ".h",     1, true,  4  },
-  { ARM64::ST2i32_POST,       "st2",  ".s",     1, true,  8  },
-  { ARM64::ST2i64_POST,       "st2",  ".d",     1, true,  16 },
-  { ARM64::ST2Twov16b,        "st2",  ".16b",   0, false, 0  },
-  { ARM64::ST2Twov8h,         "st2",  ".8h",    0, false, 0  },
-  { ARM64::ST2Twov4s,         "st2",  ".4s",    0, false, 0  },
-  { ARM64::ST2Twov2d,         "st2",  ".2d",    0, false, 0  },
-  { ARM64::ST2Twov8b,         "st2",  ".8b",    0, false, 0  },
-  { ARM64::ST2Twov4h,         "st2",  ".4h",    0, false, 0  },
-  { ARM64::ST2Twov2s,         "st2",  ".2s",    0, false, 0  },
-  { ARM64::ST2Twov16b_POST,   "st2",  ".16b",   1, false, 32 },
-  { ARM64::ST2Twov8h_POST,    "st2",  ".8h",    1, false, 32 },
-  { ARM64::ST2Twov4s_POST,    "st2",  ".4s",    1, false, 32 },
-  { ARM64::ST2Twov2d_POST,    "st2",  ".2d",    1, false, 32 },
-  { ARM64::ST2Twov8b_POST,    "st2",  ".8b",    1, false, 16 },
-  { ARM64::ST2Twov4h_POST,    "st2",  ".4h",    1, false, 16 },
-  { ARM64::ST2Twov2s_POST,    "st2",  ".2s",    1, false, 16 },
-  { ARM64::ST3i8,             "st3",  ".b",     0, true,  0  },
-  { ARM64::ST3i16,            "st3",  ".h",     0, true,  0  },
-  { ARM64::ST3i32,            "st3",  ".s",     0, true,  0  },
-  { ARM64::ST3i64,            "st3",  ".d",     0, true,  0  },
-  { ARM64::ST3i8_POST,        "st3",  ".b",     1, true,  3  },
-  { ARM64::ST3i16_POST,       "st3",  ".h",     1, true,  6  },
-  { ARM64::ST3i32_POST,       "st3",  ".s",     1, true,  12 },
-  { ARM64::ST3i64_POST,       "st3",  ".d",     1, true,  24 },
-  { ARM64::ST3Threev16b,      "st3",  ".16b",   0, false, 0  },
-  { ARM64::ST3Threev8h,       "st3",  ".8h",    0, false, 0  },
-  { ARM64::ST3Threev4s,       "st3",  ".4s",    0, false, 0  },
-  { ARM64::ST3Threev2d,       "st3",  ".2d",    0, false, 0  },
-  { ARM64::ST3Threev8b,       "st3",  ".8b",    0, false, 0  },
-  { ARM64::ST3Threev4h,       "st3",  ".4h",    0, false, 0  },
-  { ARM64::ST3Threev2s,       "st3",  ".2s",    0, false, 0  },
-  { ARM64::ST3Threev16b_POST, "st3",  ".16b",   1, false, 48 },
-  { ARM64::ST3Threev8h_POST,  "st3",  ".8h",    1, false, 48 },
-  { ARM64::ST3Threev4s_POST,  "st3",  ".4s",    1, false, 48 },
-  { ARM64::ST3Threev2d_POST,  "st3",  ".2d",    1, false, 48 },
-  { ARM64::ST3Threev8b_POST,  "st3",  ".8b",    1, false, 24 },
-  { ARM64::ST3Threev4h_POST,  "st3",  ".4h",    1, false, 24 },
-  { ARM64::ST3Threev2s_POST,  "st3",  ".2s",    1, false, 24 },
-  { ARM64::ST4i8,             "st4",  ".b",     0, true,  0  },
-  { ARM64::ST4i16,            "st4",  ".h",     0, true,  0  },
-  { ARM64::ST4i32,            "st4",  ".s",     0, true,  0  },
-  { ARM64::ST4i64,            "st4",  ".d",     0, true,  0  },
-  { ARM64::ST4i8_POST,        "st4",  ".b",     1, true,  4  },
-  { ARM64::ST4i16_POST,       "st4",  ".h",     1, true,  8  },
-  { ARM64::ST4i32_POST,       "st4",  ".s",     1, true,  16 },
-  { ARM64::ST4i64_POST,       "st4",  ".d",     1, true,  32 },
-  { ARM64::ST4Fourv16b,       "st4",  ".16b",   0, false, 0  },
-  { ARM64::ST4Fourv8h,        "st4",  ".8h",    0, false, 0  },
-  { ARM64::ST4Fourv4s,        "st4",  ".4s",    0, false, 0  },
-  { ARM64::ST4Fourv2d,        "st4",  ".2d",    0, false, 0  },
-  { ARM64::ST4Fourv8b,        "st4",  ".8b",    0, false, 0  },
-  { ARM64::ST4Fourv4h,        "st4",  ".4h",    0, false, 0  },
-  { ARM64::ST4Fourv2s,        "st4",  ".2s",    0, false, 0  },
-  { ARM64::ST4Fourv16b_POST,  "st4",  ".16b",   1, false, 64 },
-  { ARM64::ST4Fourv8h_POST,   "st4",  ".8h",    1, false, 64 },
-  { ARM64::ST4Fourv4s_POST,   "st4",  ".4s",    1, false, 64 },
-  { ARM64::ST4Fourv2d_POST,   "st4",  ".2d",    1, false, 64 },
-  { ARM64::ST4Fourv8b_POST,   "st4",  ".8b",    1, false, 32 },
-  { ARM64::ST4Fourv4h_POST,   "st4",  ".4h",    1, false, 32 },
-  { ARM64::ST4Fourv2s_POST,   "st4",  ".2s",    1, false, 32 },
-};
-
-static LdStNInstrDesc *getLdStNInstrDesc(unsigned Opcode) {
-  unsigned Idx;
-  for (Idx = 0; Idx != array_lengthof(LdStNInstInfo); ++Idx)
-    if (LdStNInstInfo[Idx].Opcode == Opcode)
-      return &LdStNInstInfo[Idx];
-
-  return nullptr;
-}
-
-void ARM64AppleInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
-                                      StringRef Annot) {
-  unsigned Opcode = MI->getOpcode();
-  StringRef Layout, Mnemonic;
-
-  bool IsTbx;
-  if (isTblTbxInstruction(MI->getOpcode(), Layout, IsTbx)) {
-    O << "\t" << (IsTbx ? "tbx" : "tbl") << Layout << '\t'
-      << getRegisterName(MI->getOperand(0).getReg(), ARM64::vreg) << ", ";
-
-    unsigned ListOpNum = IsTbx ? 2 : 1;
-    printVectorList(MI, ListOpNum, O, "");
-
-    O << ", "
-      << getRegisterName(MI->getOperand(ListOpNum + 1).getReg(), ARM64::vreg);
-    printAnnotation(O, Annot);
-    return;
-  }
-
-  if (LdStNInstrDesc *LdStDesc = getLdStNInstrDesc(Opcode)) {
-    O << "\t" << LdStDesc->Mnemonic << LdStDesc->Layout << '\t';
-
-    // Now onto the operands: first a vector list with possible lane
-    // specifier. E.g. { v0 }[2]
-    int OpNum = LdStDesc->ListOperand;
-    printVectorList(MI, OpNum++, O, "");
-
-    if (LdStDesc->HasLane)
-      O << '[' << MI->getOperand(OpNum++).getImm() << ']';
-
-    // Next the address: [xN]
-    unsigned AddrReg = MI->getOperand(OpNum++).getReg();
-    O << ", [" << getRegisterName(AddrReg) << ']';
-
-    // Finally, there might be a post-indexed offset.
-    if (LdStDesc->NaturalOffset != 0) {
-      unsigned Reg = MI->getOperand(OpNum++).getReg();
-      if (Reg != ARM64::XZR)
-        O << ", " << getRegisterName(Reg);
-      else {
-        assert(LdStDesc->NaturalOffset && "no offset on post-inc instruction?");
-        O << ", #" << LdStDesc->NaturalOffset;
-      }
-    }
-
-    printAnnotation(O, Annot);
-    return;
-  }
-
-  ARM64InstPrinter::printInst(MI, O, Annot);
-}
-
-bool ARM64InstPrinter::printSysAlias(const MCInst *MI, raw_ostream &O) {
-#ifndef NDEBUG
-  unsigned Opcode = MI->getOpcode();
-  assert(Opcode == ARM64::SYSxt && "Invalid opcode for SYS alias!");
-#endif
-
-  const char *Asm = nullptr;
-  const MCOperand &Op1 = MI->getOperand(0);
-  const MCOperand &Cn = MI->getOperand(1);
-  const MCOperand &Cm = MI->getOperand(2);
-  const MCOperand &Op2 = MI->getOperand(3);
-
-  unsigned Op1Val = Op1.getImm();
-  unsigned CnVal = Cn.getImm();
-  unsigned CmVal = Cm.getImm();
-  unsigned Op2Val = Op2.getImm();
-
-  if (CnVal == 7) {
-    switch (CmVal) {
-    default:
-      break;
-
-    // IC aliases
-    case 1:
-      if (Op1Val == 0 && Op2Val == 0)
-        Asm = "ic\tialluis";
-      break;
-    case 5:
-      if (Op1Val == 0 && Op2Val == 0)
-        Asm = "ic\tiallu";
-      else if (Op1Val == 3 && Op2Val == 1)
-        Asm = "ic\tivau";
-      break;
-
-    // DC aliases
-    case 4:
-      if (Op1Val == 3 && Op2Val == 1)
-        Asm = "dc\tzva";
-      break;
-    case 6:
-      if (Op1Val == 0 && Op2Val == 1)
-        Asm = "dc\tivac";
-      if (Op1Val == 0 && Op2Val == 2)
-        Asm = "dc\tisw";
-      break;
-    case 10:
-      if (Op1Val == 3 && Op2Val == 1)
-        Asm = "dc\tcvac";
-      else if (Op1Val == 0 && Op2Val == 2)
-        Asm = "dc\tcsw";
-      break;
-    case 11:
-      if (Op1Val == 3 && Op2Val == 1)
-        Asm = "dc\tcvau";
-      break;
-    case 14:
-      if (Op1Val == 3 && Op2Val == 1)
-        Asm = "dc\tcivac";
-      else if (Op1Val == 0 && Op2Val == 2)
-        Asm = "dc\tcisw";
-      break;
-
-    // AT aliases
-    case 8:
-      switch (Op1Val) {
-      default:
-        break;
-      case 0:
-        switch (Op2Val) {
-        default:
-          break;
-        case 0: Asm = "at\ts1e1r"; break;
-        case 1: Asm = "at\ts1e1w"; break;
-        case 2: Asm = "at\ts1e0r"; break;
-        case 3: Asm = "at\ts1e0w"; break;
-        }
-        break;
-      case 4:
-        switch (Op2Val) {
-        default:
-          break;
-        case 0: Asm = "at\ts1e2r"; break;
-        case 1: Asm = "at\ts1e2w"; break;
-        case 4: Asm = "at\ts12e1r"; break;
-        case 5: Asm = "at\ts12e1w"; break;
-        case 6: Asm = "at\ts12e0r"; break;
-        case 7: Asm = "at\ts12e0w"; break;
-        }
-        break;
-      case 6:
-        switch (Op2Val) {
-        default:
-          break;
-        case 0: Asm = "at\ts1e3r"; break;
-        case 1: Asm = "at\ts1e3w"; break;
-        }
-        break;
-      }
-      break;
-    }
-  } else if (CnVal == 8) {
-    // TLBI aliases
-    switch (CmVal) {
-    default:
-      break;
-    case 3:
-      switch (Op1Val) {
-      default:
-        break;
-      case 0:
-        switch (Op2Val) {
-        default:
-          break;
-        case 0: Asm = "tlbi\tvmalle1is"; break;
-        case 1: Asm = "tlbi\tvae1is"; break;
-        case 2: Asm = "tlbi\taside1is"; break;
-        case 3: Asm = "tlbi\tvaae1is"; break;
-        case 5: Asm = "tlbi\tvale1is"; break;
-        case 7: Asm = "tlbi\tvaale1is"; break;
-        }
-        break;
-      case 4:
-        switch (Op2Val) {
-        default:
-          break;
-        case 0: Asm = "tlbi\talle2is"; break;
-        case 1: Asm = "tlbi\tvae2is"; break;
-        case 4: Asm = "tlbi\talle1is"; break;
-        case 5: Asm = "tlbi\tvale2is"; break;
-        case 6: Asm = "tlbi\tvmalls12e1is"; break;
-        }
-        break;
-      case 6:
-        switch (Op2Val) {
-        default:
-          break;
-        case 0: Asm = "tlbi\talle3is"; break;
-        case 1: Asm = "tlbi\tvae3is"; break;
-        case 5: Asm = "tlbi\tvale3is"; break;
-        }
-        break;
-      }
-      break;
-    case 0:
-      switch (Op1Val) {
-      default:
-        break;
-      case 4:
-        switch (Op2Val) {
-        default:
-          break;
-        case 1: Asm = "tlbi\tipas2e1is"; break;
-        case 5: Asm = "tlbi\tipas2le1is"; break;
-        }
-        break;
-      }
-      break;
-    case 4:
-      switch (Op1Val) {
-      default:
-        break;
-      case 4:
-        switch (Op2Val) {
-        default:
-          break;
-        case 1: Asm = "tlbi\tipas2e1"; break;
-        case 5: Asm = "tlbi\tipas2le1"; break;
-        }
-        break;
-      }
-      break;
-    case 7:
-      switch (Op1Val) {
-      default:
-        break;
-      case 0:
-        switch (Op2Val) {
-        default:
-          break;
-        case 0: Asm = "tlbi\tvmalle1"; break;
-        case 1: Asm = "tlbi\tvae1"; break;
-        case 2: Asm = "tlbi\taside1"; break;
-        case 3: Asm = "tlbi\tvaae1"; break;
-        case 5: Asm = "tlbi\tvale1"; break;
-        case 7: Asm = "tlbi\tvaale1"; break;
-        }
-        break;
-      case 4:
-        switch (Op2Val) {
-        default:
-          break;
-        case 0: Asm = "tlbi\talle2"; break;
-        case 1: Asm = "tlbi\tvae2"; break;
-        case 4: Asm = "tlbi\talle1"; break;
-        case 5: Asm = "tlbi\tvale2"; break;
-        case 6: Asm = "tlbi\tvmalls12e1"; break;
-        }
-        break;
-      case 6:
-        switch (Op2Val) {
-        default:
-          break;
-        case 0: Asm = "tlbi\talle3"; break;
-        case 1: Asm = "tlbi\tvae3";  break;
-        case 5: Asm = "tlbi\tvale3"; break;
-        }
-        break;
-      }
-      break;
-    }
-  }
-
-  if (Asm) {
-    unsigned Reg = MI->getOperand(4).getReg();
-
-    O << '\t' << Asm;
-    if (StringRef(Asm).lower().find("all") == StringRef::npos)
-      O << ", " << getRegisterName(Reg);
-  }
-
-  return Asm != nullptr;
-}
-
-void ARM64InstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
-                                    raw_ostream &O) {
-  const MCOperand &Op = MI->getOperand(OpNo);
-  if (Op.isReg()) {
-    unsigned Reg = Op.getReg();
-    O << getRegisterName(Reg);
-  } else if (Op.isImm()) {
-    O << '#' << Op.getImm();
-  } else {
-    assert(Op.isExpr() && "unknown operand kind in printOperand");
-    O << *Op.getExpr();
-  }
-}
-
-void ARM64InstPrinter::printHexImm(const MCInst *MI, unsigned OpNo,
-                                   raw_ostream &O) {
-  const MCOperand &Op = MI->getOperand(OpNo);
-  O << format("#%#llx", Op.getImm());
-}
-
-void ARM64InstPrinter::printPostIncOperand(const MCInst *MI, unsigned OpNo,
-                                           unsigned Imm, raw_ostream &O) {
-  const MCOperand &Op = MI->getOperand(OpNo);
-  if (Op.isReg()) {
-    unsigned Reg = Op.getReg();
-    if (Reg == ARM64::XZR)
-      O << "#" << Imm;
-    else
-      O << getRegisterName(Reg);
-  } else
-    assert(0 && "unknown operand kind in printPostIncOperand64");
-}
-
-void ARM64InstPrinter::printVRegOperand(const MCInst *MI, unsigned OpNo,
-                                        raw_ostream &O) {
-  const MCOperand &Op = MI->getOperand(OpNo);
-  assert(Op.isReg() && "Non-register vreg operand!");
-  unsigned Reg = Op.getReg();
-  O << getRegisterName(Reg, ARM64::vreg);
-}
-
-void ARM64InstPrinter::printSysCROperand(const MCInst *MI, unsigned OpNo,
-                                         raw_ostream &O) {
-  const MCOperand &Op = MI->getOperand(OpNo);
-  assert(Op.isImm() && "System instruction C[nm] operands must be immediates!");
-  O << "c" << Op.getImm();
-}
-
-void ARM64InstPrinter::printAddSubImm(const MCInst *MI, unsigned OpNum,
-                                      raw_ostream &O) {
-  const MCOperand &MO = MI->getOperand(OpNum);
-  if (MO.isImm()) {
-    unsigned Val = (MO.getImm() & 0xfff);
-    assert(Val == MO.getImm() && "Add/sub immediate out of range!");
-    unsigned Shift =
-        ARM64_AM::getShiftValue(MI->getOperand(OpNum + 1).getImm());
-    O << '#' << Val;
-    if (Shift != 0)
-      printShifter(MI, OpNum + 1, O);
-
-    if (CommentStream)
-      *CommentStream << '=' << (Val << Shift) << '\n';
-  } else {
-    assert(MO.isExpr() && "Unexpected operand type!");
-    O << *MO.getExpr();
-    printShifter(MI, OpNum + 1, O);
-  }
-}
-
-void ARM64InstPrinter::printLogicalImm32(const MCInst *MI, unsigned OpNum,
-                                         raw_ostream &O) {
-  uint64_t Val = MI->getOperand(OpNum).getImm();
-  O << "#0x";
-  O.write_hex(ARM64_AM::decodeLogicalImmediate(Val, 32));
-}
-
-void ARM64InstPrinter::printLogicalImm64(const MCInst *MI, unsigned OpNum,
-                                         raw_ostream &O) {
-  uint64_t Val = MI->getOperand(OpNum).getImm();
-  O << "#0x";
-  O.write_hex(ARM64_AM::decodeLogicalImmediate(Val, 64));
-}
-
-void ARM64InstPrinter::printShifter(const MCInst *MI, unsigned OpNum,
-                                    raw_ostream &O) {
-  unsigned Val = MI->getOperand(OpNum).getImm();
-  // LSL #0 should not be printed.
-  if (ARM64_AM::getShiftType(Val) == ARM64_AM::LSL &&
-      ARM64_AM::getShiftValue(Val) == 0)
-    return;
-  O << ", " << ARM64_AM::getShiftExtendName(ARM64_AM::getShiftType(Val)) << " #"
-    << ARM64_AM::getShiftValue(Val);
-}
-
-void ARM64InstPrinter::printShiftedRegister(const MCInst *MI, unsigned OpNum,
-                                            raw_ostream &O) {
-  O << getRegisterName(MI->getOperand(OpNum).getReg());
-  printShifter(MI, OpNum + 1, O);
-}
-
-void ARM64InstPrinter::printExtendedRegister(const MCInst *MI, unsigned OpNum,
-                                             raw_ostream &O) {
-  O << getRegisterName(MI->getOperand(OpNum).getReg());
-  printArithExtend(MI, OpNum + 1, O);
-}
-
-void ARM64InstPrinter::printArithExtend(const MCInst *MI, unsigned OpNum,
-                                        raw_ostream &O) {
-  unsigned Val = MI->getOperand(OpNum).getImm();
-  ARM64_AM::ShiftExtendType ExtType = ARM64_AM::getArithExtendType(Val);
-  unsigned ShiftVal = ARM64_AM::getArithShiftValue(Val);
-
-  // If the destination or first source register operand is [W]SP, print
-  // UXTW/UXTX as LSL, and if the shift amount is also zero, print nothing at
-  // all.
-  if (ExtType == ARM64_AM::UXTW || ExtType == ARM64_AM::UXTX) {
-    unsigned Dest = MI->getOperand(0).getReg();
-    unsigned Src1 = MI->getOperand(1).getReg();
-    if ( ((Dest == ARM64::SP || Src1 == ARM64::SP) &&
-          ExtType == ARM64_AM::UXTX) ||
-         ((Dest == ARM64::WSP || Src1 == ARM64::WSP) &&
-          ExtType == ARM64_AM::UXTW) ) {
-      if (ShiftVal != 0)
-        O << ", lsl #" << ShiftVal;
-      return;
-    }
-  }
-  O << ", " << ARM64_AM::getShiftExtendName(ExtType);
-  if (ShiftVal != 0)
-    O << " #" << ShiftVal;
-}
-
-void ARM64InstPrinter::printMemExtend(const MCInst *MI, unsigned OpNum,
-                                      raw_ostream &O, char SrcRegKind,
-                                      unsigned Width) {
-  unsigned SignExtend = MI->getOperand(OpNum).getImm();
-  unsigned DoShift = MI->getOperand(OpNum + 1).getImm();
-
-  // sxtw, sxtx, uxtw or lsl (== uxtx)
-  bool IsLSL = !SignExtend && SrcRegKind == 'x';
-  if (IsLSL)
-    O << "lsl";
-  else
-    O << (SignExtend ? 's' : 'u') << "xt" << SrcRegKind;
-
-  if (DoShift || IsLSL)
-    O << " #" << Log2_32(Width / 8);
-}
-
-void ARM64InstPrinter::printCondCode(const MCInst *MI, unsigned OpNum,
-                                     raw_ostream &O) {
-  ARM64CC::CondCode CC = (ARM64CC::CondCode)MI->getOperand(OpNum).getImm();
-  O << ARM64CC::getCondCodeName(CC);
-}
-
-void ARM64InstPrinter::printInverseCondCode(const MCInst *MI, unsigned OpNum,
-                                            raw_ostream &O) {
-  ARM64CC::CondCode CC = (ARM64CC::CondCode)MI->getOperand(OpNum).getImm();
-  O << ARM64CC::getCondCodeName(ARM64CC::getInvertedCondCode(CC));
-}
-
-void ARM64InstPrinter::printAMNoIndex(const MCInst *MI, unsigned OpNum,
-                                      raw_ostream &O) {
-  O << '[' << getRegisterName(MI->getOperand(OpNum).getReg()) << ']';
-}
-
-template<int Scale>
-void ARM64InstPrinter::printImmScale(const MCInst *MI, unsigned OpNum,
-                                     raw_ostream &O) {
-  O << '#' << Scale * MI->getOperand(OpNum).getImm();
-}
-
-void ARM64InstPrinter::printUImm12Offset(const MCInst *MI, unsigned OpNum,
-                                         unsigned Scale, raw_ostream &O) {
-  const MCOperand MO = MI->getOperand(OpNum);
-  if (MO.isImm()) {
-    O << "#" << (MO.getImm() * Scale);
-  } else {
-    assert(MO.isExpr() && "Unexpected operand type!");
-    O << *MO.getExpr();
-  }
-}
-
-void ARM64InstPrinter::printAMIndexedWB(const MCInst *MI, unsigned OpNum,
-                                        unsigned Scale, raw_ostream &O) {
-  const MCOperand MO1 = MI->getOperand(OpNum + 1);
-  O << '[' << getRegisterName(MI->getOperand(OpNum).getReg());
-  if (MO1.isImm()) {
-      O << ", #" << (MO1.getImm() * Scale);
-  } else {
-    assert(MO1.isExpr() && "Unexpected operand type!");
-    O << ", " << *MO1.getExpr();
-  }
-  O << ']';
-}
-
-void ARM64InstPrinter::printPrefetchOp(const MCInst *MI, unsigned OpNum,
-                                       raw_ostream &O) {
-  unsigned prfop = MI->getOperand(OpNum).getImm();
-  bool Valid;
-  StringRef Name = ARM64PRFM::PRFMMapper().toString(prfop, Valid);
-  if (Valid)
-    O << Name;
-  else
-    O << '#' << prfop;
-}
-
-void ARM64InstPrinter::printFPImmOperand(const MCInst *MI, unsigned OpNum,
-                                         raw_ostream &O) {
-  const MCOperand &MO = MI->getOperand(OpNum);
-  float FPImm = MO.isFPImm() ? MO.getFPImm() : ARM64_AM::getFPImmFloat(MO.getImm());
-
-  // 8 decimal places are enough to perfectly represent permitted floats.
-  O << format("#%.8f", FPImm);
-}
-
-static unsigned getNextVectorRegister(unsigned Reg, unsigned Stride = 1) {
-  while (Stride--) {
-    switch (Reg) {
-    default:
-      assert(0 && "Vector register expected!");
-    case ARM64::Q0:  Reg = ARM64::Q1;  break;
-    case ARM64::Q1:  Reg = ARM64::Q2;  break;
-    case ARM64::Q2:  Reg = ARM64::Q3;  break;
-    case ARM64::Q3:  Reg = ARM64::Q4;  break;
-    case ARM64::Q4:  Reg = ARM64::Q5;  break;
-    case ARM64::Q5:  Reg = ARM64::Q6;  break;
-    case ARM64::Q6:  Reg = ARM64::Q7;  break;
-    case ARM64::Q7:  Reg = ARM64::Q8;  break;
-    case ARM64::Q8:  Reg = ARM64::Q9;  break;
-    case ARM64::Q9:  Reg = ARM64::Q10; break;
-    case ARM64::Q10: Reg = ARM64::Q11; break;
-    case ARM64::Q11: Reg = ARM64::Q12; break;
-    case ARM64::Q12: Reg = ARM64::Q13; break;
-    case ARM64::Q13: Reg = ARM64::Q14; break;
-    case ARM64::Q14: Reg = ARM64::Q15; break;
-    case ARM64::Q15: Reg = ARM64::Q16; break;
-    case ARM64::Q16: Reg = ARM64::Q17; break;
-    case ARM64::Q17: Reg = ARM64::Q18; break;
-    case ARM64::Q18: Reg = ARM64::Q19; break;
-    case ARM64::Q19: Reg = ARM64::Q20; break;
-    case ARM64::Q20: Reg = ARM64::Q21; break;
-    case ARM64::Q21: Reg = ARM64::Q22; break;
-    case ARM64::Q22: Reg = ARM64::Q23; break;
-    case ARM64::Q23: Reg = ARM64::Q24; break;
-    case ARM64::Q24: Reg = ARM64::Q25; break;
-    case ARM64::Q25: Reg = ARM64::Q26; break;
-    case ARM64::Q26: Reg = ARM64::Q27; break;
-    case ARM64::Q27: Reg = ARM64::Q28; break;
-    case ARM64::Q28: Reg = ARM64::Q29; break;
-    case ARM64::Q29: Reg = ARM64::Q30; break;
-    case ARM64::Q30: Reg = ARM64::Q31; break;
-    // Vector lists can wrap around.
-    case ARM64::Q31:
-      Reg = ARM64::Q0;
-      break;
-    }
-  }
-  return Reg;
-}
-
-void ARM64InstPrinter::printVectorList(const MCInst *MI, unsigned OpNum,
-                                       raw_ostream &O, StringRef LayoutSuffix) {
-  unsigned Reg = MI->getOperand(OpNum).getReg();
-
-  O << "{ ";
-
-  // Work out how many registers there are in the list (if there is an actual
-  // list).
-  unsigned NumRegs = 1;
-  if (MRI.getRegClass(ARM64::DDRegClassID).contains(Reg) ||
-      MRI.getRegClass(ARM64::QQRegClassID).contains(Reg))
-    NumRegs = 2;
-  else if (MRI.getRegClass(ARM64::DDDRegClassID).contains(Reg) ||
-           MRI.getRegClass(ARM64::QQQRegClassID).contains(Reg))
-    NumRegs = 3;
-  else if (MRI.getRegClass(ARM64::DDDDRegClassID).contains(Reg) ||
-           MRI.getRegClass(ARM64::QQQQRegClassID).contains(Reg))
-    NumRegs = 4;
-
-  // Now forget about the list and find out what the first register is.
-  if (unsigned FirstReg = MRI.getSubReg(Reg, ARM64::dsub0))
-    Reg = FirstReg;
-  else if (unsigned FirstReg = MRI.getSubReg(Reg, ARM64::qsub0))
-    Reg = FirstReg;
-
-  // If it's a D-reg, we need to promote it to the equivalent Q-reg before
-  // printing (otherwise getRegisterName fails).
-  if (MRI.getRegClass(ARM64::FPR64RegClassID).contains(Reg)) {
-    const MCRegisterClass &FPR128RC = MRI.getRegClass(ARM64::FPR128RegClassID);
-    Reg = MRI.getMatchingSuperReg(Reg, ARM64::dsub, &FPR128RC);
-  }
-
-  for (unsigned i = 0; i < NumRegs; ++i, Reg = getNextVectorRegister(Reg)) {
-    O << getRegisterName(Reg, ARM64::vreg) << LayoutSuffix;
-    if (i + 1 != NumRegs)
-      O << ", ";
-  }
-
-  O << " }";
-}
-
-void ARM64InstPrinter::printImplicitlyTypedVectorList(const MCInst *MI,
-                                                      unsigned OpNum,
-                                                      raw_ostream &O) {
-  printVectorList(MI, OpNum, O, "");
-}
-
-template <unsigned NumLanes, char LaneKind>
-void ARM64InstPrinter::printTypedVectorList(const MCInst *MI, unsigned OpNum,
-                                            raw_ostream &O) {
-  std::string Suffix(".");
-  if (NumLanes)
-    Suffix += itostr(NumLanes) + LaneKind;
-  else
-    Suffix += LaneKind;
-
-  printVectorList(MI, OpNum, O, Suffix);
-}
-
-void ARM64InstPrinter::printVectorIndex(const MCInst *MI, unsigned OpNum,
-                                        raw_ostream &O) {
-  O << "[" << MI->getOperand(OpNum).getImm() << "]";
-}
-
-void ARM64InstPrinter::printAlignedLabel(const MCInst *MI, unsigned OpNum,
-                                         raw_ostream &O) {
-  const MCOperand &Op = MI->getOperand(OpNum);
-
-  // If the label has already been resolved to an immediate offset (say, when
-  // we're running the disassembler), just print the immediate.
-  if (Op.isImm()) {
-    O << "#" << (Op.getImm() << 2);
-    return;
-  }
-
-  // If the branch target is simply an address then print it in hex.
-  const MCConstantExpr *BranchTarget =
-      dyn_cast<MCConstantExpr>(MI->getOperand(OpNum).getExpr());
-  int64_t Address;
-  if (BranchTarget && BranchTarget->EvaluateAsAbsolute(Address)) {
-    O << "0x";
-    O.write_hex(Address);
-  } else {
-    // Otherwise, just print the expression.
-    O << *MI->getOperand(OpNum).getExpr();
-  }
-}
-
-void ARM64InstPrinter::printAdrpLabel(const MCInst *MI, unsigned OpNum,
-                                      raw_ostream &O) {
-  const MCOperand &Op = MI->getOperand(OpNum);
-
-  // If the label has already been resolved to an immediate offset (say, when
-  // we're running the disassembler), just print the immediate.
-  if (Op.isImm()) {
-    O << "#" << (Op.getImm() << 12);
-    return;
-  }
-
-  // Otherwise, just print the expression.
-  O << *MI->getOperand(OpNum).getExpr();
-}
-
-void ARM64InstPrinter::printBarrierOption(const MCInst *MI, unsigned OpNo,
-                                          raw_ostream &O) {
-  unsigned Val = MI->getOperand(OpNo).getImm();
-  unsigned Opcode = MI->getOpcode();
-
-  bool Valid;
-  StringRef Name;
-  if (Opcode == ARM64::ISB)
-    Name = ARM64ISB::ISBMapper().toString(Val, Valid);
-  else
-    Name = ARM64DB::DBarrierMapper().toString(Val, Valid);
-  if (Valid)
-    O << Name;
-  else
-    O << "#" << Val;
-}
-
-void ARM64InstPrinter::printMRSSystemRegister(const MCInst *MI, unsigned OpNo,
-                                           raw_ostream &O) {
-  unsigned Val = MI->getOperand(OpNo).getImm();
-
-  bool Valid;
-  auto Mapper = ARM64SysReg::MRSMapper(getAvailableFeatures());
-  std::string Name = Mapper.toString(Val, Valid);
-
-  if (Valid)
-    O << StringRef(Name).upper();
-}
-
-void ARM64InstPrinter::printMSRSystemRegister(const MCInst *MI, unsigned OpNo,
-                                           raw_ostream &O) {
-  unsigned Val = MI->getOperand(OpNo).getImm();
-
-  bool Valid;
-  auto Mapper = ARM64SysReg::MSRMapper(getAvailableFeatures());
-  std::string Name = Mapper.toString(Val, Valid);
-
-  if (Valid)
-    O << StringRef(Name).upper();
-}
-
-void ARM64InstPrinter::printSystemPStateField(const MCInst *MI, unsigned OpNo,
-                                              raw_ostream &O) {
-  unsigned Val = MI->getOperand(OpNo).getImm();
-
-  bool Valid;
-  StringRef Name = ARM64PState::PStateMapper().toString(Val, Valid);
-  if (Valid)
-    O << StringRef(Name.str()).upper();
-  else
-    O << "#" << Val;
-}
-
-void ARM64InstPrinter::printSIMDType10Operand(const MCInst *MI, unsigned OpNo,
-                                              raw_ostream &O) {
-  unsigned RawVal = MI->getOperand(OpNo).getImm();
-  uint64_t Val = ARM64_AM::decodeAdvSIMDModImmType10(RawVal);
-  O << format("#%#016llx", Val);
-}
diff --git a/lib/Target/ARM64/InstPrinter/CMakeLists.txt b/lib/Target/ARM64/InstPrinter/CMakeLists.txt
deleted file mode 100644
index b8ee12c5541..00000000000
--- a/lib/Target/ARM64/InstPrinter/CMakeLists.txt
+++ /dev/null
@@ -1,7 +0,0 @@
-include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
-
-add_llvm_library(LLVMARM64AsmPrinter
-  ARM64InstPrinter.cpp
-  )
-
-add_dependencies(LLVMARM64AsmPrinter ARM64CommonTableGen)
diff --git a/lib/Target/ARM64/MCTargetDesc/ARM64FixupKinds.h b/lib/Target/ARM64/MCTargetDesc/ARM64FixupKinds.h
deleted file mode 100644
index 7106b314ea2..00000000000
--- a/lib/Target/ARM64/MCTargetDesc/ARM64FixupKinds.h
+++ /dev/null
@@ -1,76 +0,0 @@
-//===-- ARM64FixupKinds.h - ARM64 Specific Fixup Entries --------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_ARM64FIXUPKINDS_H
-#define LLVM_ARM64FIXUPKINDS_H
-
-#include "llvm/MC/MCFixup.h"
-
-namespace llvm {
-namespace ARM64 {
-
-enum Fixups {
-  // fixup_arm64_pcrel_adr_imm21 - A 21-bit pc-relative immediate inserted into
-  // an ADR instruction.
-  fixup_arm64_pcrel_adr_imm21 = FirstTargetFixupKind,
-
-  // fixup_arm64_pcrel_adrp_imm21 - A 21-bit pc-relative immediate inserted into
-  // an ADRP instruction.
-  fixup_arm64_pcrel_adrp_imm21,
-
-  // fixup_arm64_imm12 - 12-bit fixup for add/sub instructions.
-  //     No alignment adjustment. All value bits are encoded.
-  fixup_arm64_add_imm12,
-
-  // fixup_arm64_ldst_imm12_* - unsigned 12-bit fixups for load and
-  // store instructions.
-  fixup_arm64_ldst_imm12_scale1,
-  fixup_arm64_ldst_imm12_scale2,
-  fixup_arm64_ldst_imm12_scale4,
-  fixup_arm64_ldst_imm12_scale8,
-  fixup_arm64_ldst_imm12_scale16,
-
-  // fixup_arm64_ldr_pcrel_imm19 - The high 19 bits of a 21-bit pc-relative
-  // immediate. Same encoding as fixup_arm64_pcrel_adrhi, except this is used by
-  // pc-relative loads and generates relocations directly when necessary.
-  fixup_arm64_ldr_pcrel_imm19,
-
-  // FIXME: comment
-  fixup_arm64_movw,
-
-  // fixup_arm64_pcrel_imm14 - The high 14 bits of a 21-bit pc-relative
-  // immediate.
-  fixup_arm64_pcrel_branch14,
-
-  // fixup_arm64_pcrel_branch19 - The high 19 bits of a 21-bit pc-relative
-  // immediate. Same encoding as fixup_arm64_pcrel_adrhi, except this is use by
-  // b.cc and generates relocations directly when necessary.
-  fixup_arm64_pcrel_branch19,
-
-  // fixup_arm64_pcrel_branch26 - The high 26 bits of a 28-bit pc-relative
-  // immediate.
-  fixup_arm64_pcrel_branch26,
-
-  // fixup_arm64_pcrel_call26 - The high 26 bits of a 28-bit pc-relative
-  // immediate. Distinguished from branch26 only on ELF.
-  fixup_arm64_pcrel_call26,
-
-  // fixup_arm64_tlsdesc_call - zero-space placeholder for the ELF
-  // R_AARCH64_TLSDESC_CALL relocation.
-  fixup_arm64_tlsdesc_call,
-
-  // Marker
-  LastTargetFixupKind,
-  NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind
-};
-
-} // end namespace ARM64
-} // end namespace llvm
-
-#endif
diff --git a/lib/Target/ARM64/MCTargetDesc/ARM64MCTargetDesc.cpp b/lib/Target/ARM64/MCTargetDesc/ARM64MCTargetDesc.cpp
deleted file mode 100644
index 079d3588f6e..00000000000
--- a/lib/Target/ARM64/MCTargetDesc/ARM64MCTargetDesc.cpp
+++ /dev/null
@@ -1,210 +0,0 @@
-//===-- ARM64MCTargetDesc.cpp - ARM64 Target Descriptions -------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file provides ARM64 specific target descriptions.
-//
-//===----------------------------------------------------------------------===//
-
-#include "ARM64MCTargetDesc.h"
-#include "ARM64ELFStreamer.h"
-#include "ARM64MCAsmInfo.h"
-#include "InstPrinter/ARM64InstPrinter.h"
-#include "llvm/MC/MCCodeGenInfo.h"
-#include "llvm/MC/MCInstrInfo.h"
-#include "llvm/MC/MCRegisterInfo.h"
-#include "llvm/MC/MCStreamer.h"
-#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/TargetRegistry.h"
-
-using namespace llvm;
-
-#define GET_INSTRINFO_MC_DESC
-#include "ARM64GenInstrInfo.inc"
-
-#define GET_SUBTARGETINFO_MC_DESC
-#include "ARM64GenSubtargetInfo.inc"
-
-#define GET_REGINFO_MC_DESC
-#include "ARM64GenRegisterInfo.inc"
-
-static MCInstrInfo *createARM64MCInstrInfo() {
-  MCInstrInfo *X = new MCInstrInfo();
-  InitARM64MCInstrInfo(X);
-  return X;
-}
-
-static MCSubtargetInfo *createARM64MCSubtargetInfo(StringRef TT, StringRef CPU,
-                                                   StringRef FS) {
-  MCSubtargetInfo *X = new MCSubtargetInfo();
-
-  if (CPU.empty())
-    CPU = "generic";
-
-  InitARM64MCSubtargetInfo(X, TT, CPU, FS);
-  return X;
-}
-
-static MCRegisterInfo *createARM64MCRegisterInfo(StringRef Triple) {
-  MCRegisterInfo *X = new MCRegisterInfo();
-  InitARM64MCRegisterInfo(X, ARM64::LR);
-  return X;
-}
-
-static MCAsmInfo *createARM64MCAsmInfo(const MCRegisterInfo &MRI,
-                                       StringRef TT) {
-  Triple TheTriple(TT);
-
-  MCAsmInfo *MAI;
-  if (TheTriple.isOSDarwin())
-    MAI = new ARM64MCAsmInfoDarwin();
-  else {
-    assert(TheTriple.isOSBinFormatELF() && "Only expect Darwin or ELF");
-    MAI = new ARM64MCAsmInfoELF(TT);
-  }
-
-  // Initial state of the frame pointer is SP.
-  unsigned Reg = MRI.getDwarfRegNum(ARM64::SP, true);
-  MCCFIInstruction Inst = MCCFIInstruction::createDefCfa(nullptr, Reg, 0);
-  MAI->addInitialFrameState(Inst);
-
-  return MAI;
-}
-
-static MCCodeGenInfo *createARM64MCCodeGenInfo(StringRef TT, Reloc::Model RM,
-                                               CodeModel::Model CM,
-                                               CodeGenOpt::Level OL) {
-  Triple TheTriple(TT);
-  assert((TheTriple.isOSBinFormatELF() || TheTriple.isOSBinFormatMachO()) &&
-         "Only expect Darwin and ELF targets");
-
-  if (CM == CodeModel::Default)
-    CM = CodeModel::Small;
-  // The default MCJIT memory managers make no guarantees about where they can
-  // find an executable page; JITed code needs to be able to refer to globals
-  // no matter how far away they are.
-  else if (CM == CodeModel::JITDefault)
-    CM = CodeModel::Large;
-  else if (CM != CodeModel::Small && CM != CodeModel::Large)
-    report_fatal_error("Only small and large code models are allowed on ARM64");
-
-  // ARM64 Darwin is always PIC.
-  if (TheTriple.isOSDarwin())
-    RM = Reloc::PIC_;
-  // On ELF platforms the default static relocation model has a smart enough
-  // linker to cope with referencing external symbols defined in a shared
-  // library. Hence DynamicNoPIC doesn't need to be promoted to PIC.
-  else if (RM == Reloc::Default || RM == Reloc::DynamicNoPIC)
-    RM = Reloc::Static;
-
-  MCCodeGenInfo *X = new MCCodeGenInfo();
-  X->InitMCCodeGenInfo(RM, CM, OL);
-  return X;
-}
-
-static MCInstPrinter *createARM64MCInstPrinter(const Target &T,
-                                               unsigned SyntaxVariant,
-                                               const MCAsmInfo &MAI,
-                                               const MCInstrInfo &MII,
-                                               const MCRegisterInfo &MRI,
-                                               const MCSubtargetInfo &STI) {
-  if (SyntaxVariant == 0)
-    return new ARM64InstPrinter(MAI, MII, MRI, STI);
-  if (SyntaxVariant == 1)
-    return new ARM64AppleInstPrinter(MAI, MII, MRI, STI);
-
-  return nullptr;
-}
-
-static MCStreamer *createMCStreamer(const Target &T, StringRef TT,
-                                    MCContext &Ctx, MCAsmBackend &TAB,
-                                    raw_ostream &OS, MCCodeEmitter *Emitter,
-                                    const MCSubtargetInfo &STI, bool RelaxAll,
-                                    bool NoExecStack) {
-  Triple TheTriple(TT);
-
-  if (TheTriple.isOSDarwin())
-    return createMachOStreamer(Ctx, TAB, OS, Emitter, RelaxAll,
-                               /*LabelSections*/ true);
-
-  return createARM64ELFStreamer(Ctx, TAB, OS, Emitter, RelaxAll, NoExecStack);
-}
-
-// Force static initialization.
-extern "C" void LLVMInitializeARM64TargetMC() {
-  // Register the MC asm info.
-  RegisterMCAsmInfoFn X(TheARM64leTarget, createARM64MCAsmInfo);
-  RegisterMCAsmInfoFn Y(TheARM64beTarget, createARM64MCAsmInfo);
-  RegisterMCAsmInfoFn Z(TheAArch64leTarget, createARM64MCAsmInfo);
-  RegisterMCAsmInfoFn W(TheAArch64beTarget, createARM64MCAsmInfo);
-
-  // Register the MC codegen info.
-  TargetRegistry::RegisterMCCodeGenInfo(TheARM64leTarget,
-                                        createARM64MCCodeGenInfo);
-  TargetRegistry::RegisterMCCodeGenInfo(TheARM64beTarget,
-                                        createARM64MCCodeGenInfo);
-  TargetRegistry::RegisterMCCodeGenInfo(TheAArch64leTarget,
-                                        createARM64MCCodeGenInfo);
-  TargetRegistry::RegisterMCCodeGenInfo(TheAArch64beTarget,
-                                        createARM64MCCodeGenInfo);
-
-  // Register the MC instruction info.
-  TargetRegistry::RegisterMCInstrInfo(TheARM64leTarget, createARM64MCInstrInfo);
-  TargetRegistry::RegisterMCInstrInfo(TheARM64beTarget, createARM64MCInstrInfo);
-  TargetRegistry::RegisterMCInstrInfo(TheAArch64leTarget, createARM64MCInstrInfo);
-  TargetRegistry::RegisterMCInstrInfo(TheAArch64beTarget, createARM64MCInstrInfo);
-
-  // Register the MC register info.
-  TargetRegistry::RegisterMCRegInfo(TheARM64leTarget, createARM64MCRegisterInfo);
-  TargetRegistry::RegisterMCRegInfo(TheARM64beTarget, createARM64MCRegisterInfo);
-  TargetRegistry::RegisterMCRegInfo(TheAArch64leTarget, createARM64MCRegisterInfo);
-  TargetRegistry::RegisterMCRegInfo(TheAArch64beTarget, createARM64MCRegisterInfo);
-
-  // Register the MC subtarget info.
-  TargetRegistry::RegisterMCSubtargetInfo(TheARM64leTarget,
-                                          createARM64MCSubtargetInfo);
-  TargetRegistry::RegisterMCSubtargetInfo(TheARM64beTarget,
-                                          createARM64MCSubtargetInfo);
-  TargetRegistry::RegisterMCSubtargetInfo(TheAArch64leTarget,
-                                          createARM64MCSubtargetInfo);
-  TargetRegistry::RegisterMCSubtargetInfo(TheAArch64beTarget,
-                                          createARM64MCSubtargetInfo);
-
-  // Register the asm backend.
-  TargetRegistry::RegisterMCAsmBackend(TheARM64leTarget, createARM64leAsmBackend);
-  TargetRegistry::RegisterMCAsmBackend(TheARM64beTarget, createARM64beAsmBackend);
-  TargetRegistry::RegisterMCAsmBackend(TheAArch64leTarget, createARM64leAsmBackend);
-  TargetRegistry::RegisterMCAsmBackend(TheAArch64beTarget, createARM64beAsmBackend);
-
-  // Register the MC Code Emitter
-  TargetRegistry::RegisterMCCodeEmitter(TheARM64leTarget,
-                                        createARM64MCCodeEmitter);
-  TargetRegistry::RegisterMCCodeEmitter(TheARM64beTarget,
-                                        createARM64MCCodeEmitter);
-  TargetRegistry::RegisterMCCodeEmitter(TheAArch64leTarget,
-                                        createARM64MCCodeEmitter);
-  TargetRegistry::RegisterMCCodeEmitter(TheAArch64beTarget,
-                                        createARM64MCCodeEmitter);
-
-  // Register the object streamer.
-  TargetRegistry::RegisterMCObjectStreamer(TheARM64leTarget, createMCStreamer);
-  TargetRegistry::RegisterMCObjectStreamer(TheARM64beTarget, createMCStreamer);
-  TargetRegistry::RegisterMCObjectStreamer(TheAArch64leTarget, createMCStreamer);
-  TargetRegistry::RegisterMCObjectStreamer(TheAArch64beTarget, createMCStreamer);
-
-  // Register the MCInstPrinter.
-  TargetRegistry::RegisterMCInstPrinter(TheARM64leTarget,
-                                        createARM64MCInstPrinter);
-  TargetRegistry::RegisterMCInstPrinter(TheARM64beTarget,
-                                        createARM64MCInstPrinter);
-  TargetRegistry::RegisterMCInstPrinter(TheAArch64leTarget,
-                                        createARM64MCInstPrinter);
-  TargetRegistry::RegisterMCInstPrinter(TheAArch64beTarget,
-                                        createARM64MCInstPrinter);
-}
diff --git a/lib/Target/ARM64/MCTargetDesc/CMakeLists.txt b/lib/Target/ARM64/MCTargetDesc/CMakeLists.txt
deleted file mode 100644
index f8665bcfe94..00000000000
--- a/lib/Target/ARM64/MCTargetDesc/CMakeLists.txt
+++ /dev/null
@@ -1,14 +0,0 @@
-add_llvm_library(LLVMARM64Desc
-  ARM64AsmBackend.cpp
-  ARM64ELFObjectWriter.cpp
-  ARM64ELFStreamer.cpp
-  ARM64MCAsmInfo.cpp
-  ARM64MCCodeEmitter.cpp
-  ARM64MCExpr.cpp
-  ARM64MCTargetDesc.cpp
-  ARM64MachObjectWriter.cpp
-)
-add_dependencies(LLVMARM64Desc ARM64CommonTableGen)
-
-# Hack: we need to include 'main' target directory to grab private headers
-include_directories(${CMAKE_CURRENT_SOURCE_DIR}/.. ${CMAKE_CURRENT_BINARY_DIR}/..)
diff --git a/lib/Target/ARM64/Makefile b/lib/Target/ARM64/Makefile
deleted file mode 100644
index cfb05d2a87b..00000000000
--- a/lib/Target/ARM64/Makefile
+++ /dev/null
@@ -1,25 +0,0 @@
-##===- lib/Target/ARM64/Makefile ---------------------------*- Makefile -*-===##
-#
-#                     The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-##===----------------------------------------------------------------------===##
-
-LEVEL = ../../..
-LIBRARYNAME = LLVMARM64CodeGen
-TARGET = ARM64
-
-# Make sure that tblgen is run, first thing.
-BUILT_SOURCES = ARM64GenRegisterInfo.inc ARM64GenInstrInfo.inc \
-		ARM64GenAsmWriter.inc ARM64GenAsmWriter1.inc \
-		ARM64GenDAGISel.inc \
-		ARM64GenCallingConv.inc ARM64GenAsmMatcher.inc \
-		ARM64GenSubtargetInfo.inc ARM64GenMCCodeEmitter.inc \
-		ARM64GenFastISel.inc ARM64GenDisassemblerTables.inc \
-		ARM64GenMCPseudoLowering.inc
-
-DIRS = TargetInfo InstPrinter AsmParser Disassembler MCTargetDesc Utils
-
-include $(LEVEL)/Makefile.common
diff --git a/lib/Target/ARM64/TargetInfo/CMakeLists.txt b/lib/Target/ARM64/TargetInfo/CMakeLists.txt
deleted file mode 100644
index a0142c40713..00000000000
--- a/lib/Target/ARM64/TargetInfo/CMakeLists.txt
+++ /dev/null
@@ -1,7 +0,0 @@
-include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
-
-add_llvm_library(LLVMARM64Info
-  ARM64TargetInfo.cpp
-  )
-
-add_dependencies(LLVMARM64Info ARM64CommonTableGen)
diff --git a/lib/Target/ARM64/Utils/CMakeLists.txt b/lib/Target/ARM64/Utils/CMakeLists.txt
deleted file mode 100644
index f69076f4ef6..00000000000
--- a/lib/Target/ARM64/Utils/CMakeLists.txt
+++ /dev/null
@@ -1,3 +0,0 @@
-add_llvm_library(LLVMARM64Utils
-  ARM64BaseInfo.cpp
-  )
diff --git a/lib/Target/LLVMBuild.txt b/lib/Target/LLVMBuild.txt
index da2309ba0cb..1b0837cb3b5 100644
--- a/lib/Target/LLVMBuild.txt
+++ b/lib/Target/LLVMBuild.txt
@@ -16,7 +16,7 @@
 ;===------------------------------------------------------------------------===;
 
 [common]
-subdirectories = ARM ARM64 CppBackend Hexagon MSP430 NVPTX Mips PowerPC R600 Sparc SystemZ X86 XCore
+subdirectories = ARM AArch64 CppBackend Hexagon MSP430 NVPTX Mips PowerPC R600 Sparc SystemZ X86 XCore
 
 ; This is a special group whose required libraries are extended (by llvm-build)
 ; with the best execution engine (the native JIT, if available, or the
diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 18a0f9c167a..be1b5aa50b1 100644
--- a/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -836,8 +836,8 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
 
   case Intrinsic::arm_neon_vmulls:
   case Intrinsic::arm_neon_vmullu:
-  case Intrinsic::arm64_neon_smull:
-  case Intrinsic::arm64_neon_umull: {
+  case Intrinsic::aarch64_neon_smull:
+  case Intrinsic::aarch64_neon_umull: {
     Value *Arg0 = II->getArgOperand(0);
     Value *Arg1 = II->getArgOperand(1);
 
@@ -848,7 +848,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
 
     // Check for constant LHS & RHS - in this case we just simplify.
     bool Zext = (II->getIntrinsicID() == Intrinsic::arm_neon_vmullu ||
-                 II->getIntrinsicID() == Intrinsic::arm64_neon_umull);
+                 II->getIntrinsicID() == Intrinsic::aarch64_neon_umull);
     VectorType *NewVT = cast<VectorType>(II->getType());
     if (Constant *CV0 = dyn_cast<Constant>(Arg0)) {
       if (Constant *CV1 = dyn_cast<Constant>(Arg1)) {
diff --git a/test/Analysis/CostModel/ARM64/lit.local.cfg b/test/Analysis/CostModel/AArch64/lit.local.cfg
similarity index 73%
rename from test/Analysis/CostModel/ARM64/lit.local.cfg
rename to test/Analysis/CostModel/AArch64/lit.local.cfg
index 84ac9811f01..c42034979fc 100644
--- a/test/Analysis/CostModel/ARM64/lit.local.cfg
+++ b/test/Analysis/CostModel/AArch64/lit.local.cfg
@@ -1,3 +1,3 @@
 targets = set(config.root.targets_to_build.split())
-if not 'ARM64' in targets:
+if not 'AArch64' in targets:
     config.unsupported = True
diff --git a/test/Analysis/CostModel/ARM64/select.ll b/test/Analysis/CostModel/AArch64/select.ll
similarity index 100%
rename from test/Analysis/CostModel/ARM64/select.ll
rename to test/Analysis/CostModel/AArch64/select.ll
diff --git a/test/Analysis/CostModel/ARM64/store.ll b/test/Analysis/CostModel/AArch64/store.ll
similarity index 100%
rename from test/Analysis/CostModel/ARM64/store.ll
rename to test/Analysis/CostModel/AArch64/store.ll
diff --git a/test/CodeGen/AArch64/128bit_load_store.ll b/test/CodeGen/AArch64/128bit_load_store.ll
index 56f67873f84..a6f077698e4 100644
--- a/test/CodeGen/AArch64/128bit_load_store.ll
+++ b/test/CodeGen/AArch64/128bit_load_store.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=neon | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-ARM64
+; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=neon | FileCheck %s --check-prefix=CHECK
 
 define void @test_store_f128(fp128* %ptr, fp128 %val) #0 {
 ; CHECK-LABEL: test_store_f128
@@ -17,8 +17,8 @@ entry:
 }
 
 define void @test_vstrq_p128(i128* %ptr, i128 %val) #0 {
-; CHECK-ARM64-LABEL: test_vstrq_p128
-; CHECK-ARM64: stp {{x[0-9]+}}, {{x[0-9]+}}, [{{x[0-9]+}}]
+; CHECK-LABEL: test_vstrq_p128
+; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [{{x[0-9]+}}]
 
 entry:
   %0 = bitcast i128* %ptr to fp128*
@@ -28,8 +28,8 @@ entry:
 }
 
 define i128 @test_vldrq_p128(i128* readonly %ptr) #2 {
-; CHECK-ARM64-LABEL: test_vldrq_p128
-; CHECK-ARM64: ldp {{x[0-9]+}}, {{x[0-9]+}}, [{{x[0-9]+}}]
+; CHECK-LABEL: test_vldrq_p128
+; CHECK: ldp {{x[0-9]+}}, {{x[0-9]+}}, [{{x[0-9]+}}]
 
 entry:
   %0 = bitcast i128* %ptr to fp128*
diff --git a/test/CodeGen/ARM64/aarch64-neon-v1i1-setcc.ll b/test/CodeGen/AArch64/aarch64-neon-v1i1-setcc.ll
similarity index 100%
rename from test/CodeGen/ARM64/aarch64-neon-v1i1-setcc.ll
rename to test/CodeGen/AArch64/aarch64-neon-v1i1-setcc.ll
diff --git a/test/CodeGen/AArch64/addsub.ll b/test/CodeGen/AArch64/addsub.ll
index 3aa427c352c..b85fdbb14ce 100644
--- a/test/CodeGen/AArch64/addsub.ll
+++ b/test/CodeGen/AArch64/addsub.ll
@@ -1,4 +1,4 @@
-; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-linux-gnu | FileCheck %s
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-linux-gnu | FileCheck %s
 
 ; Note that this should be refactored (for efficiency if nothing else)
 ; when the PCS is implemented so we don't have to worry about the
diff --git a/test/CodeGen/AArch64/addsub_ext.ll b/test/CodeGen/AArch64/addsub_ext.ll
index cd01f594dcd..a2266b1d36d 100644
--- a/test/CodeGen/AArch64/addsub_ext.ll
+++ b/test/CodeGen/AArch64/addsub_ext.ll
@@ -1,4 +1,4 @@
-; RUN: llc -verify-machineinstrs %s -o - -mtriple=arm64-linux-gnu | FileCheck %s
+; RUN: llc -verify-machineinstrs %s -o - -mtriple=aarch64-linux-gnu | FileCheck %s
 
 @var8 = global i8 0
 @var16 = global i16 0
diff --git a/test/CodeGen/AArch64/alloca.ll b/test/CodeGen/AArch64/alloca.ll
index 7cab200b1ea..f93efbc42e6 100644
--- a/test/CodeGen/AArch64/alloca.ll
+++ b/test/CodeGen/AArch64/alloca.ll
@@ -1,5 +1,5 @@
-; RUN: llc -mtriple=arm64-linux-gnu -verify-machineinstrs -o - %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-ARM64
-; RUN: llc -mtriple=arm64-none-linux-gnu -mattr=-fp-armv8 -verify-machineinstrs < %s | FileCheck --check-prefix=CHECK-NOFP-ARM64 %s
+; RUN: llc -mtriple=aarch64-linux-gnu -verify-machineinstrs -o - %s | FileCheck %s --check-prefix=CHECK
+; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=-fp-armv8 -verify-machineinstrs < %s | FileCheck --check-prefix=CHECK-NOFP-ARM64 %s
 
 declare void @use_addr(i8*)
 
@@ -53,7 +53,7 @@ define i64 @test_alloca_with_local(i64 %n) {
 
   %val = load i64* %loc
 
-; CHECK-ARM64: ldur x0, [x29, #-[[LOC_FROM_FP]]]
+; CHECK: ldur x0, [x29, #-[[LOC_FROM_FP]]]
 
   ret i64 %val
   ; Make sure epilogue restores sp from fp
@@ -74,16 +74,16 @@ define void @test_variadic_alloca(i64 %n, ...) {
 ; CHECK-NOFP-AARCH64: add     x8, [[TMP]], #0
 
 
-; CHECK-ARM64: stp     x29, x30, [sp, #-16]!
-; CHECK-ARM64: mov     x29, sp
-; CHECK-ARM64: sub     sp, sp, #192
-; CHECK-ARM64: stp     q6, q7, [x29, #-96]
+; CHECK: stp     x29, x30, [sp, #-16]!
+; CHECK: mov     x29, sp
+; CHECK: sub     sp, sp, #192
+; CHECK: stp     q6, q7, [x29, #-96]
 ; [...]
-; CHECK-ARM64: stp     q0, q1, [x29, #-192]
+; CHECK: stp     q0, q1, [x29, #-192]
 
-; CHECK-ARM64: stp     x6, x7, [x29, #-16]
+; CHECK: stp     x6, x7, [x29, #-16]
 ; [...]
-; CHECK-ARM64: stp     x2, x3, [x29, #-48]
+; CHECK: stp     x2, x3, [x29, #-48]
 
 ; CHECK-NOFP-ARM64: stp     x29, x30, [sp, #-16]!
 ; CHECK-NOFP-ARM64: mov     x29, sp
@@ -115,11 +115,11 @@ define void @test_alloca_large_frame(i64 %n) {
 ; CHECK-LABEL: test_alloca_large_frame:
 
 
-; CHECK-ARM64: stp     x20, x19, [sp, #-32]!
-; CHECK-ARM64: stp     x29, x30, [sp, #16]
-; CHECK-ARM64: add     x29, sp, #16
-; CHECK-ARM64: sub     sp, sp, #1953, lsl #12
-; CHECK-ARM64: sub     sp, sp, #512
+; CHECK: stp     x20, x19, [sp, #-32]!
+; CHECK: stp     x29, x30, [sp, #16]
+; CHECK: add     x29, sp, #16
+; CHECK: sub     sp, sp, #1953, lsl #12
+; CHECK: sub     sp, sp, #512
 
   %addr1 = alloca i8, i64 %n
   %addr2 = alloca i64, i64 1000000
@@ -128,9 +128,9 @@ define void @test_alloca_large_frame(i64 %n) {
 
   ret void
 
-; CHECK-ARM64: sub     sp, x29, #16
-; CHECK-ARM64: ldp     x29, x30, [sp, #16]
-; CHECK-ARM64: ldp     x20, x19, [sp], #32
+; CHECK: sub     sp, x29, #16
+; CHECK: ldp     x29, x30, [sp, #16]
+; CHECK: ldp     x20, x19, [sp], #32
 }
 
 declare i8* @llvm.stacksave()
diff --git a/test/CodeGen/AArch64/analyze-branch.ll b/test/CodeGen/AArch64/analyze-branch.ll
index 1d4daec5f43..6616b27c45b 100644
--- a/test/CodeGen/AArch64/analyze-branch.ll
+++ b/test/CodeGen/AArch64/analyze-branch.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=arm64-none-linux-gnu < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s
 
 ; This test checks that LLVM can do basic stripping and reapplying of branches
 ; to basic blocks.
diff --git a/test/CodeGen/ARM64/2011-03-09-CPSRSpill.ll b/test/CodeGen/AArch64/arm64-2011-03-09-CPSRSpill.ll
similarity index 100%
rename from test/CodeGen/ARM64/2011-03-09-CPSRSpill.ll
rename to test/CodeGen/AArch64/arm64-2011-03-09-CPSRSpill.ll
diff --git a/test/CodeGen/ARM64/2011-03-17-AsmPrinterCrash.ll b/test/CodeGen/AArch64/arm64-2011-03-17-AsmPrinterCrash.ll
similarity index 100%
rename from test/CodeGen/ARM64/2011-03-17-AsmPrinterCrash.ll
rename to test/CodeGen/AArch64/arm64-2011-03-17-AsmPrinterCrash.ll
diff --git a/test/CodeGen/ARM64/2011-03-21-Unaligned-Frame-Index.ll b/test/CodeGen/AArch64/arm64-2011-03-21-Unaligned-Frame-Index.ll
similarity index 100%
rename from test/CodeGen/ARM64/2011-03-21-Unaligned-Frame-Index.ll
rename to test/CodeGen/AArch64/arm64-2011-03-21-Unaligned-Frame-Index.ll
diff --git a/test/CodeGen/ARM64/2011-04-21-CPSRBug.ll b/test/CodeGen/AArch64/arm64-2011-04-21-CPSRBug.ll
similarity index 100%
rename from test/CodeGen/ARM64/2011-04-21-CPSRBug.ll
rename to test/CodeGen/AArch64/arm64-2011-04-21-CPSRBug.ll
diff --git a/test/CodeGen/ARM64/2011-10-18-LdStOptBug.ll b/test/CodeGen/AArch64/arm64-2011-10-18-LdStOptBug.ll
similarity index 100%
rename from test/CodeGen/ARM64/2011-10-18-LdStOptBug.ll
rename to test/CodeGen/AArch64/arm64-2011-10-18-LdStOptBug.ll
diff --git a/test/CodeGen/ARM64/2012-01-11-ComparisonDAGCrash.ll b/test/CodeGen/AArch64/arm64-2012-01-11-ComparisonDAGCrash.ll
similarity index 100%
rename from test/CodeGen/ARM64/2012-01-11-ComparisonDAGCrash.ll
rename to test/CodeGen/AArch64/arm64-2012-01-11-ComparisonDAGCrash.ll
diff --git a/test/CodeGen/ARM64/2012-05-07-DAGCombineVectorExtract.ll b/test/CodeGen/AArch64/arm64-2012-05-07-DAGCombineVectorExtract.ll
similarity index 100%
rename from test/CodeGen/ARM64/2012-05-07-DAGCombineVectorExtract.ll
rename to test/CodeGen/AArch64/arm64-2012-05-07-DAGCombineVectorExtract.ll
diff --git a/test/CodeGen/ARM64/2012-05-07-MemcpyAlignBug.ll b/test/CodeGen/AArch64/arm64-2012-05-07-MemcpyAlignBug.ll
similarity index 100%
rename from test/CodeGen/ARM64/2012-05-07-MemcpyAlignBug.ll
rename to test/CodeGen/AArch64/arm64-2012-05-07-MemcpyAlignBug.ll
diff --git a/test/CodeGen/ARM64/2012-05-09-LOADgot-bug.ll b/test/CodeGen/AArch64/arm64-2012-05-09-LOADgot-bug.ll
similarity index 100%
rename from test/CodeGen/ARM64/2012-05-09-LOADgot-bug.ll
rename to test/CodeGen/AArch64/arm64-2012-05-09-LOADgot-bug.ll
diff --git a/test/CodeGen/ARM64/2012-05-22-LdStOptBug.ll b/test/CodeGen/AArch64/arm64-2012-05-22-LdStOptBug.ll
similarity index 100%
rename from test/CodeGen/ARM64/2012-05-22-LdStOptBug.ll
rename to test/CodeGen/AArch64/arm64-2012-05-22-LdStOptBug.ll
diff --git a/test/CodeGen/ARM64/2012-06-06-FPToUI.ll b/test/CodeGen/AArch64/arm64-2012-06-06-FPToUI.ll
similarity index 100%
rename from test/CodeGen/ARM64/2012-06-06-FPToUI.ll
rename to test/CodeGen/AArch64/arm64-2012-06-06-FPToUI.ll
diff --git a/test/CodeGen/ARM64/2012-07-11-InstrEmitterBug.ll b/test/CodeGen/AArch64/arm64-2012-07-11-InstrEmitterBug.ll
similarity index 100%
rename from test/CodeGen/ARM64/2012-07-11-InstrEmitterBug.ll
rename to test/CodeGen/AArch64/arm64-2012-07-11-InstrEmitterBug.ll
diff --git a/test/CodeGen/ARM64/2013-01-13-ffast-fcmp.ll b/test/CodeGen/AArch64/arm64-2013-01-13-ffast-fcmp.ll
similarity index 72%
rename from test/CodeGen/ARM64/2013-01-13-ffast-fcmp.ll
rename to test/CodeGen/AArch64/arm64-2013-01-13-ffast-fcmp.ll
index b40a581d611..e2c43d953bb 100644
--- a/test/CodeGen/ARM64/2013-01-13-ffast-fcmp.ll
+++ b/test/CodeGen/AArch64/arm64-2013-01-13-ffast-fcmp.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s
-; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple -fp-contract=fast | FileCheck %s --check-prefix=FAST
+; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
+; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple -fp-contract=fast | FileCheck %s --check-prefix=FAST
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n32:64-S128"
 target triple = "arm64-apple-ios7.0.0"
diff --git a/test/CodeGen/ARM64/2013-01-23-frem-crash.ll b/test/CodeGen/AArch64/arm64-2013-01-23-frem-crash.ll
similarity index 100%
rename from test/CodeGen/ARM64/2013-01-23-frem-crash.ll
rename to test/CodeGen/AArch64/arm64-2013-01-23-frem-crash.ll
diff --git a/test/CodeGen/ARM64/2013-01-23-sext-crash.ll b/test/CodeGen/AArch64/arm64-2013-01-23-sext-crash.ll
similarity index 100%
rename from test/CodeGen/ARM64/2013-01-23-sext-crash.ll
rename to test/CodeGen/AArch64/arm64-2013-01-23-sext-crash.ll
diff --git a/test/CodeGen/ARM64/2013-02-12-shufv8i8.ll b/test/CodeGen/AArch64/arm64-2013-02-12-shufv8i8.ll
similarity index 83%
rename from test/CodeGen/ARM64/2013-02-12-shufv8i8.ll
rename to test/CodeGen/AArch64/arm64-2013-02-12-shufv8i8.ll
index 70e745fc577..a350ba1472c 100644
--- a/test/CodeGen/ARM64/2013-02-12-shufv8i8.ll
+++ b/test/CodeGen/AArch64/arm64-2013-02-12-shufv8i8.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple
+; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple
 
 ;CHECK-LABEL: Shuff:
 ;CHECK: tbl.8b
diff --git a/test/CodeGen/ARM64/2014-04-16-AnInfiniteLoopInDAGCombine.ll b/test/CodeGen/AArch64/arm64-2014-04-16-AnInfiniteLoopInDAGCombine.ll
similarity index 100%
rename from test/CodeGen/ARM64/2014-04-16-AnInfiniteLoopInDAGCombine.ll
rename to test/CodeGen/AArch64/arm64-2014-04-16-AnInfiniteLoopInDAGCombine.ll
diff --git a/test/CodeGen/ARM64/2014-04-28-sqshl-uqshl-i64Contant.ll b/test/CodeGen/AArch64/arm64-2014-04-28-sqshl-uqshl-i64Contant.ll
similarity index 63%
rename from test/CodeGen/ARM64/2014-04-28-sqshl-uqshl-i64Contant.ll
rename to test/CodeGen/AArch64/arm64-2014-04-28-sqshl-uqshl-i64Contant.ll
index a4c9cd8f106..3949b85fbd3 100644
--- a/test/CodeGen/ARM64/2014-04-28-sqshl-uqshl-i64Contant.ll
+++ b/test/CodeGen/AArch64/arm64-2014-04-28-sqshl-uqshl-i64Contant.ll
@@ -4,16 +4,16 @@
 define i64 @test_vqshld_s64_i(i64 %a) {
 ; CHECK-LABEL: test_vqshld_s64_i:
 ; CHECK: sqshl {{d[0-9]+}}, {{d[0-9]+}}, #36
-  %1 = tail call i64 @llvm.arm64.neon.sqshl.i64(i64 %a, i64 36)
+  %1 = tail call i64 @llvm.aarch64.neon.sqshl.i64(i64 %a, i64 36)
   ret i64 %1
 }
 
 define i64 @test_vqshld_u64_i(i64 %a) {
 ; CHECK-LABEL: test_vqshld_u64_i:
 ; CHECK: uqshl {{d[0-9]+}}, {{d[0-9]+}}, #36
-  %1 = tail call i64 @llvm.arm64.neon.uqshl.i64(i64 %a, i64 36)
+  %1 = tail call i64 @llvm.aarch64.neon.uqshl.i64(i64 %a, i64 36)
   ret i64 %1
 }
 
-declare i64 @llvm.arm64.neon.uqshl.i64(i64, i64)
-declare i64 @llvm.arm64.neon.sqshl.i64(i64, i64)
+declare i64 @llvm.aarch64.neon.uqshl.i64(i64, i64)
+declare i64 @llvm.aarch64.neon.sqshl.i64(i64, i64)
diff --git a/test/CodeGen/ARM64/2014-04-29-EXT-undef-mask.ll b/test/CodeGen/AArch64/arm64-2014-04-29-EXT-undef-mask.ll
similarity index 94%
rename from test/CodeGen/ARM64/2014-04-29-EXT-undef-mask.ll
rename to test/CodeGen/AArch64/arm64-2014-04-29-EXT-undef-mask.ll
index b0ab9fda22d..1b2d54317c2 100644
--- a/test/CodeGen/ARM64/2014-04-29-EXT-undef-mask.ll
+++ b/test/CodeGen/AArch64/arm64-2014-04-29-EXT-undef-mask.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -O0 -march=arm64 -arm64-neon-syntax=apple | FileCheck %s
+; RUN: llc < %s -O0 -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
 
 ; The following 2 test cases test shufflevector with beginning UNDEF mask.
 define <8 x i16> @test_vext_undef_traverse(<8 x i16> %in) {
diff --git a/test/CodeGen/ARM64/AdvSIMD-Scalar.ll b/test/CodeGen/AArch64/arm64-AdvSIMD-Scalar.ll
similarity index 89%
rename from test/CodeGen/ARM64/AdvSIMD-Scalar.ll
rename to test/CodeGen/AArch64/arm64-AdvSIMD-Scalar.ll
index 3e75eed4cd5..c4597d5a481 100644
--- a/test/CodeGen/ARM64/AdvSIMD-Scalar.ll
+++ b/test/CodeGen/AArch64/arm64-AdvSIMD-Scalar.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple -arm64-simd-scalar=true -asm-verbose=false | FileCheck %s
-; RUN: llc < %s -march=arm64 -arm64-neon-syntax=generic -arm64-simd-scalar=true -asm-verbose=false | FileCheck %s -check-prefix=GENERIC
+; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple -aarch64-simd-scalar=true -asm-verbose=false | FileCheck %s
+; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=generic -aarch64-simd-scalar=true -asm-verbose=false | FileCheck %s -check-prefix=GENERIC
 
 define <2 x i64> @bar(<2 x i64> %a, <2 x i64> %b) nounwind readnone {
 ; CHECK-LABEL: bar:
diff --git a/test/CodeGen/ARM64/aapcs.ll b/test/CodeGen/AArch64/arm64-aapcs.ll
similarity index 100%
rename from test/CodeGen/ARM64/aapcs.ll
rename to test/CodeGen/AArch64/arm64-aapcs.ll
diff --git a/test/CodeGen/ARM64/abi-varargs.ll b/test/CodeGen/AArch64/arm64-abi-varargs.ll
similarity index 100%
rename from test/CodeGen/ARM64/abi-varargs.ll
rename to test/CodeGen/AArch64/arm64-abi-varargs.ll
diff --git a/test/CodeGen/ARM64/abi.ll b/test/CodeGen/AArch64/arm64-abi.ll
similarity index 100%
rename from test/CodeGen/ARM64/abi.ll
rename to test/CodeGen/AArch64/arm64-abi.ll
diff --git a/test/CodeGen/ARM64/abi_align.ll b/test/CodeGen/AArch64/arm64-abi_align.ll
similarity index 100%
rename from test/CodeGen/ARM64/abi_align.ll
rename to test/CodeGen/AArch64/arm64-abi_align.ll
diff --git a/test/CodeGen/ARM64/addp.ll b/test/CodeGen/AArch64/arm64-addp.ll
similarity index 90%
rename from test/CodeGen/ARM64/addp.ll
rename to test/CodeGen/AArch64/arm64-addp.ll
index 428f6d4f28a..3f1e5c5d44e 100644
--- a/test/CodeGen/ARM64/addp.ll
+++ b/test/CodeGen/AArch64/arm64-addp.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple -mcpu=cyclone | FileCheck %s
+; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple -mcpu=cyclone | FileCheck %s
 
 define double @foo(<2 x double> %a) nounwind {
 ; CHECK-LABEL: foo:
diff --git a/test/CodeGen/ARM64/addr-mode-folding.ll b/test/CodeGen/AArch64/arm64-addr-mode-folding.ll
similarity index 100%
rename from test/CodeGen/ARM64/addr-mode-folding.ll
rename to test/CodeGen/AArch64/arm64-addr-mode-folding.ll
diff --git a/test/CodeGen/ARM64/addr-type-promotion.ll b/test/CodeGen/AArch64/arm64-addr-type-promotion.ll
similarity index 100%
rename from test/CodeGen/ARM64/addr-type-promotion.ll
rename to test/CodeGen/AArch64/arm64-addr-type-promotion.ll
diff --git a/test/CodeGen/ARM64/addrmode.ll b/test/CodeGen/AArch64/arm64-addrmode.ll
similarity index 100%
rename from test/CodeGen/ARM64/addrmode.ll
rename to test/CodeGen/AArch64/arm64-addrmode.ll
diff --git a/test/CodeGen/ARM64/alloc-no-stack-realign.ll b/test/CodeGen/AArch64/arm64-alloc-no-stack-realign.ll
similarity index 100%
rename from test/CodeGen/ARM64/alloc-no-stack-realign.ll
rename to test/CodeGen/AArch64/arm64-alloc-no-stack-realign.ll
diff --git a/test/CodeGen/ARM64/alloca-frame-pointer-offset.ll b/test/CodeGen/AArch64/arm64-alloca-frame-pointer-offset.ll
similarity index 100%
rename from test/CodeGen/ARM64/alloca-frame-pointer-offset.ll
rename to test/CodeGen/AArch64/arm64-alloca-frame-pointer-offset.ll
diff --git a/test/CodeGen/ARM64/andCmpBrToTBZ.ll b/test/CodeGen/AArch64/arm64-andCmpBrToTBZ.ll
similarity index 100%
rename from test/CodeGen/ARM64/andCmpBrToTBZ.ll
rename to test/CodeGen/AArch64/arm64-andCmpBrToTBZ.ll
diff --git a/test/CodeGen/ARM64/ands-bad-peephole.ll b/test/CodeGen/AArch64/arm64-ands-bad-peephole.ll
similarity index 100%
rename from test/CodeGen/ARM64/ands-bad-peephole.ll
rename to test/CodeGen/AArch64/arm64-ands-bad-peephole.ll
diff --git a/test/CodeGen/ARM64/anyregcc-crash.ll b/test/CodeGen/AArch64/arm64-anyregcc-crash.ll
similarity index 100%
rename from test/CodeGen/ARM64/anyregcc-crash.ll
rename to test/CodeGen/AArch64/arm64-anyregcc-crash.ll
diff --git a/test/CodeGen/ARM64/anyregcc.ll b/test/CodeGen/AArch64/arm64-anyregcc.ll
similarity index 100%
rename from test/CodeGen/ARM64/anyregcc.ll
rename to test/CodeGen/AArch64/arm64-anyregcc.ll
diff --git a/test/CodeGen/ARM64/arith-saturating.ll b/test/CodeGen/AArch64/arm64-arith-saturating.ll
similarity index 58%
rename from test/CodeGen/ARM64/arith-saturating.ll
rename to test/CodeGen/AArch64/arm64-arith-saturating.ll
index 2d188ff9cc7..78cd1fcb1a2 100644
--- a/test/CodeGen/ARM64/arith-saturating.ll
+++ b/test/CodeGen/AArch64/arm64-arith-saturating.ll
@@ -5,7 +5,7 @@ define i32 @qadds(<4 x i32> %b, <4 x i32> %c) nounwind readnone optsize ssp {
 ; CHECK: sqadd s0, s0, s1
   %vecext = extractelement <4 x i32> %b, i32 0
   %vecext1 = extractelement <4 x i32> %c, i32 0
-  %vqadd.i = tail call i32 @llvm.arm64.neon.sqadd.i32(i32 %vecext, i32 %vecext1) nounwind
+  %vqadd.i = tail call i32 @llvm.aarch64.neon.sqadd.i32(i32 %vecext, i32 %vecext1) nounwind
   ret i32 %vqadd.i
 }
 
@@ -14,7 +14,7 @@ define i64 @qaddd(<2 x i64> %b, <2 x i64> %c) nounwind readnone optsize ssp {
 ; CHECK: sqadd d0, d0, d1
   %vecext = extractelement <2 x i64> %b, i32 0
   %vecext1 = extractelement <2 x i64> %c, i32 0
-  %vqadd.i = tail call i64 @llvm.arm64.neon.sqadd.i64(i64 %vecext, i64 %vecext1) nounwind
+  %vqadd.i = tail call i64 @llvm.aarch64.neon.sqadd.i64(i64 %vecext, i64 %vecext1) nounwind
   ret i64 %vqadd.i
 }
 
@@ -23,7 +23,7 @@ define i32 @uqadds(<4 x i32> %b, <4 x i32> %c) nounwind readnone optsize ssp {
 ; CHECK: uqadd s0, s0, s1
   %vecext = extractelement <4 x i32> %b, i32 0
   %vecext1 = extractelement <4 x i32> %c, i32 0
-  %vqadd.i = tail call i32 @llvm.arm64.neon.uqadd.i32(i32 %vecext, i32 %vecext1) nounwind
+  %vqadd.i = tail call i32 @llvm.aarch64.neon.uqadd.i32(i32 %vecext, i32 %vecext1) nounwind
   ret i32 %vqadd.i
 }
 
@@ -32,21 +32,21 @@ define i64 @uqaddd(<2 x i64> %b, <2 x i64> %c) nounwind readnone optsize ssp {
 ; CHECK: uqadd d0, d0, d1
   %vecext = extractelement <2 x i64> %b, i32 0
   %vecext1 = extractelement <2 x i64> %c, i32 0
-  %vqadd.i = tail call i64 @llvm.arm64.neon.uqadd.i64(i64 %vecext, i64 %vecext1) nounwind
+  %vqadd.i = tail call i64 @llvm.aarch64.neon.uqadd.i64(i64 %vecext, i64 %vecext1) nounwind
   ret i64 %vqadd.i
 }
 
-declare i64 @llvm.arm64.neon.uqadd.i64(i64, i64) nounwind readnone
-declare i32 @llvm.arm64.neon.uqadd.i32(i32, i32) nounwind readnone
-declare i64 @llvm.arm64.neon.sqadd.i64(i64, i64) nounwind readnone
-declare i32 @llvm.arm64.neon.sqadd.i32(i32, i32) nounwind readnone
+declare i64 @llvm.aarch64.neon.uqadd.i64(i64, i64) nounwind readnone
+declare i32 @llvm.aarch64.neon.uqadd.i32(i32, i32) nounwind readnone
+declare i64 @llvm.aarch64.neon.sqadd.i64(i64, i64) nounwind readnone
+declare i32 @llvm.aarch64.neon.sqadd.i32(i32, i32) nounwind readnone
 
 define i32 @qsubs(<4 x i32> %b, <4 x i32> %c) nounwind readnone optsize ssp {
 ; CHECK-LABEL: qsubs:
 ; CHECK: sqsub s0, s0, s1
   %vecext = extractelement <4 x i32> %b, i32 0
   %vecext1 = extractelement <4 x i32> %c, i32 0
-  %vqsub.i = tail call i32 @llvm.arm64.neon.sqsub.i32(i32 %vecext, i32 %vecext1) nounwind
+  %vqsub.i = tail call i32 @llvm.aarch64.neon.sqsub.i32(i32 %vecext, i32 %vecext1) nounwind
   ret i32 %vqsub.i
 }
 
@@ -55,7 +55,7 @@ define i64 @qsubd(<2 x i64> %b, <2 x i64> %c) nounwind readnone optsize ssp {
 ; CHECK: sqsub d0, d0, d1
   %vecext = extractelement <2 x i64> %b, i32 0
   %vecext1 = extractelement <2 x i64> %c, i32 0
-  %vqsub.i = tail call i64 @llvm.arm64.neon.sqsub.i64(i64 %vecext, i64 %vecext1) nounwind
+  %vqsub.i = tail call i64 @llvm.aarch64.neon.sqsub.i64(i64 %vecext, i64 %vecext1) nounwind
   ret i64 %vqsub.i
 }
 
@@ -64,7 +64,7 @@ define i32 @uqsubs(<4 x i32> %b, <4 x i32> %c) nounwind readnone optsize ssp {
 ; CHECK: uqsub s0, s0, s1
   %vecext = extractelement <4 x i32> %b, i32 0
   %vecext1 = extractelement <4 x i32> %c, i32 0
-  %vqsub.i = tail call i32 @llvm.arm64.neon.uqsub.i32(i32 %vecext, i32 %vecext1) nounwind
+  %vqsub.i = tail call i32 @llvm.aarch64.neon.uqsub.i32(i32 %vecext, i32 %vecext1) nounwind
   ret i32 %vqsub.i
 }
 
@@ -73,21 +73,21 @@ define i64 @uqsubd(<2 x i64> %b, <2 x i64> %c) nounwind readnone optsize ssp {
 ; CHECK: uqsub d0, d0, d1
   %vecext = extractelement <2 x i64> %b, i32 0
   %vecext1 = extractelement <2 x i64> %c, i32 0
-  %vqsub.i = tail call i64 @llvm.arm64.neon.uqsub.i64(i64 %vecext, i64 %vecext1) nounwind
+  %vqsub.i = tail call i64 @llvm.aarch64.neon.uqsub.i64(i64 %vecext, i64 %vecext1) nounwind
   ret i64 %vqsub.i
 }
 
-declare i64 @llvm.arm64.neon.uqsub.i64(i64, i64) nounwind readnone
-declare i32 @llvm.arm64.neon.uqsub.i32(i32, i32) nounwind readnone
-declare i64 @llvm.arm64.neon.sqsub.i64(i64, i64) nounwind readnone
-declare i32 @llvm.arm64.neon.sqsub.i32(i32, i32) nounwind readnone
+declare i64 @llvm.aarch64.neon.uqsub.i64(i64, i64) nounwind readnone
+declare i32 @llvm.aarch64.neon.uqsub.i32(i32, i32) nounwind readnone
+declare i64 @llvm.aarch64.neon.sqsub.i64(i64, i64) nounwind readnone
+declare i32 @llvm.aarch64.neon.sqsub.i32(i32, i32) nounwind readnone
 
 define i32 @qabss(<4 x i32> %b, <4 x i32> %c) nounwind readnone {
 ; CHECK-LABEL: qabss:
 ; CHECK: sqabs s0, s0
 ; CHECK: ret
   %vecext = extractelement <4 x i32> %b, i32 0
-  %vqabs.i = tail call i32 @llvm.arm64.neon.sqabs.i32(i32 %vecext) nounwind
+  %vqabs.i = tail call i32 @llvm.aarch64.neon.sqabs.i32(i32 %vecext) nounwind
   ret i32 %vqabs.i
 }
 
@@ -96,7 +96,7 @@ define i64 @qabsd(<2 x i64> %b, <2 x i64> %c) nounwind readnone {
 ; CHECK: sqabs d0, d0
 ; CHECK: ret
   %vecext = extractelement <2 x i64> %b, i32 0
-  %vqabs.i = tail call i64 @llvm.arm64.neon.sqabs.i64(i64 %vecext) nounwind
+  %vqabs.i = tail call i64 @llvm.aarch64.neon.sqabs.i64(i64 %vecext) nounwind
   ret i64 %vqabs.i
 }
 
@@ -105,7 +105,7 @@ define i32 @qnegs(<4 x i32> %b, <4 x i32> %c) nounwind readnone {
 ; CHECK: sqneg s0, s0
 ; CHECK: ret
   %vecext = extractelement <4 x i32> %b, i32 0
-  %vqneg.i = tail call i32 @llvm.arm64.neon.sqneg.i32(i32 %vecext) nounwind
+  %vqneg.i = tail call i32 @llvm.aarch64.neon.sqneg.i32(i32 %vecext) nounwind
   ret i32 %vqneg.i
 }
 
@@ -114,21 +114,21 @@ define i64 @qnegd(<2 x i64> %b, <2 x i64> %c) nounwind readnone {
 ; CHECK: sqneg d0, d0
 ; CHECK: ret
   %vecext = extractelement <2 x i64> %b, i32 0
-  %vqneg.i = tail call i64 @llvm.arm64.neon.sqneg.i64(i64 %vecext) nounwind
+  %vqneg.i = tail call i64 @llvm.aarch64.neon.sqneg.i64(i64 %vecext) nounwind
   ret i64 %vqneg.i
 }
 
-declare i64 @llvm.arm64.neon.sqneg.i64(i64) nounwind readnone
-declare i32 @llvm.arm64.neon.sqneg.i32(i32) nounwind readnone
-declare i64 @llvm.arm64.neon.sqabs.i64(i64) nounwind readnone
-declare i32 @llvm.arm64.neon.sqabs.i32(i32) nounwind readnone
+declare i64 @llvm.aarch64.neon.sqneg.i64(i64) nounwind readnone
+declare i32 @llvm.aarch64.neon.sqneg.i32(i32) nounwind readnone
+declare i64 @llvm.aarch64.neon.sqabs.i64(i64) nounwind readnone
+declare i32 @llvm.aarch64.neon.sqabs.i32(i32) nounwind readnone
 
 
 define i32 @vqmovund(<2 x i64> %b) nounwind readnone {
 ; CHECK-LABEL: vqmovund:
 ; CHECK: sqxtun s0, d0
   %vecext = extractelement <2 x i64> %b, i32 0
-  %vqmovun.i = tail call i32 @llvm.arm64.neon.scalar.sqxtun.i32.i64(i64 %vecext) nounwind
+  %vqmovun.i = tail call i32 @llvm.aarch64.neon.scalar.sqxtun.i32.i64(i64 %vecext) nounwind
   ret i32 %vqmovun.i
 }
 
@@ -136,7 +136,7 @@ define i32 @vqmovnd_s(<2 x i64> %b) nounwind readnone {
 ; CHECK-LABEL: vqmovnd_s:
 ; CHECK: sqxtn s0, d0
   %vecext = extractelement <2 x i64> %b, i32 0
-  %vqmovn.i = tail call i32 @llvm.arm64.neon.scalar.sqxtn.i32.i64(i64 %vecext) nounwind
+  %vqmovn.i = tail call i32 @llvm.aarch64.neon.scalar.sqxtn.i32.i64(i64 %vecext) nounwind
   ret i32 %vqmovn.i
 }
 
@@ -144,10 +144,10 @@ define i32 @vqmovnd_u(<2 x i64> %b) nounwind readnone {
 ; CHECK-LABEL: vqmovnd_u:
 ; CHECK: uqxtn s0, d0
   %vecext = extractelement <2 x i64> %b, i32 0
-  %vqmovn.i = tail call i32 @llvm.arm64.neon.scalar.uqxtn.i32.i64(i64 %vecext) nounwind
+  %vqmovn.i = tail call i32 @llvm.aarch64.neon.scalar.uqxtn.i32.i64(i64 %vecext) nounwind
   ret i32 %vqmovn.i
 }
 
-declare i32 @llvm.arm64.neon.scalar.uqxtn.i32.i64(i64) nounwind readnone
-declare i32 @llvm.arm64.neon.scalar.sqxtn.i32.i64(i64) nounwind readnone
-declare i32 @llvm.arm64.neon.scalar.sqxtun.i32.i64(i64) nounwind readnone
+declare i32 @llvm.aarch64.neon.scalar.uqxtn.i32.i64(i64) nounwind readnone
+declare i32 @llvm.aarch64.neon.scalar.sqxtn.i32.i64(i64) nounwind readnone
+declare i32 @llvm.aarch64.neon.scalar.sqxtun.i32.i64(i64) nounwind readnone
diff --git a/test/CodeGen/ARM64/arith.ll b/test/CodeGen/AArch64/arm64-arith.ll
similarity index 90%
rename from test/CodeGen/ARM64/arith.ll
rename to test/CodeGen/AArch64/arm64-arith.ll
index e4be8e98a78..ed9b569e218 100644
--- a/test/CodeGen/ARM64/arith.ll
+++ b/test/CodeGen/AArch64/arm64-arith.ll
@@ -168,7 +168,7 @@ entry:
 ; CHECK-LABEL: t18:
 ; CHECK: sdiv w0, w0, w1
 ; CHECK: ret
-  %sdiv = call i32 @llvm.arm64.sdiv.i32(i32 %a, i32 %b)
+  %sdiv = call i32 @llvm.aarch64.sdiv.i32(i32 %a, i32 %b)
   ret i32 %sdiv
 }
 
@@ -177,7 +177,7 @@ entry:
 ; CHECK-LABEL: t19:
 ; CHECK: sdiv x0, x0, x1
 ; CHECK: ret
-  %sdiv = call i64 @llvm.arm64.sdiv.i64(i64 %a, i64 %b)
+  %sdiv = call i64 @llvm.aarch64.sdiv.i64(i64 %a, i64 %b)
   ret i64 %sdiv
 }
 
@@ -186,7 +186,7 @@ entry:
 ; CHECK-LABEL: t20:
 ; CHECK: udiv w0, w0, w1
 ; CHECK: ret
-  %udiv = call i32 @llvm.arm64.udiv.i32(i32 %a, i32 %b)
+  %udiv = call i32 @llvm.aarch64.udiv.i32(i32 %a, i32 %b)
   ret i32 %udiv
 }
 
@@ -195,14 +195,14 @@ entry:
 ; CHECK-LABEL: t21:
 ; CHECK: udiv x0, x0, x1
 ; CHECK: ret
-  %udiv = call i64 @llvm.arm64.udiv.i64(i64 %a, i64 %b)
+  %udiv = call i64 @llvm.aarch64.udiv.i64(i64 %a, i64 %b)
   ret i64 %udiv
 }
 
-declare i32 @llvm.arm64.sdiv.i32(i32, i32) nounwind readnone
-declare i64 @llvm.arm64.sdiv.i64(i64, i64) nounwind readnone
-declare i32 @llvm.arm64.udiv.i32(i32, i32) nounwind readnone
-declare i64 @llvm.arm64.udiv.i64(i64, i64) nounwind readnone
+declare i32 @llvm.aarch64.sdiv.i32(i32, i32) nounwind readnone
+declare i64 @llvm.aarch64.sdiv.i64(i64, i64) nounwind readnone
+declare i32 @llvm.aarch64.udiv.i32(i32, i32) nounwind readnone
+declare i64 @llvm.aarch64.udiv.i64(i64, i64) nounwind readnone
 
 ; 32-bit not.
 define i32 @inv_32(i32 %x) nounwind ssp {
diff --git a/test/CodeGen/ARM64/arm64-dead-def-elimination-flag.ll b/test/CodeGen/AArch64/arm64-arm64-dead-def-elimination-flag.ll
similarity index 80%
rename from test/CodeGen/ARM64/arm64-dead-def-elimination-flag.ll
rename to test/CodeGen/AArch64/arm64-arm64-dead-def-elimination-flag.ll
index babf4827693..0904b62c403 100644
--- a/test/CodeGen/ARM64/arm64-dead-def-elimination-flag.ll
+++ b/test/CodeGen/AArch64/arm64-arm64-dead-def-elimination-flag.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=arm64 -arm64-dead-def-elimination=false < %s | FileCheck %s
+; RUN: llc -march=arm64 -aarch64-dead-def-elimination=false < %s | FileCheck %s
 
 target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
 target triple = "arm64-apple-ios7.0.0"
diff --git a/test/CodeGen/ARM64/atomic-128.ll b/test/CodeGen/AArch64/arm64-atomic-128.ll
similarity index 100%
rename from test/CodeGen/ARM64/atomic-128.ll
rename to test/CodeGen/AArch64/arm64-atomic-128.ll
diff --git a/test/CodeGen/ARM64/atomic.ll b/test/CodeGen/AArch64/arm64-atomic.ll
similarity index 100%
rename from test/CodeGen/ARM64/atomic.ll
rename to test/CodeGen/AArch64/arm64-atomic.ll
diff --git a/test/CodeGen/ARM64/basic-pic.ll b/test/CodeGen/AArch64/arm64-basic-pic.ll
similarity index 100%
rename from test/CodeGen/ARM64/basic-pic.ll
rename to test/CodeGen/AArch64/arm64-basic-pic.ll
diff --git a/test/CodeGen/ARM64/big-endian-bitconverts.ll b/test/CodeGen/AArch64/arm64-big-endian-bitconverts.ll
similarity index 99%
rename from test/CodeGen/ARM64/big-endian-bitconverts.ll
rename to test/CodeGen/AArch64/arm64-big-endian-bitconverts.ll
index cb8708b9267..f0e968b2c17 100644
--- a/test/CodeGen/ARM64/big-endian-bitconverts.ll
+++ b/test/CodeGen/AArch64/arm64-big-endian-bitconverts.ll
@@ -1,5 +1,5 @@
-; RUN: llc -mtriple arm64_be < %s -arm64-load-store-opt=false -O1 -o - | FileCheck %s
-; RUN: llc -mtriple arm64_be < %s -arm64-load-store-opt=false -O0 -fast-isel=true -o - | FileCheck %s
+; RUN: llc -mtriple arm64_be < %s -aarch64-load-store-opt=false -O1 -o - | FileCheck %s
+; RUN: llc -mtriple arm64_be < %s -aarch64-load-store-opt=false -O0 -fast-isel=true -o - | FileCheck %s
 
 ; CHECK-LABEL: test_i64_f64:
 define void @test_i64_f64(double* %p, i64* %q) {
diff --git a/test/CodeGen/ARM64/big-endian-eh.ll b/test/CodeGen/AArch64/arm64-big-endian-eh.ll
similarity index 100%
rename from test/CodeGen/ARM64/big-endian-eh.ll
rename to test/CodeGen/AArch64/arm64-big-endian-eh.ll
diff --git a/test/CodeGen/ARM64/big-endian-varargs.ll b/test/CodeGen/AArch64/arm64-big-endian-varargs.ll
similarity index 100%
rename from test/CodeGen/ARM64/big-endian-varargs.ll
rename to test/CodeGen/AArch64/arm64-big-endian-varargs.ll
diff --git a/test/CodeGen/ARM64/big-endian-vector-callee.ll b/test/CodeGen/AArch64/arm64-big-endian-vector-callee.ll
similarity index 99%
rename from test/CodeGen/ARM64/big-endian-vector-callee.ll
rename to test/CodeGen/AArch64/arm64-big-endian-vector-callee.ll
index 5b9ccace882..1dcccf106a2 100644
--- a/test/CodeGen/ARM64/big-endian-vector-callee.ll
+++ b/test/CodeGen/AArch64/arm64-big-endian-vector-callee.ll
@@ -1,5 +1,5 @@
-; RUN: llc -mtriple arm64_be < %s -arm64-load-store-opt=false -o - | FileCheck %s
-; RUN: llc -mtriple arm64_be < %s -fast-isel=true -arm64-load-store-opt=false -o - | FileCheck %s
+; RUN: llc -mtriple arm64_be < %s -aarch64-load-store-opt=false -o - | FileCheck %s
+; RUN: llc -mtriple arm64_be < %s -fast-isel=true -aarch64-load-store-opt=false -o - | FileCheck %s
 
 ; CHECK-LABEL: test_i64_f64:
 define i64 @test_i64_f64(double %p) {
diff --git a/test/CodeGen/ARM64/big-endian-vector-caller.ll b/test/CodeGen/AArch64/arm64-big-endian-vector-caller.ll
similarity index 99%
rename from test/CodeGen/ARM64/big-endian-vector-caller.ll
rename to test/CodeGen/AArch64/arm64-big-endian-vector-caller.ll
index 194a3213925..9a12b7a0115 100644
--- a/test/CodeGen/ARM64/big-endian-vector-caller.ll
+++ b/test/CodeGen/AArch64/arm64-big-endian-vector-caller.ll
@@ -1,5 +1,5 @@
-; RUN: llc -mtriple arm64_be < %s -arm64-load-store-opt=false -o - | FileCheck %s
-; RUN: llc -mtriple arm64_be < %s -arm64-load-store-opt=false -fast-isel=true -O0 -o - | FileCheck %s
+; RUN: llc -mtriple arm64_be < %s -aarch64-load-store-opt=false -o - | FileCheck %s
+; RUN: llc -mtriple arm64_be < %s -aarch64-load-store-opt=false -fast-isel=true -O0 -o - | FileCheck %s
 
 ; CHECK-LABEL: test_i64_f64:
 declare i64 @test_i64_f64_helper(double %p)
diff --git a/test/CodeGen/ARM64/big-imm-offsets.ll b/test/CodeGen/AArch64/arm64-big-imm-offsets.ll
similarity index 100%
rename from test/CodeGen/ARM64/big-imm-offsets.ll
rename to test/CodeGen/AArch64/arm64-big-imm-offsets.ll
diff --git a/test/CodeGen/ARM64/big-stack.ll b/test/CodeGen/AArch64/arm64-big-stack.ll
similarity index 100%
rename from test/CodeGen/ARM64/big-stack.ll
rename to test/CodeGen/AArch64/arm64-big-stack.ll
diff --git a/test/CodeGen/ARM64/bitfield-extract.ll b/test/CodeGen/AArch64/arm64-bitfield-extract.ll
similarity index 100%
rename from test/CodeGen/ARM64/bitfield-extract.ll
rename to test/CodeGen/AArch64/arm64-bitfield-extract.ll
diff --git a/test/CodeGen/ARM64/blockaddress.ll b/test/CodeGen/AArch64/arm64-blockaddress.ll
similarity index 100%
rename from test/CodeGen/ARM64/blockaddress.ll
rename to test/CodeGen/AArch64/arm64-blockaddress.ll
diff --git a/test/CodeGen/ARM64/build-vector.ll b/test/CodeGen/AArch64/arm64-build-vector.ll
similarity index 94%
rename from test/CodeGen/ARM64/build-vector.ll
rename to test/CodeGen/AArch64/arm64-build-vector.ll
index 143d6894383..c109263cedb 100644
--- a/test/CodeGen/ARM64/build-vector.ll
+++ b/test/CodeGen/AArch64/arm64-build-vector.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s
+; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
 
 ; Check that building up a vector w/ only one non-zero lane initializes
 ; intelligently.
diff --git a/test/CodeGen/ARM64/call-tailcalls.ll b/test/CodeGen/AArch64/arm64-call-tailcalls.ll
similarity index 100%
rename from test/CodeGen/ARM64/call-tailcalls.ll
rename to test/CodeGen/AArch64/arm64-call-tailcalls.ll
diff --git a/test/CodeGen/ARM64/cast-opt.ll b/test/CodeGen/AArch64/arm64-cast-opt.ll
similarity index 100%
rename from test/CodeGen/ARM64/cast-opt.ll
rename to test/CodeGen/AArch64/arm64-cast-opt.ll
diff --git a/test/CodeGen/ARM64/ccmp-heuristics.ll b/test/CodeGen/AArch64/arm64-ccmp-heuristics.ll
similarity index 99%
rename from test/CodeGen/ARM64/ccmp-heuristics.ll
rename to test/CodeGen/AArch64/arm64-ccmp-heuristics.ll
index 5575997e53e..664a26cafe4 100644
--- a/test/CodeGen/ARM64/ccmp-heuristics.ll
+++ b/test/CodeGen/AArch64/arm64-ccmp-heuristics.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mcpu=cyclone -verify-machineinstrs -arm64-ccmp | FileCheck %s
+; RUN: llc < %s -mcpu=cyclone -verify-machineinstrs -aarch64-ccmp | FileCheck %s
 target triple = "arm64-apple-ios7.0.0"
 
 @channelColumns = external global i64
diff --git a/test/CodeGen/ARM64/ccmp.ll b/test/CodeGen/AArch64/arm64-ccmp.ll
similarity index 98%
rename from test/CodeGen/ARM64/ccmp.ll
rename to test/CodeGen/AArch64/arm64-ccmp.ll
index 79e6f94e3f6..63965f9538b 100644
--- a/test/CodeGen/ARM64/ccmp.ll
+++ b/test/CodeGen/AArch64/arm64-ccmp.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mcpu=cyclone -verify-machineinstrs -arm64-ccmp -arm64-stress-ccmp | FileCheck %s
+; RUN: llc < %s -mcpu=cyclone -verify-machineinstrs -aarch64-ccmp -aarch64-stress-ccmp | FileCheck %s
 target triple = "arm64-apple-ios"
 
 ; CHECK: single_same
diff --git a/test/CodeGen/ARM64/clrsb.ll b/test/CodeGen/AArch64/arm64-clrsb.ll
similarity index 100%
rename from test/CodeGen/ARM64/clrsb.ll
rename to test/CodeGen/AArch64/arm64-clrsb.ll
diff --git a/test/CodeGen/ARM64/coalesce-ext.ll b/test/CodeGen/AArch64/arm64-coalesce-ext.ll
similarity index 100%
rename from test/CodeGen/ARM64/coalesce-ext.ll
rename to test/CodeGen/AArch64/arm64-coalesce-ext.ll
diff --git a/test/CodeGen/ARM64/code-model-large-abs.ll b/test/CodeGen/AArch64/arm64-code-model-large-abs.ll
similarity index 100%
rename from test/CodeGen/ARM64/code-model-large-abs.ll
rename to test/CodeGen/AArch64/arm64-code-model-large-abs.ll
diff --git a/test/CodeGen/ARM64/collect-loh-garbage-crash.ll b/test/CodeGen/AArch64/arm64-collect-loh-garbage-crash.ll
similarity index 91%
rename from test/CodeGen/ARM64/collect-loh-garbage-crash.ll
rename to test/CodeGen/AArch64/arm64-collect-loh-garbage-crash.ll
index 98cb625d2d5..81cee38420a 100644
--- a/test/CodeGen/ARM64/collect-loh-garbage-crash.ll
+++ b/test/CodeGen/AArch64/arm64-collect-loh-garbage-crash.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=arm64-apple-ios -O3 -arm64-collect-loh -arm64-collect-loh-bb-only=true -arm64-collect-loh-pre-collect-register=false < %s -o - | FileCheck %s
+; RUN: llc -mtriple=arm64-apple-ios -O3 -aarch64-collect-loh -aarch64-collect-loh-bb-only=true -aarch64-collect-loh-pre-collect-register=false < %s -o - | FileCheck %s
 ; Check that the LOH analysis does not crash when the analysed chained
 ; contains instructions that are filtered out.
 ;
diff --git a/test/CodeGen/ARM64/collect-loh-str.ll b/test/CodeGen/AArch64/arm64-collect-loh-str.ll
similarity index 86%
rename from test/CodeGen/ARM64/collect-loh-str.ll
rename to test/CodeGen/AArch64/arm64-collect-loh-str.ll
index fc63f8bcc2e..d7bc00e318f 100644
--- a/test/CodeGen/ARM64/collect-loh-str.ll
+++ b/test/CodeGen/AArch64/arm64-collect-loh-str.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=arm64-apple-ios -O2 -arm64-collect-loh -arm64-collect-loh-bb-only=false < %s -o - | FileCheck %s
+; RUN: llc -mtriple=arm64-apple-ios -O2 -aarch64-collect-loh -aarch64-collect-loh-bb-only=false < %s -o - | FileCheck %s
 ; Test case for <rdar://problem/15942912>.
 ; AdrpAddStr cannot be used when the store uses same
 ; register as address and value. Indeed, the related
diff --git a/test/CodeGen/ARM64/collect-loh.ll b/test/CodeGen/AArch64/arm64-collect-loh.ll
similarity index 85%
rename from test/CodeGen/ARM64/collect-loh.ll
rename to test/CodeGen/AArch64/arm64-collect-loh.ll
index e92690b04f8..6d73daac620 100644
--- a/test/CodeGen/ARM64/collect-loh.ll
+++ b/test/CodeGen/AArch64/arm64-collect-loh.ll
@@ -1,5 +1,5 @@
-; RUN: llc -mtriple=arm64-apple-ios -O2 -arm64-collect-loh -arm64-collect-loh-bb-only=false < %s -o - | FileCheck %s
-; RUN: llc -mtriple=arm64-linux-gnu -O2 -arm64-collect-loh -arm64-collect-loh-bb-only=false < %s -o - | FileCheck %s --check-prefix=CHECK-ELF
+; RUN: llc -mtriple=arm64-apple-ios -O2 -aarch64-collect-loh -aarch64-collect-loh-bb-only=false < %s -o - | FileCheck %s
+; RUN: llc -mtriple=arm64-linux-gnu -O2 -aarch64-collect-loh -aarch64-collect-loh-bb-only=false < %s -o - | FileCheck %s --check-prefix=CHECK-ELF
 
 ; CHECK-ELF-NOT: .loh
 ; CHECK-ELF-NOT: AdrpAdrp
diff --git a/test/CodeGen/ARM64/complex-copy-noneon.ll b/test/CodeGen/AArch64/arm64-complex-copy-noneon.ll
similarity index 100%
rename from test/CodeGen/ARM64/complex-copy-noneon.ll
rename to test/CodeGen/AArch64/arm64-complex-copy-noneon.ll
diff --git a/test/CodeGen/ARM64/complex-ret.ll b/test/CodeGen/AArch64/arm64-complex-ret.ll
similarity index 100%
rename from test/CodeGen/ARM64/complex-ret.ll
rename to test/CodeGen/AArch64/arm64-complex-ret.ll
diff --git a/test/CodeGen/ARM64/const-addr.ll b/test/CodeGen/AArch64/arm64-const-addr.ll
similarity index 100%
rename from test/CodeGen/ARM64/const-addr.ll
rename to test/CodeGen/AArch64/arm64-const-addr.ll
diff --git a/test/CodeGen/ARM64/convert-v2f64-v2i32.ll b/test/CodeGen/AArch64/arm64-convert-v2f64-v2i32.ll
similarity index 86%
rename from test/CodeGen/ARM64/convert-v2f64-v2i32.ll
rename to test/CodeGen/AArch64/arm64-convert-v2f64-v2i32.ll
index 1a07c986557..d862b1e1943 100644
--- a/test/CodeGen/ARM64/convert-v2f64-v2i32.ll
+++ b/test/CodeGen/AArch64/arm64-convert-v2f64-v2i32.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s
+; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
 
 ; CHECK: fptosi_1
 ; CHECK: fcvtzs.2d
diff --git a/test/CodeGen/ARM64/convert-v2i32-v2f64.ll b/test/CodeGen/AArch64/arm64-convert-v2i32-v2f64.ll
similarity index 90%
rename from test/CodeGen/ARM64/convert-v2i32-v2f64.ll
rename to test/CodeGen/AArch64/arm64-convert-v2i32-v2f64.ll
index 63129a4b830..daaf1e0f87d 100644
--- a/test/CodeGen/ARM64/convert-v2i32-v2f64.ll
+++ b/test/CodeGen/AArch64/arm64-convert-v2i32-v2f64.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s
+; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
 
 define <2 x double> @f1(<2 x i32> %v) nounwind readnone {
 ; CHECK-LABEL: f1:
diff --git a/test/CodeGen/ARM64/copy-tuple.ll b/test/CodeGen/AArch64/arm64-copy-tuple.ll
similarity index 69%
rename from test/CodeGen/ARM64/copy-tuple.ll
rename to test/CodeGen/AArch64/arm64-copy-tuple.ll
index f9981931246..1803787d729 100644
--- a/test/CodeGen/ARM64/copy-tuple.ll
+++ b/test/CodeGen/AArch64/arm64-copy-tuple.ll
@@ -13,14 +13,14 @@ define void @test_D1D2_from_D0D1(i8* %addr) #0 {
 ; CHECK: mov.8b v1, v0
 entry:
   %addr_v8i8 = bitcast i8* %addr to <8 x i8>*
-  %vec = tail call { <8 x i8>, <8 x i8> } @llvm.arm64.neon.ld2.v8i8.p0v8i8(<8 x i8>* %addr_v8i8)
+  %vec = tail call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2.v8i8.p0v8i8(<8 x i8>* %addr_v8i8)
   %vec0 = extractvalue { <8 x i8>, <8 x i8> } %vec, 0
   %vec1 = extractvalue { <8 x i8>, <8 x i8> } %vec, 1
   tail call void asm sideeffect "", "~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"()
-  tail call void @llvm.arm64.neon.st2.v8i8.p0i8(<8 x i8> %vec0, <8 x i8> %vec1, i8* %addr)
+  tail call void @llvm.aarch64.neon.st2.v8i8.p0i8(<8 x i8> %vec0, <8 x i8> %vec1, i8* %addr)
 
   tail call void asm sideeffect "", "~{v0},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"()
-  tail call void @llvm.arm64.neon.st2.v8i8.p0i8(<8 x i8> %vec0, <8 x i8> %vec1, i8* %addr)
+  tail call void @llvm.aarch64.neon.st2.v8i8.p0i8(<8 x i8> %vec0, <8 x i8> %vec1, i8* %addr)
   ret void
 }
 
@@ -30,14 +30,14 @@ define void @test_D0D1_from_D1D2(i8* %addr) #0 {
 ; CHECK: mov.8b v1, v2
 entry:
   %addr_v8i8 = bitcast i8* %addr to <8 x i8>*
-  %vec = tail call { <8 x i8>, <8 x i8> } @llvm.arm64.neon.ld2.v8i8.p0v8i8(<8 x i8>* %addr_v8i8)
+  %vec = tail call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2.v8i8.p0v8i8(<8 x i8>* %addr_v8i8)
   %vec0 = extractvalue { <8 x i8>, <8 x i8> } %vec, 0
   %vec1 = extractvalue { <8 x i8>, <8 x i8> } %vec, 1
   tail call void asm sideeffect "", "~{v0},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"()
-  tail call void @llvm.arm64.neon.st2.v8i8.p0i8(<8 x i8> %vec0, <8 x i8> %vec1, i8* %addr)
+  tail call void @llvm.aarch64.neon.st2.v8i8.p0i8(<8 x i8> %vec0, <8 x i8> %vec1, i8* %addr)
 
   tail call void asm sideeffect "", "~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"()
-  tail call void @llvm.arm64.neon.st2.v8i8.p0i8(<8 x i8> %vec0, <8 x i8> %vec1, i8* %addr)
+  tail call void @llvm.aarch64.neon.st2.v8i8.p0i8(<8 x i8> %vec0, <8 x i8> %vec1, i8* %addr)
   ret void
 }
 
@@ -47,14 +47,14 @@ define void @test_D0D1_from_D31D0(i8* %addr) #0 {
 ; CHECK: mov.8b v0, v31
 entry:
   %addr_v8i8 = bitcast i8* %addr to <8 x i8>*
-  %vec = tail call { <8 x i8>, <8 x i8> } @llvm.arm64.neon.ld2.v8i8.p0v8i8(<8 x i8>* %addr_v8i8)
+  %vec = tail call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2.v8i8.p0v8i8(<8 x i8>* %addr_v8i8)
   %vec0 = extractvalue { <8 x i8>, <8 x i8> } %vec, 0
   %vec1 = extractvalue { <8 x i8>, <8 x i8> } %vec, 1
   tail call void asm sideeffect "", "~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30}"()
-  tail call void @llvm.arm64.neon.st2.v8i8.p0i8(<8 x i8> %vec0, <8 x i8> %vec1, i8* %addr)
+  tail call void @llvm.aarch64.neon.st2.v8i8.p0i8(<8 x i8> %vec0, <8 x i8> %vec1, i8* %addr)
 
   tail call void asm sideeffect "", "~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"()
-  tail call void @llvm.arm64.neon.st2.v8i8.p0i8(<8 x i8> %vec0, <8 x i8> %vec1, i8* %addr)
+  tail call void @llvm.aarch64.neon.st2.v8i8.p0i8(<8 x i8> %vec0, <8 x i8> %vec1, i8* %addr)
   ret void
 }
 
@@ -64,14 +64,14 @@ define void @test_D31D0_from_D0D1(i8* %addr) #0 {
 ; CHECK: mov.8b v0, v1
 entry:
   %addr_v8i8 = bitcast i8* %addr to <8 x i8>*
-  %vec = tail call { <8 x i8>, <8 x i8> } @llvm.arm64.neon.ld2.v8i8.p0v8i8(<8 x i8>* %addr_v8i8)
+  %vec = tail call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2.v8i8.p0v8i8(<8 x i8>* %addr_v8i8)
   %vec0 = extractvalue { <8 x i8>, <8 x i8> } %vec, 0
   %vec1 = extractvalue { <8 x i8>, <8 x i8> } %vec, 1
   tail call void asm sideeffect "", "~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"()
-  tail call void @llvm.arm64.neon.st2.v8i8.p0i8(<8 x i8> %vec0, <8 x i8> %vec1, i8* %addr)
+  tail call void @llvm.aarch64.neon.st2.v8i8.p0i8(<8 x i8> %vec0, <8 x i8> %vec1, i8* %addr)
 
   tail call void asm sideeffect "", "~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30}"()
-  tail call void @llvm.arm64.neon.st2.v8i8.p0i8(<8 x i8> %vec0, <8 x i8> %vec1, i8* %addr)
+  tail call void @llvm.aarch64.neon.st2.v8i8.p0i8(<8 x i8> %vec0, <8 x i8> %vec1, i8* %addr)
   ret void
 }
 
@@ -82,16 +82,16 @@ define void @test_D2D3D4_from_D0D1D2(i8* %addr) #0 {
 ; CHECK: mov.8b v2, v0
 entry:
   %addr_v8i8 = bitcast i8* %addr to <8 x i8>*
-  %vec = tail call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm64.neon.ld3.v8i8.p0v8i8(<8 x i8>* %addr_v8i8)
+  %vec = tail call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3.v8i8.p0v8i8(<8 x i8>* %addr_v8i8)
   %vec0 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %vec, 0
   %vec1 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %vec, 1
   %vec2 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %vec, 2
 
   tail call void asm sideeffect "", "~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"()
-  tail call void @llvm.arm64.neon.st3.v8i8.p0i8(<8 x i8> %vec0, <8 x i8> %vec1, <8 x i8> %vec2, i8* %addr)
+  tail call void @llvm.aarch64.neon.st3.v8i8.p0i8(<8 x i8> %vec0, <8 x i8> %vec1, <8 x i8> %vec2, i8* %addr)
 
   tail call void asm sideeffect "", "~{v0},~{v1},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"()
-  tail call void @llvm.arm64.neon.st3.v8i8.p0i8(<8 x i8> %vec0, <8 x i8> %vec1, <8 x i8> %vec2, i8* %addr)
+  tail call void @llvm.aarch64.neon.st3.v8i8.p0i8(<8 x i8> %vec0, <8 x i8> %vec1, <8 x i8> %vec2, i8* %addr)
   ret void
 }
 
@@ -102,15 +102,15 @@ define void @test_Q0Q1Q2_from_Q1Q2Q3(i8* %addr) #0 {
 ; CHECK: mov.16b v2, v3
 entry:
   %addr_v16i8 = bitcast i8* %addr to <16 x i8>*
-  %vec = tail call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm64.neon.ld3.v16i8.p0v16i8(<16 x i8>* %addr_v16i8)
+  %vec = tail call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3.v16i8.p0v16i8(<16 x i8>* %addr_v16i8)
   %vec0 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %vec, 0
   %vec1 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %vec, 1
   %vec2 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %vec, 2
   tail call void asm sideeffect "", "~{v0},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"()
-  tail call void @llvm.arm64.neon.st3.v16i8.p0i8(<16 x i8> %vec0, <16 x i8> %vec1, <16 x i8> %vec2, i8* %addr)
+  tail call void @llvm.aarch64.neon.st3.v16i8.p0i8(<16 x i8> %vec0, <16 x i8> %vec1, <16 x i8> %vec2, i8* %addr)
 
   tail call void asm sideeffect "", "~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"()
-  tail call void @llvm.arm64.neon.st3.v16i8.p0i8(<16 x i8> %vec0, <16 x i8> %vec1, <16 x i8> %vec2, i8* %addr)
+  tail call void @llvm.aarch64.neon.st3.v16i8.p0i8(<16 x i8> %vec0, <16 x i8> %vec1, <16 x i8> %vec2, i8* %addr)
   ret void
 }
 
@@ -121,26 +121,26 @@ define void @test_Q1Q2Q3Q4_from_Q30Q31Q0Q1(i8* %addr) #0 {
 ; CHECK: mov.16b v2, v31
 ; CHECK: mov.16b v1, v30
   %addr_v16i8 = bitcast i8* %addr to <16 x i8>*
-  %vec = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm64.neon.ld4.v16i8.p0v16i8(<16 x i8>* %addr_v16i8)
+  %vec = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4.v16i8.p0v16i8(<16 x i8>* %addr_v16i8)
   %vec0 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vec, 0
   %vec1 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vec, 1
   %vec2 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vec, 2
   %vec3 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vec, 3
 
   tail call void asm sideeffect "", "~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29}"()
-  tail call void @llvm.arm64.neon.st4.v16i8.p0i8(<16 x i8> %vec0, <16 x i8> %vec1, <16 x i8> %vec2, <16 x i8> %vec3, i8* %addr)
+  tail call void @llvm.aarch64.neon.st4.v16i8.p0i8(<16 x i8> %vec0, <16 x i8> %vec1, <16 x i8> %vec2, <16 x i8> %vec3, i8* %addr)
 
   tail call void asm sideeffect "", "~{v0},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"()
-  tail call void @llvm.arm64.neon.st4.v16i8.p0i8(<16 x i8> %vec0, <16 x i8> %vec1, <16 x i8> %vec2, <16 x i8> %vec3, i8* %addr)
+  tail call void @llvm.aarch64.neon.st4.v16i8.p0i8(<16 x i8> %vec0, <16 x i8> %vec1, <16 x i8> %vec2, <16 x i8> %vec3, i8* %addr)
   ret void
 }
 
-declare { <8 x i8>, <8 x i8> } @llvm.arm64.neon.ld2.v8i8.p0v8i8(<8 x i8>*)
-declare { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm64.neon.ld3.v8i8.p0v8i8(<8 x i8>*)
-declare { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm64.neon.ld3.v16i8.p0v16i8(<16 x i8>*)
-declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm64.neon.ld4.v16i8.p0v16i8(<16 x i8>*)
+declare { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2.v8i8.p0v8i8(<8 x i8>*)
+declare { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3.v8i8.p0v8i8(<8 x i8>*)
+declare { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3.v16i8.p0v16i8(<16 x i8>*)
+declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4.v16i8.p0v16i8(<16 x i8>*)
 
-declare void @llvm.arm64.neon.st2.v8i8.p0i8(<8 x i8>, <8 x i8>, i8*)
-declare void @llvm.arm64.neon.st3.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, i8*)
-declare void @llvm.arm64.neon.st3.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, i8*)
-declare void @llvm.arm64.neon.st4.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i8*)
+declare void @llvm.aarch64.neon.st2.v8i8.p0i8(<8 x i8>, <8 x i8>, i8*)
+declare void @llvm.aarch64.neon.st3.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, i8*)
+declare void @llvm.aarch64.neon.st3.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, i8*)
+declare void @llvm.aarch64.neon.st4.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i8*)
diff --git a/test/CodeGen/ARM64/crc32.ll b/test/CodeGen/AArch64/arm64-crc32.ll
similarity index 58%
rename from test/CodeGen/ARM64/crc32.ll
rename to test/CodeGen/AArch64/arm64-crc32.ll
index 5d759dcce71..d3099e6bb13 100644
--- a/test/CodeGen/ARM64/crc32.ll
+++ b/test/CodeGen/AArch64/arm64-crc32.ll
@@ -4,7 +4,7 @@ define i32 @test_crc32b(i32 %cur, i8 %next) {
 ; CHECK-LABEL: test_crc32b:
 ; CHECK: crc32b w0, w0, w1
   %bits = zext i8 %next to i32
-  %val = call i32 @llvm.arm64.crc32b(i32 %cur, i32 %bits)
+  %val = call i32 @llvm.aarch64.crc32b(i32 %cur, i32 %bits)
   ret i32 %val
 }
 
@@ -12,21 +12,21 @@ define i32 @test_crc32h(i32 %cur, i16 %next) {
 ; CHECK-LABEL: test_crc32h:
 ; CHECK: crc32h w0, w0, w1
   %bits = zext i16 %next to i32
-  %val = call i32 @llvm.arm64.crc32h(i32 %cur, i32 %bits)
+  %val = call i32 @llvm.aarch64.crc32h(i32 %cur, i32 %bits)
   ret i32 %val
 }
 
 define i32 @test_crc32w(i32 %cur, i32 %next) {
 ; CHECK-LABEL: test_crc32w:
 ; CHECK: crc32w w0, w0, w1
-  %val = call i32 @llvm.arm64.crc32w(i32 %cur, i32 %next)
+  %val = call i32 @llvm.aarch64.crc32w(i32 %cur, i32 %next)
   ret i32 %val
 }
 
 define i32 @test_crc32x(i32 %cur, i64 %next) {
 ; CHECK-LABEL: test_crc32x:
 ; CHECK: crc32x w0, w0, x1
-  %val = call i32 @llvm.arm64.crc32x(i32 %cur, i64 %next)
+  %val = call i32 @llvm.aarch64.crc32x(i32 %cur, i64 %next)
   ret i32 %val
 }
 
@@ -34,7 +34,7 @@ define i32 @test_crc32cb(i32 %cur, i8 %next) {
 ; CHECK-LABEL: test_crc32cb:
 ; CHECK: crc32cb w0, w0, w1
   %bits = zext i8 %next to i32
-  %val = call i32 @llvm.arm64.crc32cb(i32 %cur, i32 %bits)
+  %val = call i32 @llvm.aarch64.crc32cb(i32 %cur, i32 %bits)
   ret i32 %val
 }
 
@@ -42,30 +42,30 @@ define i32 @test_crc32ch(i32 %cur, i16 %next) {
 ; CHECK-LABEL: test_crc32ch:
 ; CHECK: crc32ch w0, w0, w1
   %bits = zext i16 %next to i32
-  %val = call i32 @llvm.arm64.crc32ch(i32 %cur, i32 %bits)
+  %val = call i32 @llvm.aarch64.crc32ch(i32 %cur, i32 %bits)
   ret i32 %val
 }
 
 define i32 @test_crc32cw(i32 %cur, i32 %next) {
 ; CHECK-LABEL: test_crc32cw:
 ; CHECK: crc32cw w0, w0, w1
-  %val = call i32 @llvm.arm64.crc32cw(i32 %cur, i32 %next)
+  %val = call i32 @llvm.aarch64.crc32cw(i32 %cur, i32 %next)
   ret i32 %val
 }
 
 define i32 @test_crc32cx(i32 %cur, i64 %next) {
 ; CHECK-LABEL: test_crc32cx:
 ; CHECK: crc32cx w0, w0, x1
-  %val = call i32 @llvm.arm64.crc32cx(i32 %cur, i64 %next)
+  %val = call i32 @llvm.aarch64.crc32cx(i32 %cur, i64 %next)
   ret i32 %val
 }
 
-declare i32 @llvm.arm64.crc32b(i32, i32)
-declare i32 @llvm.arm64.crc32h(i32, i32)
-declare i32 @llvm.arm64.crc32w(i32, i32)
-declare i32 @llvm.arm64.crc32x(i32, i64)
+declare i32 @llvm.aarch64.crc32b(i32, i32)
+declare i32 @llvm.aarch64.crc32h(i32, i32)
+declare i32 @llvm.aarch64.crc32w(i32, i32)
+declare i32 @llvm.aarch64.crc32x(i32, i64)
 
-declare i32 @llvm.arm64.crc32cb(i32, i32)
-declare i32 @llvm.arm64.crc32ch(i32, i32)
-declare i32 @llvm.arm64.crc32cw(i32, i32)
-declare i32 @llvm.arm64.crc32cx(i32, i64)
+declare i32 @llvm.aarch64.crc32cb(i32, i32)
+declare i32 @llvm.aarch64.crc32ch(i32, i32)
+declare i32 @llvm.aarch64.crc32cw(i32, i32)
+declare i32 @llvm.aarch64.crc32cx(i32, i64)
diff --git a/test/CodeGen/ARM64/crypto.ll b/test/CodeGen/AArch64/arm64-crypto.ll
similarity index 50%
rename from test/CodeGen/ARM64/crypto.ll
rename to test/CodeGen/AArch64/arm64-crypto.ll
index 0020865bcd5..2908b336b1b 100644
--- a/test/CodeGen/ARM64/crypto.ll
+++ b/test/CodeGen/AArch64/arm64-crypto.ll
@@ -1,50 +1,50 @@
-; RUN: llc -march=arm64 -mattr=crypto -arm64-neon-syntax=apple -o - %s | FileCheck %s
+; RUN: llc -march=arm64 -mattr=crypto -aarch64-neon-syntax=apple -o - %s | FileCheck %s
 
-declare <16 x i8> @llvm.arm64.crypto.aese(<16 x i8> %data, <16 x i8> %key)
-declare <16 x i8> @llvm.arm64.crypto.aesd(<16 x i8> %data, <16 x i8> %key)
-declare <16 x i8> @llvm.arm64.crypto.aesmc(<16 x i8> %data)
-declare <16 x i8> @llvm.arm64.crypto.aesimc(<16 x i8> %data)
+declare <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> %data, <16 x i8> %key)
+declare <16 x i8> @llvm.aarch64.crypto.aesd(<16 x i8> %data, <16 x i8> %key)
+declare <16 x i8> @llvm.aarch64.crypto.aesmc(<16 x i8> %data)
+declare <16 x i8> @llvm.aarch64.crypto.aesimc(<16 x i8> %data)
 
 define <16 x i8> @test_aese(<16 x i8> %data, <16 x i8> %key) {
 ; CHECK-LABEL: test_aese:
 ; CHECK: aese.16b v0, v1
-  %res = call <16 x i8> @llvm.arm64.crypto.aese(<16 x i8> %data, <16 x i8> %key)
+  %res = call <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> %data, <16 x i8> %key)
   ret <16 x i8> %res
 }
 
 define <16 x i8> @test_aesd(<16 x i8> %data, <16 x i8> %key) {
 ; CHECK-LABEL: test_aesd:
 ; CHECK: aesd.16b v0, v1
-  %res = call <16 x i8> @llvm.arm64.crypto.aesd(<16 x i8> %data, <16 x i8> %key)
+  %res = call <16 x i8> @llvm.aarch64.crypto.aesd(<16 x i8> %data, <16 x i8> %key)
   ret <16 x i8> %res
 }
 
 define <16 x i8> @test_aesmc(<16 x i8> %data) {
 ; CHECK-LABEL: test_aesmc:
 ; CHECK: aesmc.16b v0, v0
- %res = call <16 x i8> @llvm.arm64.crypto.aesmc(<16 x i8> %data)
+ %res = call <16 x i8> @llvm.aarch64.crypto.aesmc(<16 x i8> %data)
   ret <16 x i8> %res
 }
 
 define <16 x i8> @test_aesimc(<16 x i8> %data) {
 ; CHECK-LABEL: test_aesimc:
 ; CHECK: aesimc.16b v0, v0
- %res = call <16 x i8> @llvm.arm64.crypto.aesimc(<16 x i8> %data)
+ %res = call <16 x i8> @llvm.aarch64.crypto.aesimc(<16 x i8> %data)
   ret <16 x i8> %res
 }
 
-declare <4 x i32> @llvm.arm64.crypto.sha1c(<4 x i32> %hash_abcd, i32 %hash_e, <4 x i32> %wk)
-declare <4 x i32> @llvm.arm64.crypto.sha1p(<4 x i32> %hash_abcd, i32 %hash_e, <4 x i32> %wk)
-declare <4 x i32> @llvm.arm64.crypto.sha1m(<4 x i32> %hash_abcd, i32 %hash_e, <4 x i32> %wk)
-declare i32 @llvm.arm64.crypto.sha1h(i32 %hash_e)
-declare <4 x i32> @llvm.arm64.crypto.sha1su0(<4 x i32> %wk0_3, <4 x i32> %wk4_7, <4 x i32> %wk8_11)
-declare <4 x i32> @llvm.arm64.crypto.sha1su1(<4 x i32> %wk0_3, <4 x i32> %wk12_15)
+declare <4 x i32> @llvm.aarch64.crypto.sha1c(<4 x i32> %hash_abcd, i32 %hash_e, <4 x i32> %wk)
+declare <4 x i32> @llvm.aarch64.crypto.sha1p(<4 x i32> %hash_abcd, i32 %hash_e, <4 x i32> %wk)
+declare <4 x i32> @llvm.aarch64.crypto.sha1m(<4 x i32> %hash_abcd, i32 %hash_e, <4 x i32> %wk)
+declare i32 @llvm.aarch64.crypto.sha1h(i32 %hash_e)
+declare <4 x i32> @llvm.aarch64.crypto.sha1su0(<4 x i32> %wk0_3, <4 x i32> %wk4_7, <4 x i32> %wk8_11)
+declare <4 x i32> @llvm.aarch64.crypto.sha1su1(<4 x i32> %wk0_3, <4 x i32> %wk12_15)
 
 define <4 x i32> @test_sha1c(<4 x i32> %hash_abcd, i32 %hash_e, <4 x i32> %wk) {
 ; CHECK-LABEL: test_sha1c:
 ; CHECK: fmov [[HASH_E:s[0-9]+]], w0
 ; CHECK: sha1c.4s q0, [[HASH_E]], v1
-  %res = call <4 x i32> @llvm.arm64.crypto.sha1c(<4 x i32> %hash_abcd, i32 %hash_e, <4 x i32> %wk)
+  %res = call <4 x i32> @llvm.aarch64.crypto.sha1c(<4 x i32> %hash_abcd, i32 %hash_e, <4 x i32> %wk)
   ret <4 x i32> %res
 }
 
@@ -55,9 +55,9 @@ define <4 x i32> @test_sha1c_in_a_row(<4 x i32> %hash_abcd, i32 %hash_e, <4 x i3
 ; CHECK: sha1c.4s q[[SHA1RES:[0-9]+]], [[HASH_E]], v1
 ; CHECK-NOT: fmov
 ; CHECK: sha1c.4s q0, s[[SHA1RES]], v1
-  %res = call <4 x i32> @llvm.arm64.crypto.sha1c(<4 x i32> %hash_abcd, i32 %hash_e, <4 x i32> %wk)
+  %res = call <4 x i32> @llvm.aarch64.crypto.sha1c(<4 x i32> %hash_abcd, i32 %hash_e, <4 x i32> %wk)
   %extract = extractelement <4 x i32> %res, i32 0
-  %res2 = call <4 x i32> @llvm.arm64.crypto.sha1c(<4 x i32> %hash_abcd, i32 %extract, <4 x i32> %wk)
+  %res2 = call <4 x i32> @llvm.aarch64.crypto.sha1c(<4 x i32> %hash_abcd, i32 %extract, <4 x i32> %wk)
   ret <4 x i32> %res2
 }
 
@@ -65,7 +65,7 @@ define <4 x i32> @test_sha1p(<4 x i32> %hash_abcd, i32 %hash_e, <4 x i32> %wk) {
 ; CHECK-LABEL: test_sha1p:
 ; CHECK: fmov [[HASH_E:s[0-9]+]], w0
 ; CHECK: sha1p.4s q0, [[HASH_E]], v1
-  %res = call <4 x i32> @llvm.arm64.crypto.sha1p(<4 x i32> %hash_abcd, i32 %hash_e, <4 x i32> %wk)
+  %res = call <4 x i32> @llvm.aarch64.crypto.sha1p(<4 x i32> %hash_abcd, i32 %hash_e, <4 x i32> %wk)
   ret <4 x i32> %res
 }
 
@@ -73,7 +73,7 @@ define <4 x i32> @test_sha1m(<4 x i32> %hash_abcd, i32 %hash_e, <4 x i32> %wk) {
 ; CHECK-LABEL: test_sha1m:
 ; CHECK: fmov [[HASH_E:s[0-9]+]], w0
 ; CHECK: sha1m.4s q0, [[HASH_E]], v1
-  %res = call <4 x i32> @llvm.arm64.crypto.sha1m(<4 x i32> %hash_abcd, i32 %hash_e, <4 x i32> %wk)
+  %res = call <4 x i32> @llvm.aarch64.crypto.sha1m(<4 x i32> %hash_abcd, i32 %hash_e, <4 x i32> %wk)
   ret <4 x i32> %res
 }
 
@@ -82,33 +82,33 @@ define i32 @test_sha1h(i32 %hash_e) {
 ; CHECK: fmov [[HASH_E:s[0-9]+]], w0
 ; CHECK: sha1h [[RES:s[0-9]+]], [[HASH_E]]
 ; CHECK: fmov w0, [[RES]]
-  %res = call i32 @llvm.arm64.crypto.sha1h(i32 %hash_e)
+  %res = call i32 @llvm.aarch64.crypto.sha1h(i32 %hash_e)
   ret i32 %res
 }
 
 define <4 x i32> @test_sha1su0(<4 x i32> %wk0_3, <4 x i32> %wk4_7, <4 x i32> %wk8_11) {
 ; CHECK-LABEL: test_sha1su0:
 ; CHECK: sha1su0.4s v0, v1, v2
-  %res = call <4 x i32> @llvm.arm64.crypto.sha1su0(<4 x i32> %wk0_3, <4 x i32> %wk4_7, <4 x i32> %wk8_11)
+  %res = call <4 x i32> @llvm.aarch64.crypto.sha1su0(<4 x i32> %wk0_3, <4 x i32> %wk4_7, <4 x i32> %wk8_11)
   ret <4 x i32> %res
 }
 
 define <4 x i32> @test_sha1su1(<4 x i32> %wk0_3, <4 x i32> %wk12_15) {
 ; CHECK-LABEL: test_sha1su1:
 ; CHECK: sha1su1.4s v0, v1
-  %res = call <4 x i32> @llvm.arm64.crypto.sha1su1(<4 x i32> %wk0_3, <4 x i32> %wk12_15)
+  %res = call <4 x i32> @llvm.aarch64.crypto.sha1su1(<4 x i32> %wk0_3, <4 x i32> %wk12_15)
   ret <4 x i32> %res
 }
 
-declare <4 x i32> @llvm.arm64.crypto.sha256h(<4 x i32> %hash_abcd, <4 x i32> %hash_efgh, <4 x i32> %wk)
-declare <4 x i32> @llvm.arm64.crypto.sha256h2(<4 x i32> %hash_efgh, <4 x i32> %hash_abcd, <4 x i32> %wk)
-declare <4 x i32> @llvm.arm64.crypto.sha256su0(<4 x i32> %w0_3, <4 x i32> %w4_7)
-declare <4 x i32> @llvm.arm64.crypto.sha256su1(<4 x i32> %w0_3, <4 x i32> %w8_11, <4 x i32> %w12_15)
+declare <4 x i32> @llvm.aarch64.crypto.sha256h(<4 x i32> %hash_abcd, <4 x i32> %hash_efgh, <4 x i32> %wk)
+declare <4 x i32> @llvm.aarch64.crypto.sha256h2(<4 x i32> %hash_efgh, <4 x i32> %hash_abcd, <4 x i32> %wk)
+declare <4 x i32> @llvm.aarch64.crypto.sha256su0(<4 x i32> %w0_3, <4 x i32> %w4_7)
+declare <4 x i32> @llvm.aarch64.crypto.sha256su1(<4 x i32> %w0_3, <4 x i32> %w8_11, <4 x i32> %w12_15)
 
 define <4 x i32> @test_sha256h(<4 x i32> %hash_abcd, <4 x i32> %hash_efgh, <4 x i32> %wk) {
 ; CHECK-LABEL: test_sha256h:
 ; CHECK: sha256h.4s q0, q1, v2
-  %res = call <4 x i32> @llvm.arm64.crypto.sha256h(<4 x i32> %hash_abcd, <4 x i32> %hash_efgh, <4 x i32> %wk)
+  %res = call <4 x i32> @llvm.aarch64.crypto.sha256h(<4 x i32> %hash_abcd, <4 x i32> %hash_efgh, <4 x i32> %wk)
   ret <4 x i32> %res
 }
 
@@ -116,20 +116,20 @@ define <4 x i32> @test_sha256h2(<4 x i32> %hash_efgh, <4 x i32> %hash_abcd, <4 x
 ; CHECK-LABEL: test_sha256h2:
 ; CHECK: sha256h2.4s q0, q1, v2
 
-  %res = call <4 x i32> @llvm.arm64.crypto.sha256h2(<4 x i32> %hash_efgh, <4 x i32> %hash_abcd, <4 x i32> %wk)
+  %res = call <4 x i32> @llvm.aarch64.crypto.sha256h2(<4 x i32> %hash_efgh, <4 x i32> %hash_abcd, <4 x i32> %wk)
   ret <4 x i32> %res
 }
 
 define <4 x i32> @test_sha256su0(<4 x i32> %w0_3, <4 x i32> %w4_7) {
 ; CHECK-LABEL: test_sha256su0:
 ; CHECK: sha256su0.4s v0, v1
-  %res = call <4 x i32> @llvm.arm64.crypto.sha256su0(<4 x i32> %w0_3, <4 x i32> %w4_7)
+  %res = call <4 x i32> @llvm.aarch64.crypto.sha256su0(<4 x i32> %w0_3, <4 x i32> %w4_7)
   ret <4 x i32> %res
 }
 
 define <4 x i32> @test_sha256su1(<4 x i32> %w0_3, <4 x i32> %w8_11, <4 x i32> %w12_15) {
 ; CHECK-LABEL: test_sha256su1:
 ; CHECK: sha256su1.4s v0, v1, v2
-  %res = call <4 x i32> @llvm.arm64.crypto.sha256su1(<4 x i32> %w0_3, <4 x i32> %w8_11, <4 x i32> %w12_15)
+  %res = call <4 x i32> @llvm.aarch64.crypto.sha256su1(<4 x i32> %w0_3, <4 x i32> %w8_11, <4 x i32> %w12_15)
   ret <4 x i32> %res
 }
diff --git a/test/CodeGen/ARM64/cse.ll b/test/CodeGen/AArch64/arm64-cse.ll
similarity index 100%
rename from test/CodeGen/ARM64/cse.ll
rename to test/CodeGen/AArch64/arm64-cse.ll
diff --git a/test/CodeGen/ARM64/csel.ll b/test/CodeGen/AArch64/arm64-csel.ll
similarity index 100%
rename from test/CodeGen/ARM64/csel.ll
rename to test/CodeGen/AArch64/arm64-csel.ll
diff --git a/test/CodeGen/ARM64/cvt.ll b/test/CodeGen/AArch64/arm64-cvt.ll
similarity index 52%
rename from test/CodeGen/ARM64/cvt.ll
rename to test/CodeGen/AArch64/arm64-cvt.ll
index b55a42fdf85..420a8bc0483 100644
--- a/test/CodeGen/ARM64/cvt.ll
+++ b/test/CodeGen/AArch64/arm64-cvt.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s
+; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
 
 ;
 ; Floating-point scalar convert to signed integer (to nearest with ties to away)
@@ -7,7 +7,7 @@ define i32 @fcvtas_1w1s(float %A) nounwind {
 ;CHECK-LABEL: fcvtas_1w1s:
 ;CHECK: fcvtas w0, s0
 ;CHECK-NEXT: ret
-	%tmp3 = call i32 @llvm.arm64.neon.fcvtas.i32.f32(float %A)
+	%tmp3 = call i32 @llvm.aarch64.neon.fcvtas.i32.f32(float %A)
 	ret i32 %tmp3
 }
 
@@ -15,7 +15,7 @@ define i64 @fcvtas_1x1s(float %A) nounwind {
 ;CHECK-LABEL: fcvtas_1x1s:
 ;CHECK: fcvtas x0, s0
 ;CHECK-NEXT: ret
-	%tmp3 = call i64 @llvm.arm64.neon.fcvtas.i64.f32(float %A)
+	%tmp3 = call i64 @llvm.aarch64.neon.fcvtas.i64.f32(float %A)
 	ret i64 %tmp3
 }
 
@@ -23,7 +23,7 @@ define i32 @fcvtas_1w1d(double %A) nounwind {
 ;CHECK-LABEL: fcvtas_1w1d:
 ;CHECK: fcvtas w0, d0
 ;CHECK-NEXT: ret
-	%tmp3 = call i32 @llvm.arm64.neon.fcvtas.i32.f64(double %A)
+	%tmp3 = call i32 @llvm.aarch64.neon.fcvtas.i32.f64(double %A)
 	ret i32 %tmp3
 }
 
@@ -31,14 +31,14 @@ define i64 @fcvtas_1x1d(double %A) nounwind {
 ;CHECK-LABEL: fcvtas_1x1d:
 ;CHECK: fcvtas x0, d0
 ;CHECK-NEXT: ret
-	%tmp3 = call i64 @llvm.arm64.neon.fcvtas.i64.f64(double %A)
+	%tmp3 = call i64 @llvm.aarch64.neon.fcvtas.i64.f64(double %A)
 	ret i64 %tmp3
 }
 
-declare i32 @llvm.arm64.neon.fcvtas.i32.f32(float) nounwind readnone
-declare i64 @llvm.arm64.neon.fcvtas.i64.f32(float) nounwind readnone
-declare i32 @llvm.arm64.neon.fcvtas.i32.f64(double) nounwind readnone
-declare i64 @llvm.arm64.neon.fcvtas.i64.f64(double) nounwind readnone
+declare i32 @llvm.aarch64.neon.fcvtas.i32.f32(float) nounwind readnone
+declare i64 @llvm.aarch64.neon.fcvtas.i64.f32(float) nounwind readnone
+declare i32 @llvm.aarch64.neon.fcvtas.i32.f64(double) nounwind readnone
+declare i64 @llvm.aarch64.neon.fcvtas.i64.f64(double) nounwind readnone
 
 ;
 ; Floating-point scalar convert to unsigned integer
@@ -47,7 +47,7 @@ define i32 @fcvtau_1w1s(float %A) nounwind {
 ;CHECK-LABEL: fcvtau_1w1s:
 ;CHECK: fcvtau w0, s0
 ;CHECK-NEXT: ret
-	%tmp3 = call i32 @llvm.arm64.neon.fcvtau.i32.f32(float %A)
+	%tmp3 = call i32 @llvm.aarch64.neon.fcvtau.i32.f32(float %A)
 	ret i32 %tmp3
 }
 
@@ -55,7 +55,7 @@ define i64 @fcvtau_1x1s(float %A) nounwind {
 ;CHECK-LABEL: fcvtau_1x1s:
 ;CHECK: fcvtau x0, s0
 ;CHECK-NEXT: ret
-	%tmp3 = call i64 @llvm.arm64.neon.fcvtau.i64.f32(float %A)
+	%tmp3 = call i64 @llvm.aarch64.neon.fcvtau.i64.f32(float %A)
 	ret i64 %tmp3
 }
 
@@ -63,7 +63,7 @@ define i32 @fcvtau_1w1d(double %A) nounwind {
 ;CHECK-LABEL: fcvtau_1w1d:
 ;CHECK: fcvtau w0, d0
 ;CHECK-NEXT: ret
-	%tmp3 = call i32 @llvm.arm64.neon.fcvtau.i32.f64(double %A)
+	%tmp3 = call i32 @llvm.aarch64.neon.fcvtau.i32.f64(double %A)
 	ret i32 %tmp3
 }
 
@@ -71,14 +71,14 @@ define i64 @fcvtau_1x1d(double %A) nounwind {
 ;CHECK-LABEL: fcvtau_1x1d:
 ;CHECK: fcvtau x0, d0
 ;CHECK-NEXT: ret
-	%tmp3 = call i64 @llvm.arm64.neon.fcvtau.i64.f64(double %A)
+	%tmp3 = call i64 @llvm.aarch64.neon.fcvtau.i64.f64(double %A)
 	ret i64 %tmp3
 }
 
-declare i32 @llvm.arm64.neon.fcvtau.i32.f32(float) nounwind readnone
-declare i64 @llvm.arm64.neon.fcvtau.i64.f32(float) nounwind readnone
-declare i32 @llvm.arm64.neon.fcvtau.i32.f64(double) nounwind readnone
-declare i64 @llvm.arm64.neon.fcvtau.i64.f64(double) nounwind readnone
+declare i32 @llvm.aarch64.neon.fcvtau.i32.f32(float) nounwind readnone
+declare i64 @llvm.aarch64.neon.fcvtau.i64.f32(float) nounwind readnone
+declare i32 @llvm.aarch64.neon.fcvtau.i32.f64(double) nounwind readnone
+declare i64 @llvm.aarch64.neon.fcvtau.i64.f64(double) nounwind readnone
 
 ;
 ; Floating-point scalar convert to signed integer (toward -Inf)
@@ -87,7 +87,7 @@ define i32 @fcvtms_1w1s(float %A) nounwind {
 ;CHECK-LABEL: fcvtms_1w1s:
 ;CHECK: fcvtms w0, s0
 ;CHECK-NEXT: ret
-	%tmp3 = call i32 @llvm.arm64.neon.fcvtms.i32.f32(float %A)
+	%tmp3 = call i32 @llvm.aarch64.neon.fcvtms.i32.f32(float %A)
 	ret i32 %tmp3
 }
 
@@ -95,7 +95,7 @@ define i64 @fcvtms_1x1s(float %A) nounwind {
 ;CHECK-LABEL: fcvtms_1x1s:
 ;CHECK: fcvtms x0, s0
 ;CHECK-NEXT: ret
-	%tmp3 = call i64 @llvm.arm64.neon.fcvtms.i64.f32(float %A)
+	%tmp3 = call i64 @llvm.aarch64.neon.fcvtms.i64.f32(float %A)
 	ret i64 %tmp3
 }
 
@@ -103,7 +103,7 @@ define i32 @fcvtms_1w1d(double %A) nounwind {
 ;CHECK-LABEL: fcvtms_1w1d:
 ;CHECK: fcvtms w0, d0
 ;CHECK-NEXT: ret
-	%tmp3 = call i32 @llvm.arm64.neon.fcvtms.i32.f64(double %A)
+	%tmp3 = call i32 @llvm.aarch64.neon.fcvtms.i32.f64(double %A)
 	ret i32 %tmp3
 }
 
@@ -111,14 +111,14 @@ define i64 @fcvtms_1x1d(double %A) nounwind {
 ;CHECK-LABEL: fcvtms_1x1d:
 ;CHECK: fcvtms x0, d0
 ;CHECK-NEXT: ret
-	%tmp3 = call i64 @llvm.arm64.neon.fcvtms.i64.f64(double %A)
+	%tmp3 = call i64 @llvm.aarch64.neon.fcvtms.i64.f64(double %A)
 	ret i64 %tmp3
 }
 
-declare i32 @llvm.arm64.neon.fcvtms.i32.f32(float) nounwind readnone
-declare i64 @llvm.arm64.neon.fcvtms.i64.f32(float) nounwind readnone
-declare i32 @llvm.arm64.neon.fcvtms.i32.f64(double) nounwind readnone
-declare i64 @llvm.arm64.neon.fcvtms.i64.f64(double) nounwind readnone
+declare i32 @llvm.aarch64.neon.fcvtms.i32.f32(float) nounwind readnone
+declare i64 @llvm.aarch64.neon.fcvtms.i64.f32(float) nounwind readnone
+declare i32 @llvm.aarch64.neon.fcvtms.i32.f64(double) nounwind readnone
+declare i64 @llvm.aarch64.neon.fcvtms.i64.f64(double) nounwind readnone
 
 ;
 ; Floating-point scalar convert to unsigned integer (toward -Inf)
@@ -127,7 +127,7 @@ define i32 @fcvtmu_1w1s(float %A) nounwind {
 ;CHECK-LABEL: fcvtmu_1w1s:
 ;CHECK: fcvtmu w0, s0
 ;CHECK-NEXT: ret
-	%tmp3 = call i32 @llvm.arm64.neon.fcvtmu.i32.f32(float %A)
+	%tmp3 = call i32 @llvm.aarch64.neon.fcvtmu.i32.f32(float %A)
 	ret i32 %tmp3
 }
 
@@ -135,7 +135,7 @@ define i64 @fcvtmu_1x1s(float %A) nounwind {
 ;CHECK-LABEL: fcvtmu_1x1s:
 ;CHECK: fcvtmu x0, s0
 ;CHECK-NEXT: ret
-	%tmp3 = call i64 @llvm.arm64.neon.fcvtmu.i64.f32(float %A)
+	%tmp3 = call i64 @llvm.aarch64.neon.fcvtmu.i64.f32(float %A)
 	ret i64 %tmp3
 }
 
@@ -143,7 +143,7 @@ define i32 @fcvtmu_1w1d(double %A) nounwind {
 ;CHECK-LABEL: fcvtmu_1w1d:
 ;CHECK: fcvtmu w0, d0
 ;CHECK-NEXT: ret
-	%tmp3 = call i32 @llvm.arm64.neon.fcvtmu.i32.f64(double %A)
+	%tmp3 = call i32 @llvm.aarch64.neon.fcvtmu.i32.f64(double %A)
 	ret i32 %tmp3
 }
 
@@ -151,14 +151,14 @@ define i64 @fcvtmu_1x1d(double %A) nounwind {
 ;CHECK-LABEL: fcvtmu_1x1d:
 ;CHECK: fcvtmu x0, d0
 ;CHECK-NEXT: ret
-	%tmp3 = call i64 @llvm.arm64.neon.fcvtmu.i64.f64(double %A)
+	%tmp3 = call i64 @llvm.aarch64.neon.fcvtmu.i64.f64(double %A)
 	ret i64 %tmp3
 }
 
-declare i32 @llvm.arm64.neon.fcvtmu.i32.f32(float) nounwind readnone
-declare i64 @llvm.arm64.neon.fcvtmu.i64.f32(float) nounwind readnone
-declare i32 @llvm.arm64.neon.fcvtmu.i32.f64(double) nounwind readnone
-declare i64 @llvm.arm64.neon.fcvtmu.i64.f64(double) nounwind readnone
+declare i32 @llvm.aarch64.neon.fcvtmu.i32.f32(float) nounwind readnone
+declare i64 @llvm.aarch64.neon.fcvtmu.i64.f32(float) nounwind readnone
+declare i32 @llvm.aarch64.neon.fcvtmu.i32.f64(double) nounwind readnone
+declare i64 @llvm.aarch64.neon.fcvtmu.i64.f64(double) nounwind readnone
 
 ;
 ; Floating-point scalar convert to signed integer (to nearest with ties to even)
@@ -167,7 +167,7 @@ define i32 @fcvtns_1w1s(float %A) nounwind {
 ;CHECK-LABEL: fcvtns_1w1s:
 ;CHECK: fcvtns w0, s0
 ;CHECK-NEXT: ret
-	%tmp3 = call i32 @llvm.arm64.neon.fcvtns.i32.f32(float %A)
+	%tmp3 = call i32 @llvm.aarch64.neon.fcvtns.i32.f32(float %A)
 	ret i32 %tmp3
 }
 
@@ -175,7 +175,7 @@ define i64 @fcvtns_1x1s(float %A) nounwind {
 ;CHECK-LABEL: fcvtns_1x1s:
 ;CHECK: fcvtns x0, s0
 ;CHECK-NEXT: ret
-	%tmp3 = call i64 @llvm.arm64.neon.fcvtns.i64.f32(float %A)
+	%tmp3 = call i64 @llvm.aarch64.neon.fcvtns.i64.f32(float %A)
 	ret i64 %tmp3
 }
 
@@ -183,7 +183,7 @@ define i32 @fcvtns_1w1d(double %A) nounwind {
 ;CHECK-LABEL: fcvtns_1w1d:
 ;CHECK: fcvtns w0, d0
 ;CHECK-NEXT: ret
-	%tmp3 = call i32 @llvm.arm64.neon.fcvtns.i32.f64(double %A)
+	%tmp3 = call i32 @llvm.aarch64.neon.fcvtns.i32.f64(double %A)
 	ret i32 %tmp3
 }
 
@@ -191,14 +191,14 @@ define i64 @fcvtns_1x1d(double %A) nounwind {
 ;CHECK-LABEL: fcvtns_1x1d:
 ;CHECK: fcvtns x0, d0
 ;CHECK-NEXT: ret
-	%tmp3 = call i64 @llvm.arm64.neon.fcvtns.i64.f64(double %A)
+	%tmp3 = call i64 @llvm.aarch64.neon.fcvtns.i64.f64(double %A)
 	ret i64 %tmp3
 }
 
-declare i32 @llvm.arm64.neon.fcvtns.i32.f32(float) nounwind readnone
-declare i64 @llvm.arm64.neon.fcvtns.i64.f32(float) nounwind readnone
-declare i32 @llvm.arm64.neon.fcvtns.i32.f64(double) nounwind readnone
-declare i64 @llvm.arm64.neon.fcvtns.i64.f64(double) nounwind readnone
+declare i32 @llvm.aarch64.neon.fcvtns.i32.f32(float) nounwind readnone
+declare i64 @llvm.aarch64.neon.fcvtns.i64.f32(float) nounwind readnone
+declare i32 @llvm.aarch64.neon.fcvtns.i32.f64(double) nounwind readnone
+declare i64 @llvm.aarch64.neon.fcvtns.i64.f64(double) nounwind readnone
 
 ;
 ; Floating-point scalar convert to unsigned integer (to nearest with ties to even)
@@ -207,7 +207,7 @@ define i32 @fcvtnu_1w1s(float %A) nounwind {
 ;CHECK-LABEL: fcvtnu_1w1s:
 ;CHECK: fcvtnu w0, s0
 ;CHECK-NEXT: ret
-	%tmp3 = call i32 @llvm.arm64.neon.fcvtnu.i32.f32(float %A)
+	%tmp3 = call i32 @llvm.aarch64.neon.fcvtnu.i32.f32(float %A)
 	ret i32 %tmp3
 }
 
@@ -215,7 +215,7 @@ define i64 @fcvtnu_1x1s(float %A) nounwind {
 ;CHECK-LABEL: fcvtnu_1x1s:
 ;CHECK: fcvtnu x0, s0
 ;CHECK-NEXT: ret
-	%tmp3 = call i64 @llvm.arm64.neon.fcvtnu.i64.f32(float %A)
+	%tmp3 = call i64 @llvm.aarch64.neon.fcvtnu.i64.f32(float %A)
 	ret i64 %tmp3
 }
 
@@ -223,7 +223,7 @@ define i32 @fcvtnu_1w1d(double %A) nounwind {
 ;CHECK-LABEL: fcvtnu_1w1d:
 ;CHECK: fcvtnu w0, d0
 ;CHECK-NEXT: ret
-	%tmp3 = call i32 @llvm.arm64.neon.fcvtnu.i32.f64(double %A)
+	%tmp3 = call i32 @llvm.aarch64.neon.fcvtnu.i32.f64(double %A)
 	ret i32 %tmp3
 }
 
@@ -231,14 +231,14 @@ define i64 @fcvtnu_1x1d(double %A) nounwind {
 ;CHECK-LABEL: fcvtnu_1x1d:
 ;CHECK: fcvtnu x0, d0
 ;CHECK-NEXT: ret
-	%tmp3 = call i64 @llvm.arm64.neon.fcvtnu.i64.f64(double %A)
+	%tmp3 = call i64 @llvm.aarch64.neon.fcvtnu.i64.f64(double %A)
 	ret i64 %tmp3
 }
 
-declare i32 @llvm.arm64.neon.fcvtnu.i32.f32(float) nounwind readnone
-declare i64 @llvm.arm64.neon.fcvtnu.i64.f32(float) nounwind readnone
-declare i32 @llvm.arm64.neon.fcvtnu.i32.f64(double) nounwind readnone
-declare i64 @llvm.arm64.neon.fcvtnu.i64.f64(double) nounwind readnone
+declare i32 @llvm.aarch64.neon.fcvtnu.i32.f32(float) nounwind readnone
+declare i64 @llvm.aarch64.neon.fcvtnu.i64.f32(float) nounwind readnone
+declare i32 @llvm.aarch64.neon.fcvtnu.i32.f64(double) nounwind readnone
+declare i64 @llvm.aarch64.neon.fcvtnu.i64.f64(double) nounwind readnone
 
 ;
 ; Floating-point scalar convert to signed integer (toward +Inf)
@@ -247,7 +247,7 @@ define i32 @fcvtps_1w1s(float %A) nounwind {
 ;CHECK-LABEL: fcvtps_1w1s:
 ;CHECK: fcvtps w0, s0
 ;CHECK-NEXT: ret
-	%tmp3 = call i32 @llvm.arm64.neon.fcvtps.i32.f32(float %A)
+	%tmp3 = call i32 @llvm.aarch64.neon.fcvtps.i32.f32(float %A)
 	ret i32 %tmp3
 }
 
@@ -255,7 +255,7 @@ define i64 @fcvtps_1x1s(float %A) nounwind {
 ;CHECK-LABEL: fcvtps_1x1s:
 ;CHECK: fcvtps x0, s0
 ;CHECK-NEXT: ret
-	%tmp3 = call i64 @llvm.arm64.neon.fcvtps.i64.f32(float %A)
+	%tmp3 = call i64 @llvm.aarch64.neon.fcvtps.i64.f32(float %A)
 	ret i64 %tmp3
 }
 
@@ -263,7 +263,7 @@ define i32 @fcvtps_1w1d(double %A) nounwind {
 ;CHECK-LABEL: fcvtps_1w1d:
 ;CHECK: fcvtps w0, d0
 ;CHECK-NEXT: ret
-	%tmp3 = call i32 @llvm.arm64.neon.fcvtps.i32.f64(double %A)
+	%tmp3 = call i32 @llvm.aarch64.neon.fcvtps.i32.f64(double %A)
 	ret i32 %tmp3
 }
 
@@ -271,14 +271,14 @@ define i64 @fcvtps_1x1d(double %A) nounwind {
 ;CHECK-LABEL: fcvtps_1x1d:
 ;CHECK: fcvtps x0, d0
 ;CHECK-NEXT: ret
-	%tmp3 = call i64 @llvm.arm64.neon.fcvtps.i64.f64(double %A)
+	%tmp3 = call i64 @llvm.aarch64.neon.fcvtps.i64.f64(double %A)
 	ret i64 %tmp3
 }
 
-declare i32 @llvm.arm64.neon.fcvtps.i32.f32(float) nounwind readnone
-declare i64 @llvm.arm64.neon.fcvtps.i64.f32(float) nounwind readnone
-declare i32 @llvm.arm64.neon.fcvtps.i32.f64(double) nounwind readnone
-declare i64 @llvm.arm64.neon.fcvtps.i64.f64(double) nounwind readnone
+declare i32 @llvm.aarch64.neon.fcvtps.i32.f32(float) nounwind readnone
+declare i64 @llvm.aarch64.neon.fcvtps.i64.f32(float) nounwind readnone
+declare i32 @llvm.aarch64.neon.fcvtps.i32.f64(double) nounwind readnone
+declare i64 @llvm.aarch64.neon.fcvtps.i64.f64(double) nounwind readnone
 
 ;
 ; Floating-point scalar convert to unsigned integer (toward +Inf)
@@ -287,7 +287,7 @@ define i32 @fcvtpu_1w1s(float %A) nounwind {
 ;CHECK-LABEL: fcvtpu_1w1s:
 ;CHECK: fcvtpu w0, s0
 ;CHECK-NEXT: ret
-	%tmp3 = call i32 @llvm.arm64.neon.fcvtpu.i32.f32(float %A)
+	%tmp3 = call i32 @llvm.aarch64.neon.fcvtpu.i32.f32(float %A)
 	ret i32 %tmp3
 }
 
@@ -295,7 +295,7 @@ define i64 @fcvtpu_1x1s(float %A) nounwind {
 ;CHECK-LABEL: fcvtpu_1x1s:
 ;CHECK: fcvtpu x0, s0
 ;CHECK-NEXT: ret
-	%tmp3 = call i64 @llvm.arm64.neon.fcvtpu.i64.f32(float %A)
+	%tmp3 = call i64 @llvm.aarch64.neon.fcvtpu.i64.f32(float %A)
 	ret i64 %tmp3
 }
 
@@ -303,7 +303,7 @@ define i32 @fcvtpu_1w1d(double %A) nounwind {
 ;CHECK-LABEL: fcvtpu_1w1d:
 ;CHECK: fcvtpu w0, d0
 ;CHECK-NEXT: ret
-	%tmp3 = call i32 @llvm.arm64.neon.fcvtpu.i32.f64(double %A)
+	%tmp3 = call i32 @llvm.aarch64.neon.fcvtpu.i32.f64(double %A)
 	ret i32 %tmp3
 }
 
@@ -311,14 +311,14 @@ define i64 @fcvtpu_1x1d(double %A) nounwind {
 ;CHECK-LABEL: fcvtpu_1x1d:
 ;CHECK: fcvtpu x0, d0
 ;CHECK-NEXT: ret
-	%tmp3 = call i64 @llvm.arm64.neon.fcvtpu.i64.f64(double %A)
+	%tmp3 = call i64 @llvm.aarch64.neon.fcvtpu.i64.f64(double %A)
 	ret i64 %tmp3
 }
 
-declare i32 @llvm.arm64.neon.fcvtpu.i32.f32(float) nounwind readnone
-declare i64 @llvm.arm64.neon.fcvtpu.i64.f32(float) nounwind readnone
-declare i32 @llvm.arm64.neon.fcvtpu.i32.f64(double) nounwind readnone
-declare i64 @llvm.arm64.neon.fcvtpu.i64.f64(double) nounwind readnone
+declare i32 @llvm.aarch64.neon.fcvtpu.i32.f32(float) nounwind readnone
+declare i64 @llvm.aarch64.neon.fcvtpu.i64.f32(float) nounwind readnone
+declare i32 @llvm.aarch64.neon.fcvtpu.i32.f64(double) nounwind readnone
+declare i64 @llvm.aarch64.neon.fcvtpu.i64.f64(double) nounwind readnone
 
 ;
 ;  Floating-point scalar convert to signed integer (toward zero)
@@ -327,7 +327,7 @@ define i32 @fcvtzs_1w1s(float %A) nounwind {
 ;CHECK-LABEL: fcvtzs_1w1s:
 ;CHECK: fcvtzs w0, s0
 ;CHECK-NEXT: ret
-	%tmp3 = call i32 @llvm.arm64.neon.fcvtzs.i32.f32(float %A)
+	%tmp3 = call i32 @llvm.aarch64.neon.fcvtzs.i32.f32(float %A)
 	ret i32 %tmp3
 }
 
@@ -335,7 +335,7 @@ define i64 @fcvtzs_1x1s(float %A) nounwind {
 ;CHECK-LABEL: fcvtzs_1x1s:
 ;CHECK: fcvtzs x0, s0
 ;CHECK-NEXT: ret
-	%tmp3 = call i64 @llvm.arm64.neon.fcvtzs.i64.f32(float %A)
+	%tmp3 = call i64 @llvm.aarch64.neon.fcvtzs.i64.f32(float %A)
 	ret i64 %tmp3
 }
 
@@ -343,7 +343,7 @@ define i32 @fcvtzs_1w1d(double %A) nounwind {
 ;CHECK-LABEL: fcvtzs_1w1d:
 ;CHECK: fcvtzs w0, d0
 ;CHECK-NEXT: ret
-	%tmp3 = call i32 @llvm.arm64.neon.fcvtzs.i32.f64(double %A)
+	%tmp3 = call i32 @llvm.aarch64.neon.fcvtzs.i32.f64(double %A)
 	ret i32 %tmp3
 }
 
@@ -351,14 +351,14 @@ define i64 @fcvtzs_1x1d(double %A) nounwind {
 ;CHECK-LABEL: fcvtzs_1x1d:
 ;CHECK: fcvtzs x0, d0
 ;CHECK-NEXT: ret
-	%tmp3 = call i64 @llvm.arm64.neon.fcvtzs.i64.f64(double %A)
+	%tmp3 = call i64 @llvm.aarch64.neon.fcvtzs.i64.f64(double %A)
 	ret i64 %tmp3
 }
 
-declare i32 @llvm.arm64.neon.fcvtzs.i32.f32(float) nounwind readnone
-declare i64 @llvm.arm64.neon.fcvtzs.i64.f32(float) nounwind readnone
-declare i32 @llvm.arm64.neon.fcvtzs.i32.f64(double) nounwind readnone
-declare i64 @llvm.arm64.neon.fcvtzs.i64.f64(double) nounwind readnone
+declare i32 @llvm.aarch64.neon.fcvtzs.i32.f32(float) nounwind readnone
+declare i64 @llvm.aarch64.neon.fcvtzs.i64.f32(float) nounwind readnone
+declare i32 @llvm.aarch64.neon.fcvtzs.i32.f64(double) nounwind readnone
+declare i64 @llvm.aarch64.neon.fcvtzs.i64.f64(double) nounwind readnone
 
 ;
 ; Floating-point scalar convert to unsigned integer (toward zero)
@@ -367,7 +367,7 @@ define i32 @fcvtzu_1w1s(float %A) nounwind {
 ;CHECK-LABEL: fcvtzu_1w1s:
 ;CHECK: fcvtzu w0, s0
 ;CHECK-NEXT: ret
-	%tmp3 = call i32 @llvm.arm64.neon.fcvtzu.i32.f32(float %A)
+	%tmp3 = call i32 @llvm.aarch64.neon.fcvtzu.i32.f32(float %A)
 	ret i32 %tmp3
 }
 
@@ -375,7 +375,7 @@ define i64 @fcvtzu_1x1s(float %A) nounwind {
 ;CHECK-LABEL: fcvtzu_1x1s:
 ;CHECK: fcvtzu x0, s0
 ;CHECK-NEXT: ret
-	%tmp3 = call i64 @llvm.arm64.neon.fcvtzu.i64.f32(float %A)
+	%tmp3 = call i64 @llvm.aarch64.neon.fcvtzu.i64.f32(float %A)
 	ret i64 %tmp3
 }
 
@@ -383,7 +383,7 @@ define i32 @fcvtzu_1w1d(double %A) nounwind {
 ;CHECK-LABEL: fcvtzu_1w1d:
 ;CHECK: fcvtzu w0, d0
 ;CHECK-NEXT: ret
-	%tmp3 = call i32 @llvm.arm64.neon.fcvtzu.i32.f64(double %A)
+	%tmp3 = call i32 @llvm.aarch64.neon.fcvtzu.i32.f64(double %A)
 	ret i32 %tmp3
 }
 
@@ -391,11 +391,11 @@ define i64 @fcvtzu_1x1d(double %A) nounwind {
 ;CHECK-LABEL: fcvtzu_1x1d:
 ;CHECK: fcvtzu x0, d0
 ;CHECK-NEXT: ret
-	%tmp3 = call i64 @llvm.arm64.neon.fcvtzu.i64.f64(double %A)
+	%tmp3 = call i64 @llvm.aarch64.neon.fcvtzu.i64.f64(double %A)
 	ret i64 %tmp3
 }
 
-declare i32 @llvm.arm64.neon.fcvtzu.i32.f32(float) nounwind readnone
-declare i64 @llvm.arm64.neon.fcvtzu.i64.f32(float) nounwind readnone
-declare i32 @llvm.arm64.neon.fcvtzu.i32.f64(double) nounwind readnone
-declare i64 @llvm.arm64.neon.fcvtzu.i64.f64(double) nounwind readnone
+declare i32 @llvm.aarch64.neon.fcvtzu.i32.f32(float) nounwind readnone
+declare i64 @llvm.aarch64.neon.fcvtzu.i64.f32(float) nounwind readnone
+declare i32 @llvm.aarch64.neon.fcvtzu.i32.f64(double) nounwind readnone
+declare i64 @llvm.aarch64.neon.fcvtzu.i64.f64(double) nounwind readnone
diff --git a/test/CodeGen/ARM64/dagcombiner-convergence.ll b/test/CodeGen/AArch64/arm64-dagcombiner-convergence.ll
similarity index 100%
rename from test/CodeGen/ARM64/dagcombiner-convergence.ll
rename to test/CodeGen/AArch64/arm64-dagcombiner-convergence.ll
diff --git a/test/CodeGen/ARM64/dagcombiner-dead-indexed-load.ll b/test/CodeGen/AArch64/arm64-dagcombiner-dead-indexed-load.ll
similarity index 100%
rename from test/CodeGen/ARM64/dagcombiner-dead-indexed-load.ll
rename to test/CodeGen/AArch64/arm64-dagcombiner-dead-indexed-load.ll
diff --git a/test/CodeGen/ARM64/dagcombiner-indexed-load.ll b/test/CodeGen/AArch64/arm64-dagcombiner-indexed-load.ll
similarity index 100%
rename from test/CodeGen/ARM64/dagcombiner-indexed-load.ll
rename to test/CodeGen/AArch64/arm64-dagcombiner-indexed-load.ll
diff --git a/test/CodeGen/ARM64/dagcombiner-load-slicing.ll b/test/CodeGen/AArch64/arm64-dagcombiner-load-slicing.ll
similarity index 100%
rename from test/CodeGen/ARM64/dagcombiner-load-slicing.ll
rename to test/CodeGen/AArch64/arm64-dagcombiner-load-slicing.ll
diff --git a/test/CodeGen/ARM64/dead-def-frame-index.ll b/test/CodeGen/AArch64/arm64-dead-def-frame-index.ll
similarity index 100%
rename from test/CodeGen/ARM64/dead-def-frame-index.ll
rename to test/CodeGen/AArch64/arm64-dead-def-frame-index.ll
diff --git a/test/CodeGen/ARM64/dead-register-def-bug.ll b/test/CodeGen/AArch64/arm64-dead-register-def-bug.ll
similarity index 100%
rename from test/CodeGen/ARM64/dead-register-def-bug.ll
rename to test/CodeGen/AArch64/arm64-dead-register-def-bug.ll
diff --git a/test/CodeGen/ARM64/dup.ll b/test/CodeGen/AArch64/arm64-dup.ll
similarity index 99%
rename from test/CodeGen/ARM64/dup.ll
rename to test/CodeGen/AArch64/arm64-dup.ll
index 97774a7d18f..0c56b46c417 100644
--- a/test/CodeGen/ARM64/dup.ll
+++ b/test/CodeGen/AArch64/arm64-dup.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple -asm-verbose=false | FileCheck %s
+; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple -asm-verbose=false | FileCheck %s
 
 define <8 x i8> @v_dup8(i8 %A) nounwind {
 ;CHECK-LABEL: v_dup8:
diff --git a/test/CodeGen/ARM64/early-ifcvt.ll b/test/CodeGen/AArch64/arm64-early-ifcvt.ll
similarity index 100%
rename from test/CodeGen/ARM64/early-ifcvt.ll
rename to test/CodeGen/AArch64/arm64-early-ifcvt.ll
diff --git a/test/CodeGen/ARM64/elf-calls.ll b/test/CodeGen/AArch64/arm64-elf-calls.ll
similarity index 100%
rename from test/CodeGen/ARM64/elf-calls.ll
rename to test/CodeGen/AArch64/arm64-elf-calls.ll
diff --git a/test/CodeGen/ARM64/elf-constpool.ll b/test/CodeGen/AArch64/arm64-elf-constpool.ll
similarity index 100%
rename from test/CodeGen/ARM64/elf-constpool.ll
rename to test/CodeGen/AArch64/arm64-elf-constpool.ll
diff --git a/test/CodeGen/ARM64/elf-globals.ll b/test/CodeGen/AArch64/arm64-elf-globals.ll
similarity index 100%
rename from test/CodeGen/ARM64/elf-globals.ll
rename to test/CodeGen/AArch64/arm64-elf-globals.ll
diff --git a/test/CodeGen/ARM64/ext.ll b/test/CodeGen/AArch64/arm64-ext.ll
similarity index 98%
rename from test/CodeGen/ARM64/ext.ll
rename to test/CodeGen/AArch64/arm64-ext.ll
index d368eef172e..67860de51b0 100644
--- a/test/CodeGen/ARM64/ext.ll
+++ b/test/CodeGen/AArch64/arm64-ext.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s
+; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
 
 define <8 x i8> @test_vextd(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: test_vextd:
diff --git a/test/CodeGen/ARM64/extend-int-to-fp.ll b/test/CodeGen/AArch64/arm64-extend-int-to-fp.ll
similarity index 86%
rename from test/CodeGen/ARM64/extend-int-to-fp.ll
rename to test/CodeGen/AArch64/arm64-extend-int-to-fp.ll
index 599a697a310..048fdb083a4 100644
--- a/test/CodeGen/ARM64/extend-int-to-fp.ll
+++ b/test/CodeGen/AArch64/arm64-extend-int-to-fp.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s
+; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
 
 define <4 x float> @foo(<4 x i16> %a) nounwind {
 ; CHECK-LABEL: foo:
diff --git a/test/CodeGen/ARM64/extend.ll b/test/CodeGen/AArch64/arm64-extend.ll
similarity index 100%
rename from test/CodeGen/ARM64/extend.ll
rename to test/CodeGen/AArch64/arm64-extend.ll
diff --git a/test/CodeGen/ARM64/extern-weak.ll b/test/CodeGen/AArch64/arm64-extern-weak.ll
similarity index 100%
rename from test/CodeGen/ARM64/extern-weak.ll
rename to test/CodeGen/AArch64/arm64-extern-weak.ll
diff --git a/test/CodeGen/ARM64/extload-knownzero.ll b/test/CodeGen/AArch64/arm64-extload-knownzero.ll
similarity index 100%
rename from test/CodeGen/ARM64/extload-knownzero.ll
rename to test/CodeGen/AArch64/arm64-extload-knownzero.ll
diff --git a/test/CodeGen/ARM64/extract.ll b/test/CodeGen/AArch64/arm64-extract.ll
similarity index 95%
rename from test/CodeGen/ARM64/extract.ll
rename to test/CodeGen/AArch64/arm64-extract.ll
index 02e42189bb4..01984662d23 100644
--- a/test/CodeGen/ARM64/extract.ll
+++ b/test/CodeGen/AArch64/arm64-extract.ll
@@ -1,4 +1,4 @@
-; RUN: llc -arm64-extr-generation=true -verify-machineinstrs < %s \
+; RUN: llc -aarch64-extr-generation=true -verify-machineinstrs < %s \
 ; RUN: -march=arm64 | FileCheck %s
 
 define i64 @ror_i64(i64 %in) {
diff --git a/test/CodeGen/ARM64/extract_subvector.ll b/test/CodeGen/AArch64/arm64-extract_subvector.ll
similarity index 95%
rename from test/CodeGen/ARM64/extract_subvector.ll
rename to test/CodeGen/AArch64/arm64-extract_subvector.ll
index 20c05fb2326..8b15a6453b2 100644
--- a/test/CodeGen/ARM64/extract_subvector.ll
+++ b/test/CodeGen/AArch64/arm64-extract_subvector.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=arm64 -arm64-neon-syntax=apple < %s | FileCheck %s
+; RUN: llc -march=arm64 -aarch64-neon-syntax=apple < %s | FileCheck %s
 
 ; Extract of an upper half of a vector is an "ext.16b v0, v0, v0, #8" insn.
 
diff --git a/test/CodeGen/ARM64/fast-isel-addr-offset.ll b/test/CodeGen/AArch64/arm64-fast-isel-addr-offset.ll
similarity index 100%
rename from test/CodeGen/ARM64/fast-isel-addr-offset.ll
rename to test/CodeGen/AArch64/arm64-fast-isel-addr-offset.ll
diff --git a/test/CodeGen/ARM64/fast-isel-alloca.ll b/test/CodeGen/AArch64/arm64-fast-isel-alloca.ll
similarity index 100%
rename from test/CodeGen/ARM64/fast-isel-alloca.ll
rename to test/CodeGen/AArch64/arm64-fast-isel-alloca.ll
diff --git a/test/CodeGen/ARM64/fast-isel-br.ll b/test/CodeGen/AArch64/arm64-fast-isel-br.ll
similarity index 100%
rename from test/CodeGen/ARM64/fast-isel-br.ll
rename to test/CodeGen/AArch64/arm64-fast-isel-br.ll
diff --git a/test/CodeGen/ARM64/fast-isel-call.ll b/test/CodeGen/AArch64/arm64-fast-isel-call.ll
similarity index 100%
rename from test/CodeGen/ARM64/fast-isel-call.ll
rename to test/CodeGen/AArch64/arm64-fast-isel-call.ll
diff --git a/test/CodeGen/ARM64/fast-isel-conversion.ll b/test/CodeGen/AArch64/arm64-fast-isel-conversion.ll
similarity index 100%
rename from test/CodeGen/ARM64/fast-isel-conversion.ll
rename to test/CodeGen/AArch64/arm64-fast-isel-conversion.ll
diff --git a/test/CodeGen/ARM64/fast-isel-fcmp.ll b/test/CodeGen/AArch64/arm64-fast-isel-fcmp.ll
similarity index 100%
rename from test/CodeGen/ARM64/fast-isel-fcmp.ll
rename to test/CodeGen/AArch64/arm64-fast-isel-fcmp.ll
diff --git a/test/CodeGen/ARM64/fast-isel-gv.ll b/test/CodeGen/AArch64/arm64-fast-isel-gv.ll
similarity index 100%
rename from test/CodeGen/ARM64/fast-isel-gv.ll
rename to test/CodeGen/AArch64/arm64-fast-isel-gv.ll
diff --git a/test/CodeGen/ARM64/fast-isel-icmp.ll b/test/CodeGen/AArch64/arm64-fast-isel-icmp.ll
similarity index 100%
rename from test/CodeGen/ARM64/fast-isel-icmp.ll
rename to test/CodeGen/AArch64/arm64-fast-isel-icmp.ll
diff --git a/test/CodeGen/ARM64/fast-isel-indirectbr.ll b/test/CodeGen/AArch64/arm64-fast-isel-indirectbr.ll
similarity index 100%
rename from test/CodeGen/ARM64/fast-isel-indirectbr.ll
rename to test/CodeGen/AArch64/arm64-fast-isel-indirectbr.ll
diff --git a/test/CodeGen/ARM64/fast-isel-intrinsic.ll b/test/CodeGen/AArch64/arm64-fast-isel-intrinsic.ll
similarity index 100%
rename from test/CodeGen/ARM64/fast-isel-intrinsic.ll
rename to test/CodeGen/AArch64/arm64-fast-isel-intrinsic.ll
diff --git a/test/CodeGen/ARM64/fast-isel-materialize.ll b/test/CodeGen/AArch64/arm64-fast-isel-materialize.ll
similarity index 100%
rename from test/CodeGen/ARM64/fast-isel-materialize.ll
rename to test/CodeGen/AArch64/arm64-fast-isel-materialize.ll
diff --git a/test/CodeGen/ARM64/fast-isel-noconvert.ll b/test/CodeGen/AArch64/arm64-fast-isel-noconvert.ll
similarity index 100%
rename from test/CodeGen/ARM64/fast-isel-noconvert.ll
rename to test/CodeGen/AArch64/arm64-fast-isel-noconvert.ll
diff --git a/test/CodeGen/ARM64/fast-isel-rem.ll b/test/CodeGen/AArch64/arm64-fast-isel-rem.ll
similarity index 100%
rename from test/CodeGen/ARM64/fast-isel-rem.ll
rename to test/CodeGen/AArch64/arm64-fast-isel-rem.ll
diff --git a/test/CodeGen/ARM64/fast-isel-ret.ll b/test/CodeGen/AArch64/arm64-fast-isel-ret.ll
similarity index 100%
rename from test/CodeGen/ARM64/fast-isel-ret.ll
rename to test/CodeGen/AArch64/arm64-fast-isel-ret.ll
diff --git a/test/CodeGen/ARM64/fast-isel-select.ll b/test/CodeGen/AArch64/arm64-fast-isel-select.ll
similarity index 100%
rename from test/CodeGen/ARM64/fast-isel-select.ll
rename to test/CodeGen/AArch64/arm64-fast-isel-select.ll
diff --git a/test/CodeGen/ARM64/fast-isel.ll b/test/CodeGen/AArch64/arm64-fast-isel.ll
similarity index 100%
rename from test/CodeGen/ARM64/fast-isel.ll
rename to test/CodeGen/AArch64/arm64-fast-isel.ll
diff --git a/test/CodeGen/ARM64/fastcc-tailcall.ll b/test/CodeGen/AArch64/arm64-fastcc-tailcall.ll
similarity index 100%
rename from test/CodeGen/ARM64/fastcc-tailcall.ll
rename to test/CodeGen/AArch64/arm64-fastcc-tailcall.ll
diff --git a/test/CodeGen/ARM64/fastisel-gep-promote-before-add.ll b/test/CodeGen/AArch64/arm64-fastisel-gep-promote-before-add.ll
similarity index 100%
rename from test/CodeGen/ARM64/fastisel-gep-promote-before-add.ll
rename to test/CodeGen/AArch64/arm64-fastisel-gep-promote-before-add.ll
diff --git a/test/CodeGen/ARM64/fcmp-opt.ll b/test/CodeGen/AArch64/arm64-fcmp-opt.ll
similarity index 98%
rename from test/CodeGen/ARM64/fcmp-opt.ll
rename to test/CodeGen/AArch64/arm64-fcmp-opt.ll
index e79eb9c6fb1..41027d4b5c7 100644
--- a/test/CodeGen/ARM64/fcmp-opt.ll
+++ b/test/CodeGen/AArch64/arm64-fcmp-opt.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm64 -mcpu=cyclone -arm64-neon-syntax=apple | FileCheck %s
+; RUN: llc < %s -march=arm64 -mcpu=cyclone -aarch64-neon-syntax=apple | FileCheck %s
 ; rdar://10263824
 
 define i1 @fcmp_float1(float %a) nounwind ssp {
diff --git a/test/CodeGen/ARM64/fcopysign.ll b/test/CodeGen/AArch64/arm64-fcopysign.ll
similarity index 100%
rename from test/CodeGen/ARM64/fcopysign.ll
rename to test/CodeGen/AArch64/arm64-fcopysign.ll
diff --git a/test/CodeGen/ARM64/fixed-point-scalar-cvt-dagcombine.ll b/test/CodeGen/AArch64/arm64-fixed-point-scalar-cvt-dagcombine.ll
similarity index 62%
rename from test/CodeGen/ARM64/fixed-point-scalar-cvt-dagcombine.ll
rename to test/CodeGen/AArch64/arm64-fixed-point-scalar-cvt-dagcombine.ll
index 77981f292bd..e51c38b2b95 100644
--- a/test/CodeGen/ARM64/fixed-point-scalar-cvt-dagcombine.ll
+++ b/test/CodeGen/AArch64/arm64-fixed-point-scalar-cvt-dagcombine.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s
+; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
 
 ; DAGCombine to transform a conversion of an extract_vector_elt to an
 ; extract_vector_elt of a conversion, which saves a round trip of copies
@@ -8,8 +8,8 @@ define double @foo0(<2 x i64> %a) nounwind {
 ; CHECK:  scvtf.2d  [[REG:v[0-9]+]], v0, #9
 ; CHECK-NEXT:  ins.d v0[0], [[REG]][1]
   %vecext = extractelement <2 x i64> %a, i32 1
-  %fcvt_n = tail call double @llvm.arm64.neon.vcvtfxs2fp.f64.i64(i64 %vecext, i32 9)
+  %fcvt_n = tail call double @llvm.aarch64.neon.vcvtfxs2fp.f64.i64(i64 %vecext, i32 9)
   ret double %fcvt_n
 }
 
-declare double @llvm.arm64.neon.vcvtfxs2fp.f64.i64(i64, i32) nounwind readnone
+declare double @llvm.aarch64.neon.vcvtfxs2fp.f64.i64(i64, i32) nounwind readnone
diff --git a/test/CodeGen/ARM64/fmadd.ll b/test/CodeGen/AArch64/arm64-fmadd.ll
similarity index 100%
rename from test/CodeGen/ARM64/fmadd.ll
rename to test/CodeGen/AArch64/arm64-fmadd.ll
diff --git a/test/CodeGen/ARM64/fmax.ll b/test/CodeGen/AArch64/arm64-fmax.ll
similarity index 100%
rename from test/CodeGen/ARM64/fmax.ll
rename to test/CodeGen/AArch64/arm64-fmax.ll
diff --git a/test/CodeGen/AArch64/arm64-fminv.ll b/test/CodeGen/AArch64/arm64-fminv.ll
new file mode 100644
index 00000000000..f4c97355dd1
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-fminv.ll
@@ -0,0 +1,101 @@
+; RUN: llc -mtriple=arm64-linux-gnu -o - %s | FileCheck %s
+
+define float @test_fminv_v2f32(<2 x float> %in) {
+; CHECK: test_fminv_v2f32:
+; CHECK: fminp s0, v0.2s
+  %min = call float @llvm.aarch64.neon.fminv.f32.v2f32(<2 x float> %in)
+  ret float %min
+}
+
+define float @test_fminv_v4f32(<4 x float> %in) {
+; CHECK: test_fminv_v4f32:
+; CHECK: fminv s0, v0.4s
+  %min = call float @llvm.aarch64.neon.fminv.f32.v4f32(<4 x float> %in)
+  ret float %min
+}
+
+define double @test_fminv_v2f64(<2 x double> %in) {
+; CHECK: test_fminv_v2f64:
+; CHECK: fminp d0, v0.2d
+  %min = call double @llvm.aarch64.neon.fminv.f64.v2f64(<2 x double> %in)
+  ret double %min
+}
+
+declare float @llvm.aarch64.neon.fminv.f32.v2f32(<2 x float>)
+declare float @llvm.aarch64.neon.fminv.f32.v4f32(<4 x float>)
+declare double @llvm.aarch64.neon.fminv.f64.v2f64(<2 x double>)
+
+define float @test_fmaxv_v2f32(<2 x float> %in) {
+; CHECK: test_fmaxv_v2f32:
+; CHECK: fmaxp s0, v0.2s
+  %max = call float @llvm.aarch64.neon.fmaxv.f32.v2f32(<2 x float> %in)
+  ret float %max
+}
+
+define float @test_fmaxv_v4f32(<4 x float> %in) {
+; CHECK: test_fmaxv_v4f32:
+; CHECK: fmaxv s0, v0.4s
+  %max = call float @llvm.aarch64.neon.fmaxv.f32.v4f32(<4 x float> %in)
+  ret float %max
+}
+
+define double @test_fmaxv_v2f64(<2 x double> %in) {
+; CHECK: test_fmaxv_v2f64:
+; CHECK: fmaxp d0, v0.2d
+  %max = call double @llvm.aarch64.neon.fmaxv.f64.v2f64(<2 x double> %in)
+  ret double %max
+}
+
+declare float @llvm.aarch64.neon.fmaxv.f32.v2f32(<2 x float>)
+declare float @llvm.aarch64.neon.fmaxv.f32.v4f32(<4 x float>)
+declare double @llvm.aarch64.neon.fmaxv.f64.v2f64(<2 x double>)
+
+define float @test_fminnmv_v2f32(<2 x float> %in) {
+; CHECK: test_fminnmv_v2f32:
+; CHECK: fminnmp s0, v0.2s
+  %minnm = call float @llvm.aarch64.neon.fminnmv.f32.v2f32(<2 x float> %in)
+  ret float %minnm
+}
+
+define float @test_fminnmv_v4f32(<4 x float> %in) {
+; CHECK: test_fminnmv_v4f32:
+; CHECK: fminnmv s0, v0.4s
+  %minnm = call float @llvm.aarch64.neon.fminnmv.f32.v4f32(<4 x float> %in)
+  ret float %minnm
+}
+
+define double @test_fminnmv_v2f64(<2 x double> %in) {
+; CHECK: test_fminnmv_v2f64:
+; CHECK: fminnmp d0, v0.2d
+  %minnm = call double @llvm.aarch64.neon.fminnmv.f64.v2f64(<2 x double> %in)
+  ret double %minnm
+}
+
+declare float @llvm.aarch64.neon.fminnmv.f32.v2f32(<2 x float>)
+declare float @llvm.aarch64.neon.fminnmv.f32.v4f32(<4 x float>)
+declare double @llvm.aarch64.neon.fminnmv.f64.v2f64(<2 x double>)
+
+define float @test_fmaxnmv_v2f32(<2 x float> %in) {
+; CHECK: test_fmaxnmv_v2f32:
+; CHECK: fmaxnmp s0, v0.2s
+  %maxnm = call float @llvm.aarch64.neon.fmaxnmv.f32.v2f32(<2 x float> %in)
+  ret float %maxnm
+}
+
+define float @test_fmaxnmv_v4f32(<4 x float> %in) {
+; CHECK: test_fmaxnmv_v4f32:
+; CHECK: fmaxnmv s0, v0.4s
+  %maxnm = call float @llvm.aarch64.neon.fmaxnmv.f32.v4f32(<4 x float> %in)
+  ret float %maxnm
+}
+
+define double @test_fmaxnmv_v2f64(<2 x double> %in) {
+; CHECK: test_fmaxnmv_v2f64:
+; CHECK: fmaxnmp d0, v0.2d
+  %maxnm = call double @llvm.aarch64.neon.fmaxnmv.f64.v2f64(<2 x double> %in)
+  ret double %maxnm
+}
+
+declare float @llvm.aarch64.neon.fmaxnmv.f32.v2f32(<2 x float>)
+declare float @llvm.aarch64.neon.fmaxnmv.f32.v4f32(<4 x float>)
+declare double @llvm.aarch64.neon.fmaxnmv.f64.v2f64(<2 x double>)
diff --git a/test/CodeGen/ARM64/fmuladd.ll b/test/CodeGen/AArch64/arm64-fmuladd.ll
similarity index 97%
rename from test/CodeGen/ARM64/fmuladd.ll
rename to test/CodeGen/AArch64/arm64-fmuladd.ll
index 174d8307671..6c5eecabd75 100644
--- a/test/CodeGen/ARM64/fmuladd.ll
+++ b/test/CodeGen/AArch64/arm64-fmuladd.ll
@@ -1,4 +1,4 @@
-; RUN: llc -asm-verbose=false < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s
+; RUN: llc -asm-verbose=false < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
 
 define float @test_f32(float* %A, float* %B, float* %C) nounwind {
 ;CHECK-LABEL: test_f32:
diff --git a/test/CodeGen/ARM64/fold-address.ll b/test/CodeGen/AArch64/arm64-fold-address.ll
similarity index 100%
rename from test/CodeGen/ARM64/fold-address.ll
rename to test/CodeGen/AArch64/arm64-fold-address.ll
diff --git a/test/CodeGen/ARM64/fold-lsl.ll b/test/CodeGen/AArch64/arm64-fold-lsl.ll
similarity index 97%
rename from test/CodeGen/ARM64/fold-lsl.ll
rename to test/CodeGen/AArch64/arm64-fold-lsl.ll
index 2e5762dd262..ec65e467e37 100644
--- a/test/CodeGen/ARM64/fold-lsl.ll
+++ b/test/CodeGen/AArch64/arm64-fold-lsl.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s
+; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
 ;
 ; <rdar://problem/14486451>
 
diff --git a/test/CodeGen/ARM64/fp-contract-zero.ll b/test/CodeGen/AArch64/arm64-fp-contract-zero.ll
similarity index 100%
rename from test/CodeGen/ARM64/fp-contract-zero.ll
rename to test/CodeGen/AArch64/arm64-fp-contract-zero.ll
diff --git a/test/CodeGen/ARM64/fp-imm.ll b/test/CodeGen/AArch64/arm64-fp-imm.ll
similarity index 100%
rename from test/CodeGen/ARM64/fp-imm.ll
rename to test/CodeGen/AArch64/arm64-fp-imm.ll
diff --git a/test/CodeGen/ARM64/fp.ll b/test/CodeGen/AArch64/arm64-fp.ll
similarity index 100%
rename from test/CodeGen/ARM64/fp.ll
rename to test/CodeGen/AArch64/arm64-fp.ll
diff --git a/test/CodeGen/ARM64/fp128-folding.ll b/test/CodeGen/AArch64/arm64-fp128-folding.ll
similarity index 100%
rename from test/CodeGen/ARM64/fp128-folding.ll
rename to test/CodeGen/AArch64/arm64-fp128-folding.ll
diff --git a/test/CodeGen/ARM64/fp128.ll b/test/CodeGen/AArch64/arm64-fp128.ll
similarity index 100%
rename from test/CodeGen/ARM64/fp128.ll
rename to test/CodeGen/AArch64/arm64-fp128.ll
diff --git a/test/CodeGen/ARM64/frame-index.ll b/test/CodeGen/AArch64/arm64-frame-index.ll
similarity index 100%
rename from test/CodeGen/ARM64/frame-index.ll
rename to test/CodeGen/AArch64/arm64-frame-index.ll
diff --git a/test/CodeGen/ARM64/frameaddr.ll b/test/CodeGen/AArch64/arm64-frameaddr.ll
similarity index 100%
rename from test/CodeGen/ARM64/frameaddr.ll
rename to test/CodeGen/AArch64/arm64-frameaddr.ll
diff --git a/test/CodeGen/ARM64/global-address.ll b/test/CodeGen/AArch64/arm64-global-address.ll
similarity index 100%
rename from test/CodeGen/ARM64/global-address.ll
rename to test/CodeGen/AArch64/arm64-global-address.ll
diff --git a/test/CodeGen/ARM64/hello.ll b/test/CodeGen/AArch64/arm64-hello.ll
similarity index 100%
rename from test/CodeGen/ARM64/hello.ll
rename to test/CodeGen/AArch64/arm64-hello.ll
diff --git a/test/CodeGen/ARM64/i16-subreg-extract.ll b/test/CodeGen/AArch64/arm64-i16-subreg-extract.ll
similarity index 80%
rename from test/CodeGen/ARM64/i16-subreg-extract.ll
rename to test/CodeGen/AArch64/arm64-i16-subreg-extract.ll
index fc2e8b58ac8..ba759e32aae 100644
--- a/test/CodeGen/ARM64/i16-subreg-extract.ll
+++ b/test/CodeGen/AArch64/arm64-i16-subreg-extract.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s
+; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
 
 define i32 @foo(<4 x i16>* %__a) nounwind {
 ; CHECK-LABEL: foo:
diff --git a/test/CodeGen/ARM64/icmp-opt.ll b/test/CodeGen/AArch64/arm64-icmp-opt.ll
similarity index 100%
rename from test/CodeGen/ARM64/icmp-opt.ll
rename to test/CodeGen/AArch64/arm64-icmp-opt.ll
diff --git a/test/CodeGen/ARM64/illegal-float-ops.ll b/test/CodeGen/AArch64/arm64-illegal-float-ops.ll
similarity index 100%
rename from test/CodeGen/ARM64/illegal-float-ops.ll
rename to test/CodeGen/AArch64/arm64-illegal-float-ops.ll
diff --git a/test/CodeGen/ARM64/indexed-memory.ll b/test/CodeGen/AArch64/arm64-indexed-memory.ll
similarity index 99%
rename from test/CodeGen/ARM64/indexed-memory.ll
rename to test/CodeGen/AArch64/arm64-indexed-memory.ll
index e390ed7ece5..e501c6e403b 100644
--- a/test/CodeGen/ARM64/indexed-memory.ll
+++ b/test/CodeGen/AArch64/arm64-indexed-memory.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm64 -arm64-redzone | FileCheck %s
+; RUN: llc < %s -march=arm64 -aarch64-redzone | FileCheck %s
 
 define void @store64(i64** nocapture %out, i64 %index, i64 %spacing) nounwind noinline ssp {
 ; CHECK-LABEL: store64:
diff --git a/test/CodeGen/ARM64/indexed-vector-ldst-2.ll b/test/CodeGen/AArch64/arm64-indexed-vector-ldst-2.ll
similarity index 100%
rename from test/CodeGen/ARM64/indexed-vector-ldst-2.ll
rename to test/CodeGen/AArch64/arm64-indexed-vector-ldst-2.ll
diff --git a/test/CodeGen/ARM64/indexed-vector-ldst.ll b/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll
similarity index 75%
rename from test/CodeGen/ARM64/indexed-vector-ldst.ll
rename to test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll
index 3d8ff53137b..9ee4063658b 100644
--- a/test/CodeGen/ARM64/indexed-vector-ldst.ll
+++ b/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll
@@ -615,7 +615,7 @@ define float* @test_v2f32_post_reg_st1_lane(<2 x float> %in, float* %addr) {
 define { <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld2(i8* %A, i8** %ptr) {
 ;CHECK-LABEL: test_v16i8_post_imm_ld2:
 ;CHECK: ld2.16b { v0, v1 }, [x0], #32
-  %ld2 = tail call { <16 x i8>, <16 x i8> } @llvm.arm64.neon.ld2.v16i8.p0i8(i8* %A)
+  %ld2 = tail call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2.v16i8.p0i8(i8* %A)
   %tmp = getelementptr i8* %A, i32 32
   store i8* %tmp, i8** %ptr
   ret { <16 x i8>, <16 x i8> } %ld2
@@ -624,19 +624,19 @@ define { <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld2(i8* %A, i8** %ptr) {
 define { <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld2(i8* %A, i8** %ptr, i64 %inc) {
 ;CHECK-LABEL: test_v16i8_post_reg_ld2:
 ;CHECK: ld2.16b { v0, v1 }, [x0], x{{[0-9]+}}
-  %ld2 = tail call { <16 x i8>, <16 x i8> } @llvm.arm64.neon.ld2.v16i8.p0i8(i8* %A)
+  %ld2 = tail call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2.v16i8.p0i8(i8* %A)
   %tmp = getelementptr i8* %A, i64 %inc
   store i8* %tmp, i8** %ptr
   ret { <16 x i8>, <16 x i8> } %ld2
 }
 
-declare { <16 x i8>, <16 x i8> } @llvm.arm64.neon.ld2.v16i8.p0i8(i8*)
+declare { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2.v16i8.p0i8(i8*)
 
 
 define { <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld2(i8* %A, i8** %ptr) {
 ;CHECK-LABEL: test_v8i8_post_imm_ld2:
 ;CHECK: ld2.8b { v0, v1 }, [x0], #16
-  %ld2 = tail call { <8 x i8>, <8 x i8> } @llvm.arm64.neon.ld2.v8i8.p0i8(i8* %A)
+  %ld2 = tail call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2.v8i8.p0i8(i8* %A)
   %tmp = getelementptr i8* %A, i32 16
   store i8* %tmp, i8** %ptr
   ret { <8 x i8>, <8 x i8> } %ld2
@@ -645,19 +645,19 @@ define { <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld2(i8* %A, i8** %ptr) {
 define { <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld2(i8* %A, i8** %ptr, i64 %inc) {
 ;CHECK-LABEL: test_v8i8_post_reg_ld2:
 ;CHECK: ld2.8b { v0, v1 }, [x0], x{{[0-9]+}}
-  %ld2 = tail call { <8 x i8>, <8 x i8> } @llvm.arm64.neon.ld2.v8i8.p0i8(i8* %A)
+  %ld2 = tail call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2.v8i8.p0i8(i8* %A)
   %tmp = getelementptr i8* %A, i64 %inc
   store i8* %tmp, i8** %ptr
   ret { <8 x i8>, <8 x i8> } %ld2
 }
 
-declare { <8 x i8>, <8 x i8> } @llvm.arm64.neon.ld2.v8i8.p0i8(i8*)
+declare { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2.v8i8.p0i8(i8*)
 
 
 define { <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld2(i16* %A, i16** %ptr) {
 ;CHECK-LABEL: test_v8i16_post_imm_ld2:
 ;CHECK: ld2.8h { v0, v1 }, [x0], #32
-  %ld2 = tail call { <8 x i16>, <8 x i16> } @llvm.arm64.neon.ld2.v8i16.p0i16(i16* %A)
+  %ld2 = tail call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2.v8i16.p0i16(i16* %A)
   %tmp = getelementptr i16* %A, i32 16
   store i16* %tmp, i16** %ptr
   ret { <8 x i16>, <8 x i16> } %ld2
@@ -666,19 +666,19 @@ define { <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld2(i16* %A, i16** %ptr) {
 define { <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld2(i16* %A, i16** %ptr, i64 %inc) {
 ;CHECK-LABEL: test_v8i16_post_reg_ld2:
 ;CHECK: ld2.8h { v0, v1 }, [x0], x{{[0-9]+}}
-  %ld2 = tail call { <8 x i16>, <8 x i16> } @llvm.arm64.neon.ld2.v8i16.p0i16(i16* %A)
+  %ld2 = tail call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2.v8i16.p0i16(i16* %A)
   %tmp = getelementptr i16* %A, i64 %inc
   store i16* %tmp, i16** %ptr
   ret { <8 x i16>, <8 x i16> } %ld2
 }
 
-declare { <8 x i16>, <8 x i16> } @llvm.arm64.neon.ld2.v8i16.p0i16(i16*)
+declare { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2.v8i16.p0i16(i16*)
 
 
 define { <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld2(i16* %A, i16** %ptr) {
 ;CHECK-LABEL: test_v4i16_post_imm_ld2:
 ;CHECK: ld2.4h { v0, v1 }, [x0], #16
-  %ld2 = tail call { <4 x i16>, <4 x i16> } @llvm.arm64.neon.ld2.v4i16.p0i16(i16* %A)
+  %ld2 = tail call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2.v4i16.p0i16(i16* %A)
   %tmp = getelementptr i16* %A, i32 8
   store i16* %tmp, i16** %ptr
   ret { <4 x i16>, <4 x i16> } %ld2
@@ -687,19 +687,19 @@ define { <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld2(i16* %A, i16** %ptr) {
 define { <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld2(i16* %A, i16** %ptr, i64 %inc) {
 ;CHECK-LABEL: test_v4i16_post_reg_ld2:
 ;CHECK: ld2.4h { v0, v1 }, [x0], x{{[0-9]+}}
-  %ld2 = tail call { <4 x i16>, <4 x i16> } @llvm.arm64.neon.ld2.v4i16.p0i16(i16* %A)
+  %ld2 = tail call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2.v4i16.p0i16(i16* %A)
   %tmp = getelementptr i16* %A, i64 %inc
   store i16* %tmp, i16** %ptr
   ret { <4 x i16>, <4 x i16> } %ld2
 }
 
-declare { <4 x i16>, <4 x i16> } @llvm.arm64.neon.ld2.v4i16.p0i16(i16*)
+declare { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2.v4i16.p0i16(i16*)
 
 
 define { <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld2(i32* %A, i32** %ptr) {
 ;CHECK-LABEL: test_v4i32_post_imm_ld2:
 ;CHECK: ld2.4s { v0, v1 }, [x0], #32
-  %ld2 = tail call { <4 x i32>, <4 x i32> } @llvm.arm64.neon.ld2.v4i32.p0i32(i32* %A)
+  %ld2 = tail call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2.v4i32.p0i32(i32* %A)
   %tmp = getelementptr i32* %A, i32 8
   store i32* %tmp, i32** %ptr
   ret { <4 x i32>, <4 x i32> } %ld2
@@ -708,19 +708,19 @@ define { <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld2(i32* %A, i32** %ptr) {
 define { <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld2(i32* %A, i32** %ptr, i64 %inc) {
 ;CHECK-LABEL: test_v4i32_post_reg_ld2:
 ;CHECK: ld2.4s { v0, v1 }, [x0], x{{[0-9]+}}
-  %ld2 = tail call { <4 x i32>, <4 x i32> } @llvm.arm64.neon.ld2.v4i32.p0i32(i32* %A)
+  %ld2 = tail call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2.v4i32.p0i32(i32* %A)
   %tmp = getelementptr i32* %A, i64 %inc
   store i32* %tmp, i32** %ptr
   ret { <4 x i32>, <4 x i32> } %ld2
 }
 
-declare { <4 x i32>, <4 x i32> } @llvm.arm64.neon.ld2.v4i32.p0i32(i32*)
+declare { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2.v4i32.p0i32(i32*)
 
 
 define { <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld2(i32* %A, i32** %ptr) {
 ;CHECK-LABEL: test_v2i32_post_imm_ld2:
 ;CHECK: ld2.2s { v0, v1 }, [x0], #16
-  %ld2 = tail call { <2 x i32>, <2 x i32> } @llvm.arm64.neon.ld2.v2i32.p0i32(i32* %A)
+  %ld2 = tail call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2.v2i32.p0i32(i32* %A)
   %tmp = getelementptr i32* %A, i32 4
   store i32* %tmp, i32** %ptr
   ret { <2 x i32>, <2 x i32> } %ld2
@@ -729,19 +729,19 @@ define { <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld2(i32* %A, i32** %ptr) {
 define { <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld2(i32* %A, i32** %ptr, i64 %inc) {
 ;CHECK-LABEL: test_v2i32_post_reg_ld2:
 ;CHECK: ld2.2s { v0, v1 }, [x0], x{{[0-9]+}}
-  %ld2 = tail call { <2 x i32>, <2 x i32> } @llvm.arm64.neon.ld2.v2i32.p0i32(i32* %A)
+  %ld2 = tail call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2.v2i32.p0i32(i32* %A)
   %tmp = getelementptr i32* %A, i64 %inc
   store i32* %tmp, i32** %ptr
   ret { <2 x i32>, <2 x i32> } %ld2
 }
 
-declare { <2 x i32>, <2 x i32> } @llvm.arm64.neon.ld2.v2i32.p0i32(i32*)
+declare { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2.v2i32.p0i32(i32*)
 
 
 define { <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld2(i64* %A, i64** %ptr) {
 ;CHECK-LABEL: test_v2i64_post_imm_ld2:
 ;CHECK: ld2.2d { v0, v1 }, [x0], #32
-  %ld2 = tail call { <2 x i64>, <2 x i64> } @llvm.arm64.neon.ld2.v2i64.p0i64(i64* %A)
+  %ld2 = tail call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2.v2i64.p0i64(i64* %A)
   %tmp = getelementptr i64* %A, i32 4
   store i64* %tmp, i64** %ptr
   ret { <2 x i64>, <2 x i64> } %ld2
@@ -750,19 +750,19 @@ define { <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld2(i64* %A, i64** %ptr) {
 define { <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld2(i64* %A, i64** %ptr, i64 %inc) {
 ;CHECK-LABEL: test_v2i64_post_reg_ld2:
 ;CHECK: ld2.2d { v0, v1 }, [x0], x{{[0-9]+}}
-  %ld2 = tail call { <2 x i64>, <2 x i64> } @llvm.arm64.neon.ld2.v2i64.p0i64(i64* %A)
+  %ld2 = tail call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2.v2i64.p0i64(i64* %A)
   %tmp = getelementptr i64* %A, i64 %inc
   store i64* %tmp, i64** %ptr
   ret { <2 x i64>, <2 x i64> } %ld2
 }
 
-declare { <2 x i64>, <2 x i64> } @llvm.arm64.neon.ld2.v2i64.p0i64(i64*)
+declare { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2.v2i64.p0i64(i64*)
 
 
 define { <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld2(i64* %A, i64** %ptr) {
 ;CHECK-LABEL: test_v1i64_post_imm_ld2:
 ;CHECK: ld1.1d { v0, v1 }, [x0], #16
-  %ld2 = tail call { <1 x i64>, <1 x i64> } @llvm.arm64.neon.ld2.v1i64.p0i64(i64* %A)
+  %ld2 = tail call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2.v1i64.p0i64(i64* %A)
   %tmp = getelementptr i64* %A, i32 2
   store i64* %tmp, i64** %ptr
   ret { <1 x i64>, <1 x i64> } %ld2
@@ -771,19 +771,19 @@ define { <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld2(i64* %A, i64** %ptr) {
 define { <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld2(i64* %A, i64** %ptr, i64 %inc) {
 ;CHECK-LABEL: test_v1i64_post_reg_ld2:
 ;CHECK: ld1.1d { v0, v1 }, [x0], x{{[0-9]+}}
-  %ld2 = tail call { <1 x i64>, <1 x i64> } @llvm.arm64.neon.ld2.v1i64.p0i64(i64* %A)
+  %ld2 = tail call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2.v1i64.p0i64(i64* %A)
   %tmp = getelementptr i64* %A, i64 %inc
   store i64* %tmp, i64** %ptr
   ret { <1 x i64>, <1 x i64> } %ld2
 }
 
-declare { <1 x i64>, <1 x i64> } @llvm.arm64.neon.ld2.v1i64.p0i64(i64*)
+declare { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2.v1i64.p0i64(i64*)
 
 
 define { <4 x float>, <4 x float> } @test_v4f32_post_imm_ld2(float* %A, float** %ptr) {
 ;CHECK-LABEL: test_v4f32_post_imm_ld2:
 ;CHECK: ld2.4s { v0, v1 }, [x0], #32
-  %ld2 = tail call { <4 x float>, <4 x float> } @llvm.arm64.neon.ld2.v4f32.p0f32(float* %A)
+  %ld2 = tail call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2.v4f32.p0f32(float* %A)
   %tmp = getelementptr float* %A, i32 8
   store float* %tmp, float** %ptr
   ret { <4 x float>, <4 x float> } %ld2
@@ -792,19 +792,19 @@ define { <4 x float>, <4 x float> } @test_v4f32_post_imm_ld2(float* %A, float**
 define { <4 x float>, <4 x float> } @test_v4f32_post_reg_ld2(float* %A, float** %ptr, i64 %inc) {
 ;CHECK-LABEL: test_v4f32_post_reg_ld2:
 ;CHECK: ld2.4s { v0, v1 }, [x0], x{{[0-9]+}}
-  %ld2 = tail call { <4 x float>, <4 x float> } @llvm.arm64.neon.ld2.v4f32.p0f32(float* %A)
+  %ld2 = tail call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2.v4f32.p0f32(float* %A)
   %tmp = getelementptr float* %A, i64 %inc
   store float* %tmp, float** %ptr
   ret { <4 x float>, <4 x float> } %ld2
 }
 
-declare { <4 x float>, <4 x float> } @llvm.arm64.neon.ld2.v4f32.p0f32(float*)
+declare { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2.v4f32.p0f32(float*)
 
 
 define { <2 x float>, <2 x float> } @test_v2f32_post_imm_ld2(float* %A, float** %ptr) {
 ;CHECK-LABEL: test_v2f32_post_imm_ld2:
 ;CHECK: ld2.2s { v0, v1 }, [x0], #16
-  %ld2 = tail call { <2 x float>, <2 x float> } @llvm.arm64.neon.ld2.v2f32.p0f32(float* %A)
+  %ld2 = tail call { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2.v2f32.p0f32(float* %A)
   %tmp = getelementptr float* %A, i32 4
   store float* %tmp, float** %ptr
   ret { <2 x float>, <2 x float> } %ld2
@@ -813,19 +813,19 @@ define { <2 x float>, <2 x float> } @test_v2f32_post_imm_ld2(float* %A, float**
 define { <2 x float>, <2 x float> } @test_v2f32_post_reg_ld2(float* %A, float** %ptr, i64 %inc) {
 ;CHECK-LABEL: test_v2f32_post_reg_ld2:
 ;CHECK: ld2.2s { v0, v1 }, [x0], x{{[0-9]+}}
-  %ld2 = tail call { <2 x float>, <2 x float> } @llvm.arm64.neon.ld2.v2f32.p0f32(float* %A)
+  %ld2 = tail call { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2.v2f32.p0f32(float* %A)
   %tmp = getelementptr float* %A, i64 %inc
   store float* %tmp, float** %ptr
   ret { <2 x float>, <2 x float> } %ld2
 }
 
-declare { <2 x float>, <2 x float> } @llvm.arm64.neon.ld2.v2f32.p0f32(float*)
+declare { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2.v2f32.p0f32(float*)
 
 
 define { <2 x double>, <2 x double> } @test_v2f64_post_imm_ld2(double* %A, double** %ptr) {
 ;CHECK-LABEL: test_v2f64_post_imm_ld2:
 ;CHECK: ld2.2d { v0, v1 }, [x0], #32
-  %ld2 = tail call { <2 x double>, <2 x double> } @llvm.arm64.neon.ld2.v2f64.p0f64(double* %A)
+  %ld2 = tail call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2.v2f64.p0f64(double* %A)
   %tmp = getelementptr double* %A, i32 4
   store double* %tmp, double** %ptr
   ret { <2 x double>, <2 x double> } %ld2
@@ -834,19 +834,19 @@ define { <2 x double>, <2 x double> } @test_v2f64_post_imm_ld2(double* %A, doubl
 define { <2 x double>, <2 x double> } @test_v2f64_post_reg_ld2(double* %A, double** %ptr, i64 %inc) {
 ;CHECK-LABEL: test_v2f64_post_reg_ld2:
 ;CHECK: ld2.2d { v0, v1 }, [x0], x{{[0-9]+}}
-  %ld2 = tail call { <2 x double>, <2 x double> } @llvm.arm64.neon.ld2.v2f64.p0f64(double* %A)
+  %ld2 = tail call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2.v2f64.p0f64(double* %A)
   %tmp = getelementptr double* %A, i64 %inc
   store double* %tmp, double** %ptr
   ret { <2 x double>, <2 x double> } %ld2
 }
 
-declare { <2 x double>, <2 x double> } @llvm.arm64.neon.ld2.v2f64.p0f64(double*)
+declare { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2.v2f64.p0f64(double*)
 
 
 define { <1 x double>, <1 x double> } @test_v1f64_post_imm_ld2(double* %A, double** %ptr) {
 ;CHECK-LABEL: test_v1f64_post_imm_ld2:
 ;CHECK: ld1.1d { v0, v1 }, [x0], #16
-  %ld2 = tail call { <1 x double>, <1 x double> } @llvm.arm64.neon.ld2.v1f64.p0f64(double* %A)
+  %ld2 = tail call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2.v1f64.p0f64(double* %A)
   %tmp = getelementptr double* %A, i32 2
   store double* %tmp, double** %ptr
   ret { <1 x double>, <1 x double> } %ld2
@@ -855,19 +855,19 @@ define { <1 x double>, <1 x double> } @test_v1f64_post_imm_ld2(double* %A, doubl
 define { <1 x double>, <1 x double> } @test_v1f64_post_reg_ld2(double* %A, double** %ptr, i64 %inc) {
 ;CHECK-LABEL: test_v1f64_post_reg_ld2:
 ;CHECK: ld1.1d { v0, v1 }, [x0], x{{[0-9]+}}
-  %ld2 = tail call { <1 x double>, <1 x double> } @llvm.arm64.neon.ld2.v1f64.p0f64(double* %A)
+  %ld2 = tail call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2.v1f64.p0f64(double* %A)
   %tmp = getelementptr double* %A, i64 %inc
   store double* %tmp, double** %ptr
   ret { <1 x double>, <1 x double> } %ld2
 }
 
-declare { <1 x double>, <1 x double> } @llvm.arm64.neon.ld2.v1f64.p0f64(double*)
+declare { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2.v1f64.p0f64(double*)
 
 
 define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld3(i8* %A, i8** %ptr) {
 ;CHECK-LABEL: test_v16i8_post_imm_ld3:
 ;CHECK: ld3.16b { v0, v1, v2 }, [x0], #48
-  %ld3 = tail call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm64.neon.ld3.v16i8.p0i8(i8* %A)
+  %ld3 = tail call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3.v16i8.p0i8(i8* %A)
   %tmp = getelementptr i8* %A, i32 48
   store i8* %tmp, i8** %ptr
   ret { <16 x i8>, <16 x i8>, <16 x i8> } %ld3
@@ -876,19 +876,19 @@ define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld3(i8* %A, i8**
 define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld3(i8* %A, i8** %ptr, i64 %inc) {
 ;CHECK-LABEL: test_v16i8_post_reg_ld3:
 ;CHECK: ld3.16b { v0, v1, v2 }, [x0], x{{[0-9]+}}
-  %ld3 = tail call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm64.neon.ld3.v16i8.p0i8(i8* %A)
+  %ld3 = tail call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3.v16i8.p0i8(i8* %A)
   %tmp = getelementptr i8* %A, i64 %inc
   store i8* %tmp, i8** %ptr
   ret { <16 x i8>, <16 x i8>, <16 x i8> } %ld3
 }
 
-declare { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm64.neon.ld3.v16i8.p0i8(i8*)
+declare { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3.v16i8.p0i8(i8*)
 
 
 define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld3(i8* %A, i8** %ptr) {
 ;CHECK-LABEL: test_v8i8_post_imm_ld3:
 ;CHECK: ld3.8b { v0, v1, v2 }, [x0], #24
-  %ld3 = tail call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm64.neon.ld3.v8i8.p0i8(i8* %A)
+  %ld3 = tail call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3.v8i8.p0i8(i8* %A)
   %tmp = getelementptr i8* %A, i32 24
   store i8* %tmp, i8** %ptr
   ret { <8 x i8>, <8 x i8>, <8 x i8> } %ld3
@@ -897,19 +897,19 @@ define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld3(i8* %A, i8** %pt
 define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld3(i8* %A, i8** %ptr, i64 %inc) {
 ;CHECK-LABEL: test_v8i8_post_reg_ld3:
 ;CHECK: ld3.8b { v0, v1, v2 }, [x0], x{{[0-9]+}}
-  %ld3 = tail call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm64.neon.ld3.v8i8.p0i8(i8* %A)
+  %ld3 = tail call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3.v8i8.p0i8(i8* %A)
   %tmp = getelementptr i8* %A, i64 %inc
   store i8* %tmp, i8** %ptr
   ret { <8 x i8>, <8 x i8>, <8 x i8> } %ld3
 }
 
-declare { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm64.neon.ld3.v8i8.p0i8(i8*)
+declare { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3.v8i8.p0i8(i8*)
 
 
 define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld3(i16* %A, i16** %ptr) {
 ;CHECK-LABEL: test_v8i16_post_imm_ld3:
 ;CHECK: ld3.8h { v0, v1, v2 }, [x0], #48
-  %ld3 = tail call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm64.neon.ld3.v8i16.p0i16(i16* %A)
+  %ld3 = tail call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3.v8i16.p0i16(i16* %A)
   %tmp = getelementptr i16* %A, i32 24
   store i16* %tmp, i16** %ptr
   ret { <8 x i16>, <8 x i16>, <8 x i16> } %ld3
@@ -918,19 +918,19 @@ define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld3(i16* %A, i16
 define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld3(i16* %A, i16** %ptr, i64 %inc) {
 ;CHECK-LABEL: test_v8i16_post_reg_ld3:
 ;CHECK: ld3.8h { v0, v1, v2 }, [x0], x{{[0-9]+}}
-  %ld3 = tail call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm64.neon.ld3.v8i16.p0i16(i16* %A)
+  %ld3 = tail call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3.v8i16.p0i16(i16* %A)
   %tmp = getelementptr i16* %A, i64 %inc
   store i16* %tmp, i16** %ptr
   ret { <8 x i16>, <8 x i16>, <8 x i16> } %ld3
 }
 
-declare { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm64.neon.ld3.v8i16.p0i16(i16*)
+declare { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3.v8i16.p0i16(i16*)
 
 
 define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld3(i16* %A, i16** %ptr) {
 ;CHECK-LABEL: test_v4i16_post_imm_ld3:
 ;CHECK: ld3.4h { v0, v1, v2 }, [x0], #24
-  %ld3 = tail call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm64.neon.ld3.v4i16.p0i16(i16* %A)
+  %ld3 = tail call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3.v4i16.p0i16(i16* %A)
   %tmp = getelementptr i16* %A, i32 12
   store i16* %tmp, i16** %ptr
   ret { <4 x i16>, <4 x i16>, <4 x i16> } %ld3
@@ -939,19 +939,19 @@ define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld3(i16* %A, i16
 define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld3(i16* %A, i16** %ptr, i64 %inc) {
 ;CHECK-LABEL: test_v4i16_post_reg_ld3:
 ;CHECK: ld3.4h { v0, v1, v2 }, [x0], x{{[0-9]+}}
-  %ld3 = tail call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm64.neon.ld3.v4i16.p0i16(i16* %A)
+  %ld3 = tail call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3.v4i16.p0i16(i16* %A)
   %tmp = getelementptr i16* %A, i64 %inc
   store i16* %tmp, i16** %ptr
   ret { <4 x i16>, <4 x i16>, <4 x i16> } %ld3
 }
 
-declare { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm64.neon.ld3.v4i16.p0i16(i16*)
+declare { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3.v4i16.p0i16(i16*)
 
 
 define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld3(i32* %A, i32** %ptr) {
 ;CHECK-LABEL: test_v4i32_post_imm_ld3:
 ;CHECK: ld3.4s { v0, v1, v2 }, [x0], #48
-  %ld3 = tail call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm64.neon.ld3.v4i32.p0i32(i32* %A)
+  %ld3 = tail call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3.v4i32.p0i32(i32* %A)
   %tmp = getelementptr i32* %A, i32 12
   store i32* %tmp, i32** %ptr
   ret { <4 x i32>, <4 x i32>, <4 x i32> } %ld3
@@ -960,19 +960,19 @@ define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld3(i32* %A, i32
 define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld3(i32* %A, i32** %ptr, i64 %inc) {
 ;CHECK-LABEL: test_v4i32_post_reg_ld3:
 ;CHECK: ld3.4s { v0, v1, v2 }, [x0], x{{[0-9]+}}
-  %ld3 = tail call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm64.neon.ld3.v4i32.p0i32(i32* %A)
+  %ld3 = tail call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3.v4i32.p0i32(i32* %A)
   %tmp = getelementptr i32* %A, i64 %inc
   store i32* %tmp, i32** %ptr
   ret { <4 x i32>, <4 x i32>, <4 x i32> } %ld3
 }
 
-declare { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm64.neon.ld3.v4i32.p0i32(i32*)
+declare { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3.v4i32.p0i32(i32*)
 
 
 define { <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld3(i32* %A, i32** %ptr) {
 ;CHECK-LABEL: test_v2i32_post_imm_ld3:
 ;CHECK: ld3.2s { v0, v1, v2 }, [x0], #24
-  %ld3 = tail call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm64.neon.ld3.v2i32.p0i32(i32* %A)
+  %ld3 = tail call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3.v2i32.p0i32(i32* %A)
   %tmp = getelementptr i32* %A, i32 6
   store i32* %tmp, i32** %ptr
   ret { <2 x i32>, <2 x i32>, <2 x i32> } %ld3
@@ -981,19 +981,19 @@ define { <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld3(i32* %A, i32
 define { <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld3(i32* %A, i32** %ptr, i64 %inc) {
 ;CHECK-LABEL: test_v2i32_post_reg_ld3:
 ;CHECK: ld3.2s { v0, v1, v2 }, [x0], x{{[0-9]+}}
-  %ld3 = tail call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm64.neon.ld3.v2i32.p0i32(i32* %A)
+  %ld3 = tail call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3.v2i32.p0i32(i32* %A)
   %tmp = getelementptr i32* %A, i64 %inc
   store i32* %tmp, i32** %ptr
   ret { <2 x i32>, <2 x i32>, <2 x i32> } %ld3
 }
 
-declare { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm64.neon.ld3.v2i32.p0i32(i32*)
+declare { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3.v2i32.p0i32(i32*)
 
 
 define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld3(i64* %A, i64** %ptr) {
 ;CHECK-LABEL: test_v2i64_post_imm_ld3:
 ;CHECK: ld3.2d { v0, v1, v2 }, [x0], #48
-  %ld3 = tail call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.arm64.neon.ld3.v2i64.p0i64(i64* %A)
+  %ld3 = tail call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3.v2i64.p0i64(i64* %A)
   %tmp = getelementptr i64* %A, i32 6
   store i64* %tmp, i64** %ptr
   ret { <2 x i64>, <2 x i64>, <2 x i64> } %ld3
@@ -1002,19 +1002,19 @@ define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld3(i64* %A, i64
 define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld3(i64* %A, i64** %ptr, i64 %inc) {
 ;CHECK-LABEL: test_v2i64_post_reg_ld3:
 ;CHECK: ld3.2d { v0, v1, v2 }, [x0], x{{[0-9]+}}
-  %ld3 = tail call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.arm64.neon.ld3.v2i64.p0i64(i64* %A)
+  %ld3 = tail call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3.v2i64.p0i64(i64* %A)
   %tmp = getelementptr i64* %A, i64 %inc
   store i64* %tmp, i64** %ptr
   ret { <2 x i64>, <2 x i64>, <2 x i64> } %ld3
 }
 
-declare { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.arm64.neon.ld3.v2i64.p0i64(i64*)
+declare { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3.v2i64.p0i64(i64*)
 
 
 define { <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld3(i64* %A, i64** %ptr) {
 ;CHECK-LABEL: test_v1i64_post_imm_ld3:
 ;CHECK: ld1.1d { v0, v1, v2 }, [x0], #24
-  %ld3 = tail call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm64.neon.ld3.v1i64.p0i64(i64* %A)
+  %ld3 = tail call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3.v1i64.p0i64(i64* %A)
   %tmp = getelementptr i64* %A, i32 3
   store i64* %tmp, i64** %ptr
   ret { <1 x i64>, <1 x i64>, <1 x i64> } %ld3
@@ -1023,19 +1023,19 @@ define { <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld3(i64* %A, i64
 define { <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld3(i64* %A, i64** %ptr, i64 %inc) {
 ;CHECK-LABEL: test_v1i64_post_reg_ld3:
 ;CHECK: ld1.1d { v0, v1, v2 }, [x0], x{{[0-9]+}}
-  %ld3 = tail call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm64.neon.ld3.v1i64.p0i64(i64* %A)
+  %ld3 = tail call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3.v1i64.p0i64(i64* %A)
   %tmp = getelementptr i64* %A, i64 %inc
   store i64* %tmp, i64** %ptr
   ret { <1 x i64>, <1 x i64>, <1 x i64> } %ld3
 }
 
-declare { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm64.neon.ld3.v1i64.p0i64(i64*)
+declare { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3.v1i64.p0i64(i64*)
 
 
 define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_imm_ld3(float* %A, float** %ptr) {
 ;CHECK-LABEL: test_v4f32_post_imm_ld3:
 ;CHECK: ld3.4s { v0, v1, v2 }, [x0], #48
-  %ld3 = tail call { <4 x float>, <4 x float>, <4 x float> } @llvm.arm64.neon.ld3.v4f32.p0f32(float* %A)
+  %ld3 = tail call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3.v4f32.p0f32(float* %A)
   %tmp = getelementptr float* %A, i32 12
   store float* %tmp, float** %ptr
   ret { <4 x float>, <4 x float>, <4 x float> } %ld3
@@ -1044,19 +1044,19 @@ define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_imm_ld3(float*
 define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_reg_ld3(float* %A, float** %ptr, i64 %inc) {
 ;CHECK-LABEL: test_v4f32_post_reg_ld3:
 ;CHECK: ld3.4s { v0, v1, v2 }, [x0], x{{[0-9]+}}
-  %ld3 = tail call { <4 x float>, <4 x float>, <4 x float> } @llvm.arm64.neon.ld3.v4f32.p0f32(float* %A)
+  %ld3 = tail call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3.v4f32.p0f32(float* %A)
   %tmp = getelementptr float* %A, i64 %inc
   store float* %tmp, float** %ptr
   ret { <4 x float>, <4 x float>, <4 x float> } %ld3
 }
 
-declare { <4 x float>, <4 x float>, <4 x float> } @llvm.arm64.neon.ld3.v4f32.p0f32(float*)
+declare { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3.v4f32.p0f32(float*)
 
 
 define { <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_imm_ld3(float* %A, float** %ptr) {
 ;CHECK-LABEL: test_v2f32_post_imm_ld3:
 ;CHECK: ld3.2s { v0, v1, v2 }, [x0], #24
-  %ld3 = tail call { <2 x float>, <2 x float>, <2 x float> } @llvm.arm64.neon.ld3.v2f32.p0f32(float* %A)
+  %ld3 = tail call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3.v2f32.p0f32(float* %A)
   %tmp = getelementptr float* %A, i32 6
   store float* %tmp, float** %ptr
   ret { <2 x float>, <2 x float>, <2 x float> } %ld3
@@ -1065,19 +1065,19 @@ define { <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_imm_ld3(float*
 define { <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_reg_ld3(float* %A, float** %ptr, i64 %inc) {
 ;CHECK-LABEL: test_v2f32_post_reg_ld3:
 ;CHECK: ld3.2s { v0, v1, v2 }, [x0], x{{[0-9]+}}
-  %ld3 = tail call { <2 x float>, <2 x float>, <2 x float> } @llvm.arm64.neon.ld3.v2f32.p0f32(float* %A)
+  %ld3 = tail call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3.v2f32.p0f32(float* %A)
   %tmp = getelementptr float* %A, i64 %inc
   store float* %tmp, float** %ptr
   ret { <2 x float>, <2 x float>, <2 x float> } %ld3
 }
 
-declare { <2 x float>, <2 x float>, <2 x float> } @llvm.arm64.neon.ld3.v2f32.p0f32(float*)
+declare { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3.v2f32.p0f32(float*)
 
 
 define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_imm_ld3(double* %A, double** %ptr) {
 ;CHECK-LABEL: test_v2f64_post_imm_ld3:
 ;CHECK: ld3.2d { v0, v1, v2 }, [x0], #48
-  %ld3 = tail call { <2 x double>, <2 x double>, <2 x double> } @llvm.arm64.neon.ld3.v2f64.p0f64(double* %A)
+  %ld3 = tail call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3.v2f64.p0f64(double* %A)
   %tmp = getelementptr double* %A, i32 6
   store double* %tmp, double** %ptr
   ret { <2 x double>, <2 x double>, <2 x double> } %ld3
@@ -1086,19 +1086,19 @@ define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_imm_ld3(dou
 define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_reg_ld3(double* %A, double** %ptr, i64 %inc) {
 ;CHECK-LABEL: test_v2f64_post_reg_ld3:
 ;CHECK: ld3.2d { v0, v1, v2 }, [x0], x{{[0-9]+}}
-  %ld3 = tail call { <2 x double>, <2 x double>, <2 x double> } @llvm.arm64.neon.ld3.v2f64.p0f64(double* %A)
+  %ld3 = tail call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3.v2f64.p0f64(double* %A)
   %tmp = getelementptr double* %A, i64 %inc
   store double* %tmp, double** %ptr
   ret { <2 x double>, <2 x double>, <2 x double> } %ld3
 }
 
-declare { <2 x double>, <2 x double>, <2 x double> } @llvm.arm64.neon.ld3.v2f64.p0f64(double*)
+declare { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3.v2f64.p0f64(double*)
 
 
 define { <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_imm_ld3(double* %A, double** %ptr) {
 ;CHECK-LABEL: test_v1f64_post_imm_ld3:
 ;CHECK: ld1.1d { v0, v1, v2 }, [x0], #24
-  %ld3 = tail call { <1 x double>, <1 x double>, <1 x double> } @llvm.arm64.neon.ld3.v1f64.p0f64(double* %A)
+  %ld3 = tail call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3.v1f64.p0f64(double* %A)
   %tmp = getelementptr double* %A, i32 3
   store double* %tmp, double** %ptr
   ret { <1 x double>, <1 x double>, <1 x double> } %ld3
@@ -1107,19 +1107,19 @@ define { <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_imm_ld3(dou
 define { <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_reg_ld3(double* %A, double** %ptr, i64 %inc) {
 ;CHECK-LABEL: test_v1f64_post_reg_ld3:
 ;CHECK: ld1.1d { v0, v1, v2 }, [x0], x{{[0-9]+}}
-  %ld3 = tail call { <1 x double>, <1 x double>, <1 x double> } @llvm.arm64.neon.ld3.v1f64.p0f64(double* %A)
+  %ld3 = tail call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3.v1f64.p0f64(double* %A)
   %tmp = getelementptr double* %A, i64 %inc
   store double* %tmp, double** %ptr
   ret { <1 x double>, <1 x double>, <1 x double> } %ld3
 }
 
-declare { <1 x double>, <1 x double>, <1 x double> } @llvm.arm64.neon.ld3.v1f64.p0f64(double*)
+declare { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3.v1f64.p0f64(double*)
 
 
 define { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld4(i8* %A, i8** %ptr) {
 ;CHECK-LABEL: test_v16i8_post_imm_ld4:
 ;CHECK: ld4.16b { v0, v1, v2, v3 }, [x0], #64
-  %ld4 = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm64.neon.ld4.v16i8.p0i8(i8* %A)
+  %ld4 = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4.v16i8.p0i8(i8* %A)
   %tmp = getelementptr i8* %A, i32 64
   store i8* %tmp, i8** %ptr
   ret { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %ld4
@@ -1128,19 +1128,19 @@ define { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld4(i
 define { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld4(i8* %A, i8** %ptr, i64 %inc) {
 ;CHECK-LABEL: test_v16i8_post_reg_ld4:
 ;CHECK: ld4.16b { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
-  %ld4 = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm64.neon.ld4.v16i8.p0i8(i8* %A)
+  %ld4 = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4.v16i8.p0i8(i8* %A)
   %tmp = getelementptr i8* %A, i64 %inc
   store i8* %tmp, i8** %ptr
   ret { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %ld4
 }
 
-declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm64.neon.ld4.v16i8.p0i8(i8*)
+declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4.v16i8.p0i8(i8*)
 
 
 define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld4(i8* %A, i8** %ptr) {
 ;CHECK-LABEL: test_v8i8_post_imm_ld4:
 ;CHECK: ld4.8b { v0, v1, v2, v3 }, [x0], #32
-  %ld4 = tail call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm64.neon.ld4.v8i8.p0i8(i8* %A)
+  %ld4 = tail call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4.v8i8.p0i8(i8* %A)
   %tmp = getelementptr i8* %A, i32 32
   store i8* %tmp, i8** %ptr
   ret { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %ld4
@@ -1149,19 +1149,19 @@ define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld4(i8* %A
 define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld4(i8* %A, i8** %ptr, i64 %inc) {
 ;CHECK-LABEL: test_v8i8_post_reg_ld4:
 ;CHECK: ld4.8b { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
-  %ld4 = tail call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm64.neon.ld4.v8i8.p0i8(i8* %A)
+  %ld4 = tail call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4.v8i8.p0i8(i8* %A)
   %tmp = getelementptr i8* %A, i64 %inc
   store i8* %tmp, i8** %ptr
   ret { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %ld4
 }
 
-declare { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm64.neon.ld4.v8i8.p0i8(i8*)
+declare { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4.v8i8.p0i8(i8*)
 
 
 define { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld4(i16* %A, i16** %ptr) {
 ;CHECK-LABEL: test_v8i16_post_imm_ld4:
 ;CHECK: ld4.8h { v0, v1, v2, v3 }, [x0], #64
-  %ld4 = tail call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm64.neon.ld4.v8i16.p0i16(i16* %A)
+  %ld4 = tail call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4.v8i16.p0i16(i16* %A)
   %tmp = getelementptr i16* %A, i32 32
   store i16* %tmp, i16** %ptr
   ret { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %ld4
@@ -1170,19 +1170,19 @@ define { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld4(i
 define { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld4(i16* %A, i16** %ptr, i64 %inc) {
 ;CHECK-LABEL: test_v8i16_post_reg_ld4:
 ;CHECK: ld4.8h { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
-  %ld4 = tail call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm64.neon.ld4.v8i16.p0i16(i16* %A)
+  %ld4 = tail call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4.v8i16.p0i16(i16* %A)
   %tmp = getelementptr i16* %A, i64 %inc
   store i16* %tmp, i16** %ptr
   ret { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %ld4
 }
 
-declare { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm64.neon.ld4.v8i16.p0i16(i16*)
+declare { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4.v8i16.p0i16(i16*)
 
 
 define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld4(i16* %A, i16** %ptr) {
 ;CHECK-LABEL: test_v4i16_post_imm_ld4:
 ;CHECK: ld4.4h { v0, v1, v2, v3 }, [x0], #32
-  %ld4 = tail call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm64.neon.ld4.v4i16.p0i16(i16* %A)
+  %ld4 = tail call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4.v4i16.p0i16(i16* %A)
   %tmp = getelementptr i16* %A, i32 16
   store i16* %tmp, i16** %ptr
   ret { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %ld4
@@ -1191,19 +1191,19 @@ define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld4(i
 define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld4(i16* %A, i16** %ptr, i64 %inc) {
 ;CHECK-LABEL: test_v4i16_post_reg_ld4:
 ;CHECK: ld4.4h { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
-  %ld4 = tail call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm64.neon.ld4.v4i16.p0i16(i16* %A)
+  %ld4 = tail call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4.v4i16.p0i16(i16* %A)
   %tmp = getelementptr i16* %A, i64 %inc
   store i16* %tmp, i16** %ptr
   ret { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %ld4
 }
 
-declare { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm64.neon.ld4.v4i16.p0i16(i16*)
+declare { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4.v4i16.p0i16(i16*)
 
 
 define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld4(i32* %A, i32** %ptr) {
 ;CHECK-LABEL: test_v4i32_post_imm_ld4:
 ;CHECK: ld4.4s { v0, v1, v2, v3 }, [x0], #64
-  %ld4 = tail call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm64.neon.ld4.v4i32.p0i32(i32* %A)
+  %ld4 = tail call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4.v4i32.p0i32(i32* %A)
   %tmp = getelementptr i32* %A, i32 16
   store i32* %tmp, i32** %ptr
   ret { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %ld4
@@ -1212,19 +1212,19 @@ define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld4(i
 define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld4(i32* %A, i32** %ptr, i64 %inc) {
 ;CHECK-LABEL: test_v4i32_post_reg_ld4:
 ;CHECK: ld4.4s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
-  %ld4 = tail call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm64.neon.ld4.v4i32.p0i32(i32* %A)
+  %ld4 = tail call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4.v4i32.p0i32(i32* %A)
   %tmp = getelementptr i32* %A, i64 %inc
   store i32* %tmp, i32** %ptr
   ret { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %ld4
 }
 
-declare { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm64.neon.ld4.v4i32.p0i32(i32*)
+declare { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4.v4i32.p0i32(i32*)
 
 
 define { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld4(i32* %A, i32** %ptr) {
 ;CHECK-LABEL: test_v2i32_post_imm_ld4:
 ;CHECK: ld4.2s { v0, v1, v2, v3 }, [x0], #32
-  %ld4 = tail call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm64.neon.ld4.v2i32.p0i32(i32* %A)
+  %ld4 = tail call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4.v2i32.p0i32(i32* %A)
   %tmp = getelementptr i32* %A, i32 8
   store i32* %tmp, i32** %ptr
   ret { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %ld4
@@ -1233,19 +1233,19 @@ define { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld4(i
 define { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld4(i32* %A, i32** %ptr, i64 %inc) {
 ;CHECK-LABEL: test_v2i32_post_reg_ld4:
 ;CHECK: ld4.2s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
-  %ld4 = tail call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm64.neon.ld4.v2i32.p0i32(i32* %A)
+  %ld4 = tail call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4.v2i32.p0i32(i32* %A)
   %tmp = getelementptr i32* %A, i64 %inc
   store i32* %tmp, i32** %ptr
   ret { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %ld4
 }
 
-declare { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm64.neon.ld4.v2i32.p0i32(i32*)
+declare { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4.v2i32.p0i32(i32*)
 
 
 define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld4(i64* %A, i64** %ptr) {
 ;CHECK-LABEL: test_v2i64_post_imm_ld4:
 ;CHECK: ld4.2d { v0, v1, v2, v3 }, [x0], #64
-  %ld4 = tail call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.arm64.neon.ld4.v2i64.p0i64(i64* %A)
+  %ld4 = tail call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4.v2i64.p0i64(i64* %A)
   %tmp = getelementptr i64* %A, i32 8
   store i64* %tmp, i64** %ptr
   ret { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %ld4
@@ -1254,19 +1254,19 @@ define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld4(i
 define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld4(i64* %A, i64** %ptr, i64 %inc) {
 ;CHECK-LABEL: test_v2i64_post_reg_ld4:
 ;CHECK: ld4.2d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
-  %ld4 = tail call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.arm64.neon.ld4.v2i64.p0i64(i64* %A)
+  %ld4 = tail call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4.v2i64.p0i64(i64* %A)
   %tmp = getelementptr i64* %A, i64 %inc
   store i64* %tmp, i64** %ptr
   ret { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %ld4
 }
 
-declare { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.arm64.neon.ld4.v2i64.p0i64(i64*)
+declare { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4.v2i64.p0i64(i64*)
 
 
 define { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld4(i64* %A, i64** %ptr) {
 ;CHECK-LABEL: test_v1i64_post_imm_ld4:
 ;CHECK: ld1.1d { v0, v1, v2, v3 }, [x0], #32
-  %ld4 = tail call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm64.neon.ld4.v1i64.p0i64(i64* %A)
+  %ld4 = tail call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4.v1i64.p0i64(i64* %A)
   %tmp = getelementptr i64* %A, i32 4
   store i64* %tmp, i64** %ptr
   ret { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %ld4
@@ -1275,19 +1275,19 @@ define { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld4(i
 define { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld4(i64* %A, i64** %ptr, i64 %inc) {
 ;CHECK-LABEL: test_v1i64_post_reg_ld4:
 ;CHECK: ld1.1d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
-  %ld4 = tail call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm64.neon.ld4.v1i64.p0i64(i64* %A)
+  %ld4 = tail call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4.v1i64.p0i64(i64* %A)
   %tmp = getelementptr i64* %A, i64 %inc
   store i64* %tmp, i64** %ptr
   ret { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %ld4
 }
 
-declare { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm64.neon.ld4.v1i64.p0i64(i64*)
+declare { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4.v1i64.p0i64(i64*)
 
 
 define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_imm_ld4(float* %A, float** %ptr) {
 ;CHECK-LABEL: test_v4f32_post_imm_ld4:
 ;CHECK: ld4.4s { v0, v1, v2, v3 }, [x0], #64
-  %ld4 = tail call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.arm64.neon.ld4.v4f32.p0f32(float* %A)
+  %ld4 = tail call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld4.v4f32.p0f32(float* %A)
   %tmp = getelementptr float* %A, i32 16
   store float* %tmp, float** %ptr
   ret { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %ld4
@@ -1296,19 +1296,19 @@ define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_i
 define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_reg_ld4(float* %A, float** %ptr, i64 %inc) {
 ;CHECK-LABEL: test_v4f32_post_reg_ld4:
 ;CHECK: ld4.4s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
-  %ld4 = tail call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.arm64.neon.ld4.v4f32.p0f32(float* %A)
+  %ld4 = tail call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld4.v4f32.p0f32(float* %A)
   %tmp = getelementptr float* %A, i64 %inc
   store float* %tmp, float** %ptr
   ret { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %ld4
 }
 
-declare { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.arm64.neon.ld4.v4f32.p0f32(float*)
+declare { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld4.v4f32.p0f32(float*)
 
 
 define { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_imm_ld4(float* %A, float** %ptr) {
 ;CHECK-LABEL: test_v2f32_post_imm_ld4:
 ;CHECK: ld4.2s { v0, v1, v2, v3 }, [x0], #32
-  %ld4 = tail call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.arm64.neon.ld4.v2f32.p0f32(float* %A)
+  %ld4 = tail call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld4.v2f32.p0f32(float* %A)
   %tmp = getelementptr float* %A, i32 8
   store float* %tmp, float** %ptr
   ret { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %ld4
@@ -1317,19 +1317,19 @@ define { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_i
 define { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_reg_ld4(float* %A, float** %ptr, i64 %inc) {
 ;CHECK-LABEL: test_v2f32_post_reg_ld4:
 ;CHECK: ld4.2s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
-  %ld4 = tail call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.arm64.neon.ld4.v2f32.p0f32(float* %A)
+  %ld4 = tail call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld4.v2f32.p0f32(float* %A)
   %tmp = getelementptr float* %A, i64 %inc
   store float* %tmp, float** %ptr
   ret { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %ld4
 }
 
-declare { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.arm64.neon.ld4.v2f32.p0f32(float*)
+declare { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld4.v2f32.p0f32(float*)
 
 
 define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_imm_ld4(double* %A, double** %ptr) {
 ;CHECK-LABEL: test_v2f64_post_imm_ld4:
 ;CHECK: ld4.2d { v0, v1, v2, v3 }, [x0], #64
-  %ld4 = tail call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.arm64.neon.ld4.v2f64.p0f64(double* %A)
+  %ld4 = tail call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld4.v2f64.p0f64(double* %A)
   %tmp = getelementptr double* %A, i32 8
   store double* %tmp, double** %ptr
   ret { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %ld4
@@ -1338,19 +1338,19 @@ define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_po
 define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_reg_ld4(double* %A, double** %ptr, i64 %inc) {
 ;CHECK-LABEL: test_v2f64_post_reg_ld4:
 ;CHECK: ld4.2d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
-  %ld4 = tail call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.arm64.neon.ld4.v2f64.p0f64(double* %A)
+  %ld4 = tail call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld4.v2f64.p0f64(double* %A)
   %tmp = getelementptr double* %A, i64 %inc
   store double* %tmp, double** %ptr
   ret { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %ld4
 }
 
-declare { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.arm64.neon.ld4.v2f64.p0f64(double*)
+declare { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld4.v2f64.p0f64(double*)
 
 
 define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_imm_ld4(double* %A, double** %ptr) {
 ;CHECK-LABEL: test_v1f64_post_imm_ld4:
 ;CHECK: ld1.1d { v0, v1, v2, v3 }, [x0], #32
-  %ld4 = tail call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.arm64.neon.ld4.v1f64.p0f64(double* %A)
+  %ld4 = tail call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld4.v1f64.p0f64(double* %A)
   %tmp = getelementptr double* %A, i32 4
   store double* %tmp, double** %ptr
   ret { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %ld4
@@ -1359,18 +1359,18 @@ define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_po
 define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_reg_ld4(double* %A, double** %ptr, i64 %inc) {
 ;CHECK-LABEL: test_v1f64_post_reg_ld4:
 ;CHECK: ld1.1d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
-  %ld4 = tail call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.arm64.neon.ld4.v1f64.p0f64(double* %A)
+  %ld4 = tail call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld4.v1f64.p0f64(double* %A)
   %tmp = getelementptr double* %A, i64 %inc
   store double* %tmp, double** %ptr
   ret { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %ld4
 }
 
-declare { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.arm64.neon.ld4.v1f64.p0f64(double*)
+declare { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld4.v1f64.p0f64(double*)
 
 define { <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld1x2(i8* %A, i8** %ptr) {
 ;CHECK-LABEL: test_v16i8_post_imm_ld1x2:
 ;CHECK: ld1.16b { v0, v1 }, [x0], #32
-  %ld1x2 = tail call { <16 x i8>, <16 x i8> } @llvm.arm64.neon.ld1x2.v16i8.p0i8(i8* %A)
+  %ld1x2 = tail call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x2.v16i8.p0i8(i8* %A)
   %tmp = getelementptr i8* %A, i32 32
   store i8* %tmp, i8** %ptr
   ret { <16 x i8>, <16 x i8> } %ld1x2
@@ -1379,19 +1379,19 @@ define { <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld1x2(i8* %A, i8** %ptr) {
 define { <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld1x2(i8* %A, i8** %ptr, i64 %inc) {
 ;CHECK-LABEL: test_v16i8_post_reg_ld1x2:
 ;CHECK: ld1.16b { v0, v1 }, [x0], x{{[0-9]+}}
-  %ld1x2 = tail call { <16 x i8>, <16 x i8> } @llvm.arm64.neon.ld1x2.v16i8.p0i8(i8* %A)
+  %ld1x2 = tail call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x2.v16i8.p0i8(i8* %A)
   %tmp = getelementptr i8* %A, i64 %inc
   store i8* %tmp, i8** %ptr
   ret { <16 x i8>, <16 x i8> } %ld1x2
 }
 
-declare { <16 x i8>, <16 x i8> } @llvm.arm64.neon.ld1x2.v16i8.p0i8(i8*)
+declare { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x2.v16i8.p0i8(i8*)
 
 
 define { <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld1x2(i8* %A, i8** %ptr) {
 ;CHECK-LABEL: test_v8i8_post_imm_ld1x2:
 ;CHECK: ld1.8b { v0, v1 }, [x0], #16
-  %ld1x2 = tail call { <8 x i8>, <8 x i8> } @llvm.arm64.neon.ld1x2.v8i8.p0i8(i8* %A)
+  %ld1x2 = tail call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x2.v8i8.p0i8(i8* %A)
   %tmp = getelementptr i8* %A, i32 16
   store i8* %tmp, i8** %ptr
   ret { <8 x i8>, <8 x i8> } %ld1x2
@@ -1400,19 +1400,19 @@ define { <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld1x2(i8* %A, i8** %ptr) {
 define { <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld1x2(i8* %A, i8** %ptr, i64 %inc) {
 ;CHECK-LABEL: test_v8i8_post_reg_ld1x2:
 ;CHECK: ld1.8b { v0, v1 }, [x0], x{{[0-9]+}}
-  %ld1x2 = tail call { <8 x i8>, <8 x i8> } @llvm.arm64.neon.ld1x2.v8i8.p0i8(i8* %A)
+  %ld1x2 = tail call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x2.v8i8.p0i8(i8* %A)
   %tmp = getelementptr i8* %A, i64 %inc
   store i8* %tmp, i8** %ptr
   ret { <8 x i8>, <8 x i8> } %ld1x2
 }
 
-declare { <8 x i8>, <8 x i8> } @llvm.arm64.neon.ld1x2.v8i8.p0i8(i8*)
+declare { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x2.v8i8.p0i8(i8*)
 
 
 define { <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld1x2(i16* %A, i16** %ptr) {
 ;CHECK-LABEL: test_v8i16_post_imm_ld1x2:
 ;CHECK: ld1.8h { v0, v1 }, [x0], #32
-  %ld1x2 = tail call { <8 x i16>, <8 x i16> } @llvm.arm64.neon.ld1x2.v8i16.p0i16(i16* %A)
+  %ld1x2 = tail call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x2.v8i16.p0i16(i16* %A)
   %tmp = getelementptr i16* %A, i32 16
   store i16* %tmp, i16** %ptr
   ret { <8 x i16>, <8 x i16> } %ld1x2
@@ -1421,19 +1421,19 @@ define { <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld1x2(i16* %A, i16** %ptr)
 define { <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld1x2(i16* %A, i16** %ptr, i64 %inc) {
 ;CHECK-LABEL: test_v8i16_post_reg_ld1x2:
 ;CHECK: ld1.8h { v0, v1 }, [x0], x{{[0-9]+}}
-  %ld1x2 = tail call { <8 x i16>, <8 x i16> } @llvm.arm64.neon.ld1x2.v8i16.p0i16(i16* %A)
+  %ld1x2 = tail call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x2.v8i16.p0i16(i16* %A)
   %tmp = getelementptr i16* %A, i64 %inc
   store i16* %tmp, i16** %ptr
   ret { <8 x i16>, <8 x i16> } %ld1x2
 }
 
-declare { <8 x i16>, <8 x i16> } @llvm.arm64.neon.ld1x2.v8i16.p0i16(i16*)
+declare { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x2.v8i16.p0i16(i16*)
 
 
 define { <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld1x2(i16* %A, i16** %ptr) {
 ;CHECK-LABEL: test_v4i16_post_imm_ld1x2:
 ;CHECK: ld1.4h { v0, v1 }, [x0], #16
-  %ld1x2 = tail call { <4 x i16>, <4 x i16> } @llvm.arm64.neon.ld1x2.v4i16.p0i16(i16* %A)
+  %ld1x2 = tail call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x2.v4i16.p0i16(i16* %A)
   %tmp = getelementptr i16* %A, i32 8
   store i16* %tmp, i16** %ptr
   ret { <4 x i16>, <4 x i16> } %ld1x2
@@ -1442,19 +1442,19 @@ define { <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld1x2(i16* %A, i16** %ptr)
 define { <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld1x2(i16* %A, i16** %ptr, i64 %inc) {
 ;CHECK-LABEL: test_v4i16_post_reg_ld1x2:
 ;CHECK: ld1.4h { v0, v1 }, [x0], x{{[0-9]+}}
-  %ld1x2 = tail call { <4 x i16>, <4 x i16> } @llvm.arm64.neon.ld1x2.v4i16.p0i16(i16* %A)
+  %ld1x2 = tail call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x2.v4i16.p0i16(i16* %A)
   %tmp = getelementptr i16* %A, i64 %inc
   store i16* %tmp, i16** %ptr
   ret { <4 x i16>, <4 x i16> } %ld1x2
 }
 
-declare { <4 x i16>, <4 x i16> } @llvm.arm64.neon.ld1x2.v4i16.p0i16(i16*)
+declare { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x2.v4i16.p0i16(i16*)
 
 
 define { <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld1x2(i32* %A, i32** %ptr) {
 ;CHECK-LABEL: test_v4i32_post_imm_ld1x2:
 ;CHECK: ld1.4s { v0, v1 }, [x0], #32
-  %ld1x2 = tail call { <4 x i32>, <4 x i32> } @llvm.arm64.neon.ld1x2.v4i32.p0i32(i32* %A)
+  %ld1x2 = tail call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x2.v4i32.p0i32(i32* %A)
   %tmp = getelementptr i32* %A, i32 8
   store i32* %tmp, i32** %ptr
   ret { <4 x i32>, <4 x i32> } %ld1x2
@@ -1463,19 +1463,19 @@ define { <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld1x2(i32* %A, i32** %ptr)
 define { <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld1x2(i32* %A, i32** %ptr, i64 %inc) {
 ;CHECK-LABEL: test_v4i32_post_reg_ld1x2:
 ;CHECK: ld1.4s { v0, v1 }, [x0], x{{[0-9]+}}
-  %ld1x2 = tail call { <4 x i32>, <4 x i32> } @llvm.arm64.neon.ld1x2.v4i32.p0i32(i32* %A)
+  %ld1x2 = tail call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x2.v4i32.p0i32(i32* %A)
   %tmp = getelementptr i32* %A, i64 %inc
   store i32* %tmp, i32** %ptr
   ret { <4 x i32>, <4 x i32> } %ld1x2
 }
 
-declare { <4 x i32>, <4 x i32> } @llvm.arm64.neon.ld1x2.v4i32.p0i32(i32*)
+declare { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x2.v4i32.p0i32(i32*)
 
 
 define { <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld1x2(i32* %A, i32** %ptr) {
 ;CHECK-LABEL: test_v2i32_post_imm_ld1x2:
 ;CHECK: ld1.2s { v0, v1 }, [x0], #16
-  %ld1x2 = tail call { <2 x i32>, <2 x i32> } @llvm.arm64.neon.ld1x2.v2i32.p0i32(i32* %A)
+  %ld1x2 = tail call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x2.v2i32.p0i32(i32* %A)
   %tmp = getelementptr i32* %A, i32 4
   store i32* %tmp, i32** %ptr
   ret { <2 x i32>, <2 x i32> } %ld1x2
@@ -1484,19 +1484,19 @@ define { <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld1x2(i32* %A, i32** %ptr)
 define { <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld1x2(i32* %A, i32** %ptr, i64 %inc) {
 ;CHECK-LABEL: test_v2i32_post_reg_ld1x2:
 ;CHECK: ld1.2s { v0, v1 }, [x0], x{{[0-9]+}}
-  %ld1x2 = tail call { <2 x i32>, <2 x i32> } @llvm.arm64.neon.ld1x2.v2i32.p0i32(i32* %A)
+  %ld1x2 = tail call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x2.v2i32.p0i32(i32* %A)
   %tmp = getelementptr i32* %A, i64 %inc
   store i32* %tmp, i32** %ptr
   ret { <2 x i32>, <2 x i32> } %ld1x2
 }
 
-declare { <2 x i32>, <2 x i32> } @llvm.arm64.neon.ld1x2.v2i32.p0i32(i32*)
+declare { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x2.v2i32.p0i32(i32*)
 
 
 define { <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld1x2(i64* %A, i64** %ptr) {
 ;CHECK-LABEL: test_v2i64_post_imm_ld1x2:
 ;CHECK: ld1.2d { v0, v1 }, [x0], #32
-  %ld1x2 = tail call { <2 x i64>, <2 x i64> } @llvm.arm64.neon.ld1x2.v2i64.p0i64(i64* %A)
+  %ld1x2 = tail call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x2.v2i64.p0i64(i64* %A)
   %tmp = getelementptr i64* %A, i32 4
   store i64* %tmp, i64** %ptr
   ret { <2 x i64>, <2 x i64> } %ld1x2
@@ -1505,19 +1505,19 @@ define { <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld1x2(i64* %A, i64** %ptr)
 define { <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld1x2(i64* %A, i64** %ptr, i64 %inc) {
 ;CHECK-LABEL: test_v2i64_post_reg_ld1x2:
 ;CHECK: ld1.2d { v0, v1 }, [x0], x{{[0-9]+}}
-  %ld1x2 = tail call { <2 x i64>, <2 x i64> } @llvm.arm64.neon.ld1x2.v2i64.p0i64(i64* %A)
+  %ld1x2 = tail call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x2.v2i64.p0i64(i64* %A)
   %tmp = getelementptr i64* %A, i64 %inc
   store i64* %tmp, i64** %ptr
   ret { <2 x i64>, <2 x i64> } %ld1x2
 }
 
-declare { <2 x i64>, <2 x i64> } @llvm.arm64.neon.ld1x2.v2i64.p0i64(i64*)
+declare { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x2.v2i64.p0i64(i64*)
 
 
 define { <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld1x2(i64* %A, i64** %ptr) {
 ;CHECK-LABEL: test_v1i64_post_imm_ld1x2:
 ;CHECK: ld1.1d { v0, v1 }, [x0], #16
-  %ld1x2 = tail call { <1 x i64>, <1 x i64> } @llvm.arm64.neon.ld1x2.v1i64.p0i64(i64* %A)
+  %ld1x2 = tail call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x2.v1i64.p0i64(i64* %A)
   %tmp = getelementptr i64* %A, i32 2
   store i64* %tmp, i64** %ptr
   ret { <1 x i64>, <1 x i64> } %ld1x2
@@ -1526,19 +1526,19 @@ define { <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld1x2(i64* %A, i64** %ptr)
 define { <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld1x2(i64* %A, i64** %ptr, i64 %inc) {
 ;CHECK-LABEL: test_v1i64_post_reg_ld1x2:
 ;CHECK: ld1.1d { v0, v1 }, [x0], x{{[0-9]+}}
-  %ld1x2 = tail call { <1 x i64>, <1 x i64> } @llvm.arm64.neon.ld1x2.v1i64.p0i64(i64* %A)
+  %ld1x2 = tail call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x2.v1i64.p0i64(i64* %A)
   %tmp = getelementptr i64* %A, i64 %inc
   store i64* %tmp, i64** %ptr
   ret { <1 x i64>, <1 x i64> } %ld1x2
 }
 
-declare { <1 x i64>, <1 x i64> } @llvm.arm64.neon.ld1x2.v1i64.p0i64(i64*)
+declare { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x2.v1i64.p0i64(i64*)
 
 
 define { <4 x float>, <4 x float> } @test_v4f32_post_imm_ld1x2(float* %A, float** %ptr) {
 ;CHECK-LABEL: test_v4f32_post_imm_ld1x2:
 ;CHECK: ld1.4s { v0, v1 }, [x0], #32
-  %ld1x2 = tail call { <4 x float>, <4 x float> } @llvm.arm64.neon.ld1x2.v4f32.p0f32(float* %A)
+  %ld1x2 = tail call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld1x2.v4f32.p0f32(float* %A)
   %tmp = getelementptr float* %A, i32 8
   store float* %tmp, float** %ptr
   ret { <4 x float>, <4 x float> } %ld1x2
@@ -1547,19 +1547,19 @@ define { <4 x float>, <4 x float> } @test_v4f32_post_imm_ld1x2(float* %A, float*
 define { <4 x float>, <4 x float> } @test_v4f32_post_reg_ld1x2(float* %A, float** %ptr, i64 %inc) {
 ;CHECK-LABEL: test_v4f32_post_reg_ld1x2:
 ;CHECK: ld1.4s { v0, v1 }, [x0], x{{[0-9]+}}
-  %ld1x2 = tail call { <4 x float>, <4 x float> } @llvm.arm64.neon.ld1x2.v4f32.p0f32(float* %A)
+  %ld1x2 = tail call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld1x2.v4f32.p0f32(float* %A)
   %tmp = getelementptr float* %A, i64 %inc
   store float* %tmp, float** %ptr
   ret { <4 x float>, <4 x float> } %ld1x2
 }
 
-declare { <4 x float>, <4 x float> } @llvm.arm64.neon.ld1x2.v4f32.p0f32(float*)
+declare { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld1x2.v4f32.p0f32(float*)
 
 
 define { <2 x float>, <2 x float> } @test_v2f32_post_imm_ld1x2(float* %A, float** %ptr) {
 ;CHECK-LABEL: test_v2f32_post_imm_ld1x2:
 ;CHECK: ld1.2s { v0, v1 }, [x0], #16
-  %ld1x2 = tail call { <2 x float>, <2 x float> } @llvm.arm64.neon.ld1x2.v2f32.p0f32(float* %A)
+  %ld1x2 = tail call { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld1x2.v2f32.p0f32(float* %A)
   %tmp = getelementptr float* %A, i32 4
   store float* %tmp, float** %ptr
   ret { <2 x float>, <2 x float> } %ld1x2
@@ -1568,19 +1568,19 @@ define { <2 x float>, <2 x float> } @test_v2f32_post_imm_ld1x2(float* %A, float*
 define { <2 x float>, <2 x float> } @test_v2f32_post_reg_ld1x2(float* %A, float** %ptr, i64 %inc) {
 ;CHECK-LABEL: test_v2f32_post_reg_ld1x2:
 ;CHECK: ld1.2s { v0, v1 }, [x0], x{{[0-9]+}}
-  %ld1x2 = tail call { <2 x float>, <2 x float> } @llvm.arm64.neon.ld1x2.v2f32.p0f32(float* %A)
+  %ld1x2 = tail call { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld1x2.v2f32.p0f32(float* %A)
   %tmp = getelementptr float* %A, i64 %inc
   store float* %tmp, float** %ptr
   ret { <2 x float>, <2 x float> } %ld1x2
 }
 
-declare { <2 x float>, <2 x float> } @llvm.arm64.neon.ld1x2.v2f32.p0f32(float*)
+declare { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld1x2.v2f32.p0f32(float*)
 
 
 define { <2 x double>, <2 x double> } @test_v2f64_post_imm_ld1x2(double* %A, double** %ptr) {
 ;CHECK-LABEL: test_v2f64_post_imm_ld1x2:
 ;CHECK: ld1.2d { v0, v1 }, [x0], #32
-  %ld1x2 = tail call { <2 x double>, <2 x double> } @llvm.arm64.neon.ld1x2.v2f64.p0f64(double* %A)
+  %ld1x2 = tail call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x2.v2f64.p0f64(double* %A)
   %tmp = getelementptr double* %A, i32 4
   store double* %tmp, double** %ptr
   ret { <2 x double>, <2 x double> } %ld1x2
@@ -1589,19 +1589,19 @@ define { <2 x double>, <2 x double> } @test_v2f64_post_imm_ld1x2(double* %A, dou
 define { <2 x double>, <2 x double> } @test_v2f64_post_reg_ld1x2(double* %A, double** %ptr, i64 %inc) {
 ;CHECK-LABEL: test_v2f64_post_reg_ld1x2:
 ;CHECK: ld1.2d { v0, v1 }, [x0], x{{[0-9]+}}
-  %ld1x2 = tail call { <2 x double>, <2 x double> } @llvm.arm64.neon.ld1x2.v2f64.p0f64(double* %A)
+  %ld1x2 = tail call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x2.v2f64.p0f64(double* %A)
   %tmp = getelementptr double* %A, i64 %inc
   store double* %tmp, double** %ptr
   ret { <2 x double>, <2 x double> } %ld1x2
 }
 
-declare { <2 x double>, <2 x double> } @llvm.arm64.neon.ld1x2.v2f64.p0f64(double*)
+declare { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x2.v2f64.p0f64(double*)
 
 
 define { <1 x double>, <1 x double> } @test_v1f64_post_imm_ld1x2(double* %A, double** %ptr) {
 ;CHECK-LABEL: test_v1f64_post_imm_ld1x2:
 ;CHECK: ld1.1d { v0, v1 }, [x0], #16
-  %ld1x2 = tail call { <1 x double>, <1 x double> } @llvm.arm64.neon.ld1x2.v1f64.p0f64(double* %A)
+  %ld1x2 = tail call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x2.v1f64.p0f64(double* %A)
   %tmp = getelementptr double* %A, i32 2
   store double* %tmp, double** %ptr
   ret { <1 x double>, <1 x double> } %ld1x2
@@ -1610,19 +1610,19 @@ define { <1 x double>, <1 x double> } @test_v1f64_post_imm_ld1x2(double* %A, dou
 define { <1 x double>, <1 x double> } @test_v1f64_post_reg_ld1x2(double* %A, double** %ptr, i64 %inc) {
 ;CHECK-LABEL: test_v1f64_post_reg_ld1x2:
 ;CHECK: ld1.1d { v0, v1 }, [x0], x{{[0-9]+}}
-  %ld1x2 = tail call { <1 x double>, <1 x double> } @llvm.arm64.neon.ld1x2.v1f64.p0f64(double* %A)
+  %ld1x2 = tail call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x2.v1f64.p0f64(double* %A)
   %tmp = getelementptr double* %A, i64 %inc
   store double* %tmp, double** %ptr
   ret { <1 x double>, <1 x double> } %ld1x2
 }
 
-declare { <1 x double>, <1 x double> } @llvm.arm64.neon.ld1x2.v1f64.p0f64(double*)
+declare { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x2.v1f64.p0f64(double*)
 
 
 define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld1x3(i8* %A, i8** %ptr) {
 ;CHECK-LABEL: test_v16i8_post_imm_ld1x3:
 ;CHECK: ld1.16b { v0, v1, v2 }, [x0], #48
-  %ld1x3 = tail call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm64.neon.ld1x3.v16i8.p0i8(i8* %A)
+  %ld1x3 = tail call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x3.v16i8.p0i8(i8* %A)
   %tmp = getelementptr i8* %A, i32 48
   store i8* %tmp, i8** %ptr
   ret { <16 x i8>, <16 x i8>, <16 x i8> } %ld1x3
@@ -1631,19 +1631,19 @@ define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld1x3(i8* %A, i8
 define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld1x3(i8* %A, i8** %ptr, i64 %inc) {
 ;CHECK-LABEL: test_v16i8_post_reg_ld1x3:
 ;CHECK: ld1.16b { v0, v1, v2 }, [x0], x{{[0-9]+}}
-  %ld1x3 = tail call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm64.neon.ld1x3.v16i8.p0i8(i8* %A)
+  %ld1x3 = tail call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x3.v16i8.p0i8(i8* %A)
   %tmp = getelementptr i8* %A, i64 %inc
   store i8* %tmp, i8** %ptr
   ret { <16 x i8>, <16 x i8>, <16 x i8> } %ld1x3
 }
 
-declare { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm64.neon.ld1x3.v16i8.p0i8(i8*)
+declare { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x3.v16i8.p0i8(i8*)
 
 
 define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld1x3(i8* %A, i8** %ptr) {
 ;CHECK-LABEL: test_v8i8_post_imm_ld1x3:
 ;CHECK: ld1.8b { v0, v1, v2 }, [x0], #24
-  %ld1x3 = tail call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm64.neon.ld1x3.v8i8.p0i8(i8* %A)
+  %ld1x3 = tail call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x3.v8i8.p0i8(i8* %A)
   %tmp = getelementptr i8* %A, i32 24
   store i8* %tmp, i8** %ptr
   ret { <8 x i8>, <8 x i8>, <8 x i8> } %ld1x3
@@ -1652,19 +1652,19 @@ define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld1x3(i8* %A, i8** %
 define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld1x3(i8* %A, i8** %ptr, i64 %inc) {
 ;CHECK-LABEL: test_v8i8_post_reg_ld1x3:
 ;CHECK: ld1.8b { v0, v1, v2 }, [x0], x{{[0-9]+}}
-  %ld1x3 = tail call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm64.neon.ld1x3.v8i8.p0i8(i8* %A)
+  %ld1x3 = tail call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x3.v8i8.p0i8(i8* %A)
   %tmp = getelementptr i8* %A, i64 %inc
   store i8* %tmp, i8** %ptr
   ret { <8 x i8>, <8 x i8>, <8 x i8> } %ld1x3
 }
 
-declare { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm64.neon.ld1x3.v8i8.p0i8(i8*)
+declare { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x3.v8i8.p0i8(i8*)
 
 
 define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld1x3(i16* %A, i16** %ptr) {
 ;CHECK-LABEL: test_v8i16_post_imm_ld1x3:
 ;CHECK: ld1.8h { v0, v1, v2 }, [x0], #48
-  %ld1x3 = tail call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm64.neon.ld1x3.v8i16.p0i16(i16* %A)
+  %ld1x3 = tail call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x3.v8i16.p0i16(i16* %A)
   %tmp = getelementptr i16* %A, i32 24
   store i16* %tmp, i16** %ptr
   ret { <8 x i16>, <8 x i16>, <8 x i16> } %ld1x3
@@ -1673,19 +1673,19 @@ define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld1x3(i16* %A, i
 define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld1x3(i16* %A, i16** %ptr, i64 %inc) {
 ;CHECK-LABEL: test_v8i16_post_reg_ld1x3:
 ;CHECK: ld1.8h { v0, v1, v2 }, [x0], x{{[0-9]+}}
-  %ld1x3 = tail call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm64.neon.ld1x3.v8i16.p0i16(i16* %A)
+  %ld1x3 = tail call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x3.v8i16.p0i16(i16* %A)
   %tmp = getelementptr i16* %A, i64 %inc
   store i16* %tmp, i16** %ptr
   ret { <8 x i16>, <8 x i16>, <8 x i16> } %ld1x3
 }
 
-declare { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm64.neon.ld1x3.v8i16.p0i16(i16*)
+declare { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x3.v8i16.p0i16(i16*)
 
 
 define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld1x3(i16* %A, i16** %ptr) {
 ;CHECK-LABEL: test_v4i16_post_imm_ld1x3:
 ;CHECK: ld1.4h { v0, v1, v2 }, [x0], #24
-  %ld1x3 = tail call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm64.neon.ld1x3.v4i16.p0i16(i16* %A)
+  %ld1x3 = tail call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x3.v4i16.p0i16(i16* %A)
   %tmp = getelementptr i16* %A, i32 12
   store i16* %tmp, i16** %ptr
   ret { <4 x i16>, <4 x i16>, <4 x i16> } %ld1x3
@@ -1694,19 +1694,19 @@ define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld1x3(i16* %A, i
 define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld1x3(i16* %A, i16** %ptr, i64 %inc) {
 ;CHECK-LABEL: test_v4i16_post_reg_ld1x3:
 ;CHECK: ld1.4h { v0, v1, v2 }, [x0], x{{[0-9]+}}
-  %ld1x3 = tail call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm64.neon.ld1x3.v4i16.p0i16(i16* %A)
+  %ld1x3 = tail call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x3.v4i16.p0i16(i16* %A)
   %tmp = getelementptr i16* %A, i64 %inc
   store i16* %tmp, i16** %ptr
   ret { <4 x i16>, <4 x i16>, <4 x i16> } %ld1x3
 }
 
-declare { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm64.neon.ld1x3.v4i16.p0i16(i16*)
+declare { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x3.v4i16.p0i16(i16*)
 
 
 define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld1x3(i32* %A, i32** %ptr) {
 ;CHECK-LABEL: test_v4i32_post_imm_ld1x3:
 ;CHECK: ld1.4s { v0, v1, v2 }, [x0], #48
-  %ld1x3 = tail call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm64.neon.ld1x3.v4i32.p0i32(i32* %A)
+  %ld1x3 = tail call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x3.v4i32.p0i32(i32* %A)
   %tmp = getelementptr i32* %A, i32 12
   store i32* %tmp, i32** %ptr
   ret { <4 x i32>, <4 x i32>, <4 x i32> } %ld1x3
@@ -1715,19 +1715,19 @@ define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld1x3(i32* %A, i
 define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld1x3(i32* %A, i32** %ptr, i64 %inc) {
 ;CHECK-LABEL: test_v4i32_post_reg_ld1x3:
 ;CHECK: ld1.4s { v0, v1, v2 }, [x0], x{{[0-9]+}}
-  %ld1x3 = tail call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm64.neon.ld1x3.v4i32.p0i32(i32* %A)
+  %ld1x3 = tail call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x3.v4i32.p0i32(i32* %A)
   %tmp = getelementptr i32* %A, i64 %inc
   store i32* %tmp, i32** %ptr
   ret { <4 x i32>, <4 x i32>, <4 x i32> } %ld1x3
 }
 
-declare { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm64.neon.ld1x3.v4i32.p0i32(i32*)
+declare { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x3.v4i32.p0i32(i32*)
 
 
 define { <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld1x3(i32* %A, i32** %ptr) {
 ;CHECK-LABEL: test_v2i32_post_imm_ld1x3:
 ;CHECK: ld1.2s { v0, v1, v2 }, [x0], #24
-  %ld1x3 = tail call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm64.neon.ld1x3.v2i32.p0i32(i32* %A)
+  %ld1x3 = tail call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x3.v2i32.p0i32(i32* %A)
   %tmp = getelementptr i32* %A, i32 6
   store i32* %tmp, i32** %ptr
   ret { <2 x i32>, <2 x i32>, <2 x i32> } %ld1x3
@@ -1736,19 +1736,19 @@ define { <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld1x3(i32* %A, i
 define { <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld1x3(i32* %A, i32** %ptr, i64 %inc) {
 ;CHECK-LABEL: test_v2i32_post_reg_ld1x3:
 ;CHECK: ld1.2s { v0, v1, v2 }, [x0], x{{[0-9]+}}
-  %ld1x3 = tail call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm64.neon.ld1x3.v2i32.p0i32(i32* %A)
+  %ld1x3 = tail call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x3.v2i32.p0i32(i32* %A)
   %tmp = getelementptr i32* %A, i64 %inc
   store i32* %tmp, i32** %ptr
   ret { <2 x i32>, <2 x i32>, <2 x i32> } %ld1x3
 }
 
-declare { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm64.neon.ld1x3.v2i32.p0i32(i32*)
+declare { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x3.v2i32.p0i32(i32*)
 
 
 define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld1x3(i64* %A, i64** %ptr) {
 ;CHECK-LABEL: test_v2i64_post_imm_ld1x3:
 ;CHECK: ld1.2d { v0, v1, v2 }, [x0], #48
-  %ld1x3 = tail call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.arm64.neon.ld1x3.v2i64.p0i64(i64* %A)
+  %ld1x3 = tail call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x3.v2i64.p0i64(i64* %A)
   %tmp = getelementptr i64* %A, i32 6
   store i64* %tmp, i64** %ptr
   ret { <2 x i64>, <2 x i64>, <2 x i64> } %ld1x3
@@ -1757,19 +1757,19 @@ define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld1x3(i64* %A, i
 define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld1x3(i64* %A, i64** %ptr, i64 %inc) {
 ;CHECK-LABEL: test_v2i64_post_reg_ld1x3:
 ;CHECK: ld1.2d { v0, v1, v2 }, [x0], x{{[0-9]+}}
-  %ld1x3 = tail call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.arm64.neon.ld1x3.v2i64.p0i64(i64* %A)
+  %ld1x3 = tail call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x3.v2i64.p0i64(i64* %A)
   %tmp = getelementptr i64* %A, i64 %inc
   store i64* %tmp, i64** %ptr
   ret { <2 x i64>, <2 x i64>, <2 x i64> } %ld1x3
 }
 
-declare { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.arm64.neon.ld1x3.v2i64.p0i64(i64*)
+declare { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x3.v2i64.p0i64(i64*)
 
 
 define { <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld1x3(i64* %A, i64** %ptr) {
 ;CHECK-LABEL: test_v1i64_post_imm_ld1x3:
 ;CHECK: ld1.1d { v0, v1, v2 }, [x0], #24
-  %ld1x3 = tail call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm64.neon.ld1x3.v1i64.p0i64(i64* %A)
+  %ld1x3 = tail call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x3.v1i64.p0i64(i64* %A)
   %tmp = getelementptr i64* %A, i32 3
   store i64* %tmp, i64** %ptr
   ret { <1 x i64>, <1 x i64>, <1 x i64> } %ld1x3
@@ -1778,19 +1778,19 @@ define { <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld1x3(i64* %A, i
 define { <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld1x3(i64* %A, i64** %ptr, i64 %inc) {
 ;CHECK-LABEL: test_v1i64_post_reg_ld1x3:
 ;CHECK: ld1.1d { v0, v1, v2 }, [x0], x{{[0-9]+}}
-  %ld1x3 = tail call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm64.neon.ld1x3.v1i64.p0i64(i64* %A)
+  %ld1x3 = tail call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x3.v1i64.p0i64(i64* %A)
   %tmp = getelementptr i64* %A, i64 %inc
   store i64* %tmp, i64** %ptr
   ret { <1 x i64>, <1 x i64>, <1 x i64> } %ld1x3
 }
 
-declare { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm64.neon.ld1x3.v1i64.p0i64(i64*)
+declare { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x3.v1i64.p0i64(i64*)
 
 
 define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_imm_ld1x3(float* %A, float** %ptr) {
 ;CHECK-LABEL: test_v4f32_post_imm_ld1x3:
 ;CHECK: ld1.4s { v0, v1, v2 }, [x0], #48
-  %ld1x3 = tail call { <4 x float>, <4 x float>, <4 x float> } @llvm.arm64.neon.ld1x3.v4f32.p0f32(float* %A)
+  %ld1x3 = tail call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld1x3.v4f32.p0f32(float* %A)
   %tmp = getelementptr float* %A, i32 12
   store float* %tmp, float** %ptr
   ret { <4 x float>, <4 x float>, <4 x float> } %ld1x3
@@ -1799,19 +1799,19 @@ define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_imm_ld1x3(floa
 define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_reg_ld1x3(float* %A, float** %ptr, i64 %inc) {
 ;CHECK-LABEL: test_v4f32_post_reg_ld1x3:
 ;CHECK: ld1.4s { v0, v1, v2 }, [x0], x{{[0-9]+}}
-  %ld1x3 = tail call { <4 x float>, <4 x float>, <4 x float> } @llvm.arm64.neon.ld1x3.v4f32.p0f32(float* %A)
+  %ld1x3 = tail call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld1x3.v4f32.p0f32(float* %A)
   %tmp = getelementptr float* %A, i64 %inc
   store float* %tmp, float** %ptr
   ret { <4 x float>, <4 x float>, <4 x float> } %ld1x3
 }
 
-declare { <4 x float>, <4 x float>, <4 x float> } @llvm.arm64.neon.ld1x3.v4f32.p0f32(float*)
+declare { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld1x3.v4f32.p0f32(float*)
 
 
 define { <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_imm_ld1x3(float* %A, float** %ptr) {
 ;CHECK-LABEL: test_v2f32_post_imm_ld1x3:
 ;CHECK: ld1.2s { v0, v1, v2 }, [x0], #24
-  %ld1x3 = tail call { <2 x float>, <2 x float>, <2 x float> } @llvm.arm64.neon.ld1x3.v2f32.p0f32(float* %A)
+  %ld1x3 = tail call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld1x3.v2f32.p0f32(float* %A)
   %tmp = getelementptr float* %A, i32 6
   store float* %tmp, float** %ptr
   ret { <2 x float>, <2 x float>, <2 x float> } %ld1x3
@@ -1820,19 +1820,19 @@ define { <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_imm_ld1x3(floa
 define { <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_reg_ld1x3(float* %A, float** %ptr, i64 %inc) {
 ;CHECK-LABEL: test_v2f32_post_reg_ld1x3:
 ;CHECK: ld1.2s { v0, v1, v2 }, [x0], x{{[0-9]+}}
-  %ld1x3 = tail call { <2 x float>, <2 x float>, <2 x float> } @llvm.arm64.neon.ld1x3.v2f32.p0f32(float* %A)
+  %ld1x3 = tail call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld1x3.v2f32.p0f32(float* %A)
   %tmp = getelementptr float* %A, i64 %inc
   store float* %tmp, float** %ptr
   ret { <2 x float>, <2 x float>, <2 x float> } %ld1x3
 }
 
-declare { <2 x float>, <2 x float>, <2 x float> } @llvm.arm64.neon.ld1x3.v2f32.p0f32(float*)
+declare { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld1x3.v2f32.p0f32(float*)
 
 
 define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_imm_ld1x3(double* %A, double** %ptr) {
 ;CHECK-LABEL: test_v2f64_post_imm_ld1x3:
 ;CHECK: ld1.2d { v0, v1, v2 }, [x0], #48
-  %ld1x3 = tail call { <2 x double>, <2 x double>, <2 x double> } @llvm.arm64.neon.ld1x3.v2f64.p0f64(double* %A)
+  %ld1x3 = tail call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x3.v2f64.p0f64(double* %A)
   %tmp = getelementptr double* %A, i32 6
   store double* %tmp, double** %ptr
   ret { <2 x double>, <2 x double>, <2 x double> } %ld1x3
@@ -1841,19 +1841,19 @@ define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_imm_ld1x3(d
 define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_reg_ld1x3(double* %A, double** %ptr, i64 %inc) {
 ;CHECK-LABEL: test_v2f64_post_reg_ld1x3:
 ;CHECK: ld1.2d { v0, v1, v2 }, [x0], x{{[0-9]+}}
-  %ld1x3 = tail call { <2 x double>, <2 x double>, <2 x double> } @llvm.arm64.neon.ld1x3.v2f64.p0f64(double* %A)
+  %ld1x3 = tail call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x3.v2f64.p0f64(double* %A)
   %tmp = getelementptr double* %A, i64 %inc
   store double* %tmp, double** %ptr
   ret { <2 x double>, <2 x double>, <2 x double> } %ld1x3
 }
 
-declare { <2 x double>, <2 x double>, <2 x double> } @llvm.arm64.neon.ld1x3.v2f64.p0f64(double*)
+declare { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x3.v2f64.p0f64(double*)
 
 
 define { <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_imm_ld1x3(double* %A, double** %ptr) {
 ;CHECK-LABEL: test_v1f64_post_imm_ld1x3:
 ;CHECK: ld1.1d { v0, v1, v2 }, [x0], #24
-  %ld1x3 = tail call { <1 x double>, <1 x double>, <1 x double> } @llvm.arm64.neon.ld1x3.v1f64.p0f64(double* %A)
+  %ld1x3 = tail call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x3.v1f64.p0f64(double* %A)
   %tmp = getelementptr double* %A, i32 3
   store double* %tmp, double** %ptr
   ret { <1 x double>, <1 x double>, <1 x double> } %ld1x3
@@ -1862,19 +1862,19 @@ define { <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_imm_ld1x3(d
 define { <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_reg_ld1x3(double* %A, double** %ptr, i64 %inc) {
 ;CHECK-LABEL: test_v1f64_post_reg_ld1x3:
 ;CHECK: ld1.1d { v0, v1, v2 }, [x0], x{{[0-9]+}}
-  %ld1x3 = tail call { <1 x double>, <1 x double>, <1 x double> } @llvm.arm64.neon.ld1x3.v1f64.p0f64(double* %A)
+  %ld1x3 = tail call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x3.v1f64.p0f64(double* %A)
   %tmp = getelementptr double* %A, i64 %inc
   store double* %tmp, double** %ptr
   ret { <1 x double>, <1 x double>, <1 x double> } %ld1x3
 }
 
-declare { <1 x double>, <1 x double>, <1 x double> } @llvm.arm64.neon.ld1x3.v1f64.p0f64(double*)
+declare { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x3.v1f64.p0f64(double*)
 
 
 define { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld1x4(i8* %A, i8** %ptr) {
 ;CHECK-LABEL: test_v16i8_post_imm_ld1x4:
 ;CHECK: ld1.16b { v0, v1, v2, v3 }, [x0], #64
-  %ld1x4 = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm64.neon.ld1x4.v16i8.p0i8(i8* %A)
+  %ld1x4 = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x4.v16i8.p0i8(i8* %A)
   %tmp = getelementptr i8* %A, i32 64
   store i8* %tmp, i8** %ptr
   ret { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %ld1x4
@@ -1883,19 +1883,19 @@ define { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld1x4
 define { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld1x4(i8* %A, i8** %ptr, i64 %inc) {
 ;CHECK-LABEL: test_v16i8_post_reg_ld1x4:
 ;CHECK: ld1.16b { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
-  %ld1x4 = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm64.neon.ld1x4.v16i8.p0i8(i8* %A)
+  %ld1x4 = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x4.v16i8.p0i8(i8* %A)
   %tmp = getelementptr i8* %A, i64 %inc
   store i8* %tmp, i8** %ptr
   ret { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %ld1x4
 }
 
-declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm64.neon.ld1x4.v16i8.p0i8(i8*)
+declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x4.v16i8.p0i8(i8*)
 
 
 define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld1x4(i8* %A, i8** %ptr) {
 ;CHECK-LABEL: test_v8i8_post_imm_ld1x4:
 ;CHECK: ld1.8b { v0, v1, v2, v3 }, [x0], #32
-  %ld1x4 = tail call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm64.neon.ld1x4.v8i8.p0i8(i8* %A)
+  %ld1x4 = tail call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x4.v8i8.p0i8(i8* %A)
   %tmp = getelementptr i8* %A, i32 32
   store i8* %tmp, i8** %ptr
   ret { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %ld1x4
@@ -1904,19 +1904,19 @@ define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld1x4(i8*
 define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld1x4(i8* %A, i8** %ptr, i64 %inc) {
 ;CHECK-LABEL: test_v8i8_post_reg_ld1x4:
 ;CHECK: ld1.8b { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
-  %ld1x4 = tail call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm64.neon.ld1x4.v8i8.p0i8(i8* %A)
+  %ld1x4 = tail call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x4.v8i8.p0i8(i8* %A)
   %tmp = getelementptr i8* %A, i64 %inc
   store i8* %tmp, i8** %ptr
   ret { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %ld1x4
 }
 
-declare { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm64.neon.ld1x4.v8i8.p0i8(i8*)
+declare { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x4.v8i8.p0i8(i8*)
 
 
 define { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld1x4(i16* %A, i16** %ptr) {
 ;CHECK-LABEL: test_v8i16_post_imm_ld1x4:
 ;CHECK: ld1.8h { v0, v1, v2, v3 }, [x0], #64
-  %ld1x4 = tail call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm64.neon.ld1x4.v8i16.p0i16(i16* %A)
+  %ld1x4 = tail call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x4.v8i16.p0i16(i16* %A)
   %tmp = getelementptr i16* %A, i32 32
   store i16* %tmp, i16** %ptr
   ret { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %ld1x4
@@ -1925,19 +1925,19 @@ define { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld1x4
 define { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld1x4(i16* %A, i16** %ptr, i64 %inc) {
 ;CHECK-LABEL: test_v8i16_post_reg_ld1x4:
 ;CHECK: ld1.8h { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
-  %ld1x4 = tail call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm64.neon.ld1x4.v8i16.p0i16(i16* %A)
+  %ld1x4 = tail call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x4.v8i16.p0i16(i16* %A)
   %tmp = getelementptr i16* %A, i64 %inc
   store i16* %tmp, i16** %ptr
   ret { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %ld1x4
 }
 
-declare { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm64.neon.ld1x4.v8i16.p0i16(i16*)
+declare { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x4.v8i16.p0i16(i16*)
 
 
 define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld1x4(i16* %A, i16** %ptr) {
 ;CHECK-LABEL: test_v4i16_post_imm_ld1x4:
 ;CHECK: ld1.4h { v0, v1, v2, v3 }, [x0], #32
-  %ld1x4 = tail call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm64.neon.ld1x4.v4i16.p0i16(i16* %A)
+  %ld1x4 = tail call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x4.v4i16.p0i16(i16* %A)
   %tmp = getelementptr i16* %A, i32 16
   store i16* %tmp, i16** %ptr
   ret { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %ld1x4
@@ -1946,19 +1946,19 @@ define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld1x4
 define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld1x4(i16* %A, i16** %ptr, i64 %inc) {
 ;CHECK-LABEL: test_v4i16_post_reg_ld1x4:
 ;CHECK: ld1.4h { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
-  %ld1x4 = tail call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm64.neon.ld1x4.v4i16.p0i16(i16* %A)
+  %ld1x4 = tail call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x4.v4i16.p0i16(i16* %A)
   %tmp = getelementptr i16* %A, i64 %inc
   store i16* %tmp, i16** %ptr
   ret { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %ld1x4
 }
 
-declare { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm64.neon.ld1x4.v4i16.p0i16(i16*)
+declare { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x4.v4i16.p0i16(i16*)
 
 
 define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld1x4(i32* %A, i32** %ptr) {
 ;CHECK-LABEL: test_v4i32_post_imm_ld1x4:
 ;CHECK: ld1.4s { v0, v1, v2, v3 }, [x0], #64
-  %ld1x4 = tail call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm64.neon.ld1x4.v4i32.p0i32(i32* %A)
+  %ld1x4 = tail call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x4.v4i32.p0i32(i32* %A)
   %tmp = getelementptr i32* %A, i32 16
   store i32* %tmp, i32** %ptr
   ret { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %ld1x4
@@ -1967,19 +1967,19 @@ define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld1x4
 define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld1x4(i32* %A, i32** %ptr, i64 %inc) {
 ;CHECK-LABEL: test_v4i32_post_reg_ld1x4:
 ;CHECK: ld1.4s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
-  %ld1x4 = tail call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm64.neon.ld1x4.v4i32.p0i32(i32* %A)
+  %ld1x4 = tail call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x4.v4i32.p0i32(i32* %A)
   %tmp = getelementptr i32* %A, i64 %inc
   store i32* %tmp, i32** %ptr
   ret { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %ld1x4
 }
 
-declare { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm64.neon.ld1x4.v4i32.p0i32(i32*)
+declare { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x4.v4i32.p0i32(i32*)
 
 
 define { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld1x4(i32* %A, i32** %ptr) {
 ;CHECK-LABEL: test_v2i32_post_imm_ld1x4:
 ;CHECK: ld1.2s { v0, v1, v2, v3 }, [x0], #32
-  %ld1x4 = tail call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm64.neon.ld1x4.v2i32.p0i32(i32* %A)
+  %ld1x4 = tail call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x4.v2i32.p0i32(i32* %A)
   %tmp = getelementptr i32* %A, i32 8
   store i32* %tmp, i32** %ptr
   ret { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %ld1x4
@@ -1988,19 +1988,19 @@ define { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld1x4
 define { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld1x4(i32* %A, i32** %ptr, i64 %inc) {
 ;CHECK-LABEL: test_v2i32_post_reg_ld1x4:
 ;CHECK: ld1.2s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
-  %ld1x4 = tail call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm64.neon.ld1x4.v2i32.p0i32(i32* %A)
+  %ld1x4 = tail call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x4.v2i32.p0i32(i32* %A)
   %tmp = getelementptr i32* %A, i64 %inc
   store i32* %tmp, i32** %ptr
   ret { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %ld1x4
 }
 
-declare { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm64.neon.ld1x4.v2i32.p0i32(i32*)
+declare { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x4.v2i32.p0i32(i32*)
 
 
 define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld1x4(i64* %A, i64** %ptr) {
 ;CHECK-LABEL: test_v2i64_post_imm_ld1x4:
 ;CHECK: ld1.2d { v0, v1, v2, v3 }, [x0], #64
-  %ld1x4 = tail call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.arm64.neon.ld1x4.v2i64.p0i64(i64* %A)
+  %ld1x4 = tail call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x4.v2i64.p0i64(i64* %A)
   %tmp = getelementptr i64* %A, i32 8
   store i64* %tmp, i64** %ptr
   ret { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %ld1x4
@@ -2009,19 +2009,19 @@ define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld1x4
 define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld1x4(i64* %A, i64** %ptr, i64 %inc) {
 ;CHECK-LABEL: test_v2i64_post_reg_ld1x4:
 ;CHECK: ld1.2d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
-  %ld1x4 = tail call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.arm64.neon.ld1x4.v2i64.p0i64(i64* %A)
+  %ld1x4 = tail call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x4.v2i64.p0i64(i64* %A)
   %tmp = getelementptr i64* %A, i64 %inc
   store i64* %tmp, i64** %ptr
   ret { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %ld1x4
 }
 
-declare { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.arm64.neon.ld1x4.v2i64.p0i64(i64*)
+declare { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x4.v2i64.p0i64(i64*)
 
 
 define { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld1x4(i64* %A, i64** %ptr) {
 ;CHECK-LABEL: test_v1i64_post_imm_ld1x4:
 ;CHECK: ld1.1d { v0, v1, v2, v3 }, [x0], #32
-  %ld1x4 = tail call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm64.neon.ld1x4.v1i64.p0i64(i64* %A)
+  %ld1x4 = tail call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x4.v1i64.p0i64(i64* %A)
   %tmp = getelementptr i64* %A, i32 4
   store i64* %tmp, i64** %ptr
   ret { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %ld1x4
@@ -2030,19 +2030,19 @@ define { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld1x4
 define { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld1x4(i64* %A, i64** %ptr, i64 %inc) {
 ;CHECK-LABEL: test_v1i64_post_reg_ld1x4:
 ;CHECK: ld1.1d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
-  %ld1x4 = tail call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm64.neon.ld1x4.v1i64.p0i64(i64* %A)
+  %ld1x4 = tail call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x4.v1i64.p0i64(i64* %A)
   %tmp = getelementptr i64* %A, i64 %inc
   store i64* %tmp, i64** %ptr
   ret { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %ld1x4
 }
 
-declare { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm64.neon.ld1x4.v1i64.p0i64(i64*)
+declare { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x4.v1i64.p0i64(i64*)
 
 
 define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_imm_ld1x4(float* %A, float** %ptr) {
 ;CHECK-LABEL: test_v4f32_post_imm_ld1x4:
 ;CHECK: ld1.4s { v0, v1, v2, v3 }, [x0], #64
-  %ld1x4 = tail call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.arm64.neon.ld1x4.v4f32.p0f32(float* %A)
+  %ld1x4 = tail call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld1x4.v4f32.p0f32(float* %A)
   %tmp = getelementptr float* %A, i32 16
   store float* %tmp, float** %ptr
   ret { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %ld1x4
@@ -2051,19 +2051,19 @@ define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_i
 define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_reg_ld1x4(float* %A, float** %ptr, i64 %inc) {
 ;CHECK-LABEL: test_v4f32_post_reg_ld1x4:
 ;CHECK: ld1.4s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
-  %ld1x4 = tail call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.arm64.neon.ld1x4.v4f32.p0f32(float* %A)
+  %ld1x4 = tail call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld1x4.v4f32.p0f32(float* %A)
   %tmp = getelementptr float* %A, i64 %inc
   store float* %tmp, float** %ptr
   ret { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %ld1x4
 }
 
-declare { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.arm64.neon.ld1x4.v4f32.p0f32(float*)
+declare { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld1x4.v4f32.p0f32(float*)
 
 
 define { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_imm_ld1x4(float* %A, float** %ptr) {
 ;CHECK-LABEL: test_v2f32_post_imm_ld1x4:
 ;CHECK: ld1.2s { v0, v1, v2, v3 }, [x0], #32
-  %ld1x4 = tail call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.arm64.neon.ld1x4.v2f32.p0f32(float* %A)
+  %ld1x4 = tail call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld1x4.v2f32.p0f32(float* %A)
   %tmp = getelementptr float* %A, i32 8
   store float* %tmp, float** %ptr
   ret { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %ld1x4
@@ -2072,19 +2072,19 @@ define { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_i
 define { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_reg_ld1x4(float* %A, float** %ptr, i64 %inc) {
 ;CHECK-LABEL: test_v2f32_post_reg_ld1x4:
 ;CHECK: ld1.2s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
-  %ld1x4 = tail call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.arm64.neon.ld1x4.v2f32.p0f32(float* %A)
+  %ld1x4 = tail call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld1x4.v2f32.p0f32(float* %A)
   %tmp = getelementptr float* %A, i64 %inc
   store float* %tmp, float** %ptr
   ret { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %ld1x4
 }
 
-declare { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.arm64.neon.ld1x4.v2f32.p0f32(float*)
+declare { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld1x4.v2f32.p0f32(float*)
 
 
 define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_imm_ld1x4(double* %A, double** %ptr) {
 ;CHECK-LABEL: test_v2f64_post_imm_ld1x4:
 ;CHECK: ld1.2d { v0, v1, v2, v3 }, [x0], #64
-  %ld1x4 = tail call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.arm64.neon.ld1x4.v2f64.p0f64(double* %A)
+  %ld1x4 = tail call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x4.v2f64.p0f64(double* %A)
   %tmp = getelementptr double* %A, i32 8
   store double* %tmp, double** %ptr
   ret { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %ld1x4
@@ -2093,19 +2093,19 @@ define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_po
 define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_reg_ld1x4(double* %A, double** %ptr, i64 %inc) {
 ;CHECK-LABEL: test_v2f64_post_reg_ld1x4:
 ;CHECK: ld1.2d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
-  %ld1x4 = tail call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.arm64.neon.ld1x4.v2f64.p0f64(double* %A)
+  %ld1x4 = tail call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x4.v2f64.p0f64(double* %A)
   %tmp = getelementptr double* %A, i64 %inc
   store double* %tmp, double** %ptr
   ret { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %ld1x4
 }
 
-declare { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.arm64.neon.ld1x4.v2f64.p0f64(double*)
+declare { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x4.v2f64.p0f64(double*)
 
 
 define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_imm_ld1x4(double* %A, double** %ptr) {
 ;CHECK-LABEL: test_v1f64_post_imm_ld1x4:
 ;CHECK: ld1.1d { v0, v1, v2, v3 }, [x0], #32
-  %ld1x4 = tail call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.arm64.neon.ld1x4.v1f64.p0f64(double* %A)
+  %ld1x4 = tail call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x4.v1f64.p0f64(double* %A)
   %tmp = getelementptr double* %A, i32 4
   store double* %tmp, double** %ptr
   ret { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %ld1x4
@@ -2114,19 +2114,19 @@ define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_po
 define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_reg_ld1x4(double* %A, double** %ptr, i64 %inc) {
 ;CHECK-LABEL: test_v1f64_post_reg_ld1x4:
 ;CHECK: ld1.1d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
-  %ld1x4 = tail call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.arm64.neon.ld1x4.v1f64.p0f64(double* %A)
+  %ld1x4 = tail call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x4.v1f64.p0f64(double* %A)
   %tmp = getelementptr double* %A, i64 %inc
   store double* %tmp, double** %ptr
   ret { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %ld1x4
 }
 
-declare { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.arm64.neon.ld1x4.v1f64.p0f64(double*)
+declare { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x4.v1f64.p0f64(double*)
 
 
 define { <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld2r(i8* %A, i8** %ptr) nounwind {
 ;CHECK-LABEL: test_v16i8_post_imm_ld2r:
 ;CHECK: ld2r.16b { v0, v1 }, [x0], #2
-  %ld2 = call { <16 x i8>, <16 x i8> } @llvm.arm64.neon.ld2r.v16i8.p0i8(i8* %A)
+  %ld2 = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2r.v16i8.p0i8(i8* %A)
   %tmp = getelementptr i8* %A, i32 2
   store i8* %tmp, i8** %ptr
   ret { <16 x i8>, <16 x i8> } %ld2
@@ -2135,19 +2135,19 @@ define { <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld2r(i8* %A, i8** %ptr) nou
 define { <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld2r(i8* %A, i8** %ptr, i64 %inc) nounwind {
 ;CHECK-LABEL: test_v16i8_post_reg_ld2r:
 ;CHECK: ld2r.16b { v0, v1 }, [x0], x{{[0-9]+}}
-  %ld2 = call { <16 x i8>, <16 x i8> } @llvm.arm64.neon.ld2r.v16i8.p0i8(i8* %A)
+  %ld2 = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2r.v16i8.p0i8(i8* %A)
   %tmp = getelementptr i8* %A, i64 %inc
   store i8* %tmp, i8** %ptr
   ret { <16 x i8>, <16 x i8> } %ld2
 }
 
-declare { <16 x i8>, <16 x i8> } @llvm.arm64.neon.ld2r.v16i8.p0i8(i8*) nounwind readonly
+declare { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2r.v16i8.p0i8(i8*) nounwind readonly
 
 
 define { <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld2r(i8* %A, i8** %ptr) nounwind {
 ;CHECK-LABEL: test_v8i8_post_imm_ld2r:
 ;CHECK: ld2r.8b { v0, v1 }, [x0], #2
-  %ld2 = call { <8 x i8>, <8 x i8> } @llvm.arm64.neon.ld2r.v8i8.p0i8(i8* %A)
+  %ld2 = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2r.v8i8.p0i8(i8* %A)
   %tmp = getelementptr i8* %A, i32 2
   store i8* %tmp, i8** %ptr
   ret { <8 x i8>, <8 x i8> } %ld2
@@ -2156,19 +2156,19 @@ define { <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld2r(i8* %A, i8** %ptr) nounwi
 define { <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld2r(i8* %A, i8** %ptr, i64 %inc) nounwind {
 ;CHECK-LABEL: test_v8i8_post_reg_ld2r:
 ;CHECK: ld2r.8b { v0, v1 }, [x0], x{{[0-9]+}}
-  %ld2 = call { <8 x i8>, <8 x i8> } @llvm.arm64.neon.ld2r.v8i8.p0i8(i8* %A)
+  %ld2 = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2r.v8i8.p0i8(i8* %A)
   %tmp = getelementptr i8* %A, i64 %inc
   store i8* %tmp, i8** %ptr
   ret { <8 x i8>, <8 x i8> } %ld2
 }
 
-declare { <8 x i8>, <8 x i8> } @llvm.arm64.neon.ld2r.v8i8.p0i8(i8*) nounwind readonly
+declare { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2r.v8i8.p0i8(i8*) nounwind readonly
 
 
 define { <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld2r(i16* %A, i16** %ptr) nounwind {
 ;CHECK-LABEL: test_v8i16_post_imm_ld2r:
 ;CHECK: ld2r.8h { v0, v1 }, [x0], #4
-  %ld2 = call { <8 x i16>, <8 x i16> } @llvm.arm64.neon.ld2r.v8i16.p0i16(i16* %A)
+  %ld2 = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2r.v8i16.p0i16(i16* %A)
   %tmp = getelementptr i16* %A, i32 2
   store i16* %tmp, i16** %ptr
   ret { <8 x i16>, <8 x i16> } %ld2
@@ -2177,19 +2177,19 @@ define { <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld2r(i16* %A, i16** %ptr) n
 define { <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld2r(i16* %A, i16** %ptr, i64 %inc) nounwind {
 ;CHECK-LABEL: test_v8i16_post_reg_ld2r:
 ;CHECK: ld2r.8h { v0, v1 }, [x0], x{{[0-9]+}}
-  %ld2 = call { <8 x i16>, <8 x i16> } @llvm.arm64.neon.ld2r.v8i16.p0i16(i16* %A)
+  %ld2 = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2r.v8i16.p0i16(i16* %A)
   %tmp = getelementptr i16* %A, i64 %inc
   store i16* %tmp, i16** %ptr
   ret { <8 x i16>, <8 x i16> } %ld2
 }
 
-declare { <8 x i16>, <8 x i16> } @llvm.arm64.neon.ld2r.v8i16.p0i16(i16*) nounwind readonly
+declare { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2r.v8i16.p0i16(i16*) nounwind readonly
 
 
 define { <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld2r(i16* %A, i16** %ptr) nounwind {
 ;CHECK-LABEL: test_v4i16_post_imm_ld2r:
 ;CHECK: ld2r.4h { v0, v1 }, [x0], #4
-  %ld2 = call { <4 x i16>, <4 x i16> } @llvm.arm64.neon.ld2r.v4i16.p0i16(i16* %A)
+  %ld2 = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2r.v4i16.p0i16(i16* %A)
   %tmp = getelementptr i16* %A, i32 2
   store i16* %tmp, i16** %ptr
   ret { <4 x i16>, <4 x i16> } %ld2
@@ -2198,19 +2198,19 @@ define { <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld2r(i16* %A, i16** %ptr) n
 define { <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld2r(i16* %A, i16** %ptr, i64 %inc) nounwind {
 ;CHECK-LABEL: test_v4i16_post_reg_ld2r:
 ;CHECK: ld2r.4h { v0, v1 }, [x0], x{{[0-9]+}}
-  %ld2 = call { <4 x i16>, <4 x i16> } @llvm.arm64.neon.ld2r.v4i16.p0i16(i16* %A)
+  %ld2 = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2r.v4i16.p0i16(i16* %A)
   %tmp = getelementptr i16* %A, i64 %inc
   store i16* %tmp, i16** %ptr
   ret { <4 x i16>, <4 x i16> } %ld2
 }
 
-declare { <4 x i16>, <4 x i16> } @llvm.arm64.neon.ld2r.v4i16.p0i16(i16*) nounwind readonly
+declare { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2r.v4i16.p0i16(i16*) nounwind readonly
 
 
 define { <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld2r(i32* %A, i32** %ptr) nounwind {
 ;CHECK-LABEL: test_v4i32_post_imm_ld2r:
 ;CHECK: ld2r.4s { v0, v1 }, [x0], #8
-  %ld2 = call { <4 x i32>, <4 x i32> } @llvm.arm64.neon.ld2r.v4i32.p0i32(i32* %A)
+  %ld2 = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2r.v4i32.p0i32(i32* %A)
   %tmp = getelementptr i32* %A, i32 2
   store i32* %tmp, i32** %ptr
   ret { <4 x i32>, <4 x i32> } %ld2
@@ -2219,18 +2219,18 @@ define { <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld2r(i32* %A, i32** %ptr) n
 define { <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld2r(i32* %A, i32** %ptr, i64 %inc) nounwind {
 ;CHECK-LABEL: test_v4i32_post_reg_ld2r:
 ;CHECK: ld2r.4s { v0, v1 }, [x0], x{{[0-9]+}}
-  %ld2 = call { <4 x i32>, <4 x i32> } @llvm.arm64.neon.ld2r.v4i32.p0i32(i32* %A)
+  %ld2 = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2r.v4i32.p0i32(i32* %A)
   %tmp = getelementptr i32* %A, i64 %inc
   store i32* %tmp, i32** %ptr
   ret { <4 x i32>, <4 x i32> } %ld2
 }
 
-declare { <4 x i32>, <4 x i32> } @llvm.arm64.neon.ld2r.v4i32.p0i32(i32*) nounwind readonly
+declare { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2r.v4i32.p0i32(i32*) nounwind readonly
 
 define { <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld2r(i32* %A, i32** %ptr) nounwind {
 ;CHECK-LABEL: test_v2i32_post_imm_ld2r:
 ;CHECK: ld2r.2s { v0, v1 }, [x0], #8
-  %ld2 = call { <2 x i32>, <2 x i32> } @llvm.arm64.neon.ld2r.v2i32.p0i32(i32* %A)
+  %ld2 = call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2r.v2i32.p0i32(i32* %A)
   %tmp = getelementptr i32* %A, i32 2
   store i32* %tmp, i32** %ptr
   ret { <2 x i32>, <2 x i32> } %ld2
@@ -2239,19 +2239,19 @@ define { <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld2r(i32* %A, i32** %ptr) n
 define { <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld2r(i32* %A, i32** %ptr, i64 %inc) nounwind {
 ;CHECK-LABEL: test_v2i32_post_reg_ld2r:
 ;CHECK: ld2r.2s { v0, v1 }, [x0], x{{[0-9]+}}
-  %ld2 = call { <2 x i32>, <2 x i32> } @llvm.arm64.neon.ld2r.v2i32.p0i32(i32* %A)
+  %ld2 = call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2r.v2i32.p0i32(i32* %A)
   %tmp = getelementptr i32* %A, i64 %inc
   store i32* %tmp, i32** %ptr
   ret { <2 x i32>, <2 x i32> } %ld2
 }
 
-declare { <2 x i32>, <2 x i32> } @llvm.arm64.neon.ld2r.v2i32.p0i32(i32*) nounwind readonly
+declare { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2r.v2i32.p0i32(i32*) nounwind readonly
 
 
 define { <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld2r(i64* %A, i64** %ptr) nounwind {
 ;CHECK-LABEL: test_v2i64_post_imm_ld2r:
 ;CHECK: ld2r.2d { v0, v1 }, [x0], #16
-  %ld2 = call { <2 x i64>, <2 x i64> } @llvm.arm64.neon.ld2r.v2i64.p0i64(i64* %A)
+  %ld2 = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2r.v2i64.p0i64(i64* %A)
   %tmp = getelementptr i64* %A, i32 2
   store i64* %tmp, i64** %ptr
   ret { <2 x i64>, <2 x i64> } %ld2
@@ -2260,18 +2260,18 @@ define { <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld2r(i64* %A, i64** %ptr) n
 define { <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld2r(i64* %A, i64** %ptr, i64 %inc) nounwind {
 ;CHECK-LABEL: test_v2i64_post_reg_ld2r:
 ;CHECK: ld2r.2d { v0, v1 }, [x0], x{{[0-9]+}}
-  %ld2 = call { <2 x i64>, <2 x i64> } @llvm.arm64.neon.ld2r.v2i64.p0i64(i64* %A)
+  %ld2 = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2r.v2i64.p0i64(i64* %A)
   %tmp = getelementptr i64* %A, i64 %inc
   store i64* %tmp, i64** %ptr
   ret { <2 x i64>, <2 x i64> } %ld2
 }
 
-declare { <2 x i64>, <2 x i64> } @llvm.arm64.neon.ld2r.v2i64.p0i64(i64*) nounwind readonly
+declare { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2r.v2i64.p0i64(i64*) nounwind readonly
 
 define { <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld2r(i64* %A, i64** %ptr) nounwind {
 ;CHECK-LABEL: test_v1i64_post_imm_ld2r:
 ;CHECK: ld2r.1d { v0, v1 }, [x0], #16
-  %ld2 = call { <1 x i64>, <1 x i64> } @llvm.arm64.neon.ld2r.v1i64.p0i64(i64* %A)
+  %ld2 = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2r.v1i64.p0i64(i64* %A)
   %tmp = getelementptr i64* %A, i32 2
   store i64* %tmp, i64** %ptr
   ret { <1 x i64>, <1 x i64> } %ld2
@@ -2280,19 +2280,19 @@ define { <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld2r(i64* %A, i64** %ptr) n
 define { <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld2r(i64* %A, i64** %ptr, i64 %inc) nounwind {
 ;CHECK-LABEL: test_v1i64_post_reg_ld2r:
 ;CHECK: ld2r.1d { v0, v1 }, [x0], x{{[0-9]+}}
-  %ld2 = call { <1 x i64>, <1 x i64> } @llvm.arm64.neon.ld2r.v1i64.p0i64(i64* %A)
+  %ld2 = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2r.v1i64.p0i64(i64* %A)
   %tmp = getelementptr i64* %A, i64 %inc
   store i64* %tmp, i64** %ptr
   ret { <1 x i64>, <1 x i64> } %ld2
 }
 
-declare { <1 x i64>, <1 x i64> } @llvm.arm64.neon.ld2r.v1i64.p0i64(i64*) nounwind readonly
+declare { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2r.v1i64.p0i64(i64*) nounwind readonly
 
 
 define { <4 x float>, <4 x float> } @test_v4f32_post_imm_ld2r(float* %A, float** %ptr) nounwind {
 ;CHECK-LABEL: test_v4f32_post_imm_ld2r:
 ;CHECK: ld2r.4s { v0, v1 }, [x0], #8
-  %ld2 = call { <4 x float>, <4 x float> } @llvm.arm64.neon.ld2r.v4f32.p0f32(float* %A)
+  %ld2 = call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2r.v4f32.p0f32(float* %A)
   %tmp = getelementptr float* %A, i32 2
   store float* %tmp, float** %ptr
   ret { <4 x float>, <4 x float> } %ld2
@@ -2301,18 +2301,18 @@ define { <4 x float>, <4 x float> } @test_v4f32_post_imm_ld2r(float* %A, float**
 define { <4 x float>, <4 x float> } @test_v4f32_post_reg_ld2r(float* %A, float** %ptr, i64 %inc) nounwind {
 ;CHECK-LABEL: test_v4f32_post_reg_ld2r:
 ;CHECK: ld2r.4s { v0, v1 }, [x0], x{{[0-9]+}}
-  %ld2 = call { <4 x float>, <4 x float> } @llvm.arm64.neon.ld2r.v4f32.p0f32(float* %A)
+  %ld2 = call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2r.v4f32.p0f32(float* %A)
   %tmp = getelementptr float* %A, i64 %inc
   store float* %tmp, float** %ptr
   ret { <4 x float>, <4 x float> } %ld2
 }
 
-declare { <4 x float>, <4 x float> } @llvm.arm64.neon.ld2r.v4f32.p0f32(float*) nounwind readonly
+declare { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2r.v4f32.p0f32(float*) nounwind readonly
 
 define { <2 x float>, <2 x float> } @test_v2f32_post_imm_ld2r(float* %A, float** %ptr) nounwind {
 ;CHECK-LABEL: test_v2f32_post_imm_ld2r:
 ;CHECK: ld2r.2s { v0, v1 }, [x0], #8
-  %ld2 = call { <2 x float>, <2 x float> } @llvm.arm64.neon.ld2r.v2f32.p0f32(float* %A)
+  %ld2 = call { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2r.v2f32.p0f32(float* %A)
   %tmp = getelementptr float* %A, i32 2
   store float* %tmp, float** %ptr
   ret { <2 x float>, <2 x float> } %ld2
@@ -2321,19 +2321,19 @@ define { <2 x float>, <2 x float> } @test_v2f32_post_imm_ld2r(float* %A, float**
 define { <2 x float>, <2 x float> } @test_v2f32_post_reg_ld2r(float* %A, float** %ptr, i64 %inc) nounwind {
 ;CHECK-LABEL: test_v2f32_post_reg_ld2r:
 ;CHECK: ld2r.2s { v0, v1 }, [x0], x{{[0-9]+}}
-  %ld2 = call { <2 x float>, <2 x float> } @llvm.arm64.neon.ld2r.v2f32.p0f32(float* %A)
+  %ld2 = call { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2r.v2f32.p0f32(float* %A)
   %tmp = getelementptr float* %A, i64 %inc
   store float* %tmp, float** %ptr
   ret { <2 x float>, <2 x float> } %ld2
 }
 
-declare { <2 x float>, <2 x float> } @llvm.arm64.neon.ld2r.v2f32.p0f32(float*) nounwind readonly
+declare { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2r.v2f32.p0f32(float*) nounwind readonly
 
 
 define { <2 x double>, <2 x double> } @test_v2f64_post_imm_ld2r(double* %A, double** %ptr) nounwind {
 ;CHECK-LABEL: test_v2f64_post_imm_ld2r:
 ;CHECK: ld2r.2d { v0, v1 }, [x0], #16
-  %ld2 = call { <2 x double>, <2 x double> } @llvm.arm64.neon.ld2r.v2f64.p0f64(double* %A)
+  %ld2 = call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2r.v2f64.p0f64(double* %A)
   %tmp = getelementptr double* %A, i32 2
   store double* %tmp, double** %ptr
   ret { <2 x double>, <2 x double> } %ld2
@@ -2342,18 +2342,18 @@ define { <2 x double>, <2 x double> } @test_v2f64_post_imm_ld2r(double* %A, doub
 define { <2 x double>, <2 x double> } @test_v2f64_post_reg_ld2r(double* %A, double** %ptr, i64 %inc) nounwind {
 ;CHECK-LABEL: test_v2f64_post_reg_ld2r:
 ;CHECK: ld2r.2d { v0, v1 }, [x0], x{{[0-9]+}}
-  %ld2 = call { <2 x double>, <2 x double> } @llvm.arm64.neon.ld2r.v2f64.p0f64(double* %A)
+  %ld2 = call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2r.v2f64.p0f64(double* %A)
   %tmp = getelementptr double* %A, i64 %inc
   store double* %tmp, double** %ptr
   ret { <2 x double>, <2 x double> } %ld2
 }
 
-declare { <2 x double>, <2 x double> } @llvm.arm64.neon.ld2r.v2f64.p0f64(double*) nounwind readonly
+declare { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2r.v2f64.p0f64(double*) nounwind readonly
 
 define { <1 x double>, <1 x double> } @test_v1f64_post_imm_ld2r(double* %A, double** %ptr) nounwind {
 ;CHECK-LABEL: test_v1f64_post_imm_ld2r:
 ;CHECK: ld2r.1d { v0, v1 }, [x0], #16
-  %ld2 = call { <1 x double>, <1 x double> } @llvm.arm64.neon.ld2r.v1f64.p0f64(double* %A)
+  %ld2 = call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2r.v1f64.p0f64(double* %A)
   %tmp = getelementptr double* %A, i32 2
   store double* %tmp, double** %ptr
   ret { <1 x double>, <1 x double> } %ld2
@@ -2362,19 +2362,19 @@ define { <1 x double>, <1 x double> } @test_v1f64_post_imm_ld2r(double* %A, doub
 define { <1 x double>, <1 x double> } @test_v1f64_post_reg_ld2r(double* %A, double** %ptr, i64 %inc) nounwind {
 ;CHECK-LABEL: test_v1f64_post_reg_ld2r:
 ;CHECK: ld2r.1d { v0, v1 }, [x0], x{{[0-9]+}}
-  %ld2 = call { <1 x double>, <1 x double> } @llvm.arm64.neon.ld2r.v1f64.p0f64(double* %A)
+  %ld2 = call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2r.v1f64.p0f64(double* %A)
   %tmp = getelementptr double* %A, i64 %inc
   store double* %tmp, double** %ptr
   ret { <1 x double>, <1 x double> } %ld2
 }
 
-declare { <1 x double>, <1 x double> } @llvm.arm64.neon.ld2r.v1f64.p0f64(double*) nounwind readonly
+declare { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2r.v1f64.p0f64(double*) nounwind readonly
 
 
 define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld3r(i8* %A, i8** %ptr) nounwind {
 ;CHECK-LABEL: test_v16i8_post_imm_ld3r:
 ;CHECK: ld3r.16b { v0, v1, v2 }, [x0], #3
-  %ld3 = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm64.neon.ld3r.v16i8.p0i8(i8* %A)
+  %ld3 = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3r.v16i8.p0i8(i8* %A)
   %tmp = getelementptr i8* %A, i32 3
   store i8* %tmp, i8** %ptr
   ret { <16 x i8>, <16 x i8>, <16 x i8> } %ld3
@@ -2383,19 +2383,19 @@ define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld3r(i8* %A, i8*
 define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld3r(i8* %A, i8** %ptr, i64 %inc) nounwind {
 ;CHECK-LABEL: test_v16i8_post_reg_ld3r:
 ;CHECK: ld3r.16b { v0, v1, v2 }, [x0], x{{[0-9]+}}
-  %ld3 = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm64.neon.ld3r.v16i8.p0i8(i8* %A)
+  %ld3 = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3r.v16i8.p0i8(i8* %A)
   %tmp = getelementptr i8* %A, i64 %inc
   store i8* %tmp, i8** %ptr
   ret { <16 x i8>, <16 x i8>, <16 x i8> } %ld3
 }
 
-declare { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm64.neon.ld3r.v16i8.p0i8(i8*) nounwind readonly
+declare { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3r.v16i8.p0i8(i8*) nounwind readonly
 
 
 define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld3r(i8* %A, i8** %ptr) nounwind {
 ;CHECK-LABEL: test_v8i8_post_imm_ld3r:
 ;CHECK: ld3r.8b { v0, v1, v2 }, [x0], #3
-  %ld3 = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm64.neon.ld3r.v8i8.p0i8(i8* %A)
+  %ld3 = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3r.v8i8.p0i8(i8* %A)
   %tmp = getelementptr i8* %A, i32 3
   store i8* %tmp, i8** %ptr
   ret { <8 x i8>, <8 x i8>, <8 x i8> } %ld3
@@ -2404,19 +2404,19 @@ define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld3r(i8* %A, i8** %p
 define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld3r(i8* %A, i8** %ptr, i64 %inc) nounwind {
 ;CHECK-LABEL: test_v8i8_post_reg_ld3r:
 ;CHECK: ld3r.8b { v0, v1, v2 }, [x0], x{{[0-9]+}}
-  %ld3 = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm64.neon.ld3r.v8i8.p0i8(i8* %A)
+  %ld3 = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3r.v8i8.p0i8(i8* %A)
   %tmp = getelementptr i8* %A, i64 %inc
   store i8* %tmp, i8** %ptr
   ret { <8 x i8>, <8 x i8>, <8 x i8> } %ld3
 }
 
-declare { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm64.neon.ld3r.v8i8.p0i8(i8*) nounwind readonly
+declare { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3r.v8i8.p0i8(i8*) nounwind readonly
 
 
 define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld3r(i16* %A, i16** %ptr) nounwind {
 ;CHECK-LABEL: test_v8i16_post_imm_ld3r:
 ;CHECK: ld3r.8h { v0, v1, v2 }, [x0], #6
-  %ld3 = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm64.neon.ld3r.v8i16.p0i16(i16* %A)
+  %ld3 = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3r.v8i16.p0i16(i16* %A)
   %tmp = getelementptr i16* %A, i32 3
   store i16* %tmp, i16** %ptr
   ret { <8 x i16>, <8 x i16>, <8 x i16> } %ld3
@@ -2425,19 +2425,19 @@ define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld3r(i16* %A, i1
 define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld3r(i16* %A, i16** %ptr, i64 %inc) nounwind {
 ;CHECK-LABEL: test_v8i16_post_reg_ld3r:
 ;CHECK: ld3r.8h { v0, v1, v2 }, [x0], x{{[0-9]+}}
-  %ld3 = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm64.neon.ld3r.v8i16.p0i16(i16* %A)
+  %ld3 = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3r.v8i16.p0i16(i16* %A)
   %tmp = getelementptr i16* %A, i64 %inc
   store i16* %tmp, i16** %ptr
   ret { <8 x i16>, <8 x i16>, <8 x i16> } %ld3
 }
 
-declare { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm64.neon.ld3r.v8i16.p0i16(i16*) nounwind readonly
+declare { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3r.v8i16.p0i16(i16*) nounwind readonly
 
 
 define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld3r(i16* %A, i16** %ptr) nounwind {
 ;CHECK-LABEL: test_v4i16_post_imm_ld3r:
 ;CHECK: ld3r.4h { v0, v1, v2 }, [x0], #6
-  %ld3 = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm64.neon.ld3r.v4i16.p0i16(i16* %A)
+  %ld3 = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3r.v4i16.p0i16(i16* %A)
   %tmp = getelementptr i16* %A, i32 3
   store i16* %tmp, i16** %ptr
   ret { <4 x i16>, <4 x i16>, <4 x i16> } %ld3
@@ -2446,19 +2446,19 @@ define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld3r(i16* %A, i1
 define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld3r(i16* %A, i16** %ptr, i64 %inc) nounwind {
 ;CHECK-LABEL: test_v4i16_post_reg_ld3r:
 ;CHECK: ld3r.4h { v0, v1, v2 }, [x0], x{{[0-9]+}}
-  %ld3 = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm64.neon.ld3r.v4i16.p0i16(i16* %A)
+  %ld3 = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3r.v4i16.p0i16(i16* %A)
   %tmp = getelementptr i16* %A, i64 %inc
   store i16* %tmp, i16** %ptr
   ret { <4 x i16>, <4 x i16>, <4 x i16> } %ld3
 }
 
-declare { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm64.neon.ld3r.v4i16.p0i16(i16*) nounwind readonly
+declare { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3r.v4i16.p0i16(i16*) nounwind readonly
 
 
 define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld3r(i32* %A, i32** %ptr) nounwind {
 ;CHECK-LABEL: test_v4i32_post_imm_ld3r:
 ;CHECK: ld3r.4s { v0, v1, v2 }, [x0], #12
-  %ld3 = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm64.neon.ld3r.v4i32.p0i32(i32* %A)
+  %ld3 = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3r.v4i32.p0i32(i32* %A)
   %tmp = getelementptr i32* %A, i32 3
   store i32* %tmp, i32** %ptr
   ret { <4 x i32>, <4 x i32>, <4 x i32> } %ld3
@@ -2467,18 +2467,18 @@ define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld3r(i32* %A, i3
 define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld3r(i32* %A, i32** %ptr, i64 %inc) nounwind {
 ;CHECK-LABEL: test_v4i32_post_reg_ld3r:
 ;CHECK: ld3r.4s { v0, v1, v2 }, [x0], x{{[0-9]+}}
-  %ld3 = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm64.neon.ld3r.v4i32.p0i32(i32* %A)
+  %ld3 = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3r.v4i32.p0i32(i32* %A)
   %tmp = getelementptr i32* %A, i64 %inc
   store i32* %tmp, i32** %ptr
   ret { <4 x i32>, <4 x i32>, <4 x i32> } %ld3
 }
 
-declare { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm64.neon.ld3r.v4i32.p0i32(i32*) nounwind readonly
+declare { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3r.v4i32.p0i32(i32*) nounwind readonly
 
 define { <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld3r(i32* %A, i32** %ptr) nounwind {
 ;CHECK-LABEL: test_v2i32_post_imm_ld3r:
 ;CHECK: ld3r.2s { v0, v1, v2 }, [x0], #12
-  %ld3 = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm64.neon.ld3r.v2i32.p0i32(i32* %A)
+  %ld3 = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3r.v2i32.p0i32(i32* %A)
   %tmp = getelementptr i32* %A, i32 3
   store i32* %tmp, i32** %ptr
   ret { <2 x i32>, <2 x i32>, <2 x i32> } %ld3
@@ -2487,19 +2487,19 @@ define { <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld3r(i32* %A, i3
 define { <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld3r(i32* %A, i32** %ptr, i64 %inc) nounwind {
 ;CHECK-LABEL: test_v2i32_post_reg_ld3r:
 ;CHECK: ld3r.2s { v0, v1, v2 }, [x0], x{{[0-9]+}}
-  %ld3 = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm64.neon.ld3r.v2i32.p0i32(i32* %A)
+  %ld3 = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3r.v2i32.p0i32(i32* %A)
   %tmp = getelementptr i32* %A, i64 %inc
   store i32* %tmp, i32** %ptr
   ret { <2 x i32>, <2 x i32>, <2 x i32> } %ld3
 }
 
-declare { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm64.neon.ld3r.v2i32.p0i32(i32*) nounwind readonly
+declare { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3r.v2i32.p0i32(i32*) nounwind readonly
 
 
 define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld3r(i64* %A, i64** %ptr) nounwind {
 ;CHECK-LABEL: test_v2i64_post_imm_ld3r:
 ;CHECK: ld3r.2d { v0, v1, v2 }, [x0], #24
-  %ld3 = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.arm64.neon.ld3r.v2i64.p0i64(i64* %A)
+  %ld3 = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3r.v2i64.p0i64(i64* %A)
   %tmp = getelementptr i64* %A, i32 3
   store i64* %tmp, i64** %ptr
   ret { <2 x i64>, <2 x i64>, <2 x i64> } %ld3
@@ -2508,18 +2508,18 @@ define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld3r(i64* %A, i6
 define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld3r(i64* %A, i64** %ptr, i64 %inc) nounwind {
 ;CHECK-LABEL: test_v2i64_post_reg_ld3r:
 ;CHECK: ld3r.2d { v0, v1, v2 }, [x0], x{{[0-9]+}}
-  %ld3 = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.arm64.neon.ld3r.v2i64.p0i64(i64* %A)
+  %ld3 = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3r.v2i64.p0i64(i64* %A)
   %tmp = getelementptr i64* %A, i64 %inc
   store i64* %tmp, i64** %ptr
   ret { <2 x i64>, <2 x i64>, <2 x i64> } %ld3
 }
 
-declare { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.arm64.neon.ld3r.v2i64.p0i64(i64*) nounwind readonly
+declare { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3r.v2i64.p0i64(i64*) nounwind readonly
 
 define { <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld3r(i64* %A, i64** %ptr) nounwind {
 ;CHECK-LABEL: test_v1i64_post_imm_ld3r:
 ;CHECK: ld3r.1d { v0, v1, v2 }, [x0], #24
-  %ld3 = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm64.neon.ld3r.v1i64.p0i64(i64* %A)
+  %ld3 = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3r.v1i64.p0i64(i64* %A)
   %tmp = getelementptr i64* %A, i32 3
   store i64* %tmp, i64** %ptr
   ret { <1 x i64>, <1 x i64>, <1 x i64> } %ld3
@@ -2528,19 +2528,19 @@ define { <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld3r(i64* %A, i6
 define { <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld3r(i64* %A, i64** %ptr, i64 %inc) nounwind {
 ;CHECK-LABEL: test_v1i64_post_reg_ld3r:
 ;CHECK: ld3r.1d { v0, v1, v2 }, [x0], x{{[0-9]+}}
-  %ld3 = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm64.neon.ld3r.v1i64.p0i64(i64* %A)
+  %ld3 = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3r.v1i64.p0i64(i64* %A)
   %tmp = getelementptr i64* %A, i64 %inc
   store i64* %tmp, i64** %ptr
   ret { <1 x i64>, <1 x i64>, <1 x i64> } %ld3
 }
 
-declare { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm64.neon.ld3r.v1i64.p0i64(i64*) nounwind readonly
+declare { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3r.v1i64.p0i64(i64*) nounwind readonly
 
 
 define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_imm_ld3r(float* %A, float** %ptr) nounwind {
 ;CHECK-LABEL: test_v4f32_post_imm_ld3r:
 ;CHECK: ld3r.4s { v0, v1, v2 }, [x0], #12
-  %ld3 = call { <4 x float>, <4 x float>, <4 x float> } @llvm.arm64.neon.ld3r.v4f32.p0f32(float* %A)
+  %ld3 = call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3r.v4f32.p0f32(float* %A)
   %tmp = getelementptr float* %A, i32 3
   store float* %tmp, float** %ptr
   ret { <4 x float>, <4 x float>, <4 x float> } %ld3
@@ -2549,18 +2549,18 @@ define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_imm_ld3r(float
 define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_reg_ld3r(float* %A, float** %ptr, i64 %inc) nounwind {
 ;CHECK-LABEL: test_v4f32_post_reg_ld3r:
 ;CHECK: ld3r.4s { v0, v1, v2 }, [x0], x{{[0-9]+}}
-  %ld3 = call { <4 x float>, <4 x float>, <4 x float> } @llvm.arm64.neon.ld3r.v4f32.p0f32(float* %A)
+  %ld3 = call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3r.v4f32.p0f32(float* %A)
   %tmp = getelementptr float* %A, i64 %inc
   store float* %tmp, float** %ptr
   ret { <4 x float>, <4 x float>, <4 x float> } %ld3
 }
 
-declare { <4 x float>, <4 x float>, <4 x float> } @llvm.arm64.neon.ld3r.v4f32.p0f32(float*) nounwind readonly
+declare { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3r.v4f32.p0f32(float*) nounwind readonly
 
 define { <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_imm_ld3r(float* %A, float** %ptr) nounwind {
 ;CHECK-LABEL: test_v2f32_post_imm_ld3r:
 ;CHECK: ld3r.2s { v0, v1, v2 }, [x0], #12
-  %ld3 = call { <2 x float>, <2 x float>, <2 x float> } @llvm.arm64.neon.ld3r.v2f32.p0f32(float* %A)
+  %ld3 = call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3r.v2f32.p0f32(float* %A)
   %tmp = getelementptr float* %A, i32 3
   store float* %tmp, float** %ptr
   ret { <2 x float>, <2 x float>, <2 x float> } %ld3
@@ -2569,19 +2569,19 @@ define { <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_imm_ld3r(float
 define { <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_reg_ld3r(float* %A, float** %ptr, i64 %inc) nounwind {
 ;CHECK-LABEL: test_v2f32_post_reg_ld3r:
 ;CHECK: ld3r.2s { v0, v1, v2 }, [x0], x{{[0-9]+}}
-  %ld3 = call { <2 x float>, <2 x float>, <2 x float> } @llvm.arm64.neon.ld3r.v2f32.p0f32(float* %A)
+  %ld3 = call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3r.v2f32.p0f32(float* %A)
   %tmp = getelementptr float* %A, i64 %inc
   store float* %tmp, float** %ptr
   ret { <2 x float>, <2 x float>, <2 x float> } %ld3
 }
 
-declare { <2 x float>, <2 x float>, <2 x float> } @llvm.arm64.neon.ld3r.v2f32.p0f32(float*) nounwind readonly
+declare { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3r.v2f32.p0f32(float*) nounwind readonly
 
 
 define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_imm_ld3r(double* %A, double** %ptr) nounwind {
 ;CHECK-LABEL: test_v2f64_post_imm_ld3r:
 ;CHECK: ld3r.2d { v0, v1, v2 }, [x0], #24
-  %ld3 = call { <2 x double>, <2 x double>, <2 x double> } @llvm.arm64.neon.ld3r.v2f64.p0f64(double* %A)
+  %ld3 = call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3r.v2f64.p0f64(double* %A)
   %tmp = getelementptr double* %A, i32 3
   store double* %tmp, double** %ptr
   ret { <2 x double>, <2 x double>, <2 x double> } %ld3
@@ -2590,18 +2590,18 @@ define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_imm_ld3r(do
 define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_reg_ld3r(double* %A, double** %ptr, i64 %inc) nounwind {
 ;CHECK-LABEL: test_v2f64_post_reg_ld3r:
 ;CHECK: ld3r.2d { v0, v1, v2 }, [x0], x{{[0-9]+}}
-  %ld3 = call { <2 x double>, <2 x double>, <2 x double> } @llvm.arm64.neon.ld3r.v2f64.p0f64(double* %A)
+  %ld3 = call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3r.v2f64.p0f64(double* %A)
   %tmp = getelementptr double* %A, i64 %inc
   store double* %tmp, double** %ptr
   ret { <2 x double>, <2 x double>, <2 x double> } %ld3
 }
 
-declare { <2 x double>, <2 x double>, <2 x double> } @llvm.arm64.neon.ld3r.v2f64.p0f64(double*) nounwind readonly
+declare { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3r.v2f64.p0f64(double*) nounwind readonly
 
 define { <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_imm_ld3r(double* %A, double** %ptr) nounwind {
 ;CHECK-LABEL: test_v1f64_post_imm_ld3r:
 ;CHECK: ld3r.1d { v0, v1, v2 }, [x0], #24
-  %ld3 = call { <1 x double>, <1 x double>, <1 x double> } @llvm.arm64.neon.ld3r.v1f64.p0f64(double* %A)
+  %ld3 = call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3r.v1f64.p0f64(double* %A)
   %tmp = getelementptr double* %A, i32 3
   store double* %tmp, double** %ptr
   ret { <1 x double>, <1 x double>, <1 x double> } %ld3
@@ -2610,19 +2610,19 @@ define { <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_imm_ld3r(do
 define { <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_reg_ld3r(double* %A, double** %ptr, i64 %inc) nounwind {
 ;CHECK-LABEL: test_v1f64_post_reg_ld3r:
 ;CHECK: ld3r.1d { v0, v1, v2 }, [x0], x{{[0-9]+}}
-  %ld3 = call { <1 x double>, <1 x double>, <1 x double> } @llvm.arm64.neon.ld3r.v1f64.p0f64(double* %A)
+  %ld3 = call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3r.v1f64.p0f64(double* %A)
   %tmp = getelementptr double* %A, i64 %inc
   store double* %tmp, double** %ptr
   ret { <1 x double>, <1 x double>, <1 x double> } %ld3
 }
 
-declare { <1 x double>, <1 x double>, <1 x double> } @llvm.arm64.neon.ld3r.v1f64.p0f64(double*) nounwind readonly
+declare { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3r.v1f64.p0f64(double*) nounwind readonly
 
 
 define { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld4r(i8* %A, i8** %ptr) nounwind {
 ;CHECK-LABEL: test_v16i8_post_imm_ld4r:
 ;CHECK: ld4r.16b { v0, v1, v2, v3 }, [x0], #4
-  %ld4 = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm64.neon.ld4r.v16i8.p0i8(i8* %A)
+  %ld4 = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4r.v16i8.p0i8(i8* %A)
   %tmp = getelementptr i8* %A, i32 4
   store i8* %tmp, i8** %ptr
   ret { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %ld4
@@ -2631,19 +2631,19 @@ define { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld4r(
 define { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld4r(i8* %A, i8** %ptr, i64 %inc) nounwind {
 ;CHECK-LABEL: test_v16i8_post_reg_ld4r:
 ;CHECK: ld4r.16b { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
-  %ld4 = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm64.neon.ld4r.v16i8.p0i8(i8* %A)
+  %ld4 = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4r.v16i8.p0i8(i8* %A)
   %tmp = getelementptr i8* %A, i64 %inc
   store i8* %tmp, i8** %ptr
   ret { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %ld4
 }
 
-declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm64.neon.ld4r.v16i8.p0i8(i8*) nounwind readonly
+declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4r.v16i8.p0i8(i8*) nounwind readonly
 
 
 define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld4r(i8* %A, i8** %ptr) nounwind {
 ;CHECK-LABEL: test_v8i8_post_imm_ld4r:
 ;CHECK: ld4r.8b { v0, v1, v2, v3 }, [x0], #4
-  %ld4 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm64.neon.ld4r.v8i8.p0i8(i8* %A)
+  %ld4 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4r.v8i8.p0i8(i8* %A)
   %tmp = getelementptr i8* %A, i32 4
   store i8* %tmp, i8** %ptr
   ret { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %ld4
@@ -2652,19 +2652,19 @@ define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld4r(i8* %
 define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld4r(i8* %A, i8** %ptr, i64 %inc) nounwind {
 ;CHECK-LABEL: test_v8i8_post_reg_ld4r:
 ;CHECK: ld4r.8b { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
-  %ld4 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm64.neon.ld4r.v8i8.p0i8(i8* %A)
+  %ld4 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4r.v8i8.p0i8(i8* %A)
   %tmp = getelementptr i8* %A, i64 %inc
   store i8* %tmp, i8** %ptr
   ret { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %ld4
 }
 
-declare { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm64.neon.ld4r.v8i8.p0i8(i8*) nounwind readonly
+declare { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4r.v8i8.p0i8(i8*) nounwind readonly
 
 
 define { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld4r(i16* %A, i16** %ptr) nounwind {
 ;CHECK-LABEL: test_v8i16_post_imm_ld4r:
 ;CHECK: ld4r.8h { v0, v1, v2, v3 }, [x0], #8
-  %ld4 = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm64.neon.ld4r.v8i16.p0i16(i16* %A)
+  %ld4 = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4r.v8i16.p0i16(i16* %A)
   %tmp = getelementptr i16* %A, i32 4
   store i16* %tmp, i16** %ptr
   ret { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %ld4
@@ -2673,19 +2673,19 @@ define { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld4r(
 define { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld4r(i16* %A, i16** %ptr, i64 %inc) nounwind {
 ;CHECK-LABEL: test_v8i16_post_reg_ld4r:
 ;CHECK: ld4r.8h { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
-  %ld4 = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm64.neon.ld4r.v8i16.p0i16(i16* %A)
+  %ld4 = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4r.v8i16.p0i16(i16* %A)
   %tmp = getelementptr i16* %A, i64 %inc
   store i16* %tmp, i16** %ptr
   ret { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %ld4
 }
 
-declare { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm64.neon.ld4r.v8i16.p0i16(i16*) nounwind readonly
+declare { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4r.v8i16.p0i16(i16*) nounwind readonly
 
 
 define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld4r(i16* %A, i16** %ptr) nounwind {
 ;CHECK-LABEL: test_v4i16_post_imm_ld4r:
 ;CHECK: ld4r.4h { v0, v1, v2, v3 }, [x0], #8
-  %ld4 = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm64.neon.ld4r.v4i16.p0i16(i16* %A)
+  %ld4 = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4r.v4i16.p0i16(i16* %A)
   %tmp = getelementptr i16* %A, i32 4
   store i16* %tmp, i16** %ptr
   ret { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %ld4
@@ -2694,19 +2694,19 @@ define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld4r(
 define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld4r(i16* %A, i16** %ptr, i64 %inc) nounwind {
 ;CHECK-LABEL: test_v4i16_post_reg_ld4r:
 ;CHECK: ld4r.4h { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
-  %ld4 = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm64.neon.ld4r.v4i16.p0i16(i16* %A)
+  %ld4 = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4r.v4i16.p0i16(i16* %A)
   %tmp = getelementptr i16* %A, i64 %inc
   store i16* %tmp, i16** %ptr
   ret { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %ld4
 }
 
-declare { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm64.neon.ld4r.v4i16.p0i16(i16*) nounwind readonly
+declare { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4r.v4i16.p0i16(i16*) nounwind readonly
 
 
 define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld4r(i32* %A, i32** %ptr) nounwind {
 ;CHECK-LABEL: test_v4i32_post_imm_ld4r:
 ;CHECK: ld4r.4s { v0, v1, v2, v3 }, [x0], #16
-  %ld4 = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm64.neon.ld4r.v4i32.p0i32(i32* %A)
+  %ld4 = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4r.v4i32.p0i32(i32* %A)
   %tmp = getelementptr i32* %A, i32 4
   store i32* %tmp, i32** %ptr
   ret { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %ld4
@@ -2715,18 +2715,18 @@ define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld4r(
 define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld4r(i32* %A, i32** %ptr, i64 %inc) nounwind {
 ;CHECK-LABEL: test_v4i32_post_reg_ld4r:
 ;CHECK: ld4r.4s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
-  %ld4 = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm64.neon.ld4r.v4i32.p0i32(i32* %A)
+  %ld4 = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4r.v4i32.p0i32(i32* %A)
   %tmp = getelementptr i32* %A, i64 %inc
   store i32* %tmp, i32** %ptr
   ret { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %ld4
 }
 
-declare { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm64.neon.ld4r.v4i32.p0i32(i32*) nounwind readonly
+declare { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4r.v4i32.p0i32(i32*) nounwind readonly
 
 define { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld4r(i32* %A, i32** %ptr) nounwind {
 ;CHECK-LABEL: test_v2i32_post_imm_ld4r:
 ;CHECK: ld4r.2s { v0, v1, v2, v3 }, [x0], #16
-  %ld4 = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm64.neon.ld4r.v2i32.p0i32(i32* %A)
+  %ld4 = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4r.v2i32.p0i32(i32* %A)
   %tmp = getelementptr i32* %A, i32 4
   store i32* %tmp, i32** %ptr
   ret { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %ld4
@@ -2735,19 +2735,19 @@ define { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld4r(
 define { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld4r(i32* %A, i32** %ptr, i64 %inc) nounwind {
 ;CHECK-LABEL: test_v2i32_post_reg_ld4r:
 ;CHECK: ld4r.2s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
-  %ld4 = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm64.neon.ld4r.v2i32.p0i32(i32* %A)
+  %ld4 = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4r.v2i32.p0i32(i32* %A)
   %tmp = getelementptr i32* %A, i64 %inc
   store i32* %tmp, i32** %ptr
   ret { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %ld4
 }
 
-declare { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm64.neon.ld4r.v2i32.p0i32(i32*) nounwind readonly
+declare { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4r.v2i32.p0i32(i32*) nounwind readonly
 
 
 define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld4r(i64* %A, i64** %ptr) nounwind {
 ;CHECK-LABEL: test_v2i64_post_imm_ld4r:
 ;CHECK: ld4r.2d { v0, v1, v2, v3 }, [x0], #32
-  %ld4 = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.arm64.neon.ld4r.v2i64.p0i64(i64* %A)
+  %ld4 = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4r.v2i64.p0i64(i64* %A)
   %tmp = getelementptr i64* %A, i32 4
   store i64* %tmp, i64** %ptr
   ret { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %ld4
@@ -2756,18 +2756,18 @@ define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld4r(
 define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld4r(i64* %A, i64** %ptr, i64 %inc) nounwind {
 ;CHECK-LABEL: test_v2i64_post_reg_ld4r:
 ;CHECK: ld4r.2d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
-  %ld4 = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.arm64.neon.ld4r.v2i64.p0i64(i64* %A)
+  %ld4 = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4r.v2i64.p0i64(i64* %A)
   %tmp = getelementptr i64* %A, i64 %inc
   store i64* %tmp, i64** %ptr
   ret { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %ld4
 }
 
-declare { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.arm64.neon.ld4r.v2i64.p0i64(i64*) nounwind readonly
+declare { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4r.v2i64.p0i64(i64*) nounwind readonly
 
 define { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld4r(i64* %A, i64** %ptr) nounwind {
 ;CHECK-LABEL: test_v1i64_post_imm_ld4r:
 ;CHECK: ld4r.1d { v0, v1, v2, v3 }, [x0], #32
-  %ld4 = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm64.neon.ld4r.v1i64.p0i64(i64* %A)
+  %ld4 = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4r.v1i64.p0i64(i64* %A)
   %tmp = getelementptr i64* %A, i32 4
   store i64* %tmp, i64** %ptr
   ret { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %ld4
@@ -2776,19 +2776,19 @@ define { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld4r(
 define { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld4r(i64* %A, i64** %ptr, i64 %inc) nounwind {
 ;CHECK-LABEL: test_v1i64_post_reg_ld4r:
 ;CHECK: ld4r.1d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
-  %ld4 = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm64.neon.ld4r.v1i64.p0i64(i64* %A)
+  %ld4 = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4r.v1i64.p0i64(i64* %A)
   %tmp = getelementptr i64* %A, i64 %inc
   store i64* %tmp, i64** %ptr
   ret { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %ld4
 }
 
-declare { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm64.neon.ld4r.v1i64.p0i64(i64*) nounwind readonly
+declare { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4r.v1i64.p0i64(i64*) nounwind readonly
 
 
 define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_imm_ld4r(float* %A, float** %ptr) nounwind {
 ;CHECK-LABEL: test_v4f32_post_imm_ld4r:
 ;CHECK: ld4r.4s { v0, v1, v2, v3 }, [x0], #16
-  %ld4 = call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.arm64.neon.ld4r.v4f32.p0f32(float* %A)
+  %ld4 = call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld4r.v4f32.p0f32(float* %A)
   %tmp = getelementptr float* %A, i32 4
   store float* %tmp, float** %ptr
   ret { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %ld4
@@ -2797,18 +2797,18 @@ define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_i
 define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_reg_ld4r(float* %A, float** %ptr, i64 %inc) nounwind {
 ;CHECK-LABEL: test_v4f32_post_reg_ld4r:
 ;CHECK: ld4r.4s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
-  %ld4 = call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.arm64.neon.ld4r.v4f32.p0f32(float* %A)
+  %ld4 = call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld4r.v4f32.p0f32(float* %A)
   %tmp = getelementptr float* %A, i64 %inc
   store float* %tmp, float** %ptr
   ret { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %ld4
 }
 
-declare { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.arm64.neon.ld4r.v4f32.p0f32(float*) nounwind readonly
+declare { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld4r.v4f32.p0f32(float*) nounwind readonly
 
 define { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_imm_ld4r(float* %A, float** %ptr) nounwind {
 ;CHECK-LABEL: test_v2f32_post_imm_ld4r:
 ;CHECK: ld4r.2s { v0, v1, v2, v3 }, [x0], #16
-  %ld4 = call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.arm64.neon.ld4r.v2f32.p0f32(float* %A)
+  %ld4 = call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld4r.v2f32.p0f32(float* %A)
   %tmp = getelementptr float* %A, i32 4
   store float* %tmp, float** %ptr
   ret { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %ld4
@@ -2817,19 +2817,19 @@ define { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_i
 define { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_reg_ld4r(float* %A, float** %ptr, i64 %inc) nounwind {
 ;CHECK-LABEL: test_v2f32_post_reg_ld4r:
 ;CHECK: ld4r.2s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
-  %ld4 = call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.arm64.neon.ld4r.v2f32.p0f32(float* %A)
+  %ld4 = call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld4r.v2f32.p0f32(float* %A)
   %tmp = getelementptr float* %A, i64 %inc
   store float* %tmp, float** %ptr
   ret { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %ld4
 }
 
-declare { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.arm64.neon.ld4r.v2f32.p0f32(float*) nounwind readonly
+declare { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld4r.v2f32.p0f32(float*) nounwind readonly
 
 
 define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_imm_ld4r(double* %A, double** %ptr) nounwind {
 ;CHECK-LABEL: test_v2f64_post_imm_ld4r:
 ;CHECK: ld4r.2d { v0, v1, v2, v3 }, [x0], #32
-  %ld4 = call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.arm64.neon.ld4r.v2f64.p0f64(double* %A)
+  %ld4 = call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld4r.v2f64.p0f64(double* %A)
   %tmp = getelementptr double* %A, i32 4
   store double* %tmp, double** %ptr
   ret { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %ld4
@@ -2838,18 +2838,18 @@ define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_po
 define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_reg_ld4r(double* %A, double** %ptr, i64 %inc) nounwind {
 ;CHECK-LABEL: test_v2f64_post_reg_ld4r:
 ;CHECK: ld4r.2d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
-  %ld4 = call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.arm64.neon.ld4r.v2f64.p0f64(double* %A)
+  %ld4 = call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld4r.v2f64.p0f64(double* %A)
   %tmp = getelementptr double* %A, i64 %inc
   store double* %tmp, double** %ptr
   ret { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %ld4
 }
 
-declare { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.arm64.neon.ld4r.v2f64.p0f64(double*) nounwind readonly
+declare { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld4r.v2f64.p0f64(double*) nounwind readonly
 
 define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_imm_ld4r(double* %A, double** %ptr) nounwind {
 ;CHECK-LABEL: test_v1f64_post_imm_ld4r:
 ;CHECK: ld4r.1d { v0, v1, v2, v3 }, [x0], #32
-  %ld4 = call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.arm64.neon.ld4r.v1f64.p0f64(double* %A)
+  %ld4 = call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld4r.v1f64.p0f64(double* %A)
   %tmp = getelementptr double* %A, i32 4
   store double* %tmp, double** %ptr
   ret { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %ld4
@@ -2858,19 +2858,19 @@ define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_po
 define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_reg_ld4r(double* %A, double** %ptr, i64 %inc) nounwind {
 ;CHECK-LABEL: test_v1f64_post_reg_ld4r:
 ;CHECK: ld4r.1d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
-  %ld4 = call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.arm64.neon.ld4r.v1f64.p0f64(double* %A)
+  %ld4 = call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld4r.v1f64.p0f64(double* %A)
   %tmp = getelementptr double* %A, i64 %inc
   store double* %tmp, double** %ptr
   ret { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %ld4
 }
 
-declare { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.arm64.neon.ld4r.v1f64.p0f64(double*) nounwind readonly
+declare { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld4r.v1f64.p0f64(double*) nounwind readonly
 
 
 define { <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld2lane(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C) nounwind {
 ;CHECK-LABEL: test_v16i8_post_imm_ld2lane:
 ;CHECK: ld2.b { v0, v1 }[0], [x0], #2
-  %ld2 = call { <16 x i8>, <16 x i8> } @llvm.arm64.neon.ld2lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, i64 0, i8* %A)
+  %ld2 = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, i64 0, i8* %A)
   %tmp = getelementptr i8* %A, i32 2
   store i8* %tmp, i8** %ptr
   ret { <16 x i8>, <16 x i8> } %ld2
@@ -2879,19 +2879,19 @@ define { <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld2lane(i8* %A, i8** %ptr,
 define { <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld2lane(i8* %A, i8** %ptr, i64 %inc, <16 x i8> %B, <16 x i8> %C) nounwind {
 ;CHECK-LABEL: test_v16i8_post_reg_ld2lane:
 ;CHECK: ld2.b { v0, v1 }[0], [x0], x{{[0-9]+}}
-  %ld2 = call { <16 x i8>, <16 x i8> } @llvm.arm64.neon.ld2lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, i64 0, i8* %A)
+  %ld2 = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, i64 0, i8* %A)
   %tmp = getelementptr i8* %A, i64 %inc
   store i8* %tmp, i8** %ptr
   ret { <16 x i8>, <16 x i8> } %ld2
 }
 
-declare { <16 x i8>, <16 x i8> } @llvm.arm64.neon.ld2lane.v16i8.p0i8(<16 x i8>, <16 x i8>, i64, i8*) nounwind readonly
+declare { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2lane.v16i8.p0i8(<16 x i8>, <16 x i8>, i64, i8*) nounwind readonly
 
 
 define { <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld2lane(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C) nounwind {
 ;CHECK-LABEL: test_v8i8_post_imm_ld2lane:
 ;CHECK: ld2.b { v0, v1 }[0], [x0], #2
-  %ld2 = call { <8 x i8>, <8 x i8> } @llvm.arm64.neon.ld2lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, i64 0, i8* %A)
+  %ld2 = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, i64 0, i8* %A)
   %tmp = getelementptr i8* %A, i32 2
   store i8* %tmp, i8** %ptr
   ret { <8 x i8>, <8 x i8> } %ld2
@@ -2900,19 +2900,19 @@ define { <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld2lane(i8* %A, i8** %ptr, <8
 define { <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld2lane(i8* %A, i8** %ptr, i64 %inc, <8 x i8> %B, <8 x i8> %C) nounwind {
 ;CHECK-LABEL: test_v8i8_post_reg_ld2lane:
 ;CHECK: ld2.b { v0, v1 }[0], [x0], x{{[0-9]+}}
-  %ld2 = call { <8 x i8>, <8 x i8> } @llvm.arm64.neon.ld2lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, i64 0, i8* %A)
+  %ld2 = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, i64 0, i8* %A)
   %tmp = getelementptr i8* %A, i64 %inc
   store i8* %tmp, i8** %ptr
   ret { <8 x i8>, <8 x i8> } %ld2
 }
 
-declare { <8 x i8>, <8 x i8> } @llvm.arm64.neon.ld2lane.v8i8.p0i8(<8 x i8>, <8 x i8>, i64, i8*) nounwind readonly
+declare { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2lane.v8i8.p0i8(<8 x i8>, <8 x i8>, i64, i8*) nounwind readonly
 
 
 define { <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld2lane(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C) nounwind {
 ;CHECK-LABEL: test_v8i16_post_imm_ld2lane:
 ;CHECK: ld2.h { v0, v1 }[0], [x0], #4
-  %ld2 = call { <8 x i16>, <8 x i16> } @llvm.arm64.neon.ld2lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, i64 0, i16* %A)
+  %ld2 = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, i64 0, i16* %A)
   %tmp = getelementptr i16* %A, i32 2
   store i16* %tmp, i16** %ptr
   ret { <8 x i16>, <8 x i16> } %ld2
@@ -2921,19 +2921,19 @@ define { <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld2lane(i16* %A, i16** %ptr
 define { <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld2lane(i16* %A, i16** %ptr, i64 %inc, <8 x i16> %B, <8 x i16> %C) nounwind {
 ;CHECK-LABEL: test_v8i16_post_reg_ld2lane:
 ;CHECK: ld2.h { v0, v1 }[0], [x0], x{{[0-9]+}}
-  %ld2 = call { <8 x i16>, <8 x i16> } @llvm.arm64.neon.ld2lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, i64 0, i16* %A)
+  %ld2 = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, i64 0, i16* %A)
   %tmp = getelementptr i16* %A, i64 %inc
   store i16* %tmp, i16** %ptr
   ret { <8 x i16>, <8 x i16> } %ld2
 }
 
-declare { <8 x i16>, <8 x i16> } @llvm.arm64.neon.ld2lane.v8i16.p0i16(<8 x i16>, <8 x i16>, i64, i16*) nounwind readonly
+declare { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2lane.v8i16.p0i16(<8 x i16>, <8 x i16>, i64, i16*) nounwind readonly
 
 
 define { <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld2lane(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C) nounwind {
 ;CHECK-LABEL: test_v4i16_post_imm_ld2lane:
 ;CHECK: ld2.h { v0, v1 }[0], [x0], #4
-  %ld2 = call { <4 x i16>, <4 x i16> } @llvm.arm64.neon.ld2lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, i64 0, i16* %A)
+  %ld2 = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, i64 0, i16* %A)
   %tmp = getelementptr i16* %A, i32 2
   store i16* %tmp, i16** %ptr
   ret { <4 x i16>, <4 x i16> } %ld2
@@ -2942,19 +2942,19 @@ define { <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld2lane(i16* %A, i16** %ptr
 define { <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld2lane(i16* %A, i16** %ptr, i64 %inc, <4 x i16> %B, <4 x i16> %C) nounwind {
 ;CHECK-LABEL: test_v4i16_post_reg_ld2lane:
 ;CHECK: ld2.h { v0, v1 }[0], [x0], x{{[0-9]+}}
-  %ld2 = call { <4 x i16>, <4 x i16> } @llvm.arm64.neon.ld2lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, i64 0, i16* %A)
+  %ld2 = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, i64 0, i16* %A)
   %tmp = getelementptr i16* %A, i64 %inc
   store i16* %tmp, i16** %ptr
   ret { <4 x i16>, <4 x i16> } %ld2
 }
 
-declare { <4 x i16>, <4 x i16> } @llvm.arm64.neon.ld2lane.v4i16.p0i16(<4 x i16>, <4 x i16>, i64, i16*) nounwind readonly
+declare { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2lane.v4i16.p0i16(<4 x i16>, <4 x i16>, i64, i16*) nounwind readonly
 
 
 define { <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld2lane(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C) nounwind {
 ;CHECK-LABEL: test_v4i32_post_imm_ld2lane:
 ;CHECK: ld2.s { v0, v1 }[0], [x0], #8
-  %ld2 = call { <4 x i32>, <4 x i32> } @llvm.arm64.neon.ld2lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, i64 0, i32* %A)
+  %ld2 = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, i64 0, i32* %A)
   %tmp = getelementptr i32* %A, i32 2
   store i32* %tmp, i32** %ptr
   ret { <4 x i32>, <4 x i32> } %ld2
@@ -2963,19 +2963,19 @@ define { <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld2lane(i32* %A, i32** %ptr
 define { <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld2lane(i32* %A, i32** %ptr, i64 %inc, <4 x i32> %B, <4 x i32> %C) nounwind {
 ;CHECK-LABEL: test_v4i32_post_reg_ld2lane:
 ;CHECK: ld2.s { v0, v1 }[0], [x0], x{{[0-9]+}}
-  %ld2 = call { <4 x i32>, <4 x i32> } @llvm.arm64.neon.ld2lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, i64 0, i32* %A)
+  %ld2 = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, i64 0, i32* %A)
   %tmp = getelementptr i32* %A, i64 %inc
   store i32* %tmp, i32** %ptr
   ret { <4 x i32>, <4 x i32> } %ld2
 }
 
-declare { <4 x i32>, <4 x i32> } @llvm.arm64.neon.ld2lane.v4i32.p0i32(<4 x i32>, <4 x i32>, i64, i32*) nounwind readonly
+declare { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2lane.v4i32.p0i32(<4 x i32>, <4 x i32>, i64, i32*) nounwind readonly
 
 
 define { <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld2lane(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C) nounwind {
 ;CHECK-LABEL: test_v2i32_post_imm_ld2lane:
 ;CHECK: ld2.s { v0, v1 }[0], [x0], #8
-  %ld2 = call { <2 x i32>, <2 x i32> } @llvm.arm64.neon.ld2lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, i64 0, i32* %A)
+  %ld2 = call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, i64 0, i32* %A)
   %tmp = getelementptr i32* %A, i32 2
   store i32* %tmp, i32** %ptr
   ret { <2 x i32>, <2 x i32> } %ld2
@@ -2984,19 +2984,19 @@ define { <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld2lane(i32* %A, i32** %ptr
 define { <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld2lane(i32* %A, i32** %ptr, i64 %inc, <2 x i32> %B, <2 x i32> %C) nounwind {
 ;CHECK-LABEL: test_v2i32_post_reg_ld2lane:
 ;CHECK: ld2.s { v0, v1 }[0], [x0], x{{[0-9]+}}
-  %ld2 = call { <2 x i32>, <2 x i32> } @llvm.arm64.neon.ld2lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, i64 0, i32* %A)
+  %ld2 = call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, i64 0, i32* %A)
   %tmp = getelementptr i32* %A, i64 %inc
   store i32* %tmp, i32** %ptr
   ret { <2 x i32>, <2 x i32> } %ld2
 }
 
-declare { <2 x i32>, <2 x i32> } @llvm.arm64.neon.ld2lane.v2i32.p0i32(<2 x i32>, <2 x i32>, i64, i32*) nounwind readonly
+declare { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2lane.v2i32.p0i32(<2 x i32>, <2 x i32>, i64, i32*) nounwind readonly
 
 
 define { <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld2lane(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C) nounwind {
 ;CHECK-LABEL: test_v2i64_post_imm_ld2lane:
 ;CHECK: ld2.d { v0, v1 }[0], [x0], #16
-  %ld2 = call { <2 x i64>, <2 x i64> } @llvm.arm64.neon.ld2lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, i64 0, i64* %A)
+  %ld2 = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, i64 0, i64* %A)
   %tmp = getelementptr i64* %A, i32 2
   store i64* %tmp, i64** %ptr
   ret { <2 x i64>, <2 x i64> } %ld2
@@ -3005,19 +3005,19 @@ define { <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld2lane(i64* %A, i64** %ptr
 define { <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld2lane(i64* %A, i64** %ptr, i64 %inc, <2 x i64> %B, <2 x i64> %C) nounwind {
 ;CHECK-LABEL: test_v2i64_post_reg_ld2lane:
 ;CHECK: ld2.d { v0, v1 }[0], [x0], x{{[0-9]+}}
-  %ld2 = call { <2 x i64>, <2 x i64> } @llvm.arm64.neon.ld2lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, i64 0, i64* %A)
+  %ld2 = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, i64 0, i64* %A)
   %tmp = getelementptr i64* %A, i64 %inc
   store i64* %tmp, i64** %ptr
   ret { <2 x i64>, <2 x i64> } %ld2
 }
 
-declare { <2 x i64>, <2 x i64> } @llvm.arm64.neon.ld2lane.v2i64.p0i64(<2 x i64>, <2 x i64>, i64, i64*) nounwind readonly
+declare { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2lane.v2i64.p0i64(<2 x i64>, <2 x i64>, i64, i64*) nounwind readonly
 
 
 define { <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld2lane(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C) nounwind {
 ;CHECK-LABEL: test_v1i64_post_imm_ld2lane:
 ;CHECK: ld2.d { v0, v1 }[0], [x0], #16
-  %ld2 = call { <1 x i64>, <1 x i64> } @llvm.arm64.neon.ld2lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, i64 0, i64* %A)
+  %ld2 = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, i64 0, i64* %A)
   %tmp = getelementptr i64* %A, i32 2
   store i64* %tmp, i64** %ptr
   ret { <1 x i64>, <1 x i64> } %ld2
@@ -3026,19 +3026,19 @@ define { <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld2lane(i64* %A, i64** %ptr
 define { <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld2lane(i64* %A, i64** %ptr, i64 %inc, <1 x i64> %B, <1 x i64> %C) nounwind {
 ;CHECK-LABEL: test_v1i64_post_reg_ld2lane:
 ;CHECK: ld2.d { v0, v1 }[0], [x0], x{{[0-9]+}}
-  %ld2 = call { <1 x i64>, <1 x i64> } @llvm.arm64.neon.ld2lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, i64 0, i64* %A)
+  %ld2 = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, i64 0, i64* %A)
   %tmp = getelementptr i64* %A, i64 %inc
   store i64* %tmp, i64** %ptr
   ret { <1 x i64>, <1 x i64> } %ld2
 }
 
-declare { <1 x i64>, <1 x i64> } @llvm.arm64.neon.ld2lane.v1i64.p0i64(<1 x i64>, <1 x i64>, i64, i64*) nounwind readonly
+declare { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2lane.v1i64.p0i64(<1 x i64>, <1 x i64>, i64, i64*) nounwind readonly
 
 
 define { <4 x float>, <4 x float> } @test_v4f32_post_imm_ld2lane(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C) nounwind {
 ;CHECK-LABEL: test_v4f32_post_imm_ld2lane:
 ;CHECK: ld2.s { v0, v1 }[0], [x0], #8
-  %ld2 = call { <4 x float>, <4 x float> } @llvm.arm64.neon.ld2lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, i64 0, float* %A)
+  %ld2 = call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, i64 0, float* %A)
   %tmp = getelementptr float* %A, i32 2
   store float* %tmp, float** %ptr
   ret { <4 x float>, <4 x float> } %ld2
@@ -3047,19 +3047,19 @@ define { <4 x float>, <4 x float> } @test_v4f32_post_imm_ld2lane(float* %A, floa
 define { <4 x float>, <4 x float> } @test_v4f32_post_reg_ld2lane(float* %A, float** %ptr, i64 %inc, <4 x float> %B, <4 x float> %C) nounwind {
 ;CHECK-LABEL: test_v4f32_post_reg_ld2lane:
 ;CHECK: ld2.s { v0, v1 }[0], [x0], x{{[0-9]+}}
-  %ld2 = call { <4 x float>, <4 x float> } @llvm.arm64.neon.ld2lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, i64 0, float* %A)
+  %ld2 = call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, i64 0, float* %A)
   %tmp = getelementptr float* %A, i64 %inc
   store float* %tmp, float** %ptr
   ret { <4 x float>, <4 x float> } %ld2
 }
 
-declare { <4 x float>, <4 x float> } @llvm.arm64.neon.ld2lane.v4f32.p0f32(<4 x float>, <4 x float>, i64, float*) nounwind readonly
+declare { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2lane.v4f32.p0f32(<4 x float>, <4 x float>, i64, float*) nounwind readonly
 
 
 define { <2 x float>, <2 x float> } @test_v2f32_post_imm_ld2lane(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C) nounwind {
 ;CHECK-LABEL: test_v2f32_post_imm_ld2lane:
 ;CHECK: ld2.s { v0, v1 }[0], [x0], #8
-  %ld2 = call { <2 x float>, <2 x float> } @llvm.arm64.neon.ld2lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, i64 0, float* %A)
+  %ld2 = call { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, i64 0, float* %A)
   %tmp = getelementptr float* %A, i32 2
   store float* %tmp, float** %ptr
   ret { <2 x float>, <2 x float> } %ld2
@@ -3068,19 +3068,19 @@ define { <2 x float>, <2 x float> } @test_v2f32_post_imm_ld2lane(float* %A, floa
 define { <2 x float>, <2 x float> } @test_v2f32_post_reg_ld2lane(float* %A, float** %ptr, i64 %inc, <2 x float> %B, <2 x float> %C) nounwind {
 ;CHECK-LABEL: test_v2f32_post_reg_ld2lane:
 ;CHECK: ld2.s { v0, v1 }[0], [x0], x{{[0-9]+}}
-  %ld2 = call { <2 x float>, <2 x float> } @llvm.arm64.neon.ld2lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, i64 0, float* %A)
+  %ld2 = call { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, i64 0, float* %A)
   %tmp = getelementptr float* %A, i64 %inc
   store float* %tmp, float** %ptr
   ret { <2 x float>, <2 x float> } %ld2
 }
 
-declare { <2 x float>, <2 x float> } @llvm.arm64.neon.ld2lane.v2f32.p0f32(<2 x float>, <2 x float>, i64, float*) nounwind readonly
+declare { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2lane.v2f32.p0f32(<2 x float>, <2 x float>, i64, float*) nounwind readonly
 
 
 define { <2 x double>, <2 x double> } @test_v2f64_post_imm_ld2lane(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C) nounwind {
 ;CHECK-LABEL: test_v2f64_post_imm_ld2lane:
 ;CHECK: ld2.d { v0, v1 }[0], [x0], #16
-  %ld2 = call { <2 x double>, <2 x double> } @llvm.arm64.neon.ld2lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, i64 0, double* %A)
+  %ld2 = call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, i64 0, double* %A)
   %tmp = getelementptr double* %A, i32 2
   store double* %tmp, double** %ptr
   ret { <2 x double>, <2 x double> } %ld2
@@ -3089,19 +3089,19 @@ define { <2 x double>, <2 x double> } @test_v2f64_post_imm_ld2lane(double* %A, d
 define { <2 x double>, <2 x double> } @test_v2f64_post_reg_ld2lane(double* %A, double** %ptr, i64 %inc, <2 x double> %B, <2 x double> %C) nounwind {
 ;CHECK-LABEL: test_v2f64_post_reg_ld2lane:
 ;CHECK: ld2.d { v0, v1 }[0], [x0], x{{[0-9]+}}
-  %ld2 = call { <2 x double>, <2 x double> } @llvm.arm64.neon.ld2lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, i64 0, double* %A)
+  %ld2 = call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, i64 0, double* %A)
   %tmp = getelementptr double* %A, i64 %inc
   store double* %tmp, double** %ptr
   ret { <2 x double>, <2 x double> } %ld2
 }
 
-declare { <2 x double>, <2 x double> } @llvm.arm64.neon.ld2lane.v2f64.p0f64(<2 x double>, <2 x double>, i64, double*) nounwind readonly
+declare { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2lane.v2f64.p0f64(<2 x double>, <2 x double>, i64, double*) nounwind readonly
 
 
 define { <1 x double>, <1 x double> } @test_v1f64_post_imm_ld2lane(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C) nounwind {
 ;CHECK-LABEL: test_v1f64_post_imm_ld2lane:
 ;CHECK: ld2.d { v0, v1 }[0], [x0], #16
-  %ld2 = call { <1 x double>, <1 x double> } @llvm.arm64.neon.ld2lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, i64 0, double* %A)
+  %ld2 = call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, i64 0, double* %A)
   %tmp = getelementptr double* %A, i32 2
   store double* %tmp, double** %ptr
   ret { <1 x double>, <1 x double> } %ld2
@@ -3110,19 +3110,19 @@ define { <1 x double>, <1 x double> } @test_v1f64_post_imm_ld2lane(double* %A, d
 define { <1 x double>, <1 x double> } @test_v1f64_post_reg_ld2lane(double* %A, double** %ptr, i64 %inc, <1 x double> %B, <1 x double> %C) nounwind {
 ;CHECK-LABEL: test_v1f64_post_reg_ld2lane:
 ;CHECK: ld2.d { v0, v1 }[0], [x0], x{{[0-9]+}}
-  %ld2 = call { <1 x double>, <1 x double> } @llvm.arm64.neon.ld2lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, i64 0, double* %A)
+  %ld2 = call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, i64 0, double* %A)
   %tmp = getelementptr double* %A, i64 %inc
   store double* %tmp, double** %ptr
   ret { <1 x double>, <1 x double> } %ld2
 }
 
-declare { <1 x double>, <1 x double> } @llvm.arm64.neon.ld2lane.v1f64.p0f64(<1 x double>, <1 x double>, i64, double*) nounwind readonly
+declare { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2lane.v1f64.p0f64(<1 x double>, <1 x double>, i64, double*) nounwind readonly
 
 
 define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld3lane(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) nounwind {
 ;CHECK-LABEL: test_v16i8_post_imm_ld3lane:
 ;CHECK: ld3.b { v0, v1, v2 }[0], [x0], #3
-  %ld3 = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm64.neon.ld3lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 0, i8* %A)
+  %ld3 = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 0, i8* %A)
   %tmp = getelementptr i8* %A, i32 3
   store i8* %tmp, i8** %ptr
   ret { <16 x i8>, <16 x i8>, <16 x i8> } %ld3
@@ -3131,19 +3131,19 @@ define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld3lane(i8* %A,
 define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld3lane(i8* %A, i8** %ptr, i64 %inc, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) nounwind {
 ;CHECK-LABEL: test_v16i8_post_reg_ld3lane:
 ;CHECK: ld3.b { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
-  %ld3 = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm64.neon.ld3lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 0, i8* %A)
+  %ld3 = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 0, i8* %A)
   %tmp = getelementptr i8* %A, i64 %inc
   store i8* %tmp, i8** %ptr
   ret { <16 x i8>, <16 x i8>, <16 x i8> } %ld3
 }
 
-declare { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm64.neon.ld3lane.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, i64, i8*) nounwind readonly
+declare { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3lane.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, i64, i8*) nounwind readonly
 
 
 define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld3lane(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D) nounwind {
 ;CHECK-LABEL: test_v8i8_post_imm_ld3lane:
 ;CHECK: ld3.b { v0, v1, v2 }[0], [x0], #3
-  %ld3 = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm64.neon.ld3lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i64 0, i8* %A)
+  %ld3 = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i64 0, i8* %A)
   %tmp = getelementptr i8* %A, i32 3
   store i8* %tmp, i8** %ptr
   ret { <8 x i8>, <8 x i8>, <8 x i8> } %ld3
@@ -3152,19 +3152,19 @@ define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld3lane(i8* %A, i8**
 define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld3lane(i8* %A, i8** %ptr, i64 %inc, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D) nounwind {
 ;CHECK-LABEL: test_v8i8_post_reg_ld3lane:
 ;CHECK: ld3.b { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
-  %ld3 = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm64.neon.ld3lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i64 0, i8* %A)
+  %ld3 = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i64 0, i8* %A)
   %tmp = getelementptr i8* %A, i64 %inc
   store i8* %tmp, i8** %ptr
   ret { <8 x i8>, <8 x i8>, <8 x i8> } %ld3
 }
 
-declare { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm64.neon.ld3lane.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, i64, i8*) nounwind readonly
+declare { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3lane.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, i64, i8*) nounwind readonly
 
 
 define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld3lane(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D) nounwind {
 ;CHECK-LABEL: test_v8i16_post_imm_ld3lane:
 ;CHECK: ld3.h { v0, v1, v2 }[0], [x0], #6
-  %ld3 = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm64.neon.ld3lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 0, i16* %A)
+  %ld3 = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 0, i16* %A)
   %tmp = getelementptr i16* %A, i32 3
   store i16* %tmp, i16** %ptr
   ret { <8 x i16>, <8 x i16>, <8 x i16> } %ld3
@@ -3173,19 +3173,19 @@ define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld3lane(i16* %A,
 define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld3lane(i16* %A, i16** %ptr, i64 %inc, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D) nounwind {
 ;CHECK-LABEL: test_v8i16_post_reg_ld3lane:
 ;CHECK: ld3.h { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
-  %ld3 = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm64.neon.ld3lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 0, i16* %A)
+  %ld3 = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 0, i16* %A)
   %tmp = getelementptr i16* %A, i64 %inc
   store i16* %tmp, i16** %ptr
   ret { <8 x i16>, <8 x i16>, <8 x i16> } %ld3
 }
 
-declare { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm64.neon.ld3lane.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, i64, i16*) nounwind readonly
+declare { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3lane.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, i64, i16*) nounwind readonly
 
 
 define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld3lane(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D) nounwind {
 ;CHECK-LABEL: test_v4i16_post_imm_ld3lane:
 ;CHECK: ld3.h { v0, v1, v2 }[0], [x0], #6
-  %ld3 = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm64.neon.ld3lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i64 0, i16* %A)
+  %ld3 = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i64 0, i16* %A)
   %tmp = getelementptr i16* %A, i32 3
   store i16* %tmp, i16** %ptr
   ret { <4 x i16>, <4 x i16>, <4 x i16> } %ld3
@@ -3194,19 +3194,19 @@ define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld3lane(i16* %A,
 define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld3lane(i16* %A, i16** %ptr, i64 %inc, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D) nounwind {
 ;CHECK-LABEL: test_v4i16_post_reg_ld3lane:
 ;CHECK: ld3.h { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
-  %ld3 = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm64.neon.ld3lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i64 0, i16* %A)
+  %ld3 = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i64 0, i16* %A)
   %tmp = getelementptr i16* %A, i64 %inc
   store i16* %tmp, i16** %ptr
   ret { <4 x i16>, <4 x i16>, <4 x i16> } %ld3
 }
 
-declare { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm64.neon.ld3lane.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>, i64, i16*) nounwind readonly
+declare { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3lane.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>, i64, i16*) nounwind readonly
 
 
 define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld3lane(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D) nounwind {
 ;CHECK-LABEL: test_v4i32_post_imm_ld3lane:
 ;CHECK: ld3.s { v0, v1, v2 }[0], [x0], #12
-  %ld3 = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm64.neon.ld3lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 0, i32* %A)
+  %ld3 = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 0, i32* %A)
   %tmp = getelementptr i32* %A, i32 3
   store i32* %tmp, i32** %ptr
   ret { <4 x i32>, <4 x i32>, <4 x i32> } %ld3
@@ -3215,19 +3215,19 @@ define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld3lane(i32* %A,
 define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld3lane(i32* %A, i32** %ptr, i64 %inc, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D) nounwind {
 ;CHECK-LABEL: test_v4i32_post_reg_ld3lane:
 ;CHECK: ld3.s { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
-  %ld3 = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm64.neon.ld3lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 0, i32* %A)
+  %ld3 = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 0, i32* %A)
   %tmp = getelementptr i32* %A, i64 %inc
   store i32* %tmp, i32** %ptr
   ret { <4 x i32>, <4 x i32>, <4 x i32> } %ld3
 }
 
-declare { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm64.neon.ld3lane.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, i64, i32*) nounwind readonly
+declare { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3lane.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, i64, i32*) nounwind readonly
 
 
 define { <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld3lane(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D) nounwind {
 ;CHECK-LABEL: test_v2i32_post_imm_ld3lane:
 ;CHECK: ld3.s { v0, v1, v2 }[0], [x0], #12
-  %ld3 = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm64.neon.ld3lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i64 0, i32* %A)
+  %ld3 = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i64 0, i32* %A)
   %tmp = getelementptr i32* %A, i32 3
   store i32* %tmp, i32** %ptr
   ret { <2 x i32>, <2 x i32>, <2 x i32> } %ld3
@@ -3236,19 +3236,19 @@ define { <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld3lane(i32* %A,
 define { <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld3lane(i32* %A, i32** %ptr, i64 %inc, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D) nounwind {
 ;CHECK-LABEL: test_v2i32_post_reg_ld3lane:
 ;CHECK: ld3.s { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
-  %ld3 = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm64.neon.ld3lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i64 0, i32* %A)
+  %ld3 = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i64 0, i32* %A)
   %tmp = getelementptr i32* %A, i64 %inc
   store i32* %tmp, i32** %ptr
   ret { <2 x i32>, <2 x i32>, <2 x i32> } %ld3
 }
 
-declare { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm64.neon.ld3lane.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, i64, i32*) nounwind readonly
+declare { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3lane.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, i64, i32*) nounwind readonly
 
 
 define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld3lane(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D) nounwind {
 ;CHECK-LABEL: test_v2i64_post_imm_ld3lane:
 ;CHECK: ld3.d { v0, v1, v2 }[0], [x0], #24
-  %ld3 = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.arm64.neon.ld3lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 0, i64* %A)
+  %ld3 = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 0, i64* %A)
   %tmp = getelementptr i64* %A, i32 3
   store i64* %tmp, i64** %ptr
   ret { <2 x i64>, <2 x i64>, <2 x i64> } %ld3
@@ -3257,19 +3257,19 @@ define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld3lane(i64* %A,
 define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld3lane(i64* %A, i64** %ptr, i64 %inc, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D) nounwind {
 ;CHECK-LABEL: test_v2i64_post_reg_ld3lane:
 ;CHECK: ld3.d { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
-  %ld3 = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.arm64.neon.ld3lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 0, i64* %A)
+  %ld3 = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 0, i64* %A)
   %tmp = getelementptr i64* %A, i64 %inc
   store i64* %tmp, i64** %ptr
   ret { <2 x i64>, <2 x i64>, <2 x i64> } %ld3
 }
 
-declare { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.arm64.neon.ld3lane.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, i64, i64*) nounwind readonly
+declare { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3lane.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, i64, i64*) nounwind readonly
 
 
 define { <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld3lane(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D) nounwind {
 ;CHECK-LABEL: test_v1i64_post_imm_ld3lane:
 ;CHECK: ld3.d { v0, v1, v2 }[0], [x0], #24
-  %ld3 = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm64.neon.ld3lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64 0, i64* %A)
+  %ld3 = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64 0, i64* %A)
   %tmp = getelementptr i64* %A, i32 3
   store i64* %tmp, i64** %ptr
   ret { <1 x i64>, <1 x i64>, <1 x i64> } %ld3
@@ -3278,19 +3278,19 @@ define { <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld3lane(i64* %A,
 define { <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld3lane(i64* %A, i64** %ptr, i64 %inc, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D) nounwind {
 ;CHECK-LABEL: test_v1i64_post_reg_ld3lane:
 ;CHECK: ld3.d { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
-  %ld3 = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm64.neon.ld3lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64 0, i64* %A)
+  %ld3 = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64 0, i64* %A)
   %tmp = getelementptr i64* %A, i64 %inc
   store i64* %tmp, i64** %ptr
   ret { <1 x i64>, <1 x i64>, <1 x i64> } %ld3
 }
 
-declare { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm64.neon.ld3lane.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>, i64, i64*) nounwind readonly
+declare { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3lane.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>, i64, i64*) nounwind readonly
 
 
 define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_imm_ld3lane(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D) nounwind {
 ;CHECK-LABEL: test_v4f32_post_imm_ld3lane:
 ;CHECK: ld3.s { v0, v1, v2 }[0], [x0], #12
-  %ld3 = call { <4 x float>, <4 x float>, <4 x float> } @llvm.arm64.neon.ld3lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, i64 0, float* %A)
+  %ld3 = call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, i64 0, float* %A)
   %tmp = getelementptr float* %A, i32 3
   store float* %tmp, float** %ptr
   ret { <4 x float>, <4 x float>, <4 x float> } %ld3
@@ -3299,19 +3299,19 @@ define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_imm_ld3lane(fl
 define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_reg_ld3lane(float* %A, float** %ptr, i64 %inc, <4 x float> %B, <4 x float> %C, <4 x float> %D) nounwind {
 ;CHECK-LABEL: test_v4f32_post_reg_ld3lane:
 ;CHECK: ld3.s { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
-  %ld3 = call { <4 x float>, <4 x float>, <4 x float> } @llvm.arm64.neon.ld3lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, i64 0, float* %A)
+  %ld3 = call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, i64 0, float* %A)
   %tmp = getelementptr float* %A, i64 %inc
   store float* %tmp, float** %ptr
   ret { <4 x float>, <4 x float>, <4 x float> } %ld3
 }
 
-declare { <4 x float>, <4 x float>, <4 x float> } @llvm.arm64.neon.ld3lane.v4f32.p0f32(<4 x float>, <4 x float>, <4 x float>, i64, float*) nounwind readonly
+declare { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3lane.v4f32.p0f32(<4 x float>, <4 x float>, <4 x float>, i64, float*) nounwind readonly
 
 
 define { <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_imm_ld3lane(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D) nounwind {
 ;CHECK-LABEL: test_v2f32_post_imm_ld3lane:
 ;CHECK: ld3.s { v0, v1, v2 }[0], [x0], #12
-  %ld3 = call { <2 x float>, <2 x float>, <2 x float> } @llvm.arm64.neon.ld3lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, i64 0, float* %A)
+  %ld3 = call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, i64 0, float* %A)
   %tmp = getelementptr float* %A, i32 3
   store float* %tmp, float** %ptr
   ret { <2 x float>, <2 x float>, <2 x float> } %ld3
@@ -3320,19 +3320,19 @@ define { <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_imm_ld3lane(fl
 define { <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_reg_ld3lane(float* %A, float** %ptr, i64 %inc, <2 x float> %B, <2 x float> %C, <2 x float> %D) nounwind {
 ;CHECK-LABEL: test_v2f32_post_reg_ld3lane:
 ;CHECK: ld3.s { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
-  %ld3 = call { <2 x float>, <2 x float>, <2 x float> } @llvm.arm64.neon.ld3lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, i64 0, float* %A)
+  %ld3 = call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, i64 0, float* %A)
   %tmp = getelementptr float* %A, i64 %inc
   store float* %tmp, float** %ptr
   ret { <2 x float>, <2 x float>, <2 x float> } %ld3
 }
 
-declare { <2 x float>, <2 x float>, <2 x float> } @llvm.arm64.neon.ld3lane.v2f32.p0f32(<2 x float>, <2 x float>, <2 x float>, i64, float*) nounwind readonly
+declare { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3lane.v2f32.p0f32(<2 x float>, <2 x float>, <2 x float>, i64, float*) nounwind readonly
 
 
 define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_imm_ld3lane(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D) nounwind {
 ;CHECK-LABEL: test_v2f64_post_imm_ld3lane:
 ;CHECK: ld3.d { v0, v1, v2 }[0], [x0], #24
-  %ld3 = call { <2 x double>, <2 x double>, <2 x double> } @llvm.arm64.neon.ld3lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, i64 0, double* %A)
+  %ld3 = call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, i64 0, double* %A)
   %tmp = getelementptr double* %A, i32 3
   store double* %tmp, double** %ptr
   ret { <2 x double>, <2 x double>, <2 x double> } %ld3
@@ -3341,19 +3341,19 @@ define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_imm_ld3lane
 define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_reg_ld3lane(double* %A, double** %ptr, i64 %inc, <2 x double> %B, <2 x double> %C, <2 x double> %D) nounwind {
 ;CHECK-LABEL: test_v2f64_post_reg_ld3lane:
 ;CHECK: ld3.d { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
-  %ld3 = call { <2 x double>, <2 x double>, <2 x double> } @llvm.arm64.neon.ld3lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, i64 0, double* %A)
+  %ld3 = call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, i64 0, double* %A)
   %tmp = getelementptr double* %A, i64 %inc
   store double* %tmp, double** %ptr
   ret { <2 x double>, <2 x double>, <2 x double> } %ld3
 }
 
-declare { <2 x double>, <2 x double>, <2 x double> } @llvm.arm64.neon.ld3lane.v2f64.p0f64(<2 x double>, <2 x double>, <2 x double>, i64, double*) nounwind readonly
+declare { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3lane.v2f64.p0f64(<2 x double>, <2 x double>, <2 x double>, i64, double*) nounwind readonly
 
 
 define { <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_imm_ld3lane(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D) nounwind {
 ;CHECK-LABEL: test_v1f64_post_imm_ld3lane:
 ;CHECK: ld3.d { v0, v1, v2 }[0], [x0], #24
-  %ld3 = call { <1 x double>, <1 x double>, <1 x double> } @llvm.arm64.neon.ld3lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, i64 0, double* %A)
+  %ld3 = call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, i64 0, double* %A)
   %tmp = getelementptr double* %A, i32 3
   store double* %tmp, double** %ptr
   ret { <1 x double>, <1 x double>, <1 x double> } %ld3
@@ -3362,19 +3362,19 @@ define { <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_imm_ld3lane
 define { <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_reg_ld3lane(double* %A, double** %ptr, i64 %inc, <1 x double> %B, <1 x double> %C, <1 x double> %D) nounwind {
 ;CHECK-LABEL: test_v1f64_post_reg_ld3lane:
 ;CHECK: ld3.d { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
-  %ld3 = call { <1 x double>, <1 x double>, <1 x double> } @llvm.arm64.neon.ld3lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, i64 0, double* %A)
+  %ld3 = call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, i64 0, double* %A)
   %tmp = getelementptr double* %A, i64 %inc
   store double* %tmp, double** %ptr
   ret { <1 x double>, <1 x double>, <1 x double> } %ld3
 }
 
-declare { <1 x double>, <1 x double>, <1 x double> } @llvm.arm64.neon.ld3lane.v1f64.p0f64(<1 x double>, <1 x double>, <1 x double>, i64, double*) nounwind readonly
+declare { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3lane.v1f64.p0f64(<1 x double>, <1 x double>, <1 x double>, i64, double*) nounwind readonly
 
 
 define { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld4lane(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) nounwind {
 ;CHECK-LABEL: test_v16i8_post_imm_ld4lane:
 ;CHECK: ld4.b { v0, v1, v2, v3 }[0], [x0], #4
-  %ld4 = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm64.neon.ld4lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i64 0, i8* %A)
+  %ld4 = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i64 0, i8* %A)
   %tmp = getelementptr i8* %A, i32 4
   store i8* %tmp, i8** %ptr
   ret { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %ld4
@@ -3383,19 +3383,19 @@ define { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld4la
 define { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld4lane(i8* %A, i8** %ptr, i64 %inc, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) nounwind {
 ;CHECK-LABEL: test_v16i8_post_reg_ld4lane:
 ;CHECK: ld4.b { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
-  %ld4 = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm64.neon.ld4lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i64 0, i8* %A)
+  %ld4 = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i64 0, i8* %A)
   %tmp = getelementptr i8* %A, i64 %inc
   store i8* %tmp, i8** %ptr
   ret { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %ld4
 }
 
-declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm64.neon.ld4lane.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i64, i8*) nounwind readonly
+declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4lane.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i64, i8*) nounwind readonly
 
 
 define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld4lane(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E) nounwind {
 ;CHECK-LABEL: test_v8i8_post_imm_ld4lane:
 ;CHECK: ld4.b { v0, v1, v2, v3 }[0], [x0], #4
-  %ld4 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm64.neon.ld4lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i64 0, i8* %A)
+  %ld4 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i64 0, i8* %A)
   %tmp = getelementptr i8* %A, i32 4
   store i8* %tmp, i8** %ptr
   ret { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %ld4
@@ -3404,19 +3404,19 @@ define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld4lane(i8
 define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld4lane(i8* %A, i8** %ptr, i64 %inc, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E) nounwind {
 ;CHECK-LABEL: test_v8i8_post_reg_ld4lane:
 ;CHECK: ld4.b { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
-  %ld4 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm64.neon.ld4lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i64 0, i8* %A)
+  %ld4 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i64 0, i8* %A)
   %tmp = getelementptr i8* %A, i64 %inc
   store i8* %tmp, i8** %ptr
   ret { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %ld4
 }
 
-declare { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm64.neon.ld4lane.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i64, i8*) nounwind readonly
+declare { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4lane.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i64, i8*) nounwind readonly
 
 
 define { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld4lane(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E) nounwind {
 ;CHECK-LABEL: test_v8i16_post_imm_ld4lane:
 ;CHECK: ld4.h { v0, v1, v2, v3 }[0], [x0], #8
-  %ld4 = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm64.neon.ld4lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i64 0, i16* %A)
+  %ld4 = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i64 0, i16* %A)
   %tmp = getelementptr i16* %A, i32 4
   store i16* %tmp, i16** %ptr
   ret { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %ld4
@@ -3425,19 +3425,19 @@ define { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld4la
 define { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld4lane(i16* %A, i16** %ptr, i64 %inc, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E) nounwind {
 ;CHECK-LABEL: test_v8i16_post_reg_ld4lane:
 ;CHECK: ld4.h { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
-  %ld4 = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm64.neon.ld4lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i64 0, i16* %A)
+  %ld4 = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i64 0, i16* %A)
   %tmp = getelementptr i16* %A, i64 %inc
   store i16* %tmp, i16** %ptr
   ret { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %ld4
 }
 
-declare { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm64.neon.ld4lane.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i64, i16*) nounwind readonly
+declare { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4lane.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i64, i16*) nounwind readonly
 
 
 define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld4lane(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E) nounwind {
 ;CHECK-LABEL: test_v4i16_post_imm_ld4lane:
 ;CHECK: ld4.h { v0, v1, v2, v3 }[0], [x0], #8
-  %ld4 = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm64.neon.ld4lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i64 0, i16* %A)
+  %ld4 = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i64 0, i16* %A)
   %tmp = getelementptr i16* %A, i32 4
   store i16* %tmp, i16** %ptr
   ret { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %ld4
@@ -3446,19 +3446,19 @@ define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld4la
 define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld4lane(i16* %A, i16** %ptr, i64 %inc, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E) nounwind {
 ;CHECK-LABEL: test_v4i16_post_reg_ld4lane:
 ;CHECK: ld4.h { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
-  %ld4 = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm64.neon.ld4lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i64 0, i16* %A)
+  %ld4 = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i64 0, i16* %A)
   %tmp = getelementptr i16* %A, i64 %inc
   store i16* %tmp, i16** %ptr
   ret { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %ld4
 }
 
-declare { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm64.neon.ld4lane.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i64, i16*) nounwind readonly
+declare { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4lane.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i64, i16*) nounwind readonly
 
 
 define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld4lane(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E) nounwind {
 ;CHECK-LABEL: test_v4i32_post_imm_ld4lane:
 ;CHECK: ld4.s { v0, v1, v2, v3 }[0], [x0], #16
-  %ld4 = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm64.neon.ld4lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i64 0, i32* %A)
+  %ld4 = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i64 0, i32* %A)
   %tmp = getelementptr i32* %A, i32 4
   store i32* %tmp, i32** %ptr
   ret { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %ld4
@@ -3467,19 +3467,19 @@ define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld4la
 define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld4lane(i32* %A, i32** %ptr, i64 %inc, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E) nounwind {
 ;CHECK-LABEL: test_v4i32_post_reg_ld4lane:
 ;CHECK: ld4.s { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
-  %ld4 = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm64.neon.ld4lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i64 0, i32* %A)
+  %ld4 = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i64 0, i32* %A)
   %tmp = getelementptr i32* %A, i64 %inc
   store i32* %tmp, i32** %ptr
   ret { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %ld4
 }
 
-declare { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm64.neon.ld4lane.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i64, i32*) nounwind readonly
+declare { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4lane.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i64, i32*) nounwind readonly
 
 
 define { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld4lane(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E) nounwind {
 ;CHECK-LABEL: test_v2i32_post_imm_ld4lane:
 ;CHECK: ld4.s { v0, v1, v2, v3 }[0], [x0], #16
-  %ld4 = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm64.neon.ld4lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i64 0, i32* %A)
+  %ld4 = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i64 0, i32* %A)
   %tmp = getelementptr i32* %A, i32 4
   store i32* %tmp, i32** %ptr
   ret { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %ld4
@@ -3488,19 +3488,19 @@ define { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld4la
 define { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld4lane(i32* %A, i32** %ptr, i64 %inc, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E) nounwind {
 ;CHECK-LABEL: test_v2i32_post_reg_ld4lane:
 ;CHECK: ld4.s { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
-  %ld4 = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm64.neon.ld4lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i64 0, i32* %A)
+  %ld4 = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i64 0, i32* %A)
   %tmp = getelementptr i32* %A, i64 %inc
   store i32* %tmp, i32** %ptr
   ret { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %ld4
 }
 
-declare { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm64.neon.ld4lane.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i64, i32*) nounwind readonly
+declare { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4lane.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i64, i32*) nounwind readonly
 
 
 define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld4lane(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E) nounwind {
 ;CHECK-LABEL: test_v2i64_post_imm_ld4lane:
 ;CHECK: ld4.d { v0, v1, v2, v3 }[0], [x0], #32
-  %ld4 = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.arm64.neon.ld4lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64 0, i64* %A)
+  %ld4 = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64 0, i64* %A)
   %tmp = getelementptr i64* %A, i32 4
   store i64* %tmp, i64** %ptr
   ret { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %ld4
@@ -3509,19 +3509,19 @@ define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld4la
 define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld4lane(i64* %A, i64** %ptr, i64 %inc, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E) nounwind {
 ;CHECK-LABEL: test_v2i64_post_reg_ld4lane:
 ;CHECK: ld4.d { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
-  %ld4 = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.arm64.neon.ld4lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64 0, i64* %A)
+  %ld4 = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64 0, i64* %A)
   %tmp = getelementptr i64* %A, i64 %inc
   store i64* %tmp, i64** %ptr
   ret { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %ld4
 }
 
-declare { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.arm64.neon.ld4lane.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, i64, i64*) nounwind readonly
+declare { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4lane.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, i64, i64*) nounwind readonly
 
 
 define { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld4lane(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E) nounwind {
 ;CHECK-LABEL: test_v1i64_post_imm_ld4lane:
 ;CHECK: ld4.d { v0, v1, v2, v3 }[0], [x0], #32
-  %ld4 = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm64.neon.ld4lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64 0, i64* %A)
+  %ld4 = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64 0, i64* %A)
   %tmp = getelementptr i64* %A, i32 4
   store i64* %tmp, i64** %ptr
   ret { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %ld4
@@ -3530,19 +3530,19 @@ define { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld4la
 define { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld4lane(i64* %A, i64** %ptr, i64 %inc, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E) nounwind {
 ;CHECK-LABEL: test_v1i64_post_reg_ld4lane:
 ;CHECK: ld4.d { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
-  %ld4 = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm64.neon.ld4lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64 0, i64* %A)
+  %ld4 = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64 0, i64* %A)
   %tmp = getelementptr i64* %A, i64 %inc
   store i64* %tmp, i64** %ptr
   ret { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %ld4
 }
 
-declare { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm64.neon.ld4lane.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>, i64, i64*) nounwind readonly
+declare { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4lane.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>, i64, i64*) nounwind readonly
 
 
 define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_imm_ld4lane(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E) nounwind {
 ;CHECK-LABEL: test_v4f32_post_imm_ld4lane:
 ;CHECK: ld4.s { v0, v1, v2, v3 }[0], [x0], #16
-  %ld4 = call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.arm64.neon.ld4lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, i64 0, float* %A)
+  %ld4 = call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld4lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, i64 0, float* %A)
   %tmp = getelementptr float* %A, i32 4
   store float* %tmp, float** %ptr
   ret { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %ld4
@@ -3551,19 +3551,19 @@ define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_i
 define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_reg_ld4lane(float* %A, float** %ptr, i64 %inc, <4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E) nounwind {
 ;CHECK-LABEL: test_v4f32_post_reg_ld4lane:
 ;CHECK: ld4.s { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
-  %ld4 = call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.arm64.neon.ld4lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, i64 0, float* %A)
+  %ld4 = call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld4lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, i64 0, float* %A)
   %tmp = getelementptr float* %A, i64 %inc
   store float* %tmp, float** %ptr
   ret { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %ld4
 }
 
-declare { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.arm64.neon.ld4lane.v4f32.p0f32(<4 x float>, <4 x float>, <4 x float>, <4 x float>, i64, float*) nounwind readonly
+declare { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld4lane.v4f32.p0f32(<4 x float>, <4 x float>, <4 x float>, <4 x float>, i64, float*) nounwind readonly
 
 
 define { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_imm_ld4lane(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E) nounwind {
 ;CHECK-LABEL: test_v2f32_post_imm_ld4lane:
 ;CHECK: ld4.s { v0, v1, v2, v3 }[0], [x0], #16
-  %ld4 = call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.arm64.neon.ld4lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, i64 0, float* %A)
+  %ld4 = call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld4lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, i64 0, float* %A)
   %tmp = getelementptr float* %A, i32 4
   store float* %tmp, float** %ptr
   ret { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %ld4
@@ -3572,19 +3572,19 @@ define { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_i
 define { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_reg_ld4lane(float* %A, float** %ptr, i64 %inc, <2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E) nounwind {
 ;CHECK-LABEL: test_v2f32_post_reg_ld4lane:
 ;CHECK: ld4.s { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
-  %ld4 = call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.arm64.neon.ld4lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, i64 0, float* %A)
+  %ld4 = call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld4lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, i64 0, float* %A)
   %tmp = getelementptr float* %A, i64 %inc
   store float* %tmp, float** %ptr
   ret { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %ld4
 }
 
-declare { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.arm64.neon.ld4lane.v2f32.p0f32(<2 x float>, <2 x float>, <2 x float>, <2 x float>, i64, float*) nounwind readonly
+declare { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld4lane.v2f32.p0f32(<2 x float>, <2 x float>, <2 x float>, <2 x float>, i64, float*) nounwind readonly
 
 
 define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_imm_ld4lane(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E) nounwind {
 ;CHECK-LABEL: test_v2f64_post_imm_ld4lane:
 ;CHECK: ld4.d { v0, v1, v2, v3 }[0], [x0], #32
-  %ld4 = call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.arm64.neon.ld4lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, i64 0, double* %A)
+  %ld4 = call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld4lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, i64 0, double* %A)
   %tmp = getelementptr double* %A, i32 4
   store double* %tmp, double** %ptr
   ret { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %ld4
@@ -3593,19 +3593,19 @@ define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_po
 define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_reg_ld4lane(double* %A, double** %ptr, i64 %inc, <2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E) nounwind {
 ;CHECK-LABEL: test_v2f64_post_reg_ld4lane:
 ;CHECK: ld4.d { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
-  %ld4 = call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.arm64.neon.ld4lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, i64 0, double* %A)
+  %ld4 = call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld4lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, i64 0, double* %A)
   %tmp = getelementptr double* %A, i64 %inc
   store double* %tmp, double** %ptr
   ret { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %ld4
 }
 
-declare { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.arm64.neon.ld4lane.v2f64.p0f64(<2 x double>, <2 x double>, <2 x double>, <2 x double>, i64, double*) nounwind readonly
+declare { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld4lane.v2f64.p0f64(<2 x double>, <2 x double>, <2 x double>, <2 x double>, i64, double*) nounwind readonly
 
 
 define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_imm_ld4lane(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E) nounwind {
 ;CHECK-LABEL: test_v1f64_post_imm_ld4lane:
 ;CHECK: ld4.d { v0, v1, v2, v3 }[0], [x0], #32
-  %ld4 = call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.arm64.neon.ld4lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, i64 0, double* %A)
+  %ld4 = call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld4lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, i64 0, double* %A)
   %tmp = getelementptr double* %A, i32 4
   store double* %tmp, double** %ptr
   ret { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %ld4
@@ -3614,19 +3614,19 @@ define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_po
 define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_reg_ld4lane(double* %A, double** %ptr, i64 %inc, <1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E) nounwind {
 ;CHECK-LABEL: test_v1f64_post_reg_ld4lane:
 ;CHECK: ld4.d { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
-  %ld4 = call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.arm64.neon.ld4lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, i64 0, double* %A)
+  %ld4 = call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld4lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, i64 0, double* %A)
   %tmp = getelementptr double* %A, i64 %inc
   store double* %tmp, double** %ptr
   ret { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %ld4
 }
 
-declare { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.arm64.neon.ld4lane.v1f64.p0f64(<1 x double>, <1 x double>, <1 x double>, <1 x double>, i64, double*) nounwind readonly
+declare { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld4lane.v1f64.p0f64(<1 x double>, <1 x double>, <1 x double>, <1 x double>, i64, double*) nounwind readonly
 
 
 define i8* @test_v16i8_post_imm_st2(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C) nounwind {
 ;CHECK-LABEL: test_v16i8_post_imm_st2:
 ;CHECK: st2.16b { v0, v1 }, [x0], #32
-  call void @llvm.arm64.neon.st2.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, i8* %A)
+  call void @llvm.aarch64.neon.st2.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, i8* %A)
   %tmp = getelementptr i8* %A, i32 32
   ret i8* %tmp
 }
@@ -3634,18 +3634,18 @@ define i8* @test_v16i8_post_imm_st2(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %
 define i8* @test_v16i8_post_reg_st2(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, i64 %inc) nounwind {
 ;CHECK-LABEL: test_v16i8_post_reg_st2:
 ;CHECK: st2.16b { v0, v1 }, [x0], x{{[0-9]+}}
-  call void @llvm.arm64.neon.st2.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, i8* %A)
+  call void @llvm.aarch64.neon.st2.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, i8* %A)
   %tmp = getelementptr i8* %A, i64 %inc
   ret i8* %tmp
 }
 
-declare void @llvm.arm64.neon.st2.v16i8.p0i8(<16 x i8>, <16 x i8>, i8*)
+declare void @llvm.aarch64.neon.st2.v16i8.p0i8(<16 x i8>, <16 x i8>, i8*)
 
 
 define i8* @test_v8i8_post_imm_st2(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C) nounwind {
 ;CHECK-LABEL: test_v8i8_post_imm_st2:
 ;CHECK: st2.8b { v0, v1 }, [x0], #16
-  call void @llvm.arm64.neon.st2.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, i8* %A)
+  call void @llvm.aarch64.neon.st2.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, i8* %A)
   %tmp = getelementptr i8* %A, i32 16
   ret i8* %tmp
 }
@@ -3653,18 +3653,18 @@ define i8* @test_v8i8_post_imm_st2(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C)
 define i8* @test_v8i8_post_reg_st2(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, i64 %inc) nounwind {
 ;CHECK-LABEL: test_v8i8_post_reg_st2:
 ;CHECK: st2.8b { v0, v1 }, [x0], x{{[0-9]+}}
-  call void @llvm.arm64.neon.st2.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, i8* %A)
+  call void @llvm.aarch64.neon.st2.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, i8* %A)
   %tmp = getelementptr i8* %A, i64 %inc
   ret i8* %tmp
 }
 
-declare void @llvm.arm64.neon.st2.v8i8.p0i8(<8 x i8>, <8 x i8>, i8*)
+declare void @llvm.aarch64.neon.st2.v8i8.p0i8(<8 x i8>, <8 x i8>, i8*)
 
 
 define i16* @test_v8i16_post_imm_st2(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C) nounwind {
 ;CHECK-LABEL: test_v8i16_post_imm_st2:
 ;CHECK: st2.8h { v0, v1 }, [x0], #32
-  call void @llvm.arm64.neon.st2.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, i16* %A)
+  call void @llvm.aarch64.neon.st2.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, i16* %A)
   %tmp = getelementptr i16* %A, i32 16
   ret i16* %tmp
 }
@@ -3672,18 +3672,18 @@ define i16* @test_v8i16_post_imm_st2(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16
 define i16* @test_v8i16_post_reg_st2(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, i64 %inc) nounwind {
 ;CHECK-LABEL: test_v8i16_post_reg_st2:
 ;CHECK: st2.8h { v0, v1 }, [x0], x{{[0-9]+}}
-  call void @llvm.arm64.neon.st2.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, i16* %A)
+  call void @llvm.aarch64.neon.st2.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, i16* %A)
   %tmp = getelementptr i16* %A, i64 %inc
   ret i16* %tmp
 }
 
-declare void @llvm.arm64.neon.st2.v8i16.p0i16(<8 x i16>, <8 x i16>, i16*)
+declare void @llvm.aarch64.neon.st2.v8i16.p0i16(<8 x i16>, <8 x i16>, i16*)
 
 
 define i16* @test_v4i16_post_imm_st2(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C) nounwind {
 ;CHECK-LABEL: test_v4i16_post_imm_st2:
 ;CHECK: st2.4h { v0, v1 }, [x0], #16
-  call void @llvm.arm64.neon.st2.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, i16* %A)
+  call void @llvm.aarch64.neon.st2.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, i16* %A)
   %tmp = getelementptr i16* %A, i32 8
   ret i16* %tmp
 }
@@ -3691,18 +3691,18 @@ define i16* @test_v4i16_post_imm_st2(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16
 define i16* @test_v4i16_post_reg_st2(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, i64 %inc) nounwind {
 ;CHECK-LABEL: test_v4i16_post_reg_st2:
 ;CHECK: st2.4h { v0, v1 }, [x0], x{{[0-9]+}}
-  call void @llvm.arm64.neon.st2.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, i16* %A)
+  call void @llvm.aarch64.neon.st2.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, i16* %A)
   %tmp = getelementptr i16* %A, i64 %inc
   ret i16* %tmp
 }
 
-declare void @llvm.arm64.neon.st2.v4i16.p0i16(<4 x i16>, <4 x i16>, i16*)
+declare void @llvm.aarch64.neon.st2.v4i16.p0i16(<4 x i16>, <4 x i16>, i16*)
 
 
 define i32* @test_v4i32_post_imm_st2(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C) nounwind {
 ;CHECK-LABEL: test_v4i32_post_imm_st2:
 ;CHECK: st2.4s { v0, v1 }, [x0], #32
-  call void @llvm.arm64.neon.st2.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, i32* %A)
+  call void @llvm.aarch64.neon.st2.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, i32* %A)
   %tmp = getelementptr i32* %A, i32 8
   ret i32* %tmp
 }
@@ -3710,18 +3710,18 @@ define i32* @test_v4i32_post_imm_st2(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32
 define i32* @test_v4i32_post_reg_st2(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, i64 %inc) nounwind {
 ;CHECK-LABEL: test_v4i32_post_reg_st2:
 ;CHECK: st2.4s { v0, v1 }, [x0], x{{[0-9]+}}
-  call void @llvm.arm64.neon.st2.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, i32* %A)
+  call void @llvm.aarch64.neon.st2.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, i32* %A)
   %tmp = getelementptr i32* %A, i64 %inc
   ret i32* %tmp
 }
 
-declare void @llvm.arm64.neon.st2.v4i32.p0i32(<4 x i32>, <4 x i32>, i32*)
+declare void @llvm.aarch64.neon.st2.v4i32.p0i32(<4 x i32>, <4 x i32>, i32*)
 
 
 define i32* @test_v2i32_post_imm_st2(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C) nounwind {
 ;CHECK-LABEL: test_v2i32_post_imm_st2:
 ;CHECK: st2.2s { v0, v1 }, [x0], #16
-  call void @llvm.arm64.neon.st2.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, i32* %A)
+  call void @llvm.aarch64.neon.st2.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, i32* %A)
   %tmp = getelementptr i32* %A, i32 4
   ret i32* %tmp
 }
@@ -3729,18 +3729,18 @@ define i32* @test_v2i32_post_imm_st2(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32
 define i32* @test_v2i32_post_reg_st2(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, i64 %inc) nounwind {
 ;CHECK-LABEL: test_v2i32_post_reg_st2:
 ;CHECK: st2.2s { v0, v1 }, [x0], x{{[0-9]+}}
-  call void @llvm.arm64.neon.st2.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, i32* %A)
+  call void @llvm.aarch64.neon.st2.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, i32* %A)
   %tmp = getelementptr i32* %A, i64 %inc
   ret i32* %tmp
 }
 
-declare void @llvm.arm64.neon.st2.v2i32.p0i32(<2 x i32>, <2 x i32>, i32*)
+declare void @llvm.aarch64.neon.st2.v2i32.p0i32(<2 x i32>, <2 x i32>, i32*)
 
 
 define i64* @test_v2i64_post_imm_st2(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C) nounwind {
 ;CHECK-LABEL: test_v2i64_post_imm_st2:
 ;CHECK: st2.2d { v0, v1 }, [x0], #32
-  call void @llvm.arm64.neon.st2.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, i64* %A)
+  call void @llvm.aarch64.neon.st2.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, i64* %A)
   %tmp = getelementptr i64* %A, i64 4
   ret i64* %tmp
 }
@@ -3748,18 +3748,18 @@ define i64* @test_v2i64_post_imm_st2(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64
 define i64* @test_v2i64_post_reg_st2(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, i64 %inc) nounwind {
 ;CHECK-LABEL: test_v2i64_post_reg_st2:
 ;CHECK: st2.2d { v0, v1 }, [x0], x{{[0-9]+}}
-  call void @llvm.arm64.neon.st2.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, i64* %A)
+  call void @llvm.aarch64.neon.st2.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, i64* %A)
   %tmp = getelementptr i64* %A, i64 %inc
   ret i64* %tmp
 }
 
-declare void @llvm.arm64.neon.st2.v2i64.p0i64(<2 x i64>, <2 x i64>, i64*)
+declare void @llvm.aarch64.neon.st2.v2i64.p0i64(<2 x i64>, <2 x i64>, i64*)
 
 
 define i64* @test_v1i64_post_imm_st2(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C) nounwind {
 ;CHECK-LABEL: test_v1i64_post_imm_st2:
 ;CHECK: st1.1d { v0, v1 }, [x0], #16
-  call void @llvm.arm64.neon.st2.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, i64* %A)
+  call void @llvm.aarch64.neon.st2.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, i64* %A)
   %tmp = getelementptr i64* %A, i64 2
   ret i64* %tmp
 }
@@ -3767,18 +3767,18 @@ define i64* @test_v1i64_post_imm_st2(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64
 define i64* @test_v1i64_post_reg_st2(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, i64 %inc) nounwind {
 ;CHECK-LABEL: test_v1i64_post_reg_st2:
 ;CHECK: st1.1d { v0, v1 }, [x0], x{{[0-9]+}}
-  call void @llvm.arm64.neon.st2.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, i64* %A)
+  call void @llvm.aarch64.neon.st2.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, i64* %A)
   %tmp = getelementptr i64* %A, i64 %inc
   ret i64* %tmp
 }
 
-declare void @llvm.arm64.neon.st2.v1i64.p0i64(<1 x i64>, <1 x i64>, i64*)
+declare void @llvm.aarch64.neon.st2.v1i64.p0i64(<1 x i64>, <1 x i64>, i64*)
 
 
 define float* @test_v4f32_post_imm_st2(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C) nounwind {
 ;CHECK-LABEL: test_v4f32_post_imm_st2:
 ;CHECK: st2.4s { v0, v1 }, [x0], #32
-  call void @llvm.arm64.neon.st2.v4f32.p0f32(<4 x float> %B, <4 x float> %C, float* %A)
+  call void @llvm.aarch64.neon.st2.v4f32.p0f32(<4 x float> %B, <4 x float> %C, float* %A)
   %tmp = getelementptr float* %A, i32 8
   ret float* %tmp
 }
@@ -3786,18 +3786,18 @@ define float* @test_v4f32_post_imm_st2(float* %A, float** %ptr, <4 x float> %B,
 define float* @test_v4f32_post_reg_st2(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, i64 %inc) nounwind {
 ;CHECK-LABEL: test_v4f32_post_reg_st2:
 ;CHECK: st2.4s { v0, v1 }, [x0], x{{[0-9]+}}
-  call void @llvm.arm64.neon.st2.v4f32.p0f32(<4 x float> %B, <4 x float> %C, float* %A)
+  call void @llvm.aarch64.neon.st2.v4f32.p0f32(<4 x float> %B, <4 x float> %C, float* %A)
   %tmp = getelementptr float* %A, i64 %inc
   ret float* %tmp
 }
 
-declare void @llvm.arm64.neon.st2.v4f32.p0f32(<4 x float>, <4 x float>, float*)
+declare void @llvm.aarch64.neon.st2.v4f32.p0f32(<4 x float>, <4 x float>, float*)
 
 
 define float* @test_v2f32_post_imm_st2(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C) nounwind {
 ;CHECK-LABEL: test_v2f32_post_imm_st2:
 ;CHECK: st2.2s { v0, v1 }, [x0], #16
-  call void @llvm.arm64.neon.st2.v2f32.p0f32(<2 x float> %B, <2 x float> %C, float* %A)
+  call void @llvm.aarch64.neon.st2.v2f32.p0f32(<2 x float> %B, <2 x float> %C, float* %A)
   %tmp = getelementptr float* %A, i32 4
   ret float* %tmp
 }
@@ -3805,18 +3805,18 @@ define float* @test_v2f32_post_imm_st2(float* %A, float** %ptr, <2 x float> %B,
 define float* @test_v2f32_post_reg_st2(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, i64 %inc) nounwind {
 ;CHECK-LABEL: test_v2f32_post_reg_st2:
 ;CHECK: st2.2s { v0, v1 }, [x0], x{{[0-9]+}}
-  call void @llvm.arm64.neon.st2.v2f32.p0f32(<2 x float> %B, <2 x float> %C, float* %A)
+  call void @llvm.aarch64.neon.st2.v2f32.p0f32(<2 x float> %B, <2 x float> %C, float* %A)
   %tmp = getelementptr float* %A, i64 %inc
   ret float* %tmp
 }
 
-declare void @llvm.arm64.neon.st2.v2f32.p0f32(<2 x float>, <2 x float>, float*)
+declare void @llvm.aarch64.neon.st2.v2f32.p0f32(<2 x float>, <2 x float>, float*)
 
 
 define double* @test_v2f64_post_imm_st2(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C) nounwind {
 ;CHECK-LABEL: test_v2f64_post_imm_st2:
 ;CHECK: st2.2d { v0, v1 }, [x0], #32
-  call void @llvm.arm64.neon.st2.v2f64.p0f64(<2 x double> %B, <2 x double> %C, double* %A)
+  call void @llvm.aarch64.neon.st2.v2f64.p0f64(<2 x double> %B, <2 x double> %C, double* %A)
   %tmp = getelementptr double* %A, i64 4
   ret double* %tmp
 }
@@ -3824,18 +3824,18 @@ define double* @test_v2f64_post_imm_st2(double* %A, double** %ptr, <2 x double>
 define double* @test_v2f64_post_reg_st2(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, i64 %inc) nounwind {
 ;CHECK-LABEL: test_v2f64_post_reg_st2:
 ;CHECK: st2.2d { v0, v1 }, [x0], x{{[0-9]+}}
-  call void @llvm.arm64.neon.st2.v2f64.p0f64(<2 x double> %B, <2 x double> %C, double* %A)
+  call void @llvm.aarch64.neon.st2.v2f64.p0f64(<2 x double> %B, <2 x double> %C, double* %A)
   %tmp = getelementptr double* %A, i64 %inc
   ret double* %tmp
 }
 
-declare void @llvm.arm64.neon.st2.v2f64.p0f64(<2 x double>, <2 x double>, double*)
+declare void @llvm.aarch64.neon.st2.v2f64.p0f64(<2 x double>, <2 x double>, double*)
 
 
 define double* @test_v1f64_post_imm_st2(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C) nounwind {
 ;CHECK-LABEL: test_v1f64_post_imm_st2:
 ;CHECK: st1.1d { v0, v1 }, [x0], #16
-  call void @llvm.arm64.neon.st2.v1f64.p0f64(<1 x double> %B, <1 x double> %C, double* %A)
+  call void @llvm.aarch64.neon.st2.v1f64.p0f64(<1 x double> %B, <1 x double> %C, double* %A)
   %tmp = getelementptr double* %A, i64 2
   ret double* %tmp
 }
@@ -3843,18 +3843,18 @@ define double* @test_v1f64_post_imm_st2(double* %A, double** %ptr, <1 x double>
 define double* @test_v1f64_post_reg_st2(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, i64 %inc) nounwind {
 ;CHECK-LABEL: test_v1f64_post_reg_st2:
 ;CHECK: st1.1d { v0, v1 }, [x0], x{{[0-9]+}}
-  call void @llvm.arm64.neon.st2.v1f64.p0f64(<1 x double> %B, <1 x double> %C, double* %A)
+  call void @llvm.aarch64.neon.st2.v1f64.p0f64(<1 x double> %B, <1 x double> %C, double* %A)
   %tmp = getelementptr double* %A, i64 %inc
   ret double* %tmp
 }
 
-declare void @llvm.arm64.neon.st2.v1f64.p0f64(<1 x double>, <1 x double>, double*)
+declare void @llvm.aarch64.neon.st2.v1f64.p0f64(<1 x double>, <1 x double>, double*)
 
 
 define i8* @test_v16i8_post_imm_st3(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) nounwind {
 ;CHECK-LABEL: test_v16i8_post_imm_st3:
 ;CHECK: st3.16b { v0, v1, v2 }, [x0], #48
-  call void @llvm.arm64.neon.st3.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i8* %A)
+  call void @llvm.aarch64.neon.st3.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i8* %A)
   %tmp = getelementptr i8* %A, i32 48
   ret i8* %tmp
 }
@@ -3862,18 +3862,18 @@ define i8* @test_v16i8_post_imm_st3(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %
 define i8* @test_v16i8_post_reg_st3(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 %inc) nounwind {
 ;CHECK-LABEL: test_v16i8_post_reg_st3:
 ;CHECK: st3.16b { v0, v1, v2 }, [x0], x{{[0-9]+}}
-  call void @llvm.arm64.neon.st3.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i8* %A)
+  call void @llvm.aarch64.neon.st3.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i8* %A)
   %tmp = getelementptr i8* %A, i64 %inc
   ret i8* %tmp
 }
 
-declare void @llvm.arm64.neon.st3.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, i8*)
+declare void @llvm.aarch64.neon.st3.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, i8*)
 
 
 define i8* @test_v8i8_post_imm_st3(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D) nounwind {
 ;CHECK-LABEL: test_v8i8_post_imm_st3:
 ;CHECK: st3.8b { v0, v1, v2 }, [x0], #24
-  call void @llvm.arm64.neon.st3.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i8* %A)
+  call void @llvm.aarch64.neon.st3.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i8* %A)
   %tmp = getelementptr i8* %A, i32 24
   ret i8* %tmp
 }
@@ -3881,18 +3881,18 @@ define i8* @test_v8i8_post_imm_st3(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C,
 define i8* @test_v8i8_post_reg_st3(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i64 %inc) nounwind {
 ;CHECK-LABEL: test_v8i8_post_reg_st3:
 ;CHECK: st3.8b { v0, v1, v2 }, [x0], x{{[0-9]+}}
-  call void @llvm.arm64.neon.st3.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i8* %A)
+  call void @llvm.aarch64.neon.st3.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i8* %A)
   %tmp = getelementptr i8* %A, i64 %inc
   ret i8* %tmp
 }
 
-declare void @llvm.arm64.neon.st3.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, i8*)
+declare void @llvm.aarch64.neon.st3.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, i8*)
 
 
 define i16* @test_v8i16_post_imm_st3(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D) nounwind {
 ;CHECK-LABEL: test_v8i16_post_imm_st3:
 ;CHECK: st3.8h { v0, v1, v2 }, [x0], #48
-  call void @llvm.arm64.neon.st3.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i16* %A)
+  call void @llvm.aarch64.neon.st3.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i16* %A)
   %tmp = getelementptr i16* %A, i32 24
   ret i16* %tmp
 }
@@ -3900,18 +3900,18 @@ define i16* @test_v8i16_post_imm_st3(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16
 define i16* @test_v8i16_post_reg_st3(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 %inc) nounwind {
 ;CHECK-LABEL: test_v8i16_post_reg_st3:
 ;CHECK: st3.8h { v0, v1, v2 }, [x0], x{{[0-9]+}}
-  call void @llvm.arm64.neon.st3.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i16* %A)
+  call void @llvm.aarch64.neon.st3.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i16* %A)
   %tmp = getelementptr i16* %A, i64 %inc
   ret i16* %tmp
 }
 
-declare void @llvm.arm64.neon.st3.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, i16*)
+declare void @llvm.aarch64.neon.st3.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, i16*)
 
 
 define i16* @test_v4i16_post_imm_st3(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D) nounwind {
 ;CHECK-LABEL: test_v4i16_post_imm_st3:
 ;CHECK: st3.4h { v0, v1, v2 }, [x0], #24
-  call void @llvm.arm64.neon.st3.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i16* %A)
+  call void @llvm.aarch64.neon.st3.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i16* %A)
   %tmp = getelementptr i16* %A, i32 12
   ret i16* %tmp
 }
@@ -3919,18 +3919,18 @@ define i16* @test_v4i16_post_imm_st3(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16
 define i16* @test_v4i16_post_reg_st3(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i64 %inc) nounwind {
 ;CHECK-LABEL: test_v4i16_post_reg_st3:
 ;CHECK: st3.4h { v0, v1, v2 }, [x0], x{{[0-9]+}}
-  call void @llvm.arm64.neon.st3.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i16* %A)
+  call void @llvm.aarch64.neon.st3.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i16* %A)
   %tmp = getelementptr i16* %A, i64 %inc
   ret i16* %tmp
 }
 
-declare void @llvm.arm64.neon.st3.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>, i16*)
+declare void @llvm.aarch64.neon.st3.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>, i16*)
 
 
 define i32* @test_v4i32_post_imm_st3(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D) nounwind {
 ;CHECK-LABEL: test_v4i32_post_imm_st3:
 ;CHECK: st3.4s { v0, v1, v2 }, [x0], #48
-  call void @llvm.arm64.neon.st3.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i32* %A)
+  call void @llvm.aarch64.neon.st3.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i32* %A)
   %tmp = getelementptr i32* %A, i32 12
   ret i32* %tmp
 }
@@ -3938,18 +3938,18 @@ define i32* @test_v4i32_post_imm_st3(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32
 define i32* @test_v4i32_post_reg_st3(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 %inc) nounwind {
 ;CHECK-LABEL: test_v4i32_post_reg_st3:
 ;CHECK: st3.4s { v0, v1, v2 }, [x0], x{{[0-9]+}}
-  call void @llvm.arm64.neon.st3.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i32* %A)
+  call void @llvm.aarch64.neon.st3.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i32* %A)
   %tmp = getelementptr i32* %A, i64 %inc
   ret i32* %tmp
 }
 
-declare void @llvm.arm64.neon.st3.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, i32*)
+declare void @llvm.aarch64.neon.st3.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, i32*)
 
 
 define i32* @test_v2i32_post_imm_st3(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D) nounwind {
 ;CHECK-LABEL: test_v2i32_post_imm_st3:
 ;CHECK: st3.2s { v0, v1, v2 }, [x0], #24
-  call void @llvm.arm64.neon.st3.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i32* %A)
+  call void @llvm.aarch64.neon.st3.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i32* %A)
   %tmp = getelementptr i32* %A, i32 6
   ret i32* %tmp
 }
@@ -3957,18 +3957,18 @@ define i32* @test_v2i32_post_imm_st3(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32
 define i32* @test_v2i32_post_reg_st3(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i64 %inc) nounwind {
 ;CHECK-LABEL: test_v2i32_post_reg_st3:
 ;CHECK: st3.2s { v0, v1, v2 }, [x0], x{{[0-9]+}}
-  call void @llvm.arm64.neon.st3.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i32* %A)
+  call void @llvm.aarch64.neon.st3.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i32* %A)
   %tmp = getelementptr i32* %A, i64 %inc
   ret i32* %tmp
 }
 
-declare void @llvm.arm64.neon.st3.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, i32*)
+declare void @llvm.aarch64.neon.st3.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, i32*)
 
 
 define i64* @test_v2i64_post_imm_st3(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D) nounwind {
 ;CHECK-LABEL: test_v2i64_post_imm_st3:
 ;CHECK: st3.2d { v0, v1, v2 }, [x0], #48
-  call void @llvm.arm64.neon.st3.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64* %A)
+  call void @llvm.aarch64.neon.st3.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64* %A)
   %tmp = getelementptr i64* %A, i64 6
   ret i64* %tmp
 }
@@ -3976,18 +3976,18 @@ define i64* @test_v2i64_post_imm_st3(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64
 define i64* @test_v2i64_post_reg_st3(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 %inc) nounwind {
 ;CHECK-LABEL: test_v2i64_post_reg_st3:
 ;CHECK: st3.2d { v0, v1, v2 }, [x0], x{{[0-9]+}}
-  call void @llvm.arm64.neon.st3.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64* %A)
+  call void @llvm.aarch64.neon.st3.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64* %A)
   %tmp = getelementptr i64* %A, i64 %inc
   ret i64* %tmp
 }
 
-declare void @llvm.arm64.neon.st3.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, i64*)
+declare void @llvm.aarch64.neon.st3.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, i64*)
 
 
 define i64* @test_v1i64_post_imm_st3(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D) nounwind {
 ;CHECK-LABEL: test_v1i64_post_imm_st3:
 ;CHECK: st1.1d { v0, v1, v2 }, [x0], #24
-  call void @llvm.arm64.neon.st3.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64* %A)
+  call void @llvm.aarch64.neon.st3.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64* %A)
   %tmp = getelementptr i64* %A, i64 3
   ret i64* %tmp
 }
@@ -3995,18 +3995,18 @@ define i64* @test_v1i64_post_imm_st3(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64
 define i64* @test_v1i64_post_reg_st3(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64 %inc) nounwind {
 ;CHECK-LABEL: test_v1i64_post_reg_st3:
 ;CHECK: st1.1d { v0, v1, v2 }, [x0], x{{[0-9]+}}
-  call void @llvm.arm64.neon.st3.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64* %A)
+  call void @llvm.aarch64.neon.st3.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64* %A)
   %tmp = getelementptr i64* %A, i64 %inc
   ret i64* %tmp
 }
 
-declare void @llvm.arm64.neon.st3.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>, i64*)
+declare void @llvm.aarch64.neon.st3.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>, i64*)
 
 
 define float* @test_v4f32_post_imm_st3(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D) nounwind {
 ;CHECK-LABEL: test_v4f32_post_imm_st3:
 ;CHECK: st3.4s { v0, v1, v2 }, [x0], #48
-  call void @llvm.arm64.neon.st3.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, float* %A)
+  call void @llvm.aarch64.neon.st3.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, float* %A)
   %tmp = getelementptr float* %A, i32 12
   ret float* %tmp
 }
@@ -4014,18 +4014,18 @@ define float* @test_v4f32_post_imm_st3(float* %A, float** %ptr, <4 x float> %B,
 define float* @test_v4f32_post_reg_st3(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, i64 %inc) nounwind {
 ;CHECK-LABEL: test_v4f32_post_reg_st3:
 ;CHECK: st3.4s { v0, v1, v2 }, [x0], x{{[0-9]+}}
-  call void @llvm.arm64.neon.st3.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, float* %A)
+  call void @llvm.aarch64.neon.st3.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, float* %A)
   %tmp = getelementptr float* %A, i64 %inc
   ret float* %tmp
 }
 
-declare void @llvm.arm64.neon.st3.v4f32.p0f32(<4 x float>, <4 x float>, <4 x float>, float*)
+declare void @llvm.aarch64.neon.st3.v4f32.p0f32(<4 x float>, <4 x float>, <4 x float>, float*)
 
 
 define float* @test_v2f32_post_imm_st3(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D) nounwind {
 ;CHECK-LABEL: test_v2f32_post_imm_st3:
 ;CHECK: st3.2s { v0, v1, v2 }, [x0], #24
-  call void @llvm.arm64.neon.st3.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, float* %A)
+  call void @llvm.aarch64.neon.st3.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, float* %A)
   %tmp = getelementptr float* %A, i32 6
   ret float* %tmp
 }
@@ -4033,18 +4033,18 @@ define float* @test_v2f32_post_imm_st3(float* %A, float** %ptr, <2 x float> %B,
 define float* @test_v2f32_post_reg_st3(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, i64 %inc) nounwind {
 ;CHECK-LABEL: test_v2f32_post_reg_st3:
 ;CHECK: st3.2s { v0, v1, v2 }, [x0], x{{[0-9]+}}
-  call void @llvm.arm64.neon.st3.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, float* %A)
+  call void @llvm.aarch64.neon.st3.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, float* %A)
   %tmp = getelementptr float* %A, i64 %inc
   ret float* %tmp
 }
 
-declare void @llvm.arm64.neon.st3.v2f32.p0f32(<2 x float>, <2 x float>, <2 x float>, float*)
+declare void @llvm.aarch64.neon.st3.v2f32.p0f32(<2 x float>, <2 x float>, <2 x float>, float*)
 
 
 define double* @test_v2f64_post_imm_st3(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D) nounwind {
 ;CHECK-LABEL: test_v2f64_post_imm_st3:
 ;CHECK: st3.2d { v0, v1, v2 }, [x0], #48
-  call void @llvm.arm64.neon.st3.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, double* %A)
+  call void @llvm.aarch64.neon.st3.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, double* %A)
   %tmp = getelementptr double* %A, i64 6
   ret double* %tmp
 }
@@ -4052,18 +4052,18 @@ define double* @test_v2f64_post_imm_st3(double* %A, double** %ptr, <2 x double>
 define double* @test_v2f64_post_reg_st3(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, i64 %inc) nounwind {
 ;CHECK-LABEL: test_v2f64_post_reg_st3:
 ;CHECK: st3.2d { v0, v1, v2 }, [x0], x{{[0-9]+}}
-  call void @llvm.arm64.neon.st3.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, double* %A)
+  call void @llvm.aarch64.neon.st3.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, double* %A)
   %tmp = getelementptr double* %A, i64 %inc
   ret double* %tmp
 }
 
-declare void @llvm.arm64.neon.st3.v2f64.p0f64(<2 x double>, <2 x double>, <2 x double>, double*)
+declare void @llvm.aarch64.neon.st3.v2f64.p0f64(<2 x double>, <2 x double>, <2 x double>, double*)
 
 
 define double* @test_v1f64_post_imm_st3(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D) nounwind {
 ;CHECK-LABEL: test_v1f64_post_imm_st3:
 ;CHECK: st1.1d { v0, v1, v2 }, [x0], #24
-  call void @llvm.arm64.neon.st3.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, double* %A)
+  call void @llvm.aarch64.neon.st3.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, double* %A)
   %tmp = getelementptr double* %A, i64 3
   ret double* %tmp
 }
@@ -4071,18 +4071,18 @@ define double* @test_v1f64_post_imm_st3(double* %A, double** %ptr, <1 x double>
 define double* @test_v1f64_post_reg_st3(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, i64 %inc) nounwind {
 ;CHECK-LABEL: test_v1f64_post_reg_st3:
 ;CHECK: st1.1d { v0, v1, v2 }, [x0], x{{[0-9]+}}
-  call void @llvm.arm64.neon.st3.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, double* %A)
+  call void @llvm.aarch64.neon.st3.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, double* %A)
   %tmp = getelementptr double* %A, i64 %inc
   ret double* %tmp
 }
 
-declare void @llvm.arm64.neon.st3.v1f64.p0f64(<1 x double>, <1 x double>, <1 x double>, double*)
+declare void @llvm.aarch64.neon.st3.v1f64.p0f64(<1 x double>, <1 x double>, <1 x double>, double*)
 
 
 define i8* @test_v16i8_post_imm_st4(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) nounwind {
 ;CHECK-LABEL: test_v16i8_post_imm_st4:
 ;CHECK: st4.16b { v0, v1, v2, v3 }, [x0], #64
-  call void @llvm.arm64.neon.st4.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i8* %A)
+  call void @llvm.aarch64.neon.st4.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i8* %A)
   %tmp = getelementptr i8* %A, i32 64
   ret i8* %tmp
 }
@@ -4090,18 +4090,18 @@ define i8* @test_v16i8_post_imm_st4(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %
 define i8* @test_v16i8_post_reg_st4(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i64 %inc) nounwind {
 ;CHECK-LABEL: test_v16i8_post_reg_st4:
 ;CHECK: st4.16b { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
-  call void @llvm.arm64.neon.st4.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i8* %A)
+  call void @llvm.aarch64.neon.st4.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i8* %A)
   %tmp = getelementptr i8* %A, i64 %inc
   ret i8* %tmp
 }
 
-declare void @llvm.arm64.neon.st4.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i8*)
+declare void @llvm.aarch64.neon.st4.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i8*)
 
 
 define i8* @test_v8i8_post_imm_st4(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E) nounwind {
 ;CHECK-LABEL: test_v8i8_post_imm_st4:
 ;CHECK: st4.8b { v0, v1, v2, v3 }, [x0], #32
-  call void @llvm.arm64.neon.st4.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i8* %A)
+  call void @llvm.aarch64.neon.st4.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i8* %A)
   %tmp = getelementptr i8* %A, i32 32
   ret i8* %tmp
 }
@@ -4109,18 +4109,18 @@ define i8* @test_v8i8_post_imm_st4(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C,
 define i8* @test_v8i8_post_reg_st4(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i64 %inc) nounwind {
 ;CHECK-LABEL: test_v8i8_post_reg_st4:
 ;CHECK: st4.8b { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
-  call void @llvm.arm64.neon.st4.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i8* %A)
+  call void @llvm.aarch64.neon.st4.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i8* %A)
   %tmp = getelementptr i8* %A, i64 %inc
   ret i8* %tmp
 }
 
-declare void @llvm.arm64.neon.st4.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i8*)
+declare void @llvm.aarch64.neon.st4.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i8*)
 
 
 define i16* @test_v8i16_post_imm_st4(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E) nounwind {
 ;CHECK-LABEL: test_v8i16_post_imm_st4:
 ;CHECK: st4.8h { v0, v1, v2, v3 }, [x0], #64
-  call void @llvm.arm64.neon.st4.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i16* %A)
+  call void @llvm.aarch64.neon.st4.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i16* %A)
   %tmp = getelementptr i16* %A, i32 32
   ret i16* %tmp
 }
@@ -4128,18 +4128,18 @@ define i16* @test_v8i16_post_imm_st4(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16
 define i16* @test_v8i16_post_reg_st4(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i64 %inc) nounwind {
 ;CHECK-LABEL: test_v8i16_post_reg_st4:
 ;CHECK: st4.8h { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
-  call void @llvm.arm64.neon.st4.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i16* %A)
+  call void @llvm.aarch64.neon.st4.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i16* %A)
   %tmp = getelementptr i16* %A, i64 %inc
   ret i16* %tmp
 }
 
-declare void @llvm.arm64.neon.st4.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i16*)
+declare void @llvm.aarch64.neon.st4.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i16*)
 
 
 define i16* @test_v4i16_post_imm_st4(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E) nounwind {
 ;CHECK-LABEL: test_v4i16_post_imm_st4:
 ;CHECK: st4.4h { v0, v1, v2, v3 }, [x0], #32
-  call void @llvm.arm64.neon.st4.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i16* %A)
+  call void @llvm.aarch64.neon.st4.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i16* %A)
   %tmp = getelementptr i16* %A, i32 16
   ret i16* %tmp
 }
@@ -4147,18 +4147,18 @@ define i16* @test_v4i16_post_imm_st4(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16
 define i16* @test_v4i16_post_reg_st4(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i64 %inc) nounwind {
 ;CHECK-LABEL: test_v4i16_post_reg_st4:
 ;CHECK: st4.4h { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
-  call void @llvm.arm64.neon.st4.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i16* %A)
+  call void @llvm.aarch64.neon.st4.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i16* %A)
   %tmp = getelementptr i16* %A, i64 %inc
   ret i16* %tmp
 }
 
-declare void @llvm.arm64.neon.st4.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>,<4 x i16>,  i16*)
+declare void @llvm.aarch64.neon.st4.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>,<4 x i16>,  i16*)
 
 
 define i32* @test_v4i32_post_imm_st4(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E) nounwind {
 ;CHECK-LABEL: test_v4i32_post_imm_st4:
 ;CHECK: st4.4s { v0, v1, v2, v3 }, [x0], #64
-  call void @llvm.arm64.neon.st4.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i32* %A)
+  call void @llvm.aarch64.neon.st4.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i32* %A)
   %tmp = getelementptr i32* %A, i32 16
   ret i32* %tmp
 }
@@ -4166,18 +4166,18 @@ define i32* @test_v4i32_post_imm_st4(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32
 define i32* @test_v4i32_post_reg_st4(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i64 %inc) nounwind {
 ;CHECK-LABEL: test_v4i32_post_reg_st4:
 ;CHECK: st4.4s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
-  call void @llvm.arm64.neon.st4.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i32* %A)
+  call void @llvm.aarch64.neon.st4.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i32* %A)
   %tmp = getelementptr i32* %A, i64 %inc
   ret i32* %tmp
 }
 
-declare void @llvm.arm64.neon.st4.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>,<4 x i32>,  i32*)
+declare void @llvm.aarch64.neon.st4.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>,<4 x i32>,  i32*)
 
 
 define i32* @test_v2i32_post_imm_st4(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E) nounwind {
 ;CHECK-LABEL: test_v2i32_post_imm_st4:
 ;CHECK: st4.2s { v0, v1, v2, v3 }, [x0], #32
-  call void @llvm.arm64.neon.st4.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i32* %A)
+  call void @llvm.aarch64.neon.st4.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i32* %A)
   %tmp = getelementptr i32* %A, i32 8
   ret i32* %tmp
 }
@@ -4185,18 +4185,18 @@ define i32* @test_v2i32_post_imm_st4(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32
 define i32* @test_v2i32_post_reg_st4(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i64 %inc) nounwind {
 ;CHECK-LABEL: test_v2i32_post_reg_st4:
 ;CHECK: st4.2s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
-  call void @llvm.arm64.neon.st4.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i32* %A)
+  call void @llvm.aarch64.neon.st4.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i32* %A)
   %tmp = getelementptr i32* %A, i64 %inc
   ret i32* %tmp
 }
 
-declare void @llvm.arm64.neon.st4.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32*)
+declare void @llvm.aarch64.neon.st4.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32*)
 
 
 define i64* @test_v2i64_post_imm_st4(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E) nounwind {
 ;CHECK-LABEL: test_v2i64_post_imm_st4:
 ;CHECK: st4.2d { v0, v1, v2, v3 }, [x0], #64
-  call void @llvm.arm64.neon.st4.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64* %A)
+  call void @llvm.aarch64.neon.st4.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64* %A)
   %tmp = getelementptr i64* %A, i64 8
   ret i64* %tmp
 }
@@ -4204,18 +4204,18 @@ define i64* @test_v2i64_post_imm_st4(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64
 define i64* @test_v2i64_post_reg_st4(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64 %inc) nounwind {
 ;CHECK-LABEL: test_v2i64_post_reg_st4:
 ;CHECK: st4.2d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
-  call void @llvm.arm64.neon.st4.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64* %A)
+  call void @llvm.aarch64.neon.st4.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64* %A)
   %tmp = getelementptr i64* %A, i64 %inc
   ret i64* %tmp
 }
 
-declare void @llvm.arm64.neon.st4.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>,<2 x i64>,  i64*)
+declare void @llvm.aarch64.neon.st4.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>,<2 x i64>,  i64*)
 
 
 define i64* @test_v1i64_post_imm_st4(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E) nounwind {
 ;CHECK-LABEL: test_v1i64_post_imm_st4:
 ;CHECK: st1.1d { v0, v1, v2, v3 }, [x0], #32
-  call void @llvm.arm64.neon.st4.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64* %A)
+  call void @llvm.aarch64.neon.st4.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64* %A)
   %tmp = getelementptr i64* %A, i64 4
   ret i64* %tmp
 }
@@ -4223,18 +4223,18 @@ define i64* @test_v1i64_post_imm_st4(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64
 define i64* @test_v1i64_post_reg_st4(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64 %inc) nounwind {
 ;CHECK-LABEL: test_v1i64_post_reg_st4:
 ;CHECK: st1.1d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
-  call void @llvm.arm64.neon.st4.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64* %A)
+  call void @llvm.aarch64.neon.st4.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64* %A)
   %tmp = getelementptr i64* %A, i64 %inc
   ret i64* %tmp
 }
 
-declare void @llvm.arm64.neon.st4.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>,<1 x i64>,  i64*)
+declare void @llvm.aarch64.neon.st4.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>,<1 x i64>,  i64*)
 
 
 define float* @test_v4f32_post_imm_st4(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E) nounwind {
 ;CHECK-LABEL: test_v4f32_post_imm_st4:
 ;CHECK: st4.4s { v0, v1, v2, v3 }, [x0], #64
-  call void @llvm.arm64.neon.st4.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, float* %A)
+  call void @llvm.aarch64.neon.st4.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, float* %A)
   %tmp = getelementptr float* %A, i32 16
   ret float* %tmp
 }
@@ -4242,18 +4242,18 @@ define float* @test_v4f32_post_imm_st4(float* %A, float** %ptr, <4 x float> %B,
 define float* @test_v4f32_post_reg_st4(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, i64 %inc) nounwind {
 ;CHECK-LABEL: test_v4f32_post_reg_st4:
 ;CHECK: st4.4s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
-  call void @llvm.arm64.neon.st4.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, float* %A)
+  call void @llvm.aarch64.neon.st4.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, float* %A)
   %tmp = getelementptr float* %A, i64 %inc
   ret float* %tmp
 }
 
-declare void @llvm.arm64.neon.st4.v4f32.p0f32(<4 x float>, <4 x float>, <4 x float>, <4 x float>, float*)
+declare void @llvm.aarch64.neon.st4.v4f32.p0f32(<4 x float>, <4 x float>, <4 x float>, <4 x float>, float*)
 
 
 define float* @test_v2f32_post_imm_st4(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E) nounwind {
 ;CHECK-LABEL: test_v2f32_post_imm_st4:
 ;CHECK: st4.2s { v0, v1, v2, v3 }, [x0], #32
-  call void @llvm.arm64.neon.st4.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, float* %A)
+  call void @llvm.aarch64.neon.st4.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, float* %A)
   %tmp = getelementptr float* %A, i32 8
   ret float* %tmp
 }
@@ -4261,18 +4261,18 @@ define float* @test_v2f32_post_imm_st4(float* %A, float** %ptr, <2 x float> %B,
 define float* @test_v2f32_post_reg_st4(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, i64 %inc) nounwind {
 ;CHECK-LABEL: test_v2f32_post_reg_st4:
 ;CHECK: st4.2s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
-  call void @llvm.arm64.neon.st4.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, float* %A)
+  call void @llvm.aarch64.neon.st4.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, float* %A)
   %tmp = getelementptr float* %A, i64 %inc
   ret float* %tmp
 }
 
-declare void @llvm.arm64.neon.st4.v2f32.p0f32(<2 x float>, <2 x float>, <2 x float>, <2 x float>, float*)
+declare void @llvm.aarch64.neon.st4.v2f32.p0f32(<2 x float>, <2 x float>, <2 x float>, <2 x float>, float*)
 
 
 define double* @test_v2f64_post_imm_st4(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E) nounwind {
 ;CHECK-LABEL: test_v2f64_post_imm_st4:
 ;CHECK: st4.2d { v0, v1, v2, v3 }, [x0], #64
-  call void @llvm.arm64.neon.st4.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, double* %A)
+  call void @llvm.aarch64.neon.st4.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, double* %A)
   %tmp = getelementptr double* %A, i64 8
   ret double* %tmp
 }
@@ -4280,18 +4280,18 @@ define double* @test_v2f64_post_imm_st4(double* %A, double** %ptr, <2 x double>
 define double* @test_v2f64_post_reg_st4(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, i64 %inc) nounwind {
 ;CHECK-LABEL: test_v2f64_post_reg_st4:
 ;CHECK: st4.2d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
-  call void @llvm.arm64.neon.st4.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, double* %A)
+  call void @llvm.aarch64.neon.st4.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, double* %A)
   %tmp = getelementptr double* %A, i64 %inc
   ret double* %tmp
 }
 
-declare void @llvm.arm64.neon.st4.v2f64.p0f64(<2 x double>, <2 x double>, <2 x double>,<2 x double>,  double*)
+declare void @llvm.aarch64.neon.st4.v2f64.p0f64(<2 x double>, <2 x double>, <2 x double>,<2 x double>,  double*)
 
 
 define double* @test_v1f64_post_imm_st4(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E) nounwind {
 ;CHECK-LABEL: test_v1f64_post_imm_st4:
 ;CHECK: st1.1d { v0, v1, v2, v3 }, [x0], #32
-  call void @llvm.arm64.neon.st4.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, double* %A)
+  call void @llvm.aarch64.neon.st4.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, double* %A)
   %tmp = getelementptr double* %A, i64 4
   ret double* %tmp
 }
@@ -4299,18 +4299,18 @@ define double* @test_v1f64_post_imm_st4(double* %A, double** %ptr, <1 x double>
 define double* @test_v1f64_post_reg_st4(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, i64 %inc) nounwind {
 ;CHECK-LABEL: test_v1f64_post_reg_st4:
 ;CHECK: st1.1d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
-  call void @llvm.arm64.neon.st4.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, double* %A)
+  call void @llvm.aarch64.neon.st4.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, double* %A)
   %tmp = getelementptr double* %A, i64 %inc
   ret double* %tmp
 }
 
-declare void @llvm.arm64.neon.st4.v1f64.p0f64(<1 x double>, <1 x double>, <1 x double>, <1 x double>, double*)
+declare void @llvm.aarch64.neon.st4.v1f64.p0f64(<1 x double>, <1 x double>, <1 x double>, <1 x double>, double*)
 
 
 define i8* @test_v16i8_post_imm_st1x2(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C) nounwind {
 ;CHECK-LABEL: test_v16i8_post_imm_st1x2:
 ;CHECK: st1.16b { v0, v1 }, [x0], #32
-  call void @llvm.arm64.neon.st1x2.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, i8* %A)
+  call void @llvm.aarch64.neon.st1x2.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, i8* %A)
   %tmp = getelementptr i8* %A, i32 32
   ret i8* %tmp
 }
@@ -4318,18 +4318,18 @@ define i8* @test_v16i8_post_imm_st1x2(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8>
 define i8* @test_v16i8_post_reg_st1x2(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, i64 %inc) nounwind {
 ;CHECK-LABEL: test_v16i8_post_reg_st1x2:
 ;CHECK: st1.16b { v0, v1 }, [x0], x{{[0-9]+}}
-  call void @llvm.arm64.neon.st1x2.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, i8* %A)
+  call void @llvm.aarch64.neon.st1x2.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, i8* %A)
   %tmp = getelementptr i8* %A, i64 %inc
   ret i8* %tmp
 }
 
-declare void @llvm.arm64.neon.st1x2.v16i8.p0i8(<16 x i8>, <16 x i8>, i8*)
+declare void @llvm.aarch64.neon.st1x2.v16i8.p0i8(<16 x i8>, <16 x i8>, i8*)
 
 
 define i8* @test_v8i8_post_imm_st1x2(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C) nounwind {
 ;CHECK-LABEL: test_v8i8_post_imm_st1x2:
 ;CHECK: st1.8b { v0, v1 }, [x0], #16
-  call void @llvm.arm64.neon.st1x2.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, i8* %A)
+  call void @llvm.aarch64.neon.st1x2.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, i8* %A)
   %tmp = getelementptr i8* %A, i32 16
   ret i8* %tmp
 }
@@ -4337,18 +4337,18 @@ define i8* @test_v8i8_post_imm_st1x2(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C
 define i8* @test_v8i8_post_reg_st1x2(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, i64 %inc) nounwind {
 ;CHECK-LABEL: test_v8i8_post_reg_st1x2:
 ;CHECK: st1.8b { v0, v1 }, [x0], x{{[0-9]+}}
-  call void @llvm.arm64.neon.st1x2.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, i8* %A)
+  call void @llvm.aarch64.neon.st1x2.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, i8* %A)
   %tmp = getelementptr i8* %A, i64 %inc
   ret i8* %tmp
 }
 
-declare void @llvm.arm64.neon.st1x2.v8i8.p0i8(<8 x i8>, <8 x i8>, i8*)
+declare void @llvm.aarch64.neon.st1x2.v8i8.p0i8(<8 x i8>, <8 x i8>, i8*)
 
 
 define i16* @test_v8i16_post_imm_st1x2(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C) nounwind {
 ;CHECK-LABEL: test_v8i16_post_imm_st1x2:
 ;CHECK: st1.8h { v0, v1 }, [x0], #32
-  call void @llvm.arm64.neon.st1x2.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, i16* %A)
+  call void @llvm.aarch64.neon.st1x2.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, i16* %A)
   %tmp = getelementptr i16* %A, i32 16
   ret i16* %tmp
 }
@@ -4356,18 +4356,18 @@ define i16* @test_v8i16_post_imm_st1x2(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i
 define i16* @test_v8i16_post_reg_st1x2(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, i64 %inc) nounwind {
 ;CHECK-LABEL: test_v8i16_post_reg_st1x2:
 ;CHECK: st1.8h { v0, v1 }, [x0], x{{[0-9]+}}
-  call void @llvm.arm64.neon.st1x2.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, i16* %A)
+  call void @llvm.aarch64.neon.st1x2.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, i16* %A)
   %tmp = getelementptr i16* %A, i64 %inc
   ret i16* %tmp
 }
 
-declare void @llvm.arm64.neon.st1x2.v8i16.p0i16(<8 x i16>, <8 x i16>, i16*)
+declare void @llvm.aarch64.neon.st1x2.v8i16.p0i16(<8 x i16>, <8 x i16>, i16*)
 
 
 define i16* @test_v4i16_post_imm_st1x2(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C) nounwind {
 ;CHECK-LABEL: test_v4i16_post_imm_st1x2:
 ;CHECK: st1.4h { v0, v1 }, [x0], #16
-  call void @llvm.arm64.neon.st1x2.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, i16* %A)
+  call void @llvm.aarch64.neon.st1x2.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, i16* %A)
   %tmp = getelementptr i16* %A, i32 8
   ret i16* %tmp
 }
@@ -4375,18 +4375,18 @@ define i16* @test_v4i16_post_imm_st1x2(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i
 define i16* @test_v4i16_post_reg_st1x2(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, i64 %inc) nounwind {
 ;CHECK-LABEL: test_v4i16_post_reg_st1x2:
 ;CHECK: st1.4h { v0, v1 }, [x0], x{{[0-9]+}}
-  call void @llvm.arm64.neon.st1x2.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, i16* %A)
+  call void @llvm.aarch64.neon.st1x2.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, i16* %A)
   %tmp = getelementptr i16* %A, i64 %inc
   ret i16* %tmp
 }
 
-declare void @llvm.arm64.neon.st1x2.v4i16.p0i16(<4 x i16>, <4 x i16>, i16*)
+declare void @llvm.aarch64.neon.st1x2.v4i16.p0i16(<4 x i16>, <4 x i16>, i16*)
 
 
 define i32* @test_v4i32_post_imm_st1x2(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C) nounwind {
 ;CHECK-LABEL: test_v4i32_post_imm_st1x2:
 ;CHECK: st1.4s { v0, v1 }, [x0], #32
-  call void @llvm.arm64.neon.st1x2.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, i32* %A)
+  call void @llvm.aarch64.neon.st1x2.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, i32* %A)
   %tmp = getelementptr i32* %A, i32 8
   ret i32* %tmp
 }
@@ -4394,18 +4394,18 @@ define i32* @test_v4i32_post_imm_st1x2(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i
 define i32* @test_v4i32_post_reg_st1x2(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, i64 %inc) nounwind {
 ;CHECK-LABEL: test_v4i32_post_reg_st1x2:
 ;CHECK: st1.4s { v0, v1 }, [x0], x{{[0-9]+}}
-  call void @llvm.arm64.neon.st1x2.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, i32* %A)
+  call void @llvm.aarch64.neon.st1x2.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, i32* %A)
   %tmp = getelementptr i32* %A, i64 %inc
   ret i32* %tmp
 }
 
-declare void @llvm.arm64.neon.st1x2.v4i32.p0i32(<4 x i32>, <4 x i32>, i32*)
+declare void @llvm.aarch64.neon.st1x2.v4i32.p0i32(<4 x i32>, <4 x i32>, i32*)
 
 
 define i32* @test_v2i32_post_imm_st1x2(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C) nounwind {
 ;CHECK-LABEL: test_v2i32_post_imm_st1x2:
 ;CHECK: st1.2s { v0, v1 }, [x0], #16
-  call void @llvm.arm64.neon.st1x2.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, i32* %A)
+  call void @llvm.aarch64.neon.st1x2.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, i32* %A)
   %tmp = getelementptr i32* %A, i32 4
   ret i32* %tmp
 }
@@ -4413,18 +4413,18 @@ define i32* @test_v2i32_post_imm_st1x2(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i
 define i32* @test_v2i32_post_reg_st1x2(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, i64 %inc) nounwind {
 ;CHECK-LABEL: test_v2i32_post_reg_st1x2:
 ;CHECK: st1.2s { v0, v1 }, [x0], x{{[0-9]+}}
-  call void @llvm.arm64.neon.st1x2.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, i32* %A)
+  call void @llvm.aarch64.neon.st1x2.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, i32* %A)
   %tmp = getelementptr i32* %A, i64 %inc
   ret i32* %tmp
 }
 
-declare void @llvm.arm64.neon.st1x2.v2i32.p0i32(<2 x i32>, <2 x i32>, i32*)
+declare void @llvm.aarch64.neon.st1x2.v2i32.p0i32(<2 x i32>, <2 x i32>, i32*)
 
 
 define i64* @test_v2i64_post_imm_st1x2(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C) nounwind {
 ;CHECK-LABEL: test_v2i64_post_imm_st1x2:
 ;CHECK: st1.2d { v0, v1 }, [x0], #32
-  call void @llvm.arm64.neon.st1x2.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, i64* %A)
+  call void @llvm.aarch64.neon.st1x2.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, i64* %A)
   %tmp = getelementptr i64* %A, i64 4
   ret i64* %tmp
 }
@@ -4432,18 +4432,18 @@ define i64* @test_v2i64_post_imm_st1x2(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i
 define i64* @test_v2i64_post_reg_st1x2(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, i64 %inc) nounwind {
 ;CHECK-LABEL: test_v2i64_post_reg_st1x2:
 ;CHECK: st1.2d { v0, v1 }, [x0], x{{[0-9]+}}
-  call void @llvm.arm64.neon.st1x2.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, i64* %A)
+  call void @llvm.aarch64.neon.st1x2.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, i64* %A)
   %tmp = getelementptr i64* %A, i64 %inc
   ret i64* %tmp
 }
 
-declare void @llvm.arm64.neon.st1x2.v2i64.p0i64(<2 x i64>, <2 x i64>, i64*)
+declare void @llvm.aarch64.neon.st1x2.v2i64.p0i64(<2 x i64>, <2 x i64>, i64*)
 
 
 define i64* @test_v1i64_post_imm_st1x2(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C) nounwind {
 ;CHECK-LABEL: test_v1i64_post_imm_st1x2:
 ;CHECK: st1.1d { v0, v1 }, [x0], #16
-  call void @llvm.arm64.neon.st1x2.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, i64* %A)
+  call void @llvm.aarch64.neon.st1x2.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, i64* %A)
   %tmp = getelementptr i64* %A, i64 2
   ret i64* %tmp
 }
@@ -4451,18 +4451,18 @@ define i64* @test_v1i64_post_imm_st1x2(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i
 define i64* @test_v1i64_post_reg_st1x2(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, i64 %inc) nounwind {
 ;CHECK-LABEL: test_v1i64_post_reg_st1x2:
 ;CHECK: st1.1d { v0, v1 }, [x0], x{{[0-9]+}}
-  call void @llvm.arm64.neon.st1x2.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, i64* %A)
+  call void @llvm.aarch64.neon.st1x2.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, i64* %A)
   %tmp = getelementptr i64* %A, i64 %inc
   ret i64* %tmp
 }
 
-declare void @llvm.arm64.neon.st1x2.v1i64.p0i64(<1 x i64>, <1 x i64>, i64*)
+declare void @llvm.aarch64.neon.st1x2.v1i64.p0i64(<1 x i64>, <1 x i64>, i64*)
 
 
 define float* @test_v4f32_post_imm_st1x2(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C) nounwind {
 ;CHECK-LABEL: test_v4f32_post_imm_st1x2:
 ;CHECK: st1.4s { v0, v1 }, [x0], #32
-  call void @llvm.arm64.neon.st1x2.v4f32.p0f32(<4 x float> %B, <4 x float> %C, float* %A)
+  call void @llvm.aarch64.neon.st1x2.v4f32.p0f32(<4 x float> %B, <4 x float> %C, float* %A)
   %tmp = getelementptr float* %A, i32 8
   ret float* %tmp
 }
@@ -4470,18 +4470,18 @@ define float* @test_v4f32_post_imm_st1x2(float* %A, float** %ptr, <4 x float> %B
 define float* @test_v4f32_post_reg_st1x2(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, i64 %inc) nounwind {
 ;CHECK-LABEL: test_v4f32_post_reg_st1x2:
 ;CHECK: st1.4s { v0, v1 }, [x0], x{{[0-9]+}}
-  call void @llvm.arm64.neon.st1x2.v4f32.p0f32(<4 x float> %B, <4 x float> %C, float* %A)
+  call void @llvm.aarch64.neon.st1x2.v4f32.p0f32(<4 x float> %B, <4 x float> %C, float* %A)
   %tmp = getelementptr float* %A, i64 %inc
   ret float* %tmp
 }
 
-declare void @llvm.arm64.neon.st1x2.v4f32.p0f32(<4 x float>, <4 x float>, float*)
+declare void @llvm.aarch64.neon.st1x2.v4f32.p0f32(<4 x float>, <4 x float>, float*)
 
 
 define float* @test_v2f32_post_imm_st1x2(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C) nounwind {
 ;CHECK-LABEL: test_v2f32_post_imm_st1x2:
 ;CHECK: st1.2s { v0, v1 }, [x0], #16
-  call void @llvm.arm64.neon.st1x2.v2f32.p0f32(<2 x float> %B, <2 x float> %C, float* %A)
+  call void @llvm.aarch64.neon.st1x2.v2f32.p0f32(<2 x float> %B, <2 x float> %C, float* %A)
   %tmp = getelementptr float* %A, i32 4
   ret float* %tmp
 }
@@ -4489,18 +4489,18 @@ define float* @test_v2f32_post_imm_st1x2(float* %A, float** %ptr, <2 x float> %B
 define float* @test_v2f32_post_reg_st1x2(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, i64 %inc) nounwind {
 ;CHECK-LABEL: test_v2f32_post_reg_st1x2:
 ;CHECK: st1.2s { v0, v1 }, [x0], x{{[0-9]+}}
-  call void @llvm.arm64.neon.st1x2.v2f32.p0f32(<2 x float> %B, <2 x float> %C, float* %A)
+  call void @llvm.aarch64.neon.st1x2.v2f32.p0f32(<2 x float> %B, <2 x float> %C, float* %A)
   %tmp = getelementptr float* %A, i64 %inc
   ret float* %tmp
 }
 
-declare void @llvm.arm64.neon.st1x2.v2f32.p0f32(<2 x float>, <2 x float>, float*)
+declare void @llvm.aarch64.neon.st1x2.v2f32.p0f32(<2 x float>, <2 x float>, float*)
 
 
 define double* @test_v2f64_post_imm_st1x2(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C) nounwind {
 ;CHECK-LABEL: test_v2f64_post_imm_st1x2:
 ;CHECK: st1.2d { v0, v1 }, [x0], #32
-  call void @llvm.arm64.neon.st1x2.v2f64.p0f64(<2 x double> %B, <2 x double> %C, double* %A)
+  call void @llvm.aarch64.neon.st1x2.v2f64.p0f64(<2 x double> %B, <2 x double> %C, double* %A)
   %tmp = getelementptr double* %A, i64 4
   ret double* %tmp
 }
@@ -4508,18 +4508,18 @@ define double* @test_v2f64_post_imm_st1x2(double* %A, double** %ptr, <2 x double
 define double* @test_v2f64_post_reg_st1x2(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, i64 %inc) nounwind {
 ;CHECK-LABEL: test_v2f64_post_reg_st1x2:
 ;CHECK: st1.2d { v0, v1 }, [x0], x{{[0-9]+}}
-  call void @llvm.arm64.neon.st1x2.v2f64.p0f64(<2 x double> %B, <2 x double> %C, double* %A)
+  call void @llvm.aarch64.neon.st1x2.v2f64.p0f64(<2 x double> %B, <2 x double> %C, double* %A)
   %tmp = getelementptr double* %A, i64 %inc
   ret double* %tmp
 }
 
-declare void @llvm.arm64.neon.st1x2.v2f64.p0f64(<2 x double>, <2 x double>, double*)
+declare void @llvm.aarch64.neon.st1x2.v2f64.p0f64(<2 x double>, <2 x double>, double*)
 
 
 define double* @test_v1f64_post_imm_st1x2(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C) nounwind {
 ;CHECK-LABEL: test_v1f64_post_imm_st1x2:
 ;CHECK: st1.1d { v0, v1 }, [x0], #16
-  call void @llvm.arm64.neon.st1x2.v1f64.p0f64(<1 x double> %B, <1 x double> %C, double* %A)
+  call void @llvm.aarch64.neon.st1x2.v1f64.p0f64(<1 x double> %B, <1 x double> %C, double* %A)
   %tmp = getelementptr double* %A, i64 2
   ret double* %tmp
 }
@@ -4527,18 +4527,18 @@ define double* @test_v1f64_post_imm_st1x2(double* %A, double** %ptr, <1 x double
 define double* @test_v1f64_post_reg_st1x2(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, i64 %inc) nounwind {
 ;CHECK-LABEL: test_v1f64_post_reg_st1x2:
 ;CHECK: st1.1d { v0, v1 }, [x0], x{{[0-9]+}}
-  call void @llvm.arm64.neon.st1x2.v1f64.p0f64(<1 x double> %B, <1 x double> %C, double* %A)
+  call void @llvm.aarch64.neon.st1x2.v1f64.p0f64(<1 x double> %B, <1 x double> %C, double* %A)
   %tmp = getelementptr double* %A, i64 %inc
   ret double* %tmp
 }
 
-declare void @llvm.arm64.neon.st1x2.v1f64.p0f64(<1 x double>, <1 x double>, double*)
+declare void @llvm.aarch64.neon.st1x2.v1f64.p0f64(<1 x double>, <1 x double>, double*)
 
 
 define i8* @test_v16i8_post_imm_st1x3(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) nounwind {
 ;CHECK-LABEL: test_v16i8_post_imm_st1x3:
 ;CHECK: st1.16b { v0, v1, v2 }, [x0], #48
-  call void @llvm.arm64.neon.st1x3.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i8* %A)
+  call void @llvm.aarch64.neon.st1x3.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i8* %A)
   %tmp = getelementptr i8* %A, i32 48
   ret i8* %tmp
 }
@@ -4546,18 +4546,18 @@ define i8* @test_v16i8_post_imm_st1x3(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8>
 define i8* @test_v16i8_post_reg_st1x3(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 %inc) nounwind {
 ;CHECK-LABEL: test_v16i8_post_reg_st1x3:
 ;CHECK: st1.16b { v0, v1, v2 }, [x0], x{{[0-9]+}}
-  call void @llvm.arm64.neon.st1x3.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i8* %A)
+  call void @llvm.aarch64.neon.st1x3.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i8* %A)
   %tmp = getelementptr i8* %A, i64 %inc
   ret i8* %tmp
 }
 
-declare void @llvm.arm64.neon.st1x3.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, i8*)
+declare void @llvm.aarch64.neon.st1x3.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, i8*)
 
 
 define i8* @test_v8i8_post_imm_st1x3(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D) nounwind {
 ;CHECK-LABEL: test_v8i8_post_imm_st1x3:
 ;CHECK: st1.8b { v0, v1, v2 }, [x0], #24
-  call void @llvm.arm64.neon.st1x3.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i8* %A)
+  call void @llvm.aarch64.neon.st1x3.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i8* %A)
   %tmp = getelementptr i8* %A, i32 24
   ret i8* %tmp
 }
@@ -4565,18 +4565,18 @@ define i8* @test_v8i8_post_imm_st1x3(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C
 define i8* @test_v8i8_post_reg_st1x3(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i64 %inc) nounwind {
 ;CHECK-LABEL: test_v8i8_post_reg_st1x3:
 ;CHECK: st1.8b { v0, v1, v2 }, [x0], x{{[0-9]+}}
-  call void @llvm.arm64.neon.st1x3.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i8* %A)
+  call void @llvm.aarch64.neon.st1x3.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i8* %A)
   %tmp = getelementptr i8* %A, i64 %inc
   ret i8* %tmp
 }
 
-declare void @llvm.arm64.neon.st1x3.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, i8*)
+declare void @llvm.aarch64.neon.st1x3.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, i8*)
 
 
 define i16* @test_v8i16_post_imm_st1x3(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D) nounwind {
 ;CHECK-LABEL: test_v8i16_post_imm_st1x3:
 ;CHECK: st1.8h { v0, v1, v2 }, [x0], #48
-  call void @llvm.arm64.neon.st1x3.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i16* %A)
+  call void @llvm.aarch64.neon.st1x3.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i16* %A)
   %tmp = getelementptr i16* %A, i32 24
   ret i16* %tmp
 }
@@ -4584,18 +4584,18 @@ define i16* @test_v8i16_post_imm_st1x3(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i
 define i16* @test_v8i16_post_reg_st1x3(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 %inc) nounwind {
 ;CHECK-LABEL: test_v8i16_post_reg_st1x3:
 ;CHECK: st1.8h { v0, v1, v2 }, [x0], x{{[0-9]+}}
-  call void @llvm.arm64.neon.st1x3.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i16* %A)
+  call void @llvm.aarch64.neon.st1x3.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i16* %A)
   %tmp = getelementptr i16* %A, i64 %inc
   ret i16* %tmp
 }
 
-declare void @llvm.arm64.neon.st1x3.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, i16*)
+declare void @llvm.aarch64.neon.st1x3.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, i16*)
 
 
 define i16* @test_v4i16_post_imm_st1x3(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D) nounwind {
 ;CHECK-LABEL: test_v4i16_post_imm_st1x3:
 ;CHECK: st1.4h { v0, v1, v2 }, [x0], #24
-  call void @llvm.arm64.neon.st1x3.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i16* %A)
+  call void @llvm.aarch64.neon.st1x3.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i16* %A)
   %tmp = getelementptr i16* %A, i32 12
   ret i16* %tmp
 }
@@ -4603,18 +4603,18 @@ define i16* @test_v4i16_post_imm_st1x3(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i
 define i16* @test_v4i16_post_reg_st1x3(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i64 %inc) nounwind {
 ;CHECK-LABEL: test_v4i16_post_reg_st1x3:
 ;CHECK: st1.4h { v0, v1, v2 }, [x0], x{{[0-9]+}}
-  call void @llvm.arm64.neon.st1x3.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i16* %A)
+  call void @llvm.aarch64.neon.st1x3.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i16* %A)
   %tmp = getelementptr i16* %A, i64 %inc
   ret i16* %tmp
 }
 
-declare void @llvm.arm64.neon.st1x3.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>, i16*)
+declare void @llvm.aarch64.neon.st1x3.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>, i16*)
 
 
 define i32* @test_v4i32_post_imm_st1x3(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D) nounwind {
 ;CHECK-LABEL: test_v4i32_post_imm_st1x3:
 ;CHECK: st1.4s { v0, v1, v2 }, [x0], #48
-  call void @llvm.arm64.neon.st1x3.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i32* %A)
+  call void @llvm.aarch64.neon.st1x3.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i32* %A)
   %tmp = getelementptr i32* %A, i32 12
   ret i32* %tmp
 }
@@ -4622,18 +4622,18 @@ define i32* @test_v4i32_post_imm_st1x3(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i
 define i32* @test_v4i32_post_reg_st1x3(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 %inc) nounwind {
 ;CHECK-LABEL: test_v4i32_post_reg_st1x3:
 ;CHECK: st1.4s { v0, v1, v2 }, [x0], x{{[0-9]+}}
-  call void @llvm.arm64.neon.st1x3.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i32* %A)
+  call void @llvm.aarch64.neon.st1x3.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i32* %A)
   %tmp = getelementptr i32* %A, i64 %inc
   ret i32* %tmp
 }
 
-declare void @llvm.arm64.neon.st1x3.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, i32*)
+declare void @llvm.aarch64.neon.st1x3.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, i32*)
 
 
 define i32* @test_v2i32_post_imm_st1x3(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D) nounwind {
 ;CHECK-LABEL: test_v2i32_post_imm_st1x3:
 ;CHECK: st1.2s { v0, v1, v2 }, [x0], #24
-  call void @llvm.arm64.neon.st1x3.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i32* %A)
+  call void @llvm.aarch64.neon.st1x3.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i32* %A)
   %tmp = getelementptr i32* %A, i32 6
   ret i32* %tmp
 }
@@ -4641,18 +4641,18 @@ define i32* @test_v2i32_post_imm_st1x3(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i
 define i32* @test_v2i32_post_reg_st1x3(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i64 %inc) nounwind {
 ;CHECK-LABEL: test_v2i32_post_reg_st1x3:
 ;CHECK: st1.2s { v0, v1, v2 }, [x0], x{{[0-9]+}}
-  call void @llvm.arm64.neon.st1x3.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i32* %A)
+  call void @llvm.aarch64.neon.st1x3.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i32* %A)
   %tmp = getelementptr i32* %A, i64 %inc
   ret i32* %tmp
 }
 
-declare void @llvm.arm64.neon.st1x3.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, i32*)
+declare void @llvm.aarch64.neon.st1x3.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, i32*)
 
 
 define i64* @test_v2i64_post_imm_st1x3(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D) nounwind {
 ;CHECK-LABEL: test_v2i64_post_imm_st1x3:
 ;CHECK: st1.2d { v0, v1, v2 }, [x0], #48
-  call void @llvm.arm64.neon.st1x3.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64* %A)
+  call void @llvm.aarch64.neon.st1x3.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64* %A)
   %tmp = getelementptr i64* %A, i64 6
   ret i64* %tmp
 }
@@ -4660,18 +4660,18 @@ define i64* @test_v2i64_post_imm_st1x3(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i
 define i64* @test_v2i64_post_reg_st1x3(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 %inc) nounwind {
 ;CHECK-LABEL: test_v2i64_post_reg_st1x3:
 ;CHECK: st1.2d { v0, v1, v2 }, [x0], x{{[0-9]+}}
-  call void @llvm.arm64.neon.st1x3.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64* %A)
+  call void @llvm.aarch64.neon.st1x3.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64* %A)
   %tmp = getelementptr i64* %A, i64 %inc
   ret i64* %tmp
 }
 
-declare void @llvm.arm64.neon.st1x3.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, i64*)
+declare void @llvm.aarch64.neon.st1x3.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, i64*)
 
 
 define i64* @test_v1i64_post_imm_st1x3(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D) nounwind {
 ;CHECK-LABEL: test_v1i64_post_imm_st1x3:
 ;CHECK: st1.1d { v0, v1, v2 }, [x0], #24
-  call void @llvm.arm64.neon.st1x3.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64* %A)
+  call void @llvm.aarch64.neon.st1x3.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64* %A)
   %tmp = getelementptr i64* %A, i64 3
   ret i64* %tmp
 }
@@ -4679,18 +4679,18 @@ define i64* @test_v1i64_post_imm_st1x3(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i
 define i64* @test_v1i64_post_reg_st1x3(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64 %inc) nounwind {
 ;CHECK-LABEL: test_v1i64_post_reg_st1x3:
 ;CHECK: st1.1d { v0, v1, v2 }, [x0], x{{[0-9]+}}
-  call void @llvm.arm64.neon.st1x3.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64* %A)
+  call void @llvm.aarch64.neon.st1x3.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64* %A)
   %tmp = getelementptr i64* %A, i64 %inc
   ret i64* %tmp
 }
 
-declare void @llvm.arm64.neon.st1x3.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>, i64*)
+declare void @llvm.aarch64.neon.st1x3.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>, i64*)
 
 
 define float* @test_v4f32_post_imm_st1x3(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D) nounwind {
 ;CHECK-LABEL: test_v4f32_post_imm_st1x3:
 ;CHECK: st1.4s { v0, v1, v2 }, [x0], #48
-  call void @llvm.arm64.neon.st1x3.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, float* %A)
+  call void @llvm.aarch64.neon.st1x3.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, float* %A)
   %tmp = getelementptr float* %A, i32 12
   ret float* %tmp
 }
@@ -4698,18 +4698,18 @@ define float* @test_v4f32_post_imm_st1x3(float* %A, float** %ptr, <4 x float> %B
 define float* @test_v4f32_post_reg_st1x3(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, i64 %inc) nounwind {
 ;CHECK-LABEL: test_v4f32_post_reg_st1x3:
 ;CHECK: st1.4s { v0, v1, v2 }, [x0], x{{[0-9]+}}
-  call void @llvm.arm64.neon.st1x3.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, float* %A)
+  call void @llvm.aarch64.neon.st1x3.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, float* %A)
   %tmp = getelementptr float* %A, i64 %inc
   ret float* %tmp
 }
 
-declare void @llvm.arm64.neon.st1x3.v4f32.p0f32(<4 x float>, <4 x float>, <4 x float>, float*)
+declare void @llvm.aarch64.neon.st1x3.v4f32.p0f32(<4 x float>, <4 x float>, <4 x float>, float*)
 
 
 define float* @test_v2f32_post_imm_st1x3(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D) nounwind {
 ;CHECK-LABEL: test_v2f32_post_imm_st1x3:
 ;CHECK: st1.2s { v0, v1, v2 }, [x0], #24
-  call void @llvm.arm64.neon.st1x3.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, float* %A)
+  call void @llvm.aarch64.neon.st1x3.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, float* %A)
   %tmp = getelementptr float* %A, i32 6
   ret float* %tmp
 }
@@ -4717,18 +4717,18 @@ define float* @test_v2f32_post_imm_st1x3(float* %A, float** %ptr, <2 x float> %B
 define float* @test_v2f32_post_reg_st1x3(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, i64 %inc) nounwind {
 ;CHECK-LABEL: test_v2f32_post_reg_st1x3:
 ;CHECK: st1.2s { v0, v1, v2 }, [x0], x{{[0-9]+}}
-  call void @llvm.arm64.neon.st1x3.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, float* %A)
+  call void @llvm.aarch64.neon.st1x3.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, float* %A)
   %tmp = getelementptr float* %A, i64 %inc
   ret float* %tmp
 }
 
-declare void @llvm.arm64.neon.st1x3.v2f32.p0f32(<2 x float>, <2 x float>, <2 x float>, float*)
+declare void @llvm.aarch64.neon.st1x3.v2f32.p0f32(<2 x float>, <2 x float>, <2 x float>, float*)
 
 
 define double* @test_v2f64_post_imm_st1x3(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D) nounwind {
 ;CHECK-LABEL: test_v2f64_post_imm_st1x3:
 ;CHECK: st1.2d { v0, v1, v2 }, [x0], #48
-  call void @llvm.arm64.neon.st1x3.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, double* %A)
+  call void @llvm.aarch64.neon.st1x3.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, double* %A)
   %tmp = getelementptr double* %A, i64 6
   ret double* %tmp
 }
@@ -4736,18 +4736,18 @@ define double* @test_v2f64_post_imm_st1x3(double* %A, double** %ptr, <2 x double
 define double* @test_v2f64_post_reg_st1x3(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, i64 %inc) nounwind {
 ;CHECK-LABEL: test_v2f64_post_reg_st1x3:
 ;CHECK: st1.2d { v0, v1, v2 }, [x0], x{{[0-9]+}}
-  call void @llvm.arm64.neon.st1x3.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, double* %A)
+  call void @llvm.aarch64.neon.st1x3.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, double* %A)
   %tmp = getelementptr double* %A, i64 %inc
   ret double* %tmp
 }
 
-declare void @llvm.arm64.neon.st1x3.v2f64.p0f64(<2 x double>, <2 x double>, <2 x double>, double*)
+declare void @llvm.aarch64.neon.st1x3.v2f64.p0f64(<2 x double>, <2 x double>, <2 x double>, double*)
 
 
 define double* @test_v1f64_post_imm_st1x3(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D) nounwind {
 ;CHECK-LABEL: test_v1f64_post_imm_st1x3:
 ;CHECK: st1.1d { v0, v1, v2 }, [x0], #24
-  call void @llvm.arm64.neon.st1x3.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, double* %A)
+  call void @llvm.aarch64.neon.st1x3.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, double* %A)
   %tmp = getelementptr double* %A, i64 3
   ret double* %tmp
 }
@@ -4755,18 +4755,18 @@ define double* @test_v1f64_post_imm_st1x3(double* %A, double** %ptr, <1 x double
 define double* @test_v1f64_post_reg_st1x3(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, i64 %inc) nounwind {
 ;CHECK-LABEL: test_v1f64_post_reg_st1x3:
 ;CHECK: st1.1d { v0, v1, v2 }, [x0], x{{[0-9]+}}
-  call void @llvm.arm64.neon.st1x3.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, double* %A)
+  call void @llvm.aarch64.neon.st1x3.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, double* %A)
   %tmp = getelementptr double* %A, i64 %inc
   ret double* %tmp
 }
 
-declare void @llvm.arm64.neon.st1x3.v1f64.p0f64(<1 x double>, <1 x double>, <1 x double>, double*)
+declare void @llvm.aarch64.neon.st1x3.v1f64.p0f64(<1 x double>, <1 x double>, <1 x double>, double*)
 
 
 define i8* @test_v16i8_post_imm_st1x4(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) nounwind {
 ;CHECK-LABEL: test_v16i8_post_imm_st1x4:
 ;CHECK: st1.16b { v0, v1, v2, v3 }, [x0], #64
-  call void @llvm.arm64.neon.st1x4.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i8* %A)
+  call void @llvm.aarch64.neon.st1x4.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i8* %A)
   %tmp = getelementptr i8* %A, i32 64
   ret i8* %tmp
 }
@@ -4774,18 +4774,18 @@ define i8* @test_v16i8_post_imm_st1x4(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8>
 define i8* @test_v16i8_post_reg_st1x4(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i64 %inc) nounwind {
 ;CHECK-LABEL: test_v16i8_post_reg_st1x4:
 ;CHECK: st1.16b { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
-  call void @llvm.arm64.neon.st1x4.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i8* %A)
+  call void @llvm.aarch64.neon.st1x4.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i8* %A)
   %tmp = getelementptr i8* %A, i64 %inc
   ret i8* %tmp
 }
 
-declare void @llvm.arm64.neon.st1x4.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i8*)
+declare void @llvm.aarch64.neon.st1x4.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i8*)
 
 
 define i8* @test_v8i8_post_imm_st1x4(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E) nounwind {
 ;CHECK-LABEL: test_v8i8_post_imm_st1x4:
 ;CHECK: st1.8b { v0, v1, v2, v3 }, [x0], #32
-  call void @llvm.arm64.neon.st1x4.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i8* %A)
+  call void @llvm.aarch64.neon.st1x4.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i8* %A)
   %tmp = getelementptr i8* %A, i32 32
   ret i8* %tmp
 }
@@ -4793,18 +4793,18 @@ define i8* @test_v8i8_post_imm_st1x4(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C
 define i8* @test_v8i8_post_reg_st1x4(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i64 %inc) nounwind {
 ;CHECK-LABEL: test_v8i8_post_reg_st1x4:
 ;CHECK: st1.8b { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
-  call void @llvm.arm64.neon.st1x4.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i8* %A)
+  call void @llvm.aarch64.neon.st1x4.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i8* %A)
   %tmp = getelementptr i8* %A, i64 %inc
   ret i8* %tmp
 }
 
-declare void @llvm.arm64.neon.st1x4.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i8*)
+declare void @llvm.aarch64.neon.st1x4.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i8*)
 
 
 define i16* @test_v8i16_post_imm_st1x4(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E) nounwind {
 ;CHECK-LABEL: test_v8i16_post_imm_st1x4:
 ;CHECK: st1.8h { v0, v1, v2, v3 }, [x0], #64
-  call void @llvm.arm64.neon.st1x4.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i16* %A)
+  call void @llvm.aarch64.neon.st1x4.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i16* %A)
   %tmp = getelementptr i16* %A, i32 32
   ret i16* %tmp
 }
@@ -4812,18 +4812,18 @@ define i16* @test_v8i16_post_imm_st1x4(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i
 define i16* @test_v8i16_post_reg_st1x4(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i64 %inc) nounwind {
 ;CHECK-LABEL: test_v8i16_post_reg_st1x4:
 ;CHECK: st1.8h { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
-  call void @llvm.arm64.neon.st1x4.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i16* %A)
+  call void @llvm.aarch64.neon.st1x4.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i16* %A)
   %tmp = getelementptr i16* %A, i64 %inc
   ret i16* %tmp
 }
 
-declare void @llvm.arm64.neon.st1x4.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i16*)
+declare void @llvm.aarch64.neon.st1x4.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i16*)
 
 
 define i16* @test_v4i16_post_imm_st1x4(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E) nounwind {
 ;CHECK-LABEL: test_v4i16_post_imm_st1x4:
 ;CHECK: st1.4h { v0, v1, v2, v3 }, [x0], #32
-  call void @llvm.arm64.neon.st1x4.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i16* %A)
+  call void @llvm.aarch64.neon.st1x4.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i16* %A)
   %tmp = getelementptr i16* %A, i32 16
   ret i16* %tmp
 }
@@ -4831,18 +4831,18 @@ define i16* @test_v4i16_post_imm_st1x4(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i
 define i16* @test_v4i16_post_reg_st1x4(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i64 %inc) nounwind {
 ;CHECK-LABEL: test_v4i16_post_reg_st1x4:
 ;CHECK: st1.4h { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
-  call void @llvm.arm64.neon.st1x4.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i16* %A)
+  call void @llvm.aarch64.neon.st1x4.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i16* %A)
   %tmp = getelementptr i16* %A, i64 %inc
   ret i16* %tmp
 }
 
-declare void @llvm.arm64.neon.st1x4.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>,<4 x i16>,  i16*)
+declare void @llvm.aarch64.neon.st1x4.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>,<4 x i16>,  i16*)
 
 
 define i32* @test_v4i32_post_imm_st1x4(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E) nounwind {
 ;CHECK-LABEL: test_v4i32_post_imm_st1x4:
 ;CHECK: st1.4s { v0, v1, v2, v3 }, [x0], #64
-  call void @llvm.arm64.neon.st1x4.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i32* %A)
+  call void @llvm.aarch64.neon.st1x4.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i32* %A)
   %tmp = getelementptr i32* %A, i32 16
   ret i32* %tmp
 }
@@ -4850,18 +4850,18 @@ define i32* @test_v4i32_post_imm_st1x4(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i
 define i32* @test_v4i32_post_reg_st1x4(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i64 %inc) nounwind {
 ;CHECK-LABEL: test_v4i32_post_reg_st1x4:
 ;CHECK: st1.4s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
-  call void @llvm.arm64.neon.st1x4.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i32* %A)
+  call void @llvm.aarch64.neon.st1x4.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i32* %A)
   %tmp = getelementptr i32* %A, i64 %inc
   ret i32* %tmp
 }
 
-declare void @llvm.arm64.neon.st1x4.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>,<4 x i32>,  i32*)
+declare void @llvm.aarch64.neon.st1x4.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>,<4 x i32>,  i32*)
 
 
 define i32* @test_v2i32_post_imm_st1x4(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E) nounwind {
 ;CHECK-LABEL: test_v2i32_post_imm_st1x4:
 ;CHECK: st1.2s { v0, v1, v2, v3 }, [x0], #32
-  call void @llvm.arm64.neon.st1x4.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i32* %A)
+  call void @llvm.aarch64.neon.st1x4.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i32* %A)
   %tmp = getelementptr i32* %A, i32 8
   ret i32* %tmp
 }
@@ -4869,18 +4869,18 @@ define i32* @test_v2i32_post_imm_st1x4(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i
 define i32* @test_v2i32_post_reg_st1x4(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i64 %inc) nounwind {
 ;CHECK-LABEL: test_v2i32_post_reg_st1x4:
 ;CHECK: st1.2s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
-  call void @llvm.arm64.neon.st1x4.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i32* %A)
+  call void @llvm.aarch64.neon.st1x4.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i32* %A)
   %tmp = getelementptr i32* %A, i64 %inc
   ret i32* %tmp
 }
 
-declare void @llvm.arm64.neon.st1x4.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32*)
+declare void @llvm.aarch64.neon.st1x4.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32*)
 
 
 define i64* @test_v2i64_post_imm_st1x4(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E) nounwind {
 ;CHECK-LABEL: test_v2i64_post_imm_st1x4:
 ;CHECK: st1.2d { v0, v1, v2, v3 }, [x0], #64
-  call void @llvm.arm64.neon.st1x4.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64* %A)
+  call void @llvm.aarch64.neon.st1x4.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64* %A)
   %tmp = getelementptr i64* %A, i64 8
   ret i64* %tmp
 }
@@ -4888,18 +4888,18 @@ define i64* @test_v2i64_post_imm_st1x4(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i
 define i64* @test_v2i64_post_reg_st1x4(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64 %inc) nounwind {
 ;CHECK-LABEL: test_v2i64_post_reg_st1x4:
 ;CHECK: st1.2d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
-  call void @llvm.arm64.neon.st1x4.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64* %A)
+  call void @llvm.aarch64.neon.st1x4.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64* %A)
   %tmp = getelementptr i64* %A, i64 %inc
   ret i64* %tmp
 }
 
-declare void @llvm.arm64.neon.st1x4.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>,<2 x i64>,  i64*)
+declare void @llvm.aarch64.neon.st1x4.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>,<2 x i64>,  i64*)
 
 
 define i64* @test_v1i64_post_imm_st1x4(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E) nounwind {
 ;CHECK-LABEL: test_v1i64_post_imm_st1x4:
 ;CHECK: st1.1d { v0, v1, v2, v3 }, [x0], #32
-  call void @llvm.arm64.neon.st1x4.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64* %A)
+  call void @llvm.aarch64.neon.st1x4.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64* %A)
   %tmp = getelementptr i64* %A, i64 4
   ret i64* %tmp
 }
@@ -4907,18 +4907,18 @@ define i64* @test_v1i64_post_imm_st1x4(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i
 define i64* @test_v1i64_post_reg_st1x4(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64 %inc) nounwind {
 ;CHECK-LABEL: test_v1i64_post_reg_st1x4:
 ;CHECK: st1.1d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
-  call void @llvm.arm64.neon.st1x4.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64* %A)
+  call void @llvm.aarch64.neon.st1x4.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64* %A)
   %tmp = getelementptr i64* %A, i64 %inc
   ret i64* %tmp
 }
 
-declare void @llvm.arm64.neon.st1x4.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>,<1 x i64>,  i64*)
+declare void @llvm.aarch64.neon.st1x4.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>,<1 x i64>,  i64*)
 
 
 define float* @test_v4f32_post_imm_st1x4(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E) nounwind {
 ;CHECK-LABEL: test_v4f32_post_imm_st1x4:
 ;CHECK: st1.4s { v0, v1, v2, v3 }, [x0], #64
-  call void @llvm.arm64.neon.st1x4.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, float* %A)
+  call void @llvm.aarch64.neon.st1x4.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, float* %A)
   %tmp = getelementptr float* %A, i32 16
   ret float* %tmp
 }
@@ -4926,18 +4926,18 @@ define float* @test_v4f32_post_imm_st1x4(float* %A, float** %ptr, <4 x float> %B
 define float* @test_v4f32_post_reg_st1x4(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, i64 %inc) nounwind {
 ;CHECK-LABEL: test_v4f32_post_reg_st1x4:
 ;CHECK: st1.4s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
-  call void @llvm.arm64.neon.st1x4.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, float* %A)
+  call void @llvm.aarch64.neon.st1x4.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, float* %A)
   %tmp = getelementptr float* %A, i64 %inc
   ret float* %tmp
 }
 
-declare void @llvm.arm64.neon.st1x4.v4f32.p0f32(<4 x float>, <4 x float>, <4 x float>, <4 x float>, float*)
+declare void @llvm.aarch64.neon.st1x4.v4f32.p0f32(<4 x float>, <4 x float>, <4 x float>, <4 x float>, float*)
 
 
 define float* @test_v2f32_post_imm_st1x4(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E) nounwind {
 ;CHECK-LABEL: test_v2f32_post_imm_st1x4:
 ;CHECK: st1.2s { v0, v1, v2, v3 }, [x0], #32
-  call void @llvm.arm64.neon.st1x4.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, float* %A)
+  call void @llvm.aarch64.neon.st1x4.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, float* %A)
   %tmp = getelementptr float* %A, i32 8
   ret float* %tmp
 }
@@ -4945,18 +4945,18 @@ define float* @test_v2f32_post_imm_st1x4(float* %A, float** %ptr, <2 x float> %B
 define float* @test_v2f32_post_reg_st1x4(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, i64 %inc) nounwind {
 ;CHECK-LABEL: test_v2f32_post_reg_st1x4:
 ;CHECK: st1.2s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
-  call void @llvm.arm64.neon.st1x4.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, float* %A)
+  call void @llvm.aarch64.neon.st1x4.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, float* %A)
   %tmp = getelementptr float* %A, i64 %inc
   ret float* %tmp
 }
 
-declare void @llvm.arm64.neon.st1x4.v2f32.p0f32(<2 x float>, <2 x float>, <2 x float>, <2 x float>, float*)
+declare void @llvm.aarch64.neon.st1x4.v2f32.p0f32(<2 x float>, <2 x float>, <2 x float>, <2 x float>, float*)
 
 
 define double* @test_v2f64_post_imm_st1x4(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E) nounwind {
 ;CHECK-LABEL: test_v2f64_post_imm_st1x4:
 ;CHECK: st1.2d { v0, v1, v2, v3 }, [x0], #64
-  call void @llvm.arm64.neon.st1x4.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, double* %A)
+  call void @llvm.aarch64.neon.st1x4.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, double* %A)
   %tmp = getelementptr double* %A, i64 8
   ret double* %tmp
 }
@@ -4964,18 +4964,18 @@ define double* @test_v2f64_post_imm_st1x4(double* %A, double** %ptr, <2 x double
 define double* @test_v2f64_post_reg_st1x4(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, i64 %inc) nounwind {
 ;CHECK-LABEL: test_v2f64_post_reg_st1x4:
 ;CHECK: st1.2d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
-  call void @llvm.arm64.neon.st1x4.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, double* %A)
+  call void @llvm.aarch64.neon.st1x4.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, double* %A)
   %tmp = getelementptr double* %A, i64 %inc
   ret double* %tmp
 }
 
-declare void @llvm.arm64.neon.st1x4.v2f64.p0f64(<2 x double>, <2 x double>, <2 x double>,<2 x double>,  double*)
+declare void @llvm.aarch64.neon.st1x4.v2f64.p0f64(<2 x double>, <2 x double>, <2 x double>,<2 x double>,  double*)
 
 
 define double* @test_v1f64_post_imm_st1x4(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E) nounwind {
 ;CHECK-LABEL: test_v1f64_post_imm_st1x4:
 ;CHECK: st1.1d { v0, v1, v2, v3 }, [x0], #32
-  call void @llvm.arm64.neon.st1x4.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, double* %A)
+  call void @llvm.aarch64.neon.st1x4.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, double* %A)
   %tmp = getelementptr double* %A, i64 4
   ret double* %tmp
 }
@@ -4983,33 +4983,33 @@ define double* @test_v1f64_post_imm_st1x4(double* %A, double** %ptr, <1 x double
 define double* @test_v1f64_post_reg_st1x4(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, i64 %inc) nounwind {
 ;CHECK-LABEL: test_v1f64_post_reg_st1x4:
 ;CHECK: st1.1d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
-  call void @llvm.arm64.neon.st1x4.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, double* %A)
+  call void @llvm.aarch64.neon.st1x4.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, double* %A)
   %tmp = getelementptr double* %A, i64 %inc
   ret double* %tmp
 }
 
-declare void @llvm.arm64.neon.st1x4.v1f64.p0f64(<1 x double>, <1 x double>, <1 x double>, <1 x double>, double*)
+declare void @llvm.aarch64.neon.st1x4.v1f64.p0f64(<1 x double>, <1 x double>, <1 x double>, <1 x double>, double*)
 
 
 define i8* @test_v16i8_post_imm_st2lanelane(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C) {
-  call void @llvm.arm64.neon.st2lanelane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, i64 0, i64 1, i8* %A)
+  call void @llvm.aarch64.neon.st2lanelane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, i64 0, i64 1, i8* %A)
   %tmp = getelementptr i8* %A, i32 2
   ret i8* %tmp
 }
 
 define i8* @test_v16i8_post_reg_st2lanelane(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, i64 %inc) {
-  call void @llvm.arm64.neon.st2lanelane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, i64 0, i64 1, i8* %A)
+  call void @llvm.aarch64.neon.st2lanelane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, i64 0, i64 1, i8* %A)
   %tmp = getelementptr i8* %A, i64 %inc
   ret i8* %tmp
 }
 
-declare void @llvm.arm64.neon.st2lanelane.v16i8.p0i8(<16 x i8>, <16 x i8>, i64, i64, i8*) nounwind readnone
+declare void @llvm.aarch64.neon.st2lanelane.v16i8.p0i8(<16 x i8>, <16 x i8>, i64, i64, i8*) nounwind readnone
 
 
 define i8* @test_v16i8_post_imm_st2lane(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C) nounwind {
 ;CHECK-LABEL: test_v16i8_post_imm_st2lane:
 ;CHECK: st2.b { v0, v1 }[0], [x0], #2
-  call void @llvm.arm64.neon.st2lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, i64 0, i8* %A)
+  call void @llvm.aarch64.neon.st2lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, i64 0, i8* %A)
   %tmp = getelementptr i8* %A, i32 2
   ret i8* %tmp
 }
@@ -5017,18 +5017,18 @@ define i8* @test_v16i8_post_imm_st2lane(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i
 define i8* @test_v16i8_post_reg_st2lane(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, i64 %inc) nounwind {
 ;CHECK-LABEL: test_v16i8_post_reg_st2lane:
 ;CHECK: st2.b { v0, v1 }[0], [x0], x{{[0-9]+}}
-  call void @llvm.arm64.neon.st2lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, i64 0, i8* %A)
+  call void @llvm.aarch64.neon.st2lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, i64 0, i8* %A)
   %tmp = getelementptr i8* %A, i64 %inc
   ret i8* %tmp
 }
 
-declare void @llvm.arm64.neon.st2lane.v16i8.p0i8(<16 x i8>, <16 x i8>, i64, i8*)
+declare void @llvm.aarch64.neon.st2lane.v16i8.p0i8(<16 x i8>, <16 x i8>, i64, i8*)
 
 
 define i8* @test_v8i8_post_imm_st2lane(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C) nounwind {
 ;CHECK-LABEL: test_v8i8_post_imm_st2lane:
 ;CHECK: st2.b { v0, v1 }[0], [x0], #2
-  call void @llvm.arm64.neon.st2lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, i64 0, i8* %A)
+  call void @llvm.aarch64.neon.st2lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, i64 0, i8* %A)
   %tmp = getelementptr i8* %A, i32 2
   ret i8* %tmp
 }
@@ -5036,18 +5036,18 @@ define i8* @test_v8i8_post_imm_st2lane(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8>
 define i8* @test_v8i8_post_reg_st2lane(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, i64 %inc) nounwind {
 ;CHECK-LABEL: test_v8i8_post_reg_st2lane:
 ;CHECK: st2.b { v0, v1 }[0], [x0], x{{[0-9]+}}
-  call void @llvm.arm64.neon.st2lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, i64 0, i8* %A)
+  call void @llvm.aarch64.neon.st2lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, i64 0, i8* %A)
   %tmp = getelementptr i8* %A, i64 %inc
   ret i8* %tmp
 }
 
-declare void @llvm.arm64.neon.st2lane.v8i8.p0i8(<8 x i8>, <8 x i8>, i64, i8*)
+declare void @llvm.aarch64.neon.st2lane.v8i8.p0i8(<8 x i8>, <8 x i8>, i64, i8*)
 
 
 define i16* @test_v8i16_post_imm_st2lane(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C) nounwind {
 ;CHECK-LABEL: test_v8i16_post_imm_st2lane:
 ;CHECK: st2.h { v0, v1 }[0], [x0], #4
-  call void @llvm.arm64.neon.st2lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, i64 0, i16* %A)
+  call void @llvm.aarch64.neon.st2lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, i64 0, i16* %A)
   %tmp = getelementptr i16* %A, i32 2
   ret i16* %tmp
 }
@@ -5055,18 +5055,18 @@ define i16* @test_v8i16_post_imm_st2lane(i16* %A, i16** %ptr, <8 x i16> %B, <8 x
 define i16* @test_v8i16_post_reg_st2lane(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, i64 %inc) nounwind {
 ;CHECK-LABEL: test_v8i16_post_reg_st2lane:
 ;CHECK: st2.h { v0, v1 }[0], [x0], x{{[0-9]+}}
-  call void @llvm.arm64.neon.st2lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, i64 0, i16* %A)
+  call void @llvm.aarch64.neon.st2lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, i64 0, i16* %A)
   %tmp = getelementptr i16* %A, i64 %inc
   ret i16* %tmp
 }
 
-declare void @llvm.arm64.neon.st2lane.v8i16.p0i16(<8 x i16>, <8 x i16>, i64, i16*)
+declare void @llvm.aarch64.neon.st2lane.v8i16.p0i16(<8 x i16>, <8 x i16>, i64, i16*)
 
 
 define i16* @test_v4i16_post_imm_st2lane(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C) nounwind {
 ;CHECK-LABEL: test_v4i16_post_imm_st2lane:
 ;CHECK: st2.h { v0, v1 }[0], [x0], #4
-  call void @llvm.arm64.neon.st2lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, i64 0, i16* %A)
+  call void @llvm.aarch64.neon.st2lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, i64 0, i16* %A)
   %tmp = getelementptr i16* %A, i32 2
   ret i16* %tmp
 }
@@ -5074,18 +5074,18 @@ define i16* @test_v4i16_post_imm_st2lane(i16* %A, i16** %ptr, <4 x i16> %B, <4 x
 define i16* @test_v4i16_post_reg_st2lane(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, i64 %inc) nounwind {
 ;CHECK-LABEL: test_v4i16_post_reg_st2lane:
 ;CHECK: st2.h { v0, v1 }[0], [x0], x{{[0-9]+}}
-  call void @llvm.arm64.neon.st2lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, i64 0, i16* %A)
+  call void @llvm.aarch64.neon.st2lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, i64 0, i16* %A)
   %tmp = getelementptr i16* %A, i64 %inc
   ret i16* %tmp
 }
 
-declare void @llvm.arm64.neon.st2lane.v4i16.p0i16(<4 x i16>, <4 x i16>, i64, i16*)
+declare void @llvm.aarch64.neon.st2lane.v4i16.p0i16(<4 x i16>, <4 x i16>, i64, i16*)
 
 
 define i32* @test_v4i32_post_imm_st2lane(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C) nounwind {
 ;CHECK-LABEL: test_v4i32_post_imm_st2lane:
 ;CHECK: st2.s { v0, v1 }[0], [x0], #8
-  call void @llvm.arm64.neon.st2lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, i64 0, i32* %A)
+  call void @llvm.aarch64.neon.st2lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, i64 0, i32* %A)
   %tmp = getelementptr i32* %A, i32 2
   ret i32* %tmp
 }
@@ -5093,18 +5093,18 @@ define i32* @test_v4i32_post_imm_st2lane(i32* %A, i32** %ptr, <4 x i32> %B, <4 x
 define i32* @test_v4i32_post_reg_st2lane(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, i64 %inc) nounwind {
 ;CHECK-LABEL: test_v4i32_post_reg_st2lane:
 ;CHECK: st2.s { v0, v1 }[0], [x0], x{{[0-9]+}}
-  call void @llvm.arm64.neon.st2lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, i64 0, i32* %A)
+  call void @llvm.aarch64.neon.st2lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, i64 0, i32* %A)
   %tmp = getelementptr i32* %A, i64 %inc
   ret i32* %tmp
 }
 
-declare void @llvm.arm64.neon.st2lane.v4i32.p0i32(<4 x i32>, <4 x i32>, i64, i32*)
+declare void @llvm.aarch64.neon.st2lane.v4i32.p0i32(<4 x i32>, <4 x i32>, i64, i32*)
 
 
 define i32* @test_v2i32_post_imm_st2lane(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C) nounwind {
 ;CHECK-LABEL: test_v2i32_post_imm_st2lane:
 ;CHECK: st2.s { v0, v1 }[0], [x0], #8
-  call void @llvm.arm64.neon.st2lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, i64 0, i32* %A)
+  call void @llvm.aarch64.neon.st2lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, i64 0, i32* %A)
   %tmp = getelementptr i32* %A, i32 2
   ret i32* %tmp
 }
@@ -5112,18 +5112,18 @@ define i32* @test_v2i32_post_imm_st2lane(i32* %A, i32** %ptr, <2 x i32> %B, <2 x
 define i32* @test_v2i32_post_reg_st2lane(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, i64 %inc) nounwind {
 ;CHECK-LABEL: test_v2i32_post_reg_st2lane:
 ;CHECK: st2.s { v0, v1 }[0], [x0], x{{[0-9]+}}
-  call void @llvm.arm64.neon.st2lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, i64 0, i32* %A)
+  call void @llvm.aarch64.neon.st2lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, i64 0, i32* %A)
   %tmp = getelementptr i32* %A, i64 %inc
   ret i32* %tmp
 }
 
-declare void @llvm.arm64.neon.st2lane.v2i32.p0i32(<2 x i32>, <2 x i32>, i64, i32*)
+declare void @llvm.aarch64.neon.st2lane.v2i32.p0i32(<2 x i32>, <2 x i32>, i64, i32*)
 
 
 define i64* @test_v2i64_post_imm_st2lane(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C) nounwind {
 ;CHECK-LABEL: test_v2i64_post_imm_st2lane:
 ;CHECK: st2.d { v0, v1 }[0], [x0], #16
-  call void @llvm.arm64.neon.st2lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, i64 0, i64* %A)
+  call void @llvm.aarch64.neon.st2lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, i64 0, i64* %A)
   %tmp = getelementptr i64* %A, i64 2
   ret i64* %tmp
 }
@@ -5131,18 +5131,18 @@ define i64* @test_v2i64_post_imm_st2lane(i64* %A, i64** %ptr, <2 x i64> %B, <2 x
 define i64* @test_v2i64_post_reg_st2lane(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, i64 %inc) nounwind {
 ;CHECK-LABEL: test_v2i64_post_reg_st2lane:
 ;CHECK: st2.d { v0, v1 }[0], [x0], x{{[0-9]+}}
-  call void @llvm.arm64.neon.st2lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, i64 0, i64* %A)
+  call void @llvm.aarch64.neon.st2lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, i64 0, i64* %A)
   %tmp = getelementptr i64* %A, i64 %inc
   ret i64* %tmp
 }
 
-declare void @llvm.arm64.neon.st2lane.v2i64.p0i64(<2 x i64>, <2 x i64>, i64, i64*)
+declare void @llvm.aarch64.neon.st2lane.v2i64.p0i64(<2 x i64>, <2 x i64>, i64, i64*)
 
 
 define i64* @test_v1i64_post_imm_st2lane(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C) nounwind {
 ;CHECK-LABEL: test_v1i64_post_imm_st2lane:
 ;CHECK: st2.d { v0, v1 }[0], [x0], #16
-  call void @llvm.arm64.neon.st2lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, i64 0, i64* %A)
+  call void @llvm.aarch64.neon.st2lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, i64 0, i64* %A)
   %tmp = getelementptr i64* %A, i64 2
   ret i64* %tmp
 }
@@ -5150,18 +5150,18 @@ define i64* @test_v1i64_post_imm_st2lane(i64* %A, i64** %ptr, <1 x i64> %B, <1 x
 define i64* @test_v1i64_post_reg_st2lane(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, i64 %inc) nounwind {
 ;CHECK-LABEL: test_v1i64_post_reg_st2lane:
 ;CHECK: st2.d { v0, v1 }[0], [x0], x{{[0-9]+}}
-  call void @llvm.arm64.neon.st2lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, i64 0, i64* %A)
+  call void @llvm.aarch64.neon.st2lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, i64 0, i64* %A)
   %tmp = getelementptr i64* %A, i64 %inc
   ret i64* %tmp
 }
 
-declare void @llvm.arm64.neon.st2lane.v1i64.p0i64(<1 x i64>, <1 x i64>, i64, i64*)
+declare void @llvm.aarch64.neon.st2lane.v1i64.p0i64(<1 x i64>, <1 x i64>, i64, i64*)
 
 
 define float* @test_v4f32_post_imm_st2lane(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C) nounwind {
 ;CHECK-LABEL: test_v4f32_post_imm_st2lane:
 ;CHECK: st2.s { v0, v1 }[0], [x0], #8
-  call void @llvm.arm64.neon.st2lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, i64 0, float* %A)
+  call void @llvm.aarch64.neon.st2lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, i64 0, float* %A)
   %tmp = getelementptr float* %A, i32 2
   ret float* %tmp
 }
@@ -5169,18 +5169,18 @@ define float* @test_v4f32_post_imm_st2lane(float* %A, float** %ptr, <4 x float>
 define float* @test_v4f32_post_reg_st2lane(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, i64 %inc) nounwind {
 ;CHECK-LABEL: test_v4f32_post_reg_st2lane:
 ;CHECK: st2.s { v0, v1 }[0], [x0], x{{[0-9]+}}
-  call void @llvm.arm64.neon.st2lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, i64 0, float* %A)
+  call void @llvm.aarch64.neon.st2lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, i64 0, float* %A)
   %tmp = getelementptr float* %A, i64 %inc
   ret float* %tmp
 }
 
-declare void @llvm.arm64.neon.st2lane.v4f32.p0f32(<4 x float>, <4 x float>, i64, float*)
+declare void @llvm.aarch64.neon.st2lane.v4f32.p0f32(<4 x float>, <4 x float>, i64, float*)
 
 
 define float* @test_v2f32_post_imm_st2lane(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C) nounwind {
 ;CHECK-LABEL: test_v2f32_post_imm_st2lane:
 ;CHECK: st2.s { v0, v1 }[0], [x0], #8
-  call void @llvm.arm64.neon.st2lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, i64 0, float* %A)
+  call void @llvm.aarch64.neon.st2lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, i64 0, float* %A)
   %tmp = getelementptr float* %A, i32 2
   ret float* %tmp
 }
@@ -5188,18 +5188,18 @@ define float* @test_v2f32_post_imm_st2lane(float* %A, float** %ptr, <2 x float>
 define float* @test_v2f32_post_reg_st2lane(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, i64 %inc) nounwind {
 ;CHECK-LABEL: test_v2f32_post_reg_st2lane:
 ;CHECK: st2.s { v0, v1 }[0], [x0], x{{[0-9]+}}
-  call void @llvm.arm64.neon.st2lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, i64 0, float* %A)
+  call void @llvm.aarch64.neon.st2lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, i64 0, float* %A)
   %tmp = getelementptr float* %A, i64 %inc
   ret float* %tmp
 }
 
-declare void @llvm.arm64.neon.st2lane.v2f32.p0f32(<2 x float>, <2 x float>, i64, float*)
+declare void @llvm.aarch64.neon.st2lane.v2f32.p0f32(<2 x float>, <2 x float>, i64, float*)
 
 
 define double* @test_v2f64_post_imm_st2lane(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C) nounwind {
 ;CHECK-LABEL: test_v2f64_post_imm_st2lane:
 ;CHECK: st2.d { v0, v1 }[0], [x0], #16
-  call void @llvm.arm64.neon.st2lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, i64 0, double* %A)
+  call void @llvm.aarch64.neon.st2lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, i64 0, double* %A)
   %tmp = getelementptr double* %A, i64 2
   ret double* %tmp
 }
@@ -5207,18 +5207,18 @@ define double* @test_v2f64_post_imm_st2lane(double* %A, double** %ptr, <2 x doub
 define double* @test_v2f64_post_reg_st2lane(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, i64 %inc) nounwind {
 ;CHECK-LABEL: test_v2f64_post_reg_st2lane:
 ;CHECK: st2.d { v0, v1 }[0], [x0], x{{[0-9]+}}
-  call void @llvm.arm64.neon.st2lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, i64 0, double* %A)
+  call void @llvm.aarch64.neon.st2lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, i64 0, double* %A)
   %tmp = getelementptr double* %A, i64 %inc
   ret double* %tmp
 }
 
-declare void @llvm.arm64.neon.st2lane.v2f64.p0f64(<2 x double>, <2 x double>, i64, double*)
+declare void @llvm.aarch64.neon.st2lane.v2f64.p0f64(<2 x double>, <2 x double>, i64, double*)
 
 
 define double* @test_v1f64_post_imm_st2lane(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C) nounwind {
 ;CHECK-LABEL: test_v1f64_post_imm_st2lane:
 ;CHECK: st2.d { v0, v1 }[0], [x0], #16
-  call void @llvm.arm64.neon.st2lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, i64 0, double* %A)
+  call void @llvm.aarch64.neon.st2lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, i64 0, double* %A)
   %tmp = getelementptr double* %A, i64 2
   ret double* %tmp
 }
@@ -5226,18 +5226,18 @@ define double* @test_v1f64_post_imm_st2lane(double* %A, double** %ptr, <1 x doub
 define double* @test_v1f64_post_reg_st2lane(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, i64 %inc) nounwind {
 ;CHECK-LABEL: test_v1f64_post_reg_st2lane:
 ;CHECK: st2.d { v0, v1 }[0], [x0], x{{[0-9]+}}
-  call void @llvm.arm64.neon.st2lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, i64 0, double* %A)
+  call void @llvm.aarch64.neon.st2lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, i64 0, double* %A)
   %tmp = getelementptr double* %A, i64 %inc
   ret double* %tmp
 }
 
-declare void @llvm.arm64.neon.st2lane.v1f64.p0f64(<1 x double>, <1 x double>, i64, double*)
+declare void @llvm.aarch64.neon.st2lane.v1f64.p0f64(<1 x double>, <1 x double>, i64, double*)
 
 
 define i8* @test_v16i8_post_imm_st3lane(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) nounwind {
 ;CHECK-LABEL: test_v16i8_post_imm_st3lane:
 ;CHECK: st3.b { v0, v1, v2 }[0], [x0], #3
-  call void @llvm.arm64.neon.st3lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 0, i8* %A)
+  call void @llvm.aarch64.neon.st3lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 0, i8* %A)
   %tmp = getelementptr i8* %A, i32 3
   ret i8* %tmp
 }
@@ -5245,18 +5245,18 @@ define i8* @test_v16i8_post_imm_st3lane(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i
 define i8* @test_v16i8_post_reg_st3lane(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 %inc) nounwind {
 ;CHECK-LABEL: test_v16i8_post_reg_st3lane:
 ;CHECK: st3.b { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
-  call void @llvm.arm64.neon.st3lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 0, i8* %A)
+  call void @llvm.aarch64.neon.st3lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 0, i8* %A)
   %tmp = getelementptr i8* %A, i64 %inc
   ret i8* %tmp
 }
 
-declare void @llvm.arm64.neon.st3lane.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, i64, i8*)
+declare void @llvm.aarch64.neon.st3lane.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, i64, i8*)
 
 
 define i8* @test_v8i8_post_imm_st3lane(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D) nounwind {
 ;CHECK-LABEL: test_v8i8_post_imm_st3lane:
 ;CHECK: st3.b { v0, v1, v2 }[0], [x0], #3
-  call void @llvm.arm64.neon.st3lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i64 0, i8* %A)
+  call void @llvm.aarch64.neon.st3lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i64 0, i8* %A)
   %tmp = getelementptr i8* %A, i32 3
   ret i8* %tmp
 }
@@ -5264,18 +5264,18 @@ define i8* @test_v8i8_post_imm_st3lane(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8>
 define i8* @test_v8i8_post_reg_st3lane(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i64 %inc) nounwind {
 ;CHECK-LABEL: test_v8i8_post_reg_st3lane:
 ;CHECK: st3.b { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
-  call void @llvm.arm64.neon.st3lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i64 0, i8* %A)
+  call void @llvm.aarch64.neon.st3lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i64 0, i8* %A)
   %tmp = getelementptr i8* %A, i64 %inc
   ret i8* %tmp
 }
 
-declare void @llvm.arm64.neon.st3lane.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, i64, i8*)
+declare void @llvm.aarch64.neon.st3lane.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, i64, i8*)
 
 
 define i16* @test_v8i16_post_imm_st3lane(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D) nounwind {
 ;CHECK-LABEL: test_v8i16_post_imm_st3lane:
 ;CHECK: st3.h { v0, v1, v2 }[0], [x0], #6
-  call void @llvm.arm64.neon.st3lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 0, i16* %A)
+  call void @llvm.aarch64.neon.st3lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 0, i16* %A)
   %tmp = getelementptr i16* %A, i32 3
   ret i16* %tmp
 }
@@ -5283,18 +5283,18 @@ define i16* @test_v8i16_post_imm_st3lane(i16* %A, i16** %ptr, <8 x i16> %B, <8 x
 define i16* @test_v8i16_post_reg_st3lane(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 %inc) nounwind {
 ;CHECK-LABEL: test_v8i16_post_reg_st3lane:
 ;CHECK: st3.h { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
-  call void @llvm.arm64.neon.st3lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 0, i16* %A)
+  call void @llvm.aarch64.neon.st3lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 0, i16* %A)
   %tmp = getelementptr i16* %A, i64 %inc
   ret i16* %tmp
 }
 
-declare void @llvm.arm64.neon.st3lane.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, i64, i16*)
+declare void @llvm.aarch64.neon.st3lane.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, i64, i16*)
 
 
 define i16* @test_v4i16_post_imm_st3lane(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D) nounwind {
 ;CHECK-LABEL: test_v4i16_post_imm_st3lane:
 ;CHECK: st3.h { v0, v1, v2 }[0], [x0], #6
-  call void @llvm.arm64.neon.st3lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i64 0, i16* %A)
+  call void @llvm.aarch64.neon.st3lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i64 0, i16* %A)
   %tmp = getelementptr i16* %A, i32 3
   ret i16* %tmp
 }
@@ -5302,18 +5302,18 @@ define i16* @test_v4i16_post_imm_st3lane(i16* %A, i16** %ptr, <4 x i16> %B, <4 x
 define i16* @test_v4i16_post_reg_st3lane(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i64 %inc) nounwind {
 ;CHECK-LABEL: test_v4i16_post_reg_st3lane:
 ;CHECK: st3.h { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
-  call void @llvm.arm64.neon.st3lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i64 0, i16* %A)
+  call void @llvm.aarch64.neon.st3lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i64 0, i16* %A)
   %tmp = getelementptr i16* %A, i64 %inc
   ret i16* %tmp
 }
 
-declare void @llvm.arm64.neon.st3lane.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>, i64, i16*)
+declare void @llvm.aarch64.neon.st3lane.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>, i64, i16*)
 
 
 define i32* @test_v4i32_post_imm_st3lane(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D) nounwind {
 ;CHECK-LABEL: test_v4i32_post_imm_st3lane:
 ;CHECK: st3.s { v0, v1, v2 }[0], [x0], #12
-  call void @llvm.arm64.neon.st3lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 0, i32* %A)
+  call void @llvm.aarch64.neon.st3lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 0, i32* %A)
   %tmp = getelementptr i32* %A, i32 3
   ret i32* %tmp
 }
@@ -5321,18 +5321,18 @@ define i32* @test_v4i32_post_imm_st3lane(i32* %A, i32** %ptr, <4 x i32> %B, <4 x
 define i32* @test_v4i32_post_reg_st3lane(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 %inc) nounwind {
 ;CHECK-LABEL: test_v4i32_post_reg_st3lane:
 ;CHECK: st3.s { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
-  call void @llvm.arm64.neon.st3lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 0, i32* %A)
+  call void @llvm.aarch64.neon.st3lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 0, i32* %A)
   %tmp = getelementptr i32* %A, i64 %inc
   ret i32* %tmp
 }
 
-declare void @llvm.arm64.neon.st3lane.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, i64, i32*)
+declare void @llvm.aarch64.neon.st3lane.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, i64, i32*)
 
 
 define i32* @test_v2i32_post_imm_st3lane(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D) nounwind {
 ;CHECK-LABEL: test_v2i32_post_imm_st3lane:
 ;CHECK: st3.s { v0, v1, v2 }[0], [x0], #12
-  call void @llvm.arm64.neon.st3lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i64 0, i32* %A)
+  call void @llvm.aarch64.neon.st3lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i64 0, i32* %A)
   %tmp = getelementptr i32* %A, i32 3
   ret i32* %tmp
 }
@@ -5340,18 +5340,18 @@ define i32* @test_v2i32_post_imm_st3lane(i32* %A, i32** %ptr, <2 x i32> %B, <2 x
 define i32* @test_v2i32_post_reg_st3lane(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i64 %inc) nounwind {
 ;CHECK-LABEL: test_v2i32_post_reg_st3lane:
 ;CHECK: st3.s { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
-  call void @llvm.arm64.neon.st3lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i64 0, i32* %A)
+  call void @llvm.aarch64.neon.st3lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i64 0, i32* %A)
   %tmp = getelementptr i32* %A, i64 %inc
   ret i32* %tmp
 }
 
-declare void @llvm.arm64.neon.st3lane.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, i64, i32*)
+declare void @llvm.aarch64.neon.st3lane.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, i64, i32*)
 
 
 define i64* @test_v2i64_post_imm_st3lane(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D) nounwind {
 ;CHECK-LABEL: test_v2i64_post_imm_st3lane:
 ;CHECK: st3.d { v0, v1, v2 }[0], [x0], #24
-  call void @llvm.arm64.neon.st3lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 0, i64* %A)
+  call void @llvm.aarch64.neon.st3lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 0, i64* %A)
   %tmp = getelementptr i64* %A, i64 3
   ret i64* %tmp
 }
@@ -5359,18 +5359,18 @@ define i64* @test_v2i64_post_imm_st3lane(i64* %A, i64** %ptr, <2 x i64> %B, <2 x
 define i64* @test_v2i64_post_reg_st3lane(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 %inc) nounwind {
 ;CHECK-LABEL: test_v2i64_post_reg_st3lane:
 ;CHECK: st3.d { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
-  call void @llvm.arm64.neon.st3lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 0, i64* %A)
+  call void @llvm.aarch64.neon.st3lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 0, i64* %A)
   %tmp = getelementptr i64* %A, i64 %inc
   ret i64* %tmp
 }
 
-declare void @llvm.arm64.neon.st3lane.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, i64, i64*)
+declare void @llvm.aarch64.neon.st3lane.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, i64, i64*)
 
 
 define i64* @test_v1i64_post_imm_st3lane(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D) nounwind {
 ;CHECK-LABEL: test_v1i64_post_imm_st3lane:
 ;CHECK: st3.d { v0, v1, v2 }[0], [x0], #24
-  call void @llvm.arm64.neon.st3lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64 0, i64* %A)
+  call void @llvm.aarch64.neon.st3lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64 0, i64* %A)
   %tmp = getelementptr i64* %A, i64 3
   ret i64* %tmp
 }
@@ -5378,18 +5378,18 @@ define i64* @test_v1i64_post_imm_st3lane(i64* %A, i64** %ptr, <1 x i64> %B, <1 x
 define i64* @test_v1i64_post_reg_st3lane(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64 %inc) nounwind {
 ;CHECK-LABEL: test_v1i64_post_reg_st3lane:
 ;CHECK: st3.d { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
-  call void @llvm.arm64.neon.st3lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64 0, i64* %A)
+  call void @llvm.aarch64.neon.st3lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64 0, i64* %A)
   %tmp = getelementptr i64* %A, i64 %inc
   ret i64* %tmp
 }
 
-declare void @llvm.arm64.neon.st3lane.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>, i64, i64*)
+declare void @llvm.aarch64.neon.st3lane.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>, i64, i64*)
 
 
 define float* @test_v4f32_post_imm_st3lane(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D) nounwind {
 ;CHECK-LABEL: test_v4f32_post_imm_st3lane:
 ;CHECK: st3.s { v0, v1, v2 }[0], [x0], #12
-  call void @llvm.arm64.neon.st3lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, i64 0, float* %A)
+  call void @llvm.aarch64.neon.st3lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, i64 0, float* %A)
   %tmp = getelementptr float* %A, i32 3
   ret float* %tmp
 }
@@ -5397,18 +5397,18 @@ define float* @test_v4f32_post_imm_st3lane(float* %A, float** %ptr, <4 x float>
 define float* @test_v4f32_post_reg_st3lane(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, i64 %inc) nounwind {
 ;CHECK-LABEL: test_v4f32_post_reg_st3lane:
 ;CHECK: st3.s { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
-  call void @llvm.arm64.neon.st3lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, i64 0, float* %A)
+  call void @llvm.aarch64.neon.st3lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, i64 0, float* %A)
   %tmp = getelementptr float* %A, i64 %inc
   ret float* %tmp
 }
 
-declare void @llvm.arm64.neon.st3lane.v4f32.p0f32(<4 x float>, <4 x float>, <4 x float>, i64, float*)
+declare void @llvm.aarch64.neon.st3lane.v4f32.p0f32(<4 x float>, <4 x float>, <4 x float>, i64, float*)
 
 
 define float* @test_v2f32_post_imm_st3lane(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D) nounwind {
 ;CHECK-LABEL: test_v2f32_post_imm_st3lane:
 ;CHECK: st3.s { v0, v1, v2 }[0], [x0], #12
-  call void @llvm.arm64.neon.st3lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, i64 0, float* %A)
+  call void @llvm.aarch64.neon.st3lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, i64 0, float* %A)
   %tmp = getelementptr float* %A, i32 3
   ret float* %tmp
 }
@@ -5416,18 +5416,18 @@ define float* @test_v2f32_post_imm_st3lane(float* %A, float** %ptr, <2 x float>
 define float* @test_v2f32_post_reg_st3lane(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, i64 %inc) nounwind {
 ;CHECK-LABEL: test_v2f32_post_reg_st3lane:
 ;CHECK: st3.s { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
-  call void @llvm.arm64.neon.st3lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, i64 0, float* %A)
+  call void @llvm.aarch64.neon.st3lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, i64 0, float* %A)
   %tmp = getelementptr float* %A, i64 %inc
   ret float* %tmp
 }
 
-declare void @llvm.arm64.neon.st3lane.v2f32.p0f32(<2 x float>, <2 x float>, <2 x float>, i64, float*)
+declare void @llvm.aarch64.neon.st3lane.v2f32.p0f32(<2 x float>, <2 x float>, <2 x float>, i64, float*)
 
 
 define double* @test_v2f64_post_imm_st3lane(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D) nounwind {
 ;CHECK-LABEL: test_v2f64_post_imm_st3lane:
 ;CHECK: st3.d { v0, v1, v2 }[0], [x0], #24
-  call void @llvm.arm64.neon.st3lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, i64 0, double* %A)
+  call void @llvm.aarch64.neon.st3lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, i64 0, double* %A)
   %tmp = getelementptr double* %A, i64 3
   ret double* %tmp
 }
@@ -5435,18 +5435,18 @@ define double* @test_v2f64_post_imm_st3lane(double* %A, double** %ptr, <2 x doub
 define double* @test_v2f64_post_reg_st3lane(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, i64 %inc) nounwind {
 ;CHECK-LABEL: test_v2f64_post_reg_st3lane:
 ;CHECK: st3.d { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
-  call void @llvm.arm64.neon.st3lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, i64 0, double* %A)
+  call void @llvm.aarch64.neon.st3lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, i64 0, double* %A)
   %tmp = getelementptr double* %A, i64 %inc
   ret double* %tmp
 }
 
-declare void @llvm.arm64.neon.st3lane.v2f64.p0f64(<2 x double>, <2 x double>, <2 x double>, i64, double*)
+declare void @llvm.aarch64.neon.st3lane.v2f64.p0f64(<2 x double>, <2 x double>, <2 x double>, i64, double*)
 
 
 define double* @test_v1f64_post_imm_st3lane(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D) nounwind {
 ;CHECK-LABEL: test_v1f64_post_imm_st3lane:
 ;CHECK: st3.d { v0, v1, v2 }[0], [x0], #24
-  call void @llvm.arm64.neon.st3lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, i64 0, double* %A)
+  call void @llvm.aarch64.neon.st3lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, i64 0, double* %A)
   %tmp = getelementptr double* %A, i64 3
   ret double* %tmp
 }
@@ -5454,18 +5454,18 @@ define double* @test_v1f64_post_imm_st3lane(double* %A, double** %ptr, <1 x doub
 define double* @test_v1f64_post_reg_st3lane(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, i64 %inc) nounwind {
 ;CHECK-LABEL: test_v1f64_post_reg_st3lane:
 ;CHECK: st3.d { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
-  call void @llvm.arm64.neon.st3lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, i64 0, double* %A)
+  call void @llvm.aarch64.neon.st3lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, i64 0, double* %A)
   %tmp = getelementptr double* %A, i64 %inc
   ret double* %tmp
 }
 
-declare void @llvm.arm64.neon.st3lane.v1f64.p0f64(<1 x double>, <1 x double>, <1 x double>, i64, double*)
+declare void @llvm.aarch64.neon.st3lane.v1f64.p0f64(<1 x double>, <1 x double>, <1 x double>, i64, double*)
 
 
 define i8* @test_v16i8_post_imm_st4lane(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) nounwind {
 ;CHECK-LABEL: test_v16i8_post_imm_st4lane:
 ;CHECK: st4.b { v0, v1, v2, v3 }[0], [x0], #4
-  call void @llvm.arm64.neon.st4lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i64 0, i8* %A)
+  call void @llvm.aarch64.neon.st4lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i64 0, i8* %A)
   %tmp = getelementptr i8* %A, i32 4
   ret i8* %tmp
 }
@@ -5473,18 +5473,18 @@ define i8* @test_v16i8_post_imm_st4lane(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i
 define i8* @test_v16i8_post_reg_st4lane(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i64 %inc) nounwind {
 ;CHECK-LABEL: test_v16i8_post_reg_st4lane:
 ;CHECK: st4.b { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
-  call void @llvm.arm64.neon.st4lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i64 0, i8* %A)
+  call void @llvm.aarch64.neon.st4lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i64 0, i8* %A)
   %tmp = getelementptr i8* %A, i64 %inc
   ret i8* %tmp
 }
 
-declare void @llvm.arm64.neon.st4lane.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i64, i8*)
+declare void @llvm.aarch64.neon.st4lane.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i64, i8*)
 
 
 define i8* @test_v8i8_post_imm_st4lane(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E) nounwind {
 ;CHECK-LABEL: test_v8i8_post_imm_st4lane:
 ;CHECK: st4.b { v0, v1, v2, v3 }[0], [x0], #4
-  call void @llvm.arm64.neon.st4lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i64 0, i8* %A)
+  call void @llvm.aarch64.neon.st4lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i64 0, i8* %A)
   %tmp = getelementptr i8* %A, i32 4
   ret i8* %tmp
 }
@@ -5492,18 +5492,18 @@ define i8* @test_v8i8_post_imm_st4lane(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8>
 define i8* @test_v8i8_post_reg_st4lane(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i64 %inc) nounwind {
 ;CHECK-LABEL: test_v8i8_post_reg_st4lane:
 ;CHECK: st4.b { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
-  call void @llvm.arm64.neon.st4lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i64 0, i8* %A)
+  call void @llvm.aarch64.neon.st4lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i64 0, i8* %A)
   %tmp = getelementptr i8* %A, i64 %inc
   ret i8* %tmp
 }
 
-declare void @llvm.arm64.neon.st4lane.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i64, i8*)
+declare void @llvm.aarch64.neon.st4lane.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i64, i8*)
 
 
 define i16* @test_v8i16_post_imm_st4lane(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E) nounwind {
 ;CHECK-LABEL: test_v8i16_post_imm_st4lane:
 ;CHECK: st4.h { v0, v1, v2, v3 }[0], [x0], #8
-  call void @llvm.arm64.neon.st4lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i64 0, i16* %A)
+  call void @llvm.aarch64.neon.st4lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i64 0, i16* %A)
   %tmp = getelementptr i16* %A, i32 4
   ret i16* %tmp
 }
@@ -5511,18 +5511,18 @@ define i16* @test_v8i16_post_imm_st4lane(i16* %A, i16** %ptr, <8 x i16> %B, <8 x
 define i16* @test_v8i16_post_reg_st4lane(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i64 %inc) nounwind {
 ;CHECK-LABEL: test_v8i16_post_reg_st4lane:
 ;CHECK: st4.h { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
-  call void @llvm.arm64.neon.st4lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i64 0, i16* %A)
+  call void @llvm.aarch64.neon.st4lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i64 0, i16* %A)
   %tmp = getelementptr i16* %A, i64 %inc
   ret i16* %tmp
 }
 
-declare void @llvm.arm64.neon.st4lane.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i64, i16*)
+declare void @llvm.aarch64.neon.st4lane.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i64, i16*)
 
 
 define i16* @test_v4i16_post_imm_st4lane(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E) nounwind {
 ;CHECK-LABEL: test_v4i16_post_imm_st4lane:
 ;CHECK: st4.h { v0, v1, v2, v3 }[0], [x0], #8
-  call void @llvm.arm64.neon.st4lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i64 0, i16* %A)
+  call void @llvm.aarch64.neon.st4lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i64 0, i16* %A)
   %tmp = getelementptr i16* %A, i32 4
   ret i16* %tmp
 }
@@ -5530,18 +5530,18 @@ define i16* @test_v4i16_post_imm_st4lane(i16* %A, i16** %ptr, <4 x i16> %B, <4 x
 define i16* @test_v4i16_post_reg_st4lane(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i64 %inc) nounwind {
 ;CHECK-LABEL: test_v4i16_post_reg_st4lane:
 ;CHECK: st4.h { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
-  call void @llvm.arm64.neon.st4lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i64 0, i16* %A)
+  call void @llvm.aarch64.neon.st4lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i64 0, i16* %A)
   %tmp = getelementptr i16* %A, i64 %inc
   ret i16* %tmp
 }
 
-declare void @llvm.arm64.neon.st4lane.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i64, i16*)
+declare void @llvm.aarch64.neon.st4lane.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i64, i16*)
 
 
 define i32* @test_v4i32_post_imm_st4lane(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E) nounwind {
 ;CHECK-LABEL: test_v4i32_post_imm_st4lane:
 ;CHECK: st4.s { v0, v1, v2, v3 }[0], [x0], #16
-  call void @llvm.arm64.neon.st4lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i64 0, i32* %A)
+  call void @llvm.aarch64.neon.st4lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i64 0, i32* %A)
   %tmp = getelementptr i32* %A, i32 4
   ret i32* %tmp
 }
@@ -5549,18 +5549,18 @@ define i32* @test_v4i32_post_imm_st4lane(i32* %A, i32** %ptr, <4 x i32> %B, <4 x
 define i32* @test_v4i32_post_reg_st4lane(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i64 %inc) nounwind {
 ;CHECK-LABEL: test_v4i32_post_reg_st4lane:
 ;CHECK: st4.s { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
-  call void @llvm.arm64.neon.st4lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i64 0, i32* %A)
+  call void @llvm.aarch64.neon.st4lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i64 0, i32* %A)
   %tmp = getelementptr i32* %A, i64 %inc
   ret i32* %tmp
 }
 
-declare void @llvm.arm64.neon.st4lane.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i64, i32*)
+declare void @llvm.aarch64.neon.st4lane.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i64, i32*)
 
 
 define i32* @test_v2i32_post_imm_st4lane(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E) nounwind {
 ;CHECK-LABEL: test_v2i32_post_imm_st4lane:
 ;CHECK: st4.s { v0, v1, v2, v3 }[0], [x0], #16
-  call void @llvm.arm64.neon.st4lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i64 0, i32* %A)
+  call void @llvm.aarch64.neon.st4lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i64 0, i32* %A)
   %tmp = getelementptr i32* %A, i32 4
   ret i32* %tmp
 }
@@ -5568,18 +5568,18 @@ define i32* @test_v2i32_post_imm_st4lane(i32* %A, i32** %ptr, <2 x i32> %B, <2 x
 define i32* @test_v2i32_post_reg_st4lane(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i64 %inc) nounwind {
 ;CHECK-LABEL: test_v2i32_post_reg_st4lane:
 ;CHECK: st4.s { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
-  call void @llvm.arm64.neon.st4lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i64 0, i32* %A)
+  call void @llvm.aarch64.neon.st4lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i64 0, i32* %A)
   %tmp = getelementptr i32* %A, i64 %inc
   ret i32* %tmp
 }
 
-declare void @llvm.arm64.neon.st4lane.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i64, i32*)
+declare void @llvm.aarch64.neon.st4lane.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i64, i32*)
 
 
 define i64* @test_v2i64_post_imm_st4lane(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E) nounwind {
 ;CHECK-LABEL: test_v2i64_post_imm_st4lane:
 ;CHECK: st4.d { v0, v1, v2, v3 }[0], [x0], #32
-  call void @llvm.arm64.neon.st4lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64 0, i64* %A)
+  call void @llvm.aarch64.neon.st4lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64 0, i64* %A)
   %tmp = getelementptr i64* %A, i64 4
   ret i64* %tmp
 }
@@ -5587,18 +5587,18 @@ define i64* @test_v2i64_post_imm_st4lane(i64* %A, i64** %ptr, <2 x i64> %B, <2 x
 define i64* @test_v2i64_post_reg_st4lane(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64 %inc) nounwind {
 ;CHECK-LABEL: test_v2i64_post_reg_st4lane:
 ;CHECK: st4.d { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
-  call void @llvm.arm64.neon.st4lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64 0, i64* %A)
+  call void @llvm.aarch64.neon.st4lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64 0, i64* %A)
   %tmp = getelementptr i64* %A, i64 %inc
   ret i64* %tmp
 }
 
-declare void @llvm.arm64.neon.st4lane.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, i64, i64*)
+declare void @llvm.aarch64.neon.st4lane.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, i64, i64*)
 
 
 define i64* @test_v1i64_post_imm_st4lane(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E) nounwind {
 ;CHECK-LABEL: test_v1i64_post_imm_st4lane:
 ;CHECK: st4.d { v0, v1, v2, v3 }[0], [x0], #32
-  call void @llvm.arm64.neon.st4lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64 0, i64* %A)
+  call void @llvm.aarch64.neon.st4lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64 0, i64* %A)
   %tmp = getelementptr i64* %A, i64 4
   ret i64* %tmp
 }
@@ -5606,18 +5606,18 @@ define i64* @test_v1i64_post_imm_st4lane(i64* %A, i64** %ptr, <1 x i64> %B, <1 x
 define i64* @test_v1i64_post_reg_st4lane(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64 %inc) nounwind {
 ;CHECK-LABEL: test_v1i64_post_reg_st4lane:
 ;CHECK: st4.d { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
-  call void @llvm.arm64.neon.st4lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64 0, i64* %A)
+  call void @llvm.aarch64.neon.st4lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64 0, i64* %A)
   %tmp = getelementptr i64* %A, i64 %inc
   ret i64* %tmp
 }
 
-declare void @llvm.arm64.neon.st4lane.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>, i64, i64*)
+declare void @llvm.aarch64.neon.st4lane.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>, i64, i64*)
 
 
 define float* @test_v4f32_post_imm_st4lane(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E) nounwind {
 ;CHECK-LABEL: test_v4f32_post_imm_st4lane:
 ;CHECK: st4.s { v0, v1, v2, v3 }[0], [x0], #16
-  call void @llvm.arm64.neon.st4lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, i64 0, float* %A)
+  call void @llvm.aarch64.neon.st4lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, i64 0, float* %A)
   %tmp = getelementptr float* %A, i32 4
   ret float* %tmp
 }
@@ -5625,18 +5625,18 @@ define float* @test_v4f32_post_imm_st4lane(float* %A, float** %ptr, <4 x float>
 define float* @test_v4f32_post_reg_st4lane(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, i64 %inc) nounwind {
 ;CHECK-LABEL: test_v4f32_post_reg_st4lane:
 ;CHECK: st4.s { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
-  call void @llvm.arm64.neon.st4lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, i64 0, float* %A)
+  call void @llvm.aarch64.neon.st4lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, i64 0, float* %A)
   %tmp = getelementptr float* %A, i64 %inc
   ret float* %tmp
 }
 
-declare void @llvm.arm64.neon.st4lane.v4f32.p0f32(<4 x float>, <4 x float>, <4 x float>, <4 x float>, i64, float*)
+declare void @llvm.aarch64.neon.st4lane.v4f32.p0f32(<4 x float>, <4 x float>, <4 x float>, <4 x float>, i64, float*)
 
 
 define float* @test_v2f32_post_imm_st4lane(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E) nounwind {
 ;CHECK-LABEL: test_v2f32_post_imm_st4lane:
 ;CHECK: st4.s { v0, v1, v2, v3 }[0], [x0], #16
-  call void @llvm.arm64.neon.st4lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, i64 0, float* %A)
+  call void @llvm.aarch64.neon.st4lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, i64 0, float* %A)
   %tmp = getelementptr float* %A, i32 4
   ret float* %tmp
 }
@@ -5644,18 +5644,18 @@ define float* @test_v2f32_post_imm_st4lane(float* %A, float** %ptr, <2 x float>
 define float* @test_v2f32_post_reg_st4lane(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, i64 %inc) nounwind {
 ;CHECK-LABEL: test_v2f32_post_reg_st4lane:
 ;CHECK: st4.s { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
-  call void @llvm.arm64.neon.st4lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, i64 0, float* %A)
+  call void @llvm.aarch64.neon.st4lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, i64 0, float* %A)
   %tmp = getelementptr float* %A, i64 %inc
   ret float* %tmp
 }
 
-declare void @llvm.arm64.neon.st4lane.v2f32.p0f32(<2 x float>, <2 x float>, <2 x float>, <2 x float>, i64, float*)
+declare void @llvm.aarch64.neon.st4lane.v2f32.p0f32(<2 x float>, <2 x float>, <2 x float>, <2 x float>, i64, float*)
 
 
 define double* @test_v2f64_post_imm_st4lane(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E) nounwind {
 ;CHECK-LABEL: test_v2f64_post_imm_st4lane:
 ;CHECK: st4.d { v0, v1, v2, v3 }[0], [x0], #32
-  call void @llvm.arm64.neon.st4lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, i64 0, double* %A)
+  call void @llvm.aarch64.neon.st4lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, i64 0, double* %A)
   %tmp = getelementptr double* %A, i64 4
   ret double* %tmp
 }
@@ -5663,18 +5663,18 @@ define double* @test_v2f64_post_imm_st4lane(double* %A, double** %ptr, <2 x doub
 define double* @test_v2f64_post_reg_st4lane(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, i64 %inc) nounwind {
 ;CHECK-LABEL: test_v2f64_post_reg_st4lane:
 ;CHECK: st4.d { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
-  call void @llvm.arm64.neon.st4lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, i64 0, double* %A)
+  call void @llvm.aarch64.neon.st4lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, i64 0, double* %A)
   %tmp = getelementptr double* %A, i64 %inc
   ret double* %tmp
 }
 
-declare void @llvm.arm64.neon.st4lane.v2f64.p0f64(<2 x double>, <2 x double>, <2 x double>, <2 x double>, i64, double*)
+declare void @llvm.aarch64.neon.st4lane.v2f64.p0f64(<2 x double>, <2 x double>, <2 x double>, <2 x double>, i64, double*)
 
 
 define double* @test_v1f64_post_imm_st4lane(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E) nounwind {
 ;CHECK-LABEL: test_v1f64_post_imm_st4lane:
 ;CHECK: st4.d { v0, v1, v2, v3 }[0], [x0], #32
-  call void @llvm.arm64.neon.st4lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, i64 0, double* %A)
+  call void @llvm.aarch64.neon.st4lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, i64 0, double* %A)
   %tmp = getelementptr double* %A, i64 4
   ret double* %tmp
 }
@@ -5682,12 +5682,12 @@ define double* @test_v1f64_post_imm_st4lane(double* %A, double** %ptr, <1 x doub
 define double* @test_v1f64_post_reg_st4lane(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, i64 %inc) nounwind {
 ;CHECK-LABEL: test_v1f64_post_reg_st4lane:
 ;CHECK: st4.d { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
-  call void @llvm.arm64.neon.st4lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, i64 0, double* %A)
+  call void @llvm.aarch64.neon.st4lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, i64 0, double* %A)
   %tmp = getelementptr double* %A, i64 %inc
   ret double* %tmp
 }
 
-declare void @llvm.arm64.neon.st4lane.v1f64.p0f64(<1 x double>, <1 x double>, <1 x double>, <1 x double>, i64, double*)
+declare void @llvm.aarch64.neon.st4lane.v1f64.p0f64(<1 x double>, <1 x double>, <1 x double>, <1 x double>, i64, double*)
 
 define <16 x i8> @test_v16i8_post_imm_ld1r(i8* %bar, i8** %ptr) {
 ; CHECK-LABEL: test_v16i8_post_imm_ld1r:
diff --git a/test/CodeGen/ARM64/inline-asm-error-I.ll b/test/CodeGen/AArch64/arm64-inline-asm-error-I.ll
similarity index 100%
rename from test/CodeGen/ARM64/inline-asm-error-I.ll
rename to test/CodeGen/AArch64/arm64-inline-asm-error-I.ll
diff --git a/test/CodeGen/ARM64/inline-asm-error-J.ll b/test/CodeGen/AArch64/arm64-inline-asm-error-J.ll
similarity index 100%
rename from test/CodeGen/ARM64/inline-asm-error-J.ll
rename to test/CodeGen/AArch64/arm64-inline-asm-error-J.ll
diff --git a/test/CodeGen/ARM64/inline-asm-error-K.ll b/test/CodeGen/AArch64/arm64-inline-asm-error-K.ll
similarity index 100%
rename from test/CodeGen/ARM64/inline-asm-error-K.ll
rename to test/CodeGen/AArch64/arm64-inline-asm-error-K.ll
diff --git a/test/CodeGen/ARM64/inline-asm-error-L.ll b/test/CodeGen/AArch64/arm64-inline-asm-error-L.ll
similarity index 100%
rename from test/CodeGen/ARM64/inline-asm-error-L.ll
rename to test/CodeGen/AArch64/arm64-inline-asm-error-L.ll
diff --git a/test/CodeGen/ARM64/inline-asm-error-M.ll b/test/CodeGen/AArch64/arm64-inline-asm-error-M.ll
similarity index 100%
rename from test/CodeGen/ARM64/inline-asm-error-M.ll
rename to test/CodeGen/AArch64/arm64-inline-asm-error-M.ll
diff --git a/test/CodeGen/ARM64/inline-asm-error-N.ll b/test/CodeGen/AArch64/arm64-inline-asm-error-N.ll
similarity index 100%
rename from test/CodeGen/ARM64/inline-asm-error-N.ll
rename to test/CodeGen/AArch64/arm64-inline-asm-error-N.ll
diff --git a/test/CodeGen/ARM64/inline-asm-zero-reg-error.ll b/test/CodeGen/AArch64/arm64-inline-asm-zero-reg-error.ll
similarity index 100%
rename from test/CodeGen/ARM64/inline-asm-zero-reg-error.ll
rename to test/CodeGen/AArch64/arm64-inline-asm-zero-reg-error.ll
diff --git a/test/CodeGen/ARM64/inline-asm.ll b/test/CodeGen/AArch64/arm64-inline-asm.ll
similarity index 98%
rename from test/CodeGen/ARM64/inline-asm.ll
rename to test/CodeGen/AArch64/arm64-inline-asm.ll
index e64507870fb..d76cca3f21c 100644
--- a/test/CodeGen/ARM64/inline-asm.ll
+++ b/test/CodeGen/AArch64/arm64-inline-asm.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple -no-integrated-as | FileCheck %s
+; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple -no-integrated-as | FileCheck %s
 
 ; rdar://9167275
 
diff --git a/test/CodeGen/ARM64/join-reserved.ll b/test/CodeGen/AArch64/arm64-join-reserved.ll
similarity index 100%
rename from test/CodeGen/ARM64/join-reserved.ll
rename to test/CodeGen/AArch64/arm64-join-reserved.ll
diff --git a/test/CodeGen/ARM64/jumptable.ll b/test/CodeGen/AArch64/arm64-jumptable.ll
similarity index 100%
rename from test/CodeGen/ARM64/jumptable.ll
rename to test/CodeGen/AArch64/arm64-jumptable.ll
diff --git a/test/CodeGen/ARM64/aarch64-large-frame.ll b/test/CodeGen/AArch64/arm64-large-frame.ll
similarity index 100%
rename from test/CodeGen/ARM64/aarch64-large-frame.ll
rename to test/CodeGen/AArch64/arm64-large-frame.ll
diff --git a/test/CodeGen/ARM64/ld1.ll b/test/CodeGen/AArch64/arm64-ld1.ll
similarity index 67%
rename from test/CodeGen/ARM64/ld1.ll
rename to test/CodeGen/AArch64/arm64-ld1.ll
index 61836a10a80..72d808ccc34 100644
--- a/test/CodeGen/ARM64/ld1.ll
+++ b/test/CodeGen/AArch64/arm64-ld1.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple -verify-machineinstrs | FileCheck %s
+; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple -verify-machineinstrs | FileCheck %s
 
 %struct.__neon_int8x8x2_t = type { <8 x i8>,  <8 x i8> }
 %struct.__neon_int8x8x3_t = type { <8 x i8>,  <8 x i8>,  <8 x i8> }
@@ -10,7 +10,7 @@ define %struct.__neon_int8x8x2_t @ld2_8b(i8* %A) nounwind {
 ; and from the argument of the function also defined by ABI (i.e., x0)
 ; CHECK ld2.8b { v0, v1 }, [x0]
 ; CHECK-NEXT ret
-	%tmp2 = call %struct.__neon_int8x8x2_t @llvm.arm64.neon.ld2.v8i8.p0i8(i8* %A)
+	%tmp2 = call %struct.__neon_int8x8x2_t @llvm.aarch64.neon.ld2.v8i8.p0i8(i8* %A)
 	ret %struct.__neon_int8x8x2_t  %tmp2
 }
 
@@ -19,7 +19,7 @@ define %struct.__neon_int8x8x3_t @ld3_8b(i8* %A) nounwind {
 ; Make sure we are using the operands defined by the ABI
 ; CHECK ld3.8b { v0, v1, v2 }, [x0]
 ; CHECK-NEXT ret
-	%tmp2 = call %struct.__neon_int8x8x3_t @llvm.arm64.neon.ld3.v8i8.p0i8(i8* %A)
+	%tmp2 = call %struct.__neon_int8x8x3_t @llvm.aarch64.neon.ld3.v8i8.p0i8(i8* %A)
 	ret %struct.__neon_int8x8x3_t  %tmp2
 }
 
@@ -28,13 +28,13 @@ define %struct.__neon_int8x8x4_t @ld4_8b(i8* %A) nounwind {
 ; Make sure we are using the operands defined by the ABI
 ; CHECK ld4.8b { v0, v1, v2, v3 }, [x0]
 ; CHECK-NEXT ret
-	%tmp2 = call %struct.__neon_int8x8x4_t @llvm.arm64.neon.ld4.v8i8.p0i8(i8* %A)
+	%tmp2 = call %struct.__neon_int8x8x4_t @llvm.aarch64.neon.ld4.v8i8.p0i8(i8* %A)
 	ret %struct.__neon_int8x8x4_t  %tmp2
 }
 
-declare %struct.__neon_int8x8x2_t @llvm.arm64.neon.ld2.v8i8.p0i8(i8*) nounwind readonly
-declare %struct.__neon_int8x8x3_t @llvm.arm64.neon.ld3.v8i8.p0i8(i8*) nounwind readonly
-declare %struct.__neon_int8x8x4_t @llvm.arm64.neon.ld4.v8i8.p0i8(i8*) nounwind readonly
+declare %struct.__neon_int8x8x2_t @llvm.aarch64.neon.ld2.v8i8.p0i8(i8*) nounwind readonly
+declare %struct.__neon_int8x8x3_t @llvm.aarch64.neon.ld3.v8i8.p0i8(i8*) nounwind readonly
+declare %struct.__neon_int8x8x4_t @llvm.aarch64.neon.ld4.v8i8.p0i8(i8*) nounwind readonly
 
 %struct.__neon_int8x16x2_t = type { <16 x i8>,  <16 x i8> }
 %struct.__neon_int8x16x3_t = type { <16 x i8>,  <16 x i8>,  <16 x i8> }
@@ -45,7 +45,7 @@ define %struct.__neon_int8x16x2_t @ld2_16b(i8* %A) nounwind {
 ; Make sure we are using the operands defined by the ABI
 ; CHECK ld2.16b { v0, v1 }, [x0]
 ; CHECK-NEXT ret
-  %tmp2 = call %struct.__neon_int8x16x2_t @llvm.arm64.neon.ld2.v16i8.p0i8(i8* %A)
+  %tmp2 = call %struct.__neon_int8x16x2_t @llvm.aarch64.neon.ld2.v16i8.p0i8(i8* %A)
   ret %struct.__neon_int8x16x2_t  %tmp2
 }
 
@@ -54,7 +54,7 @@ define %struct.__neon_int8x16x3_t @ld3_16b(i8* %A) nounwind {
 ; Make sure we are using the operands defined by the ABI
 ; CHECK ld3.16b { v0, v1, v2 }, [x0]
 ; CHECK-NEXT ret
-  %tmp2 = call %struct.__neon_int8x16x3_t @llvm.arm64.neon.ld3.v16i8.p0i8(i8* %A)
+  %tmp2 = call %struct.__neon_int8x16x3_t @llvm.aarch64.neon.ld3.v16i8.p0i8(i8* %A)
   ret %struct.__neon_int8x16x3_t  %tmp2
 }
 
@@ -63,13 +63,13 @@ define %struct.__neon_int8x16x4_t @ld4_16b(i8* %A) nounwind {
 ; Make sure we are using the operands defined by the ABI
 ; CHECK ld4.16b { v0, v1, v2, v3 }, [x0]
 ; CHECK-NEXT ret
-  %tmp2 = call %struct.__neon_int8x16x4_t @llvm.arm64.neon.ld4.v16i8.p0i8(i8* %A)
+  %tmp2 = call %struct.__neon_int8x16x4_t @llvm.aarch64.neon.ld4.v16i8.p0i8(i8* %A)
   ret %struct.__neon_int8x16x4_t  %tmp2
 }
 
-declare %struct.__neon_int8x16x2_t @llvm.arm64.neon.ld2.v16i8.p0i8(i8*) nounwind readonly
-declare %struct.__neon_int8x16x3_t @llvm.arm64.neon.ld3.v16i8.p0i8(i8*) nounwind readonly
-declare %struct.__neon_int8x16x4_t @llvm.arm64.neon.ld4.v16i8.p0i8(i8*) nounwind readonly
+declare %struct.__neon_int8x16x2_t @llvm.aarch64.neon.ld2.v16i8.p0i8(i8*) nounwind readonly
+declare %struct.__neon_int8x16x3_t @llvm.aarch64.neon.ld3.v16i8.p0i8(i8*) nounwind readonly
+declare %struct.__neon_int8x16x4_t @llvm.aarch64.neon.ld4.v16i8.p0i8(i8*) nounwind readonly
 
 %struct.__neon_int16x4x2_t = type { <4 x i16>,  <4 x i16> }
 %struct.__neon_int16x4x3_t = type { <4 x i16>,  <4 x i16>,  <4 x i16> }
@@ -80,7 +80,7 @@ define %struct.__neon_int16x4x2_t @ld2_4h(i16* %A) nounwind {
 ; Make sure we are using the operands defined by the ABI
 ; CHECK ld2.4h { v0, v1 }, [x0]
 ; CHECK-NEXT ret
-	%tmp2 = call %struct.__neon_int16x4x2_t @llvm.arm64.neon.ld2.v4i16.p0i16(i16* %A)
+	%tmp2 = call %struct.__neon_int16x4x2_t @llvm.aarch64.neon.ld2.v4i16.p0i16(i16* %A)
 	ret %struct.__neon_int16x4x2_t  %tmp2
 }
 
@@ -89,7 +89,7 @@ define %struct.__neon_int16x4x3_t @ld3_4h(i16* %A) nounwind {
 ; Make sure we are using the operands defined by the ABI
 ; CHECK ld3.4h { v0, v1, v2 }, [x0]
 ; CHECK-NEXT ret
-	%tmp2 = call %struct.__neon_int16x4x3_t @llvm.arm64.neon.ld3.v4i16.p0i16(i16* %A)
+	%tmp2 = call %struct.__neon_int16x4x3_t @llvm.aarch64.neon.ld3.v4i16.p0i16(i16* %A)
 	ret %struct.__neon_int16x4x3_t  %tmp2
 }
 
@@ -98,13 +98,13 @@ define %struct.__neon_int16x4x4_t @ld4_4h(i16* %A) nounwind {
 ; Make sure we are using the operands defined by the ABI
 ; CHECK ld4.4h { v0, v1, v2, v3 }, [x0]
 ; CHECK-NEXT ret
-	%tmp2 = call %struct.__neon_int16x4x4_t @llvm.arm64.neon.ld4.v4i16.p0i16(i16* %A)
+	%tmp2 = call %struct.__neon_int16x4x4_t @llvm.aarch64.neon.ld4.v4i16.p0i16(i16* %A)
 	ret %struct.__neon_int16x4x4_t  %tmp2
 }
 
-declare %struct.__neon_int16x4x2_t @llvm.arm64.neon.ld2.v4i16.p0i16(i16*) nounwind readonly
-declare %struct.__neon_int16x4x3_t @llvm.arm64.neon.ld3.v4i16.p0i16(i16*) nounwind readonly
-declare %struct.__neon_int16x4x4_t @llvm.arm64.neon.ld4.v4i16.p0i16(i16*) nounwind readonly
+declare %struct.__neon_int16x4x2_t @llvm.aarch64.neon.ld2.v4i16.p0i16(i16*) nounwind readonly
+declare %struct.__neon_int16x4x3_t @llvm.aarch64.neon.ld3.v4i16.p0i16(i16*) nounwind readonly
+declare %struct.__neon_int16x4x4_t @llvm.aarch64.neon.ld4.v4i16.p0i16(i16*) nounwind readonly
 
 %struct.__neon_int16x8x2_t = type { <8 x i16>,  <8 x i16> }
 %struct.__neon_int16x8x3_t = type { <8 x i16>,  <8 x i16>,  <8 x i16> }
@@ -115,7 +115,7 @@ define %struct.__neon_int16x8x2_t @ld2_8h(i16* %A) nounwind {
 ; Make sure we are using the operands defined by the ABI
 ; CHECK ld2.8h { v0, v1 }, [x0]
 ; CHECK-NEXT ret
-  %tmp2 = call %struct.__neon_int16x8x2_t @llvm.arm64.neon.ld2.v8i16.p0i16(i16* %A)
+  %tmp2 = call %struct.__neon_int16x8x2_t @llvm.aarch64.neon.ld2.v8i16.p0i16(i16* %A)
   ret %struct.__neon_int16x8x2_t  %tmp2
 }
 
@@ -124,7 +124,7 @@ define %struct.__neon_int16x8x3_t @ld3_8h(i16* %A) nounwind {
 ; Make sure we are using the operands defined by the ABI
 ; CHECK ld3.8h { v0, v1, v2 }, [x0]
 ; CHECK-NEXT ret
-  %tmp2 = call %struct.__neon_int16x8x3_t @llvm.arm64.neon.ld3.v8i16.p0i16(i16* %A)
+  %tmp2 = call %struct.__neon_int16x8x3_t @llvm.aarch64.neon.ld3.v8i16.p0i16(i16* %A)
   ret %struct.__neon_int16x8x3_t %tmp2
 }
 
@@ -133,13 +133,13 @@ define %struct.__neon_int16x8x4_t @ld4_8h(i16* %A) nounwind {
 ; Make sure we are using the operands defined by the ABI
 ; CHECK ld4.8h { v0, v1, v2, v3 }, [x0]
 ; CHECK-NEXT ret
-  %tmp2 = call %struct.__neon_int16x8x4_t @llvm.arm64.neon.ld4.v8i16.p0i16(i16* %A)
+  %tmp2 = call %struct.__neon_int16x8x4_t @llvm.aarch64.neon.ld4.v8i16.p0i16(i16* %A)
   ret %struct.__neon_int16x8x4_t  %tmp2
 }
 
-declare %struct.__neon_int16x8x2_t @llvm.arm64.neon.ld2.v8i16.p0i16(i16*) nounwind readonly
-declare %struct.__neon_int16x8x3_t @llvm.arm64.neon.ld3.v8i16.p0i16(i16*) nounwind readonly
-declare %struct.__neon_int16x8x4_t @llvm.arm64.neon.ld4.v8i16.p0i16(i16*) nounwind readonly
+declare %struct.__neon_int16x8x2_t @llvm.aarch64.neon.ld2.v8i16.p0i16(i16*) nounwind readonly
+declare %struct.__neon_int16x8x3_t @llvm.aarch64.neon.ld3.v8i16.p0i16(i16*) nounwind readonly
+declare %struct.__neon_int16x8x4_t @llvm.aarch64.neon.ld4.v8i16.p0i16(i16*) nounwind readonly
 
 %struct.__neon_int32x2x2_t = type { <2 x i32>,  <2 x i32> }
 %struct.__neon_int32x2x3_t = type { <2 x i32>,  <2 x i32>,  <2 x i32> }
@@ -150,7 +150,7 @@ define %struct.__neon_int32x2x2_t @ld2_2s(i32* %A) nounwind {
 ; Make sure we are using the operands defined by the ABI
 ; CHECK ld2.2s { v0, v1 }, [x0]
 ; CHECK-NEXT ret
-	%tmp2 = call %struct.__neon_int32x2x2_t @llvm.arm64.neon.ld2.v2i32.p0i32(i32* %A)
+	%tmp2 = call %struct.__neon_int32x2x2_t @llvm.aarch64.neon.ld2.v2i32.p0i32(i32* %A)
 	ret %struct.__neon_int32x2x2_t  %tmp2
 }
 
@@ -159,7 +159,7 @@ define %struct.__neon_int32x2x3_t @ld3_2s(i32* %A) nounwind {
 ; Make sure we are using the operands defined by the ABI
 ; CHECK ld3.2s { v0, v1, v2 }, [x0]
 ; CHECK-NEXT ret
-	%tmp2 = call %struct.__neon_int32x2x3_t @llvm.arm64.neon.ld3.v2i32.p0i32(i32* %A)
+	%tmp2 = call %struct.__neon_int32x2x3_t @llvm.aarch64.neon.ld3.v2i32.p0i32(i32* %A)
 	ret %struct.__neon_int32x2x3_t  %tmp2
 }
 
@@ -168,13 +168,13 @@ define %struct.__neon_int32x2x4_t @ld4_2s(i32* %A) nounwind {
 ; Make sure we are using the operands defined by the ABI
 ; CHECK ld4.2s { v0, v1, v2, v3 }, [x0]
 ; CHECK-NEXT ret
-	%tmp2 = call %struct.__neon_int32x2x4_t @llvm.arm64.neon.ld4.v2i32.p0i32(i32* %A)
+	%tmp2 = call %struct.__neon_int32x2x4_t @llvm.aarch64.neon.ld4.v2i32.p0i32(i32* %A)
 	ret %struct.__neon_int32x2x4_t  %tmp2
 }
 
-declare %struct.__neon_int32x2x2_t @llvm.arm64.neon.ld2.v2i32.p0i32(i32*) nounwind readonly
-declare %struct.__neon_int32x2x3_t @llvm.arm64.neon.ld3.v2i32.p0i32(i32*) nounwind readonly
-declare %struct.__neon_int32x2x4_t @llvm.arm64.neon.ld4.v2i32.p0i32(i32*) nounwind readonly
+declare %struct.__neon_int32x2x2_t @llvm.aarch64.neon.ld2.v2i32.p0i32(i32*) nounwind readonly
+declare %struct.__neon_int32x2x3_t @llvm.aarch64.neon.ld3.v2i32.p0i32(i32*) nounwind readonly
+declare %struct.__neon_int32x2x4_t @llvm.aarch64.neon.ld4.v2i32.p0i32(i32*) nounwind readonly
 
 %struct.__neon_int32x4x2_t = type { <4 x i32>,  <4 x i32> }
 %struct.__neon_int32x4x3_t = type { <4 x i32>,  <4 x i32>,  <4 x i32> }
@@ -185,7 +185,7 @@ define %struct.__neon_int32x4x2_t @ld2_4s(i32* %A) nounwind {
 ; Make sure we are using the operands defined by the ABI
 ; CHECK ld2.4s { v0, v1 }, [x0]
 ; CHECK-NEXT ret
-	%tmp2 = call %struct.__neon_int32x4x2_t @llvm.arm64.neon.ld2.v4i32.p0i32(i32* %A)
+	%tmp2 = call %struct.__neon_int32x4x2_t @llvm.aarch64.neon.ld2.v4i32.p0i32(i32* %A)
 	ret %struct.__neon_int32x4x2_t  %tmp2
 }
 
@@ -194,7 +194,7 @@ define %struct.__neon_int32x4x3_t @ld3_4s(i32* %A) nounwind {
 ; Make sure we are using the operands defined by the ABI
 ; CHECK ld3.4s { v0, v1, v2 }, [x0]
 ; CHECK-NEXT ret
-	%tmp2 = call %struct.__neon_int32x4x3_t @llvm.arm64.neon.ld3.v4i32.p0i32(i32* %A)
+	%tmp2 = call %struct.__neon_int32x4x3_t @llvm.aarch64.neon.ld3.v4i32.p0i32(i32* %A)
 	ret %struct.__neon_int32x4x3_t  %tmp2
 }
 
@@ -203,13 +203,13 @@ define %struct.__neon_int32x4x4_t @ld4_4s(i32* %A) nounwind {
 ; Make sure we are using the operands defined by the ABI
 ; CHECK ld4.4s { v0, v1, v2, v3 }, [x0]
 ; CHECK-NEXT ret
-	%tmp2 = call %struct.__neon_int32x4x4_t @llvm.arm64.neon.ld4.v4i32.p0i32(i32* %A)
+	%tmp2 = call %struct.__neon_int32x4x4_t @llvm.aarch64.neon.ld4.v4i32.p0i32(i32* %A)
 	ret %struct.__neon_int32x4x4_t  %tmp2
 }
 
-declare %struct.__neon_int32x4x2_t @llvm.arm64.neon.ld2.v4i32.p0i32(i32*) nounwind readonly
-declare %struct.__neon_int32x4x3_t @llvm.arm64.neon.ld3.v4i32.p0i32(i32*) nounwind readonly
-declare %struct.__neon_int32x4x4_t @llvm.arm64.neon.ld4.v4i32.p0i32(i32*) nounwind readonly
+declare %struct.__neon_int32x4x2_t @llvm.aarch64.neon.ld2.v4i32.p0i32(i32*) nounwind readonly
+declare %struct.__neon_int32x4x3_t @llvm.aarch64.neon.ld3.v4i32.p0i32(i32*) nounwind readonly
+declare %struct.__neon_int32x4x4_t @llvm.aarch64.neon.ld4.v4i32.p0i32(i32*) nounwind readonly
 
 %struct.__neon_int64x2x2_t = type { <2 x i64>,  <2 x i64> }
 %struct.__neon_int64x2x3_t = type { <2 x i64>,  <2 x i64>,  <2 x i64> }
@@ -220,7 +220,7 @@ define %struct.__neon_int64x2x2_t @ld2_2d(i64* %A) nounwind {
 ; Make sure we are using the operands defined by the ABI
 ; CHECK ld2.2d { v0, v1 }, [x0]
 ; CHECK-NEXT ret
-	%tmp2 = call %struct.__neon_int64x2x2_t @llvm.arm64.neon.ld2.v2i64.p0i64(i64* %A)
+	%tmp2 = call %struct.__neon_int64x2x2_t @llvm.aarch64.neon.ld2.v2i64.p0i64(i64* %A)
 	ret %struct.__neon_int64x2x2_t  %tmp2
 }
 
@@ -229,7 +229,7 @@ define %struct.__neon_int64x2x3_t @ld3_2d(i64* %A) nounwind {
 ; Make sure we are using the operands defined by the ABI
 ; CHECK ld3.2d { v0, v1, v2 }, [x0]
 ; CHECK-NEXT ret
-	%tmp2 = call %struct.__neon_int64x2x3_t @llvm.arm64.neon.ld3.v2i64.p0i64(i64* %A)
+	%tmp2 = call %struct.__neon_int64x2x3_t @llvm.aarch64.neon.ld3.v2i64.p0i64(i64* %A)
 	ret %struct.__neon_int64x2x3_t  %tmp2
 }
 
@@ -238,13 +238,13 @@ define %struct.__neon_int64x2x4_t @ld4_2d(i64* %A) nounwind {
 ; Make sure we are using the operands defined by the ABI
 ; CHECK ld4.2d { v0, v1, v2, v3 }, [x0]
 ; CHECK-NEXT ret
-	%tmp2 = call %struct.__neon_int64x2x4_t @llvm.arm64.neon.ld4.v2i64.p0i64(i64* %A)
+	%tmp2 = call %struct.__neon_int64x2x4_t @llvm.aarch64.neon.ld4.v2i64.p0i64(i64* %A)
 	ret %struct.__neon_int64x2x4_t  %tmp2
 }
 
-declare %struct.__neon_int64x2x2_t @llvm.arm64.neon.ld2.v2i64.p0i64(i64*) nounwind readonly
-declare %struct.__neon_int64x2x3_t @llvm.arm64.neon.ld3.v2i64.p0i64(i64*) nounwind readonly
-declare %struct.__neon_int64x2x4_t @llvm.arm64.neon.ld4.v2i64.p0i64(i64*) nounwind readonly
+declare %struct.__neon_int64x2x2_t @llvm.aarch64.neon.ld2.v2i64.p0i64(i64*) nounwind readonly
+declare %struct.__neon_int64x2x3_t @llvm.aarch64.neon.ld3.v2i64.p0i64(i64*) nounwind readonly
+declare %struct.__neon_int64x2x4_t @llvm.aarch64.neon.ld4.v2i64.p0i64(i64*) nounwind readonly
 
 %struct.__neon_int64x1x2_t = type { <1 x i64>,  <1 x i64> }
 %struct.__neon_int64x1x3_t = type { <1 x i64>,  <1 x i64>, <1 x i64> }
@@ -256,7 +256,7 @@ define %struct.__neon_int64x1x2_t @ld2_1di64(i64* %A) nounwind {
 ; Make sure we are using the operands defined by the ABI
 ; CHECK ld1.1d { v0, v1 }, [x0]
 ; CHECK-NEXT ret
-	%tmp2 = call %struct.__neon_int64x1x2_t @llvm.arm64.neon.ld2.v1i64.p0i64(i64* %A)
+	%tmp2 = call %struct.__neon_int64x1x2_t @llvm.aarch64.neon.ld2.v1i64.p0i64(i64* %A)
 	ret %struct.__neon_int64x1x2_t  %tmp2
 }
 
@@ -265,7 +265,7 @@ define %struct.__neon_int64x1x3_t @ld3_1di64(i64* %A) nounwind {
 ; Make sure we are using the operands defined by the ABI
 ; CHECK ld1.1d { v0, v1, v2 }, [x0]
 ; CHECK-NEXT ret
-	%tmp2 = call %struct.__neon_int64x1x3_t @llvm.arm64.neon.ld3.v1i64.p0i64(i64* %A)
+	%tmp2 = call %struct.__neon_int64x1x3_t @llvm.aarch64.neon.ld3.v1i64.p0i64(i64* %A)
 	ret %struct.__neon_int64x1x3_t  %tmp2
 }
 
@@ -274,14 +274,14 @@ define %struct.__neon_int64x1x4_t @ld4_1di64(i64* %A) nounwind {
 ; Make sure we are using the operands defined by the ABI
 ; CHECK ld1.1d { v0, v1, v2, v3 }, [x0]
 ; CHECK-NEXT ret
-	%tmp2 = call %struct.__neon_int64x1x4_t @llvm.arm64.neon.ld4.v1i64.p0i64(i64* %A)
+	%tmp2 = call %struct.__neon_int64x1x4_t @llvm.aarch64.neon.ld4.v1i64.p0i64(i64* %A)
 	ret %struct.__neon_int64x1x4_t  %tmp2
 }
 
 
-declare %struct.__neon_int64x1x2_t @llvm.arm64.neon.ld2.v1i64.p0i64(i64*) nounwind readonly
-declare %struct.__neon_int64x1x3_t @llvm.arm64.neon.ld3.v1i64.p0i64(i64*) nounwind readonly
-declare %struct.__neon_int64x1x4_t @llvm.arm64.neon.ld4.v1i64.p0i64(i64*) nounwind readonly
+declare %struct.__neon_int64x1x2_t @llvm.aarch64.neon.ld2.v1i64.p0i64(i64*) nounwind readonly
+declare %struct.__neon_int64x1x3_t @llvm.aarch64.neon.ld3.v1i64.p0i64(i64*) nounwind readonly
+declare %struct.__neon_int64x1x4_t @llvm.aarch64.neon.ld4.v1i64.p0i64(i64*) nounwind readonly
 
 %struct.__neon_float64x1x2_t = type { <1 x double>,  <1 x double> }
 %struct.__neon_float64x1x3_t = type { <1 x double>,  <1 x double>, <1 x double> }
@@ -293,7 +293,7 @@ define %struct.__neon_float64x1x2_t @ld2_1df64(double* %A) nounwind {
 ; Make sure we are using the operands defined by the ABI
 ; CHECK ld1.1d { v0, v1 }, [x0]
 ; CHECK-NEXT ret
-	%tmp2 = call %struct.__neon_float64x1x2_t @llvm.arm64.neon.ld2.v1f64.p0f64(double* %A)
+	%tmp2 = call %struct.__neon_float64x1x2_t @llvm.aarch64.neon.ld2.v1f64.p0f64(double* %A)
 	ret %struct.__neon_float64x1x2_t  %tmp2
 }
 
@@ -302,7 +302,7 @@ define %struct.__neon_float64x1x3_t @ld3_1df64(double* %A) nounwind {
 ; Make sure we are using the operands defined by the ABI
 ; CHECK ld1.1d { v0, v1, v2 }, [x0]
 ; CHECK-NEXT ret
-	%tmp2 = call %struct.__neon_float64x1x3_t @llvm.arm64.neon.ld3.v1f64.p0f64(double* %A)
+	%tmp2 = call %struct.__neon_float64x1x3_t @llvm.aarch64.neon.ld3.v1f64.p0f64(double* %A)
 	ret %struct.__neon_float64x1x3_t  %tmp2
 }
 
@@ -311,13 +311,13 @@ define %struct.__neon_float64x1x4_t @ld4_1df64(double* %A) nounwind {
 ; Make sure we are using the operands defined by the ABI
 ; CHECK ld1.1d { v0, v1, v2, v3 }, [x0]
 ; CHECK-NEXT ret
-	%tmp2 = call %struct.__neon_float64x1x4_t @llvm.arm64.neon.ld4.v1f64.p0f64(double* %A)
+	%tmp2 = call %struct.__neon_float64x1x4_t @llvm.aarch64.neon.ld4.v1f64.p0f64(double* %A)
 	ret %struct.__neon_float64x1x4_t  %tmp2
 }
 
-declare %struct.__neon_float64x1x2_t @llvm.arm64.neon.ld2.v1f64.p0f64(double*) nounwind readonly
-declare %struct.__neon_float64x1x3_t @llvm.arm64.neon.ld3.v1f64.p0f64(double*) nounwind readonly
-declare %struct.__neon_float64x1x4_t @llvm.arm64.neon.ld4.v1f64.p0f64(double*) nounwind readonly
+declare %struct.__neon_float64x1x2_t @llvm.aarch64.neon.ld2.v1f64.p0f64(double*) nounwind readonly
+declare %struct.__neon_float64x1x3_t @llvm.aarch64.neon.ld3.v1f64.p0f64(double*) nounwind readonly
+declare %struct.__neon_float64x1x4_t @llvm.aarch64.neon.ld4.v1f64.p0f64(double*) nounwind readonly
 
 
 define %struct.__neon_int8x16x2_t @ld2lane_16b(<16 x i8> %L1, <16 x i8> %L2, i8* %A) nounwind {
@@ -325,7 +325,7 @@ define %struct.__neon_int8x16x2_t @ld2lane_16b(<16 x i8> %L1, <16 x i8> %L2, i8*
 ; CHECK: ld2lane_16b
 ; CHECK ld2.b { v0, v1 }[1], [x0]
 ; CHECK-NEXT ret
-	%tmp2 = call %struct.__neon_int8x16x2_t @llvm.arm64.neon.ld2lane.v16i8.p0i8(<16 x i8> %L1, <16 x i8> %L2, i64 1, i8* %A)
+	%tmp2 = call %struct.__neon_int8x16x2_t @llvm.aarch64.neon.ld2lane.v16i8.p0i8(<16 x i8> %L1, <16 x i8> %L2, i64 1, i8* %A)
 	ret %struct.__neon_int8x16x2_t  %tmp2
 }
 
@@ -334,7 +334,7 @@ define %struct.__neon_int8x16x3_t @ld3lane_16b(<16 x i8> %L1, <16 x i8> %L2, <16
 ; CHECK: ld3lane_16b
 ; CHECK ld3.b { v0, v1, v2 }[1], [x0]
 ; CHECK-NEXT ret
-	%tmp2 = call %struct.__neon_int8x16x3_t @llvm.arm64.neon.ld3lane.v16i8.p0i8(<16 x i8> %L1, <16 x i8> %L2, <16 x i8> %L3, i64 1, i8* %A)
+	%tmp2 = call %struct.__neon_int8x16x3_t @llvm.aarch64.neon.ld3lane.v16i8.p0i8(<16 x i8> %L1, <16 x i8> %L2, <16 x i8> %L3, i64 1, i8* %A)
 	ret %struct.__neon_int8x16x3_t  %tmp2
 }
 
@@ -343,20 +343,20 @@ define %struct.__neon_int8x16x4_t @ld4lane_16b(<16 x i8> %L1, <16 x i8> %L2, <16
 ; CHECK: ld4lane_16b
 ; CHECK ld4.b { v0, v1, v2, v3 }[1], [x0]
 ; CHECK-NEXT ret
-	%tmp2 = call %struct.__neon_int8x16x4_t @llvm.arm64.neon.ld4lane.v16i8.p0i8(<16 x i8> %L1, <16 x i8> %L2, <16 x i8> %L3, <16 x i8> %L4, i64 1, i8* %A)
+	%tmp2 = call %struct.__neon_int8x16x4_t @llvm.aarch64.neon.ld4lane.v16i8.p0i8(<16 x i8> %L1, <16 x i8> %L2, <16 x i8> %L3, <16 x i8> %L4, i64 1, i8* %A)
 	ret %struct.__neon_int8x16x4_t  %tmp2
 }
 
-declare %struct.__neon_int8x16x2_t @llvm.arm64.neon.ld2lane.v16i8.p0i8(<16 x i8>, <16 x i8>, i64, i8*) nounwind readonly
-declare %struct.__neon_int8x16x3_t @llvm.arm64.neon.ld3lane.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, i64, i8*) nounwind readonly
-declare %struct.__neon_int8x16x4_t @llvm.arm64.neon.ld4lane.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i64, i8*) nounwind readonly
+declare %struct.__neon_int8x16x2_t @llvm.aarch64.neon.ld2lane.v16i8.p0i8(<16 x i8>, <16 x i8>, i64, i8*) nounwind readonly
+declare %struct.__neon_int8x16x3_t @llvm.aarch64.neon.ld3lane.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, i64, i8*) nounwind readonly
+declare %struct.__neon_int8x16x4_t @llvm.aarch64.neon.ld4lane.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i64, i8*) nounwind readonly
 
 define %struct.__neon_int16x8x2_t @ld2lane_8h(<8 x i16> %L1, <8 x i16> %L2, i16* %A) nounwind {
 ; Make sure we are using the operands defined by the ABI
 ; CHECK: ld2lane_8h
 ; CHECK ld2.h { v0, v1 }[1], [x0]
 ; CHECK-NEXT ret
-	%tmp2 = call %struct.__neon_int16x8x2_t @llvm.arm64.neon.ld2lane.v8i16.p0i16(<8 x i16> %L1, <8 x i16> %L2, i64 1, i16* %A)
+	%tmp2 = call %struct.__neon_int16x8x2_t @llvm.aarch64.neon.ld2lane.v8i16.p0i16(<8 x i16> %L1, <8 x i16> %L2, i64 1, i16* %A)
 	ret %struct.__neon_int16x8x2_t  %tmp2
 }
 
@@ -365,7 +365,7 @@ define %struct.__neon_int16x8x3_t @ld3lane_8h(<8 x i16> %L1, <8 x i16> %L2, <8 x
 ; CHECK: ld3lane_8h
 ; CHECK ld3.h { v0, v1, v3 }[1], [x0]
 ; CHECK-NEXT ret
-	%tmp2 = call %struct.__neon_int16x8x3_t @llvm.arm64.neon.ld3lane.v8i16.p0i16(<8 x i16> %L1, <8 x i16> %L2, <8 x i16> %L3, i64 1, i16* %A)
+	%tmp2 = call %struct.__neon_int16x8x3_t @llvm.aarch64.neon.ld3lane.v8i16.p0i16(<8 x i16> %L1, <8 x i16> %L2, <8 x i16> %L3, i64 1, i16* %A)
 	ret %struct.__neon_int16x8x3_t  %tmp2
 }
 
@@ -374,20 +374,20 @@ define %struct.__neon_int16x8x4_t @ld4lane_8h(<8 x i16> %L1, <8 x i16> %L2, <8 x
 ; CHECK: ld4lane_8h
 ; CHECK ld4.h { v0, v1, v2, v3 }[1], [x0]
 ; CHECK-NEXT ret
-	%tmp2 = call %struct.__neon_int16x8x4_t @llvm.arm64.neon.ld4lane.v8i16.p0i16(<8 x i16> %L1, <8 x i16> %L2, <8 x i16> %L3, <8 x i16> %L4, i64 1, i16* %A)
+	%tmp2 = call %struct.__neon_int16x8x4_t @llvm.aarch64.neon.ld4lane.v8i16.p0i16(<8 x i16> %L1, <8 x i16> %L2, <8 x i16> %L3, <8 x i16> %L4, i64 1, i16* %A)
 	ret %struct.__neon_int16x8x4_t  %tmp2
 }
 
-declare %struct.__neon_int16x8x2_t @llvm.arm64.neon.ld2lane.v8i16.p0i16(<8 x i16>, <8 x i16>, i64, i16*) nounwind readonly
-declare %struct.__neon_int16x8x3_t @llvm.arm64.neon.ld3lane.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, i64, i16*) nounwind readonly
-declare %struct.__neon_int16x8x4_t @llvm.arm64.neon.ld4lane.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i64, i16*) nounwind readonly
+declare %struct.__neon_int16x8x2_t @llvm.aarch64.neon.ld2lane.v8i16.p0i16(<8 x i16>, <8 x i16>, i64, i16*) nounwind readonly
+declare %struct.__neon_int16x8x3_t @llvm.aarch64.neon.ld3lane.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, i64, i16*) nounwind readonly
+declare %struct.__neon_int16x8x4_t @llvm.aarch64.neon.ld4lane.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i64, i16*) nounwind readonly
 
 define %struct.__neon_int32x4x2_t @ld2lane_4s(<4 x i32> %L1, <4 x i32> %L2, i32* %A) nounwind {
 ; Make sure we are using the operands defined by the ABI
 ; CHECK: ld2lane_4s
 ; CHECK ld2.s { v0, v1 }[1], [x0]
 ; CHECK-NEXT ret
-	%tmp2 = call %struct.__neon_int32x4x2_t @llvm.arm64.neon.ld2lane.v4i32.p0i32(<4 x i32> %L1, <4 x i32> %L2, i64 1, i32* %A)
+	%tmp2 = call %struct.__neon_int32x4x2_t @llvm.aarch64.neon.ld2lane.v4i32.p0i32(<4 x i32> %L1, <4 x i32> %L2, i64 1, i32* %A)
 	ret %struct.__neon_int32x4x2_t  %tmp2
 }
 
@@ -396,7 +396,7 @@ define %struct.__neon_int32x4x3_t @ld3lane_4s(<4 x i32> %L1, <4 x i32> %L2, <4 x
 ; CHECK: ld3lane_4s
 ; CHECK ld3.s { v0, v1, v2 }[1], [x0]
 ; CHECK-NEXT ret
-	%tmp2 = call %struct.__neon_int32x4x3_t @llvm.arm64.neon.ld3lane.v4i32.p0i32(<4 x i32> %L1, <4 x i32> %L2, <4 x i32> %L3, i64 1, i32* %A)
+	%tmp2 = call %struct.__neon_int32x4x3_t @llvm.aarch64.neon.ld3lane.v4i32.p0i32(<4 x i32> %L1, <4 x i32> %L2, <4 x i32> %L3, i64 1, i32* %A)
 	ret %struct.__neon_int32x4x3_t  %tmp2
 }
 
@@ -405,20 +405,20 @@ define %struct.__neon_int32x4x4_t @ld4lane_4s(<4 x i32> %L1, <4 x i32> %L2, <4 x
 ; CHECK: ld4lane_4s
 ; CHECK ld4.s { v0, v1, v2, v3 }[1], [x0]
 ; CHECK-NEXT ret
-	%tmp2 = call %struct.__neon_int32x4x4_t @llvm.arm64.neon.ld4lane.v4i32.p0i32(<4 x i32> %L1, <4 x i32> %L2, <4 x i32> %L3, <4 x i32> %L4, i64 1, i32* %A)
+	%tmp2 = call %struct.__neon_int32x4x4_t @llvm.aarch64.neon.ld4lane.v4i32.p0i32(<4 x i32> %L1, <4 x i32> %L2, <4 x i32> %L3, <4 x i32> %L4, i64 1, i32* %A)
 	ret %struct.__neon_int32x4x4_t  %tmp2
 }
 
-declare %struct.__neon_int32x4x2_t @llvm.arm64.neon.ld2lane.v4i32.p0i32(<4 x i32>, <4 x i32>, i64, i32*) nounwind readonly
-declare %struct.__neon_int32x4x3_t @llvm.arm64.neon.ld3lane.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, i64, i32*) nounwind readonly
-declare %struct.__neon_int32x4x4_t @llvm.arm64.neon.ld4lane.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i64, i32*) nounwind readonly
+declare %struct.__neon_int32x4x2_t @llvm.aarch64.neon.ld2lane.v4i32.p0i32(<4 x i32>, <4 x i32>, i64, i32*) nounwind readonly
+declare %struct.__neon_int32x4x3_t @llvm.aarch64.neon.ld3lane.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, i64, i32*) nounwind readonly
+declare %struct.__neon_int32x4x4_t @llvm.aarch64.neon.ld4lane.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i64, i32*) nounwind readonly
 
 define %struct.__neon_int64x2x2_t @ld2lane_2d(<2 x i64> %L1, <2 x i64> %L2, i64* %A) nounwind {
 ; Make sure we are using the operands defined by the ABI
 ; CHECK: ld2lane_2d
 ; CHECK ld2.d { v0, v1 }[1], [x0]
 ; CHECK-NEXT ret
-	%tmp2 = call %struct.__neon_int64x2x2_t @llvm.arm64.neon.ld2lane.v2i64.p0i64(<2 x i64> %L1, <2 x i64> %L2, i64 1, i64* %A)
+	%tmp2 = call %struct.__neon_int64x2x2_t @llvm.aarch64.neon.ld2lane.v2i64.p0i64(<2 x i64> %L1, <2 x i64> %L2, i64 1, i64* %A)
 	ret %struct.__neon_int64x2x2_t  %tmp2
 }
 
@@ -427,7 +427,7 @@ define %struct.__neon_int64x2x3_t @ld3lane_2d(<2 x i64> %L1, <2 x i64> %L2, <2 x
 ; CHECK: ld3lane_2d
 ; CHECK ld3.d { v0, v1, v3 }[1], [x0]
 ; CHECK-NEXT ret
-	%tmp2 = call %struct.__neon_int64x2x3_t @llvm.arm64.neon.ld3lane.v2i64.p0i64(<2 x i64> %L1, <2 x i64> %L2, <2 x i64> %L3, i64 1, i64* %A)
+	%tmp2 = call %struct.__neon_int64x2x3_t @llvm.aarch64.neon.ld3lane.v2i64.p0i64(<2 x i64> %L1, <2 x i64> %L2, <2 x i64> %L3, i64 1, i64* %A)
 	ret %struct.__neon_int64x2x3_t  %tmp2
 }
 
@@ -436,13 +436,13 @@ define %struct.__neon_int64x2x4_t @ld4lane_2d(<2 x i64> %L1, <2 x i64> %L2, <2 x
 ; CHECK: ld4lane_2d
 ; CHECK ld4.d { v0, v1, v2, v3 }[1], [x0]
 ; CHECK-NEXT ret
-	%tmp2 = call %struct.__neon_int64x2x4_t @llvm.arm64.neon.ld4lane.v2i64.p0i64(<2 x i64> %L1, <2 x i64> %L2, <2 x i64> %L3, <2 x i64> %L4, i64 1, i64* %A)
+	%tmp2 = call %struct.__neon_int64x2x4_t @llvm.aarch64.neon.ld4lane.v2i64.p0i64(<2 x i64> %L1, <2 x i64> %L2, <2 x i64> %L3, <2 x i64> %L4, i64 1, i64* %A)
 	ret %struct.__neon_int64x2x4_t  %tmp2
 }
 
-declare %struct.__neon_int64x2x2_t @llvm.arm64.neon.ld2lane.v2i64.p0i64(<2 x i64>, <2 x i64>, i64, i64*) nounwind readonly
-declare %struct.__neon_int64x2x3_t @llvm.arm64.neon.ld3lane.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, i64, i64*) nounwind readonly
-declare %struct.__neon_int64x2x4_t @llvm.arm64.neon.ld4lane.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, i64, i64*) nounwind readonly
+declare %struct.__neon_int64x2x2_t @llvm.aarch64.neon.ld2lane.v2i64.p0i64(<2 x i64>, <2 x i64>, i64, i64*) nounwind readonly
+declare %struct.__neon_int64x2x3_t @llvm.aarch64.neon.ld3lane.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, i64, i64*) nounwind readonly
+declare %struct.__neon_int64x2x4_t @llvm.aarch64.neon.ld4lane.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, i64, i64*) nounwind readonly
 
 define <8 x i8> @ld1r_8b(i8* %bar) {
 ; CHECK: ld1r_8b
@@ -556,7 +556,7 @@ define %struct.__neon_int8x8x2_t @ld2r_8b(i8* %A) nounwind {
 ; Make sure we are using the operands defined by the ABI
 ; CHECK ld2r.8b { v0, v1 }, [x0]
 ; CHECK-NEXT ret
-	%tmp2 = call %struct.__neon_int8x8x2_t @llvm.arm64.neon.ld2r.v8i8.p0i8(i8* %A)
+	%tmp2 = call %struct.__neon_int8x8x2_t @llvm.aarch64.neon.ld2r.v8i8.p0i8(i8* %A)
 	ret %struct.__neon_int8x8x2_t  %tmp2
 }
 
@@ -565,7 +565,7 @@ define %struct.__neon_int8x8x3_t @ld3r_8b(i8* %A) nounwind {
 ; Make sure we are using the operands defined by the ABI
 ; CHECK ld3r.8b { v0, v1, v2 }, [x0]
 ; CHECK-NEXT ret
-	%tmp2 = call %struct.__neon_int8x8x3_t @llvm.arm64.neon.ld3r.v8i8.p0i8(i8* %A)
+	%tmp2 = call %struct.__neon_int8x8x3_t @llvm.aarch64.neon.ld3r.v8i8.p0i8(i8* %A)
 	ret %struct.__neon_int8x8x3_t  %tmp2
 }
 
@@ -574,20 +574,20 @@ define %struct.__neon_int8x8x4_t @ld4r_8b(i8* %A) nounwind {
 ; Make sure we are using the operands defined by the ABI
 ; CHECK ld4r.8b { v0, v1, v2, v3 }, [x0]
 ; CHECK-NEXT ret
-	%tmp2 = call %struct.__neon_int8x8x4_t @llvm.arm64.neon.ld4r.v8i8.p0i8(i8* %A)
+	%tmp2 = call %struct.__neon_int8x8x4_t @llvm.aarch64.neon.ld4r.v8i8.p0i8(i8* %A)
 	ret %struct.__neon_int8x8x4_t  %tmp2
 }
 
-declare %struct.__neon_int8x8x2_t @llvm.arm64.neon.ld2r.v8i8.p0i8(i8*) nounwind readonly
-declare %struct.__neon_int8x8x3_t @llvm.arm64.neon.ld3r.v8i8.p0i8(i8*) nounwind readonly
-declare %struct.__neon_int8x8x4_t @llvm.arm64.neon.ld4r.v8i8.p0i8(i8*) nounwind readonly
+declare %struct.__neon_int8x8x2_t @llvm.aarch64.neon.ld2r.v8i8.p0i8(i8*) nounwind readonly
+declare %struct.__neon_int8x8x3_t @llvm.aarch64.neon.ld3r.v8i8.p0i8(i8*) nounwind readonly
+declare %struct.__neon_int8x8x4_t @llvm.aarch64.neon.ld4r.v8i8.p0i8(i8*) nounwind readonly
 
 define %struct.__neon_int8x16x2_t @ld2r_16b(i8* %A) nounwind {
 ; CHECK: ld2r_16b
 ; Make sure we are using the operands defined by the ABI
 ; CHECK ld2r.16b { v0, v1 }, [x0]
 ; CHECK-NEXT ret
-	%tmp2 = call %struct.__neon_int8x16x2_t @llvm.arm64.neon.ld2r.v16i8.p0i8(i8* %A)
+	%tmp2 = call %struct.__neon_int8x16x2_t @llvm.aarch64.neon.ld2r.v16i8.p0i8(i8* %A)
 	ret %struct.__neon_int8x16x2_t  %tmp2
 }
 
@@ -596,7 +596,7 @@ define %struct.__neon_int8x16x3_t @ld3r_16b(i8* %A) nounwind {
 ; Make sure we are using the operands defined by the ABI
 ; CHECK ld3r.16b { v0, v1, v2 }, [x0]
 ; CHECK-NEXT ret
-	%tmp2 = call %struct.__neon_int8x16x3_t @llvm.arm64.neon.ld3r.v16i8.p0i8(i8* %A)
+	%tmp2 = call %struct.__neon_int8x16x3_t @llvm.aarch64.neon.ld3r.v16i8.p0i8(i8* %A)
 	ret %struct.__neon_int8x16x3_t  %tmp2
 }
 
@@ -605,20 +605,20 @@ define %struct.__neon_int8x16x4_t @ld4r_16b(i8* %A) nounwind {
 ; Make sure we are using the operands defined by the ABI
 ; CHECK ld4r.16b { v0, v1, v2, v3 }, [x0]
 ; CHECK-NEXT ret
-	%tmp2 = call %struct.__neon_int8x16x4_t @llvm.arm64.neon.ld4r.v16i8.p0i8(i8* %A)
+	%tmp2 = call %struct.__neon_int8x16x4_t @llvm.aarch64.neon.ld4r.v16i8.p0i8(i8* %A)
 	ret %struct.__neon_int8x16x4_t  %tmp2
 }
 
-declare %struct.__neon_int8x16x2_t @llvm.arm64.neon.ld2r.v16i8.p0i8(i8*) nounwind readonly
-declare %struct.__neon_int8x16x3_t @llvm.arm64.neon.ld3r.v16i8.p0i8(i8*) nounwind readonly
-declare %struct.__neon_int8x16x4_t @llvm.arm64.neon.ld4r.v16i8.p0i8(i8*) nounwind readonly
+declare %struct.__neon_int8x16x2_t @llvm.aarch64.neon.ld2r.v16i8.p0i8(i8*) nounwind readonly
+declare %struct.__neon_int8x16x3_t @llvm.aarch64.neon.ld3r.v16i8.p0i8(i8*) nounwind readonly
+declare %struct.__neon_int8x16x4_t @llvm.aarch64.neon.ld4r.v16i8.p0i8(i8*) nounwind readonly
 
 define %struct.__neon_int16x4x2_t @ld2r_4h(i16* %A) nounwind {
 ; CHECK: ld2r_4h
 ; Make sure we are using the operands defined by the ABI
 ; CHECK ld2r.4h { v0, v1 }, [x0]
 ; CHECK-NEXT ret
-	%tmp2 = call %struct.__neon_int16x4x2_t @llvm.arm64.neon.ld2r.v4i16.p0i16(i16* %A)
+	%tmp2 = call %struct.__neon_int16x4x2_t @llvm.aarch64.neon.ld2r.v4i16.p0i16(i16* %A)
 	ret %struct.__neon_int16x4x2_t  %tmp2
 }
 
@@ -627,7 +627,7 @@ define %struct.__neon_int16x4x3_t @ld3r_4h(i16* %A) nounwind {
 ; Make sure we are using the operands defined by the ABI
 ; CHECK ld3r.4h { v0, v1, v2 }, [x0]
 ; CHECK-NEXT ret
-	%tmp2 = call %struct.__neon_int16x4x3_t @llvm.arm64.neon.ld3r.v4i16.p0i16(i16* %A)
+	%tmp2 = call %struct.__neon_int16x4x3_t @llvm.aarch64.neon.ld3r.v4i16.p0i16(i16* %A)
 	ret %struct.__neon_int16x4x3_t  %tmp2
 }
 
@@ -636,20 +636,20 @@ define %struct.__neon_int16x4x4_t @ld4r_4h(i16* %A) nounwind {
 ; Make sure we are using the operands defined by the ABI
 ; CHECK ld4r.4h { v0, v1, v2, v3 }, [x0]
 ; CHECK-NEXT ret
-	%tmp2 = call %struct.__neon_int16x4x4_t @llvm.arm64.neon.ld4r.v4i16.p0i16(i16* %A)
+	%tmp2 = call %struct.__neon_int16x4x4_t @llvm.aarch64.neon.ld4r.v4i16.p0i16(i16* %A)
 	ret %struct.__neon_int16x4x4_t  %tmp2
 }
 
-declare %struct.__neon_int16x4x2_t @llvm.arm64.neon.ld2r.v4i16.p0i16(i16*) nounwind readonly
-declare %struct.__neon_int16x4x3_t @llvm.arm64.neon.ld3r.v4i16.p0i16(i16*) nounwind readonly
-declare %struct.__neon_int16x4x4_t @llvm.arm64.neon.ld4r.v4i16.p0i16(i16*) nounwind readonly
+declare %struct.__neon_int16x4x2_t @llvm.aarch64.neon.ld2r.v4i16.p0i16(i16*) nounwind readonly
+declare %struct.__neon_int16x4x3_t @llvm.aarch64.neon.ld3r.v4i16.p0i16(i16*) nounwind readonly
+declare %struct.__neon_int16x4x4_t @llvm.aarch64.neon.ld4r.v4i16.p0i16(i16*) nounwind readonly
 
 define %struct.__neon_int16x8x2_t @ld2r_8h(i16* %A) nounwind {
 ; CHECK: ld2r_8h
 ; Make sure we are using the operands defined by the ABI
 ; CHECK ld2r.8h { v0, v1 }, [x0]
 ; CHECK-NEXT ret
-  %tmp2 = call %struct.__neon_int16x8x2_t @llvm.arm64.neon.ld2r.v8i16.p0i16(i16* %A)
+  %tmp2 = call %struct.__neon_int16x8x2_t @llvm.aarch64.neon.ld2r.v8i16.p0i16(i16* %A)
   ret %struct.__neon_int16x8x2_t  %tmp2
 }
 
@@ -658,7 +658,7 @@ define %struct.__neon_int16x8x3_t @ld3r_8h(i16* %A) nounwind {
 ; Make sure we are using the operands defined by the ABI
 ; CHECK ld3r.8h { v0, v1, v2 }, [x0]
 ; CHECK-NEXT ret
-  %tmp2 = call %struct.__neon_int16x8x3_t @llvm.arm64.neon.ld3r.v8i16.p0i16(i16* %A)
+  %tmp2 = call %struct.__neon_int16x8x3_t @llvm.aarch64.neon.ld3r.v8i16.p0i16(i16* %A)
   ret %struct.__neon_int16x8x3_t  %tmp2
 }
 
@@ -667,20 +667,20 @@ define %struct.__neon_int16x8x4_t @ld4r_8h(i16* %A) nounwind {
 ; Make sure we are using the operands defined by the ABI
 ; CHECK ld4r.8h { v0, v1, v2, v3 }, [x0]
 ; CHECK-NEXT ret
-  %tmp2 = call %struct.__neon_int16x8x4_t @llvm.arm64.neon.ld4r.v8i16.p0i16(i16* %A)
+  %tmp2 = call %struct.__neon_int16x8x4_t @llvm.aarch64.neon.ld4r.v8i16.p0i16(i16* %A)
   ret %struct.__neon_int16x8x4_t  %tmp2
 }
 
-declare %struct.__neon_int16x8x2_t @llvm.arm64.neon.ld2r.v8i16.p0i16(i16*) nounwind readonly
-declare %struct.__neon_int16x8x3_t @llvm.arm64.neon.ld3r.v8i16.p0i16(i16*) nounwind readonly
-declare %struct.__neon_int16x8x4_t @llvm.arm64.neon.ld4r.v8i16.p0i16(i16*) nounwind readonly
+declare %struct.__neon_int16x8x2_t @llvm.aarch64.neon.ld2r.v8i16.p0i16(i16*) nounwind readonly
+declare %struct.__neon_int16x8x3_t @llvm.aarch64.neon.ld3r.v8i16.p0i16(i16*) nounwind readonly
+declare %struct.__neon_int16x8x4_t @llvm.aarch64.neon.ld4r.v8i16.p0i16(i16*) nounwind readonly
 
 define %struct.__neon_int32x2x2_t @ld2r_2s(i32* %A) nounwind {
 ; CHECK: ld2r_2s
 ; Make sure we are using the operands defined by the ABI
 ; CHECK ld2r.2s { v0, v1 }, [x0]
 ; CHECK-NEXT ret
-	%tmp2 = call %struct.__neon_int32x2x2_t @llvm.arm64.neon.ld2r.v2i32.p0i32(i32* %A)
+	%tmp2 = call %struct.__neon_int32x2x2_t @llvm.aarch64.neon.ld2r.v2i32.p0i32(i32* %A)
 	ret %struct.__neon_int32x2x2_t  %tmp2
 }
 
@@ -689,7 +689,7 @@ define %struct.__neon_int32x2x3_t @ld3r_2s(i32* %A) nounwind {
 ; Make sure we are using the operands defined by the ABI
 ; CHECK ld3r.2s { v0, v1, v2 }, [x0]
 ; CHECK-NEXT ret
-	%tmp2 = call %struct.__neon_int32x2x3_t @llvm.arm64.neon.ld3r.v2i32.p0i32(i32* %A)
+	%tmp2 = call %struct.__neon_int32x2x3_t @llvm.aarch64.neon.ld3r.v2i32.p0i32(i32* %A)
 	ret %struct.__neon_int32x2x3_t  %tmp2
 }
 
@@ -698,20 +698,20 @@ define %struct.__neon_int32x2x4_t @ld4r_2s(i32* %A) nounwind {
 ; Make sure we are using the operands defined by the ABI
 ; CHECK ld4r.2s { v0, v1, v2, v3 }, [x0]
 ; CHECK-NEXT ret
-	%tmp2 = call %struct.__neon_int32x2x4_t @llvm.arm64.neon.ld4r.v2i32.p0i32(i32* %A)
+	%tmp2 = call %struct.__neon_int32x2x4_t @llvm.aarch64.neon.ld4r.v2i32.p0i32(i32* %A)
 	ret %struct.__neon_int32x2x4_t  %tmp2
 }
 
-declare %struct.__neon_int32x2x2_t @llvm.arm64.neon.ld2r.v2i32.p0i32(i32*) nounwind readonly
-declare %struct.__neon_int32x2x3_t @llvm.arm64.neon.ld3r.v2i32.p0i32(i32*) nounwind readonly
-declare %struct.__neon_int32x2x4_t @llvm.arm64.neon.ld4r.v2i32.p0i32(i32*) nounwind readonly
+declare %struct.__neon_int32x2x2_t @llvm.aarch64.neon.ld2r.v2i32.p0i32(i32*) nounwind readonly
+declare %struct.__neon_int32x2x3_t @llvm.aarch64.neon.ld3r.v2i32.p0i32(i32*) nounwind readonly
+declare %struct.__neon_int32x2x4_t @llvm.aarch64.neon.ld4r.v2i32.p0i32(i32*) nounwind readonly
 
 define %struct.__neon_int32x4x2_t @ld2r_4s(i32* %A) nounwind {
 ; CHECK: ld2r_4s
 ; Make sure we are using the operands defined by the ABI
 ; CHECK ld2r.4s { v0, v1 }, [x0]
 ; CHECK-NEXT ret
-	%tmp2 = call %struct.__neon_int32x4x2_t @llvm.arm64.neon.ld2r.v4i32.p0i32(i32* %A)
+	%tmp2 = call %struct.__neon_int32x4x2_t @llvm.aarch64.neon.ld2r.v4i32.p0i32(i32* %A)
 	ret %struct.__neon_int32x4x2_t  %tmp2
 }
 
@@ -720,7 +720,7 @@ define %struct.__neon_int32x4x3_t @ld3r_4s(i32* %A) nounwind {
 ; Make sure we are using the operands defined by the ABI
 ; CHECK ld3r.4s { v0, v1, v2 }, [x0]
 ; CHECK-NEXT ret
-	%tmp2 = call %struct.__neon_int32x4x3_t @llvm.arm64.neon.ld3r.v4i32.p0i32(i32* %A)
+	%tmp2 = call %struct.__neon_int32x4x3_t @llvm.aarch64.neon.ld3r.v4i32.p0i32(i32* %A)
 	ret %struct.__neon_int32x4x3_t  %tmp2
 }
 
@@ -729,20 +729,20 @@ define %struct.__neon_int32x4x4_t @ld4r_4s(i32* %A) nounwind {
 ; Make sure we are using the operands defined by the ABI
 ; CHECK ld4r.4s { v0, v1, v2, v3 }, [x0]
 ; CHECK-NEXT ret
-	%tmp2 = call %struct.__neon_int32x4x4_t @llvm.arm64.neon.ld4r.v4i32.p0i32(i32* %A)
+	%tmp2 = call %struct.__neon_int32x4x4_t @llvm.aarch64.neon.ld4r.v4i32.p0i32(i32* %A)
 	ret %struct.__neon_int32x4x4_t  %tmp2
 }
 
-declare %struct.__neon_int32x4x2_t @llvm.arm64.neon.ld2r.v4i32.p0i32(i32*) nounwind readonly
-declare %struct.__neon_int32x4x3_t @llvm.arm64.neon.ld3r.v4i32.p0i32(i32*) nounwind readonly
-declare %struct.__neon_int32x4x4_t @llvm.arm64.neon.ld4r.v4i32.p0i32(i32*) nounwind readonly
+declare %struct.__neon_int32x4x2_t @llvm.aarch64.neon.ld2r.v4i32.p0i32(i32*) nounwind readonly
+declare %struct.__neon_int32x4x3_t @llvm.aarch64.neon.ld3r.v4i32.p0i32(i32*) nounwind readonly
+declare %struct.__neon_int32x4x4_t @llvm.aarch64.neon.ld4r.v4i32.p0i32(i32*) nounwind readonly
 
 define %struct.__neon_int64x1x2_t @ld2r_1d(i64* %A) nounwind {
 ; CHECK: ld2r_1d
 ; Make sure we are using the operands defined by the ABI
 ; CHECK ld2r.1d { v0, v1 }, [x0]
 ; CHECK-NEXT ret
-	%tmp2 = call %struct.__neon_int64x1x2_t @llvm.arm64.neon.ld2r.v1i64.p0i64(i64* %A)
+	%tmp2 = call %struct.__neon_int64x1x2_t @llvm.aarch64.neon.ld2r.v1i64.p0i64(i64* %A)
 	ret %struct.__neon_int64x1x2_t  %tmp2
 }
 
@@ -751,7 +751,7 @@ define %struct.__neon_int64x1x3_t @ld3r_1d(i64* %A) nounwind {
 ; Make sure we are using the operands defined by the ABI
 ; CHECK ld3r.1d { v0, v1, v2 }, [x0]
 ; CHECK-NEXT ret
-	%tmp2 = call %struct.__neon_int64x1x3_t @llvm.arm64.neon.ld3r.v1i64.p0i64(i64* %A)
+	%tmp2 = call %struct.__neon_int64x1x3_t @llvm.aarch64.neon.ld3r.v1i64.p0i64(i64* %A)
 	ret %struct.__neon_int64x1x3_t  %tmp2
 }
 
@@ -760,20 +760,20 @@ define %struct.__neon_int64x1x4_t @ld4r_1d(i64* %A) nounwind {
 ; Make sure we are using the operands defined by the ABI
 ; CHECK ld4r.1d { v0, v1, v2, v3 }, [x0]
 ; CHECK-NEXT ret
-	%tmp2 = call %struct.__neon_int64x1x4_t @llvm.arm64.neon.ld4r.v1i64.p0i64(i64* %A)
+	%tmp2 = call %struct.__neon_int64x1x4_t @llvm.aarch64.neon.ld4r.v1i64.p0i64(i64* %A)
 	ret %struct.__neon_int64x1x4_t  %tmp2
 }
 
-declare %struct.__neon_int64x1x2_t @llvm.arm64.neon.ld2r.v1i64.p0i64(i64*) nounwind readonly
-declare %struct.__neon_int64x1x3_t @llvm.arm64.neon.ld3r.v1i64.p0i64(i64*) nounwind readonly
-declare %struct.__neon_int64x1x4_t @llvm.arm64.neon.ld4r.v1i64.p0i64(i64*) nounwind readonly
+declare %struct.__neon_int64x1x2_t @llvm.aarch64.neon.ld2r.v1i64.p0i64(i64*) nounwind readonly
+declare %struct.__neon_int64x1x3_t @llvm.aarch64.neon.ld3r.v1i64.p0i64(i64*) nounwind readonly
+declare %struct.__neon_int64x1x4_t @llvm.aarch64.neon.ld4r.v1i64.p0i64(i64*) nounwind readonly
 
 define %struct.__neon_int64x2x2_t @ld2r_2d(i64* %A) nounwind {
 ; CHECK: ld2r_2d
 ; Make sure we are using the operands defined by the ABI
 ; CHECK ld2r.2d { v0, v1 }, [x0]
 ; CHECK-NEXT ret
-	%tmp2 = call %struct.__neon_int64x2x2_t @llvm.arm64.neon.ld2r.v2i64.p0i64(i64* %A)
+	%tmp2 = call %struct.__neon_int64x2x2_t @llvm.aarch64.neon.ld2r.v2i64.p0i64(i64* %A)
 	ret %struct.__neon_int64x2x2_t  %tmp2
 }
 
@@ -782,7 +782,7 @@ define %struct.__neon_int64x2x3_t @ld3r_2d(i64* %A) nounwind {
 ; Make sure we are using the operands defined by the ABI
 ; CHECK ld3r.2d { v0, v1, v2 }, [x0]
 ; CHECK-NEXT ret
-	%tmp2 = call %struct.__neon_int64x2x3_t @llvm.arm64.neon.ld3r.v2i64.p0i64(i64* %A)
+	%tmp2 = call %struct.__neon_int64x2x3_t @llvm.aarch64.neon.ld3r.v2i64.p0i64(i64* %A)
 	ret %struct.__neon_int64x2x3_t  %tmp2
 }
 
@@ -791,13 +791,13 @@ define %struct.__neon_int64x2x4_t @ld4r_2d(i64* %A) nounwind {
 ; Make sure we are using the operands defined by the ABI
 ; CHECK ld4r.2d { v0, v1, v2, v3 }, [x0]
 ; CHECK-NEXT ret
-	%tmp2 = call %struct.__neon_int64x2x4_t @llvm.arm64.neon.ld4r.v2i64.p0i64(i64* %A)
+	%tmp2 = call %struct.__neon_int64x2x4_t @llvm.aarch64.neon.ld4r.v2i64.p0i64(i64* %A)
 	ret %struct.__neon_int64x2x4_t  %tmp2
 }
 
-declare %struct.__neon_int64x2x2_t @llvm.arm64.neon.ld2r.v2i64.p0i64(i64*) nounwind readonly
-declare %struct.__neon_int64x2x3_t @llvm.arm64.neon.ld3r.v2i64.p0i64(i64*) nounwind readonly
-declare %struct.__neon_int64x2x4_t @llvm.arm64.neon.ld4r.v2i64.p0i64(i64*) nounwind readonly
+declare %struct.__neon_int64x2x2_t @llvm.aarch64.neon.ld2r.v2i64.p0i64(i64*) nounwind readonly
+declare %struct.__neon_int64x2x3_t @llvm.aarch64.neon.ld3r.v2i64.p0i64(i64*) nounwind readonly
+declare %struct.__neon_int64x2x4_t @llvm.aarch64.neon.ld4r.v2i64.p0i64(i64*) nounwind readonly
 
 define <16 x i8> @ld1_16b(<16 x i8> %V, i8* %bar) {
 ; CHECK-LABEL: ld1_16b
@@ -1041,52 +1041,52 @@ entry:
 %struct.__neon_float32x2x3_t = type { <2 x float>,  <2 x float>,  <2 x float> }
 %struct.__neon_float32x2x4_t = type { <2 x float>,  <2 x float>, <2 x float>,  <2 x float> }
 
-declare %struct.__neon_int8x8x2_t @llvm.arm64.neon.ld1x2.v8i8.p0i8(i8*) nounwind readonly
-declare %struct.__neon_int16x4x2_t @llvm.arm64.neon.ld1x2.v4i16.p0i16(i16*) nounwind readonly
-declare %struct.__neon_int32x2x2_t @llvm.arm64.neon.ld1x2.v2i32.p0i32(i32*) nounwind readonly
-declare %struct.__neon_float32x2x2_t @llvm.arm64.neon.ld1x2.v2f32.p0f32(float*) nounwind readonly
-declare %struct.__neon_int64x1x2_t @llvm.arm64.neon.ld1x2.v1i64.p0i64(i64*) nounwind readonly
-declare %struct.__neon_float64x1x2_t @llvm.arm64.neon.ld1x2.v1f64.p0f64(double*) nounwind readonly
+declare %struct.__neon_int8x8x2_t @llvm.aarch64.neon.ld1x2.v8i8.p0i8(i8*) nounwind readonly
+declare %struct.__neon_int16x4x2_t @llvm.aarch64.neon.ld1x2.v4i16.p0i16(i16*) nounwind readonly
+declare %struct.__neon_int32x2x2_t @llvm.aarch64.neon.ld1x2.v2i32.p0i32(i32*) nounwind readonly
+declare %struct.__neon_float32x2x2_t @llvm.aarch64.neon.ld1x2.v2f32.p0f32(float*) nounwind readonly
+declare %struct.__neon_int64x1x2_t @llvm.aarch64.neon.ld1x2.v1i64.p0i64(i64*) nounwind readonly
+declare %struct.__neon_float64x1x2_t @llvm.aarch64.neon.ld1x2.v1f64.p0f64(double*) nounwind readonly
 
 define %struct.__neon_int8x8x2_t @ld1_x2_v8i8(i8* %addr) {
 ; CHECK-LABEL: ld1_x2_v8i8:
 ; CHECK: ld1.8b { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
-  %val = call %struct.__neon_int8x8x2_t @llvm.arm64.neon.ld1x2.v8i8.p0i8(i8* %addr)
+  %val = call %struct.__neon_int8x8x2_t @llvm.aarch64.neon.ld1x2.v8i8.p0i8(i8* %addr)
   ret %struct.__neon_int8x8x2_t %val
 }
 
 define %struct.__neon_int16x4x2_t @ld1_x2_v4i16(i16* %addr) {
 ; CHECK-LABEL: ld1_x2_v4i16:
 ; CHECK: ld1.4h { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
-  %val = call %struct.__neon_int16x4x2_t @llvm.arm64.neon.ld1x2.v4i16.p0i16(i16* %addr)
+  %val = call %struct.__neon_int16x4x2_t @llvm.aarch64.neon.ld1x2.v4i16.p0i16(i16* %addr)
   ret %struct.__neon_int16x4x2_t %val
 }
 
 define %struct.__neon_int32x2x2_t @ld1_x2_v2i32(i32* %addr) {
 ; CHECK-LABEL: ld1_x2_v2i32:
 ; CHECK: ld1.2s { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
-  %val = call %struct.__neon_int32x2x2_t @llvm.arm64.neon.ld1x2.v2i32.p0i32(i32* %addr)
+  %val = call %struct.__neon_int32x2x2_t @llvm.aarch64.neon.ld1x2.v2i32.p0i32(i32* %addr)
   ret %struct.__neon_int32x2x2_t %val
 }
 
 define %struct.__neon_float32x2x2_t @ld1_x2_v2f32(float* %addr) {
 ; CHECK-LABEL: ld1_x2_v2f32:
 ; CHECK: ld1.2s { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
-  %val = call %struct.__neon_float32x2x2_t @llvm.arm64.neon.ld1x2.v2f32.p0f32(float* %addr)
+  %val = call %struct.__neon_float32x2x2_t @llvm.aarch64.neon.ld1x2.v2f32.p0f32(float* %addr)
   ret %struct.__neon_float32x2x2_t %val
 }
 
 define %struct.__neon_int64x1x2_t @ld1_x2_v1i64(i64* %addr) {
 ; CHECK-LABEL: ld1_x2_v1i64:
 ; CHECK: ld1.1d { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
-  %val = call %struct.__neon_int64x1x2_t @llvm.arm64.neon.ld1x2.v1i64.p0i64(i64* %addr)
+  %val = call %struct.__neon_int64x1x2_t @llvm.aarch64.neon.ld1x2.v1i64.p0i64(i64* %addr)
   ret %struct.__neon_int64x1x2_t %val
 }
 
 define %struct.__neon_float64x1x2_t @ld1_x2_v1f64(double* %addr) {
 ; CHECK-LABEL: ld1_x2_v1f64:
 ; CHECK: ld1.1d { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
-  %val = call %struct.__neon_float64x1x2_t @llvm.arm64.neon.ld1x2.v1f64.p0f64(double* %addr)
+  %val = call %struct.__neon_float64x1x2_t @llvm.aarch64.neon.ld1x2.v1f64.p0f64(double* %addr)
   ret %struct.__neon_float64x1x2_t %val
 }
 
@@ -1099,247 +1099,247 @@ define %struct.__neon_float64x1x2_t @ld1_x2_v1f64(double* %addr) {
 %struct.__neon_float64x2x3_t = type { <2 x double>,  <2 x double>,  <2 x double> }
 %struct.__neon_float64x2x4_t = type { <2 x double>,  <2 x double>, <2 x double>,  <2 x double> }
 
-declare %struct.__neon_int8x16x2_t @llvm.arm64.neon.ld1x2.v16i8.p0i8(i8*) nounwind readonly
-declare %struct.__neon_int16x8x2_t @llvm.arm64.neon.ld1x2.v8i16.p0i16(i16*) nounwind readonly
-declare %struct.__neon_int32x4x2_t @llvm.arm64.neon.ld1x2.v4i32.p0i32(i32*) nounwind readonly
-declare %struct.__neon_float32x4x2_t @llvm.arm64.neon.ld1x2.v4f32.p0f32(float*) nounwind readonly
-declare %struct.__neon_int64x2x2_t @llvm.arm64.neon.ld1x2.v2i64.p0i64(i64*) nounwind readonly
-declare %struct.__neon_float64x2x2_t @llvm.arm64.neon.ld1x2.v2f64.p0f64(double*) nounwind readonly
+declare %struct.__neon_int8x16x2_t @llvm.aarch64.neon.ld1x2.v16i8.p0i8(i8*) nounwind readonly
+declare %struct.__neon_int16x8x2_t @llvm.aarch64.neon.ld1x2.v8i16.p0i16(i16*) nounwind readonly
+declare %struct.__neon_int32x4x2_t @llvm.aarch64.neon.ld1x2.v4i32.p0i32(i32*) nounwind readonly
+declare %struct.__neon_float32x4x2_t @llvm.aarch64.neon.ld1x2.v4f32.p0f32(float*) nounwind readonly
+declare %struct.__neon_int64x2x2_t @llvm.aarch64.neon.ld1x2.v2i64.p0i64(i64*) nounwind readonly
+declare %struct.__neon_float64x2x2_t @llvm.aarch64.neon.ld1x2.v2f64.p0f64(double*) nounwind readonly
 
 define %struct.__neon_int8x16x2_t @ld1_x2_v16i8(i8* %addr) {
 ; CHECK-LABEL: ld1_x2_v16i8:
 ; CHECK: ld1.16b { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
-  %val = call %struct.__neon_int8x16x2_t @llvm.arm64.neon.ld1x2.v16i8.p0i8(i8* %addr)
+  %val = call %struct.__neon_int8x16x2_t @llvm.aarch64.neon.ld1x2.v16i8.p0i8(i8* %addr)
   ret %struct.__neon_int8x16x2_t %val
 }
 
 define %struct.__neon_int16x8x2_t @ld1_x2_v8i16(i16* %addr) {
 ; CHECK-LABEL: ld1_x2_v8i16:
 ; CHECK: ld1.8h { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
-  %val = call %struct.__neon_int16x8x2_t @llvm.arm64.neon.ld1x2.v8i16.p0i16(i16* %addr)
+  %val = call %struct.__neon_int16x8x2_t @llvm.aarch64.neon.ld1x2.v8i16.p0i16(i16* %addr)
   ret %struct.__neon_int16x8x2_t %val
 }
 
 define %struct.__neon_int32x4x2_t @ld1_x2_v4i32(i32* %addr) {
 ; CHECK-LABEL: ld1_x2_v4i32:
 ; CHECK: ld1.4s { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
-  %val = call %struct.__neon_int32x4x2_t @llvm.arm64.neon.ld1x2.v4i32.p0i32(i32* %addr)
+  %val = call %struct.__neon_int32x4x2_t @llvm.aarch64.neon.ld1x2.v4i32.p0i32(i32* %addr)
   ret %struct.__neon_int32x4x2_t %val
 }
 
 define %struct.__neon_float32x4x2_t @ld1_x2_v4f32(float* %addr) {
 ; CHECK-LABEL: ld1_x2_v4f32:
 ; CHECK: ld1.4s { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
-  %val = call %struct.__neon_float32x4x2_t @llvm.arm64.neon.ld1x2.v4f32.p0f32(float* %addr)
+  %val = call %struct.__neon_float32x4x2_t @llvm.aarch64.neon.ld1x2.v4f32.p0f32(float* %addr)
   ret %struct.__neon_float32x4x2_t %val
 }
 
 define %struct.__neon_int64x2x2_t @ld1_x2_v2i64(i64* %addr) {
 ; CHECK-LABEL: ld1_x2_v2i64:
 ; CHECK: ld1.2d { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
-  %val = call %struct.__neon_int64x2x2_t @llvm.arm64.neon.ld1x2.v2i64.p0i64(i64* %addr)
+  %val = call %struct.__neon_int64x2x2_t @llvm.aarch64.neon.ld1x2.v2i64.p0i64(i64* %addr)
   ret %struct.__neon_int64x2x2_t %val
 }
 
 define %struct.__neon_float64x2x2_t @ld1_x2_v2f64(double* %addr) {
 ; CHECK-LABEL: ld1_x2_v2f64:
 ; CHECK: ld1.2d { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
-  %val = call %struct.__neon_float64x2x2_t @llvm.arm64.neon.ld1x2.v2f64.p0f64(double* %addr)
+  %val = call %struct.__neon_float64x2x2_t @llvm.aarch64.neon.ld1x2.v2f64.p0f64(double* %addr)
   ret %struct.__neon_float64x2x2_t %val
 }
 
-declare %struct.__neon_int8x8x3_t @llvm.arm64.neon.ld1x3.v8i8.p0i8(i8*) nounwind readonly
-declare %struct.__neon_int16x4x3_t @llvm.arm64.neon.ld1x3.v4i16.p0i16(i16*) nounwind readonly
-declare %struct.__neon_int32x2x3_t @llvm.arm64.neon.ld1x3.v2i32.p0i32(i32*) nounwind readonly
-declare %struct.__neon_float32x2x3_t @llvm.arm64.neon.ld1x3.v2f32.p0f32(float*) nounwind readonly
-declare %struct.__neon_int64x1x3_t @llvm.arm64.neon.ld1x3.v1i64.p0i64(i64*) nounwind readonly
-declare %struct.__neon_float64x1x3_t @llvm.arm64.neon.ld1x3.v1f64.p0f64(double*) nounwind readonly
+declare %struct.__neon_int8x8x3_t @llvm.aarch64.neon.ld1x3.v8i8.p0i8(i8*) nounwind readonly
+declare %struct.__neon_int16x4x3_t @llvm.aarch64.neon.ld1x3.v4i16.p0i16(i16*) nounwind readonly
+declare %struct.__neon_int32x2x3_t @llvm.aarch64.neon.ld1x3.v2i32.p0i32(i32*) nounwind readonly
+declare %struct.__neon_float32x2x3_t @llvm.aarch64.neon.ld1x3.v2f32.p0f32(float*) nounwind readonly
+declare %struct.__neon_int64x1x3_t @llvm.aarch64.neon.ld1x3.v1i64.p0i64(i64*) nounwind readonly
+declare %struct.__neon_float64x1x3_t @llvm.aarch64.neon.ld1x3.v1f64.p0f64(double*) nounwind readonly
 
 define %struct.__neon_int8x8x3_t @ld1_x3_v8i8(i8* %addr) {
 ; CHECK-LABEL: ld1_x3_v8i8:
 ; CHECK: ld1.8b { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
-  %val = call %struct.__neon_int8x8x3_t @llvm.arm64.neon.ld1x3.v8i8.p0i8(i8* %addr)
+  %val = call %struct.__neon_int8x8x3_t @llvm.aarch64.neon.ld1x3.v8i8.p0i8(i8* %addr)
   ret %struct.__neon_int8x8x3_t %val
 }
 
 define %struct.__neon_int16x4x3_t @ld1_x3_v4i16(i16* %addr) {
 ; CHECK-LABEL: ld1_x3_v4i16:
 ; CHECK: ld1.4h { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
-  %val = call %struct.__neon_int16x4x3_t @llvm.arm64.neon.ld1x3.v4i16.p0i16(i16* %addr)
+  %val = call %struct.__neon_int16x4x3_t @llvm.aarch64.neon.ld1x3.v4i16.p0i16(i16* %addr)
   ret %struct.__neon_int16x4x3_t %val
 }
 
 define %struct.__neon_int32x2x3_t @ld1_x3_v2i32(i32* %addr) {
 ; CHECK-LABEL: ld1_x3_v2i32:
 ; CHECK: ld1.2s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
-  %val = call %struct.__neon_int32x2x3_t @llvm.arm64.neon.ld1x3.v2i32.p0i32(i32* %addr)
+  %val = call %struct.__neon_int32x2x3_t @llvm.aarch64.neon.ld1x3.v2i32.p0i32(i32* %addr)
   ret %struct.__neon_int32x2x3_t %val
 }
 
 define %struct.__neon_float32x2x3_t @ld1_x3_v2f32(float* %addr) {
 ; CHECK-LABEL: ld1_x3_v2f32:
 ; CHECK: ld1.2s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
-  %val = call %struct.__neon_float32x2x3_t @llvm.arm64.neon.ld1x3.v2f32.p0f32(float* %addr)
+  %val = call %struct.__neon_float32x2x3_t @llvm.aarch64.neon.ld1x3.v2f32.p0f32(float* %addr)
   ret %struct.__neon_float32x2x3_t %val
 }
 
 define %struct.__neon_int64x1x3_t @ld1_x3_v1i64(i64* %addr) {
 ; CHECK-LABEL: ld1_x3_v1i64:
 ; CHECK: ld1.1d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
-  %val = call %struct.__neon_int64x1x3_t @llvm.arm64.neon.ld1x3.v1i64.p0i64(i64* %addr)
+  %val = call %struct.__neon_int64x1x3_t @llvm.aarch64.neon.ld1x3.v1i64.p0i64(i64* %addr)
   ret %struct.__neon_int64x1x3_t %val
 }
 
 define %struct.__neon_float64x1x3_t @ld1_x3_v1f64(double* %addr) {
 ; CHECK-LABEL: ld1_x3_v1f64:
 ; CHECK: ld1.1d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
-  %val = call %struct.__neon_float64x1x3_t @llvm.arm64.neon.ld1x3.v1f64.p0f64(double* %addr)
+  %val = call %struct.__neon_float64x1x3_t @llvm.aarch64.neon.ld1x3.v1f64.p0f64(double* %addr)
   ret %struct.__neon_float64x1x3_t %val
 }
 
-declare %struct.__neon_int8x16x3_t @llvm.arm64.neon.ld1x3.v16i8.p0i8(i8*) nounwind readonly
-declare %struct.__neon_int16x8x3_t @llvm.arm64.neon.ld1x3.v8i16.p0i16(i16*) nounwind readonly
-declare %struct.__neon_int32x4x3_t @llvm.arm64.neon.ld1x3.v4i32.p0i32(i32*) nounwind readonly
-declare %struct.__neon_float32x4x3_t @llvm.arm64.neon.ld1x3.v4f32.p0f32(float*) nounwind readonly
-declare %struct.__neon_int64x2x3_t @llvm.arm64.neon.ld1x3.v2i64.p0i64(i64*) nounwind readonly
-declare %struct.__neon_float64x2x3_t @llvm.arm64.neon.ld1x3.v2f64.p0f64(double*) nounwind readonly
+declare %struct.__neon_int8x16x3_t @llvm.aarch64.neon.ld1x3.v16i8.p0i8(i8*) nounwind readonly
+declare %struct.__neon_int16x8x3_t @llvm.aarch64.neon.ld1x3.v8i16.p0i16(i16*) nounwind readonly
+declare %struct.__neon_int32x4x3_t @llvm.aarch64.neon.ld1x3.v4i32.p0i32(i32*) nounwind readonly
+declare %struct.__neon_float32x4x3_t @llvm.aarch64.neon.ld1x3.v4f32.p0f32(float*) nounwind readonly
+declare %struct.__neon_int64x2x3_t @llvm.aarch64.neon.ld1x3.v2i64.p0i64(i64*) nounwind readonly
+declare %struct.__neon_float64x2x3_t @llvm.aarch64.neon.ld1x3.v2f64.p0f64(double*) nounwind readonly
 
 define %struct.__neon_int8x16x3_t @ld1_x3_v16i8(i8* %addr) {
 ; CHECK-LABEL: ld1_x3_v16i8:
 ; CHECK: ld1.16b { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
-  %val = call %struct.__neon_int8x16x3_t @llvm.arm64.neon.ld1x3.v16i8.p0i8(i8* %addr)
+  %val = call %struct.__neon_int8x16x3_t @llvm.aarch64.neon.ld1x3.v16i8.p0i8(i8* %addr)
   ret %struct.__neon_int8x16x3_t %val
 }
 
 define %struct.__neon_int16x8x3_t @ld1_x3_v8i16(i16* %addr) {
 ; CHECK-LABEL: ld1_x3_v8i16:
 ; CHECK: ld1.8h { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
-  %val = call %struct.__neon_int16x8x3_t @llvm.arm64.neon.ld1x3.v8i16.p0i16(i16* %addr)
+  %val = call %struct.__neon_int16x8x3_t @llvm.aarch64.neon.ld1x3.v8i16.p0i16(i16* %addr)
   ret %struct.__neon_int16x8x3_t %val
 }
 
 define %struct.__neon_int32x4x3_t @ld1_x3_v4i32(i32* %addr) {
 ; CHECK-LABEL: ld1_x3_v4i32:
 ; CHECK: ld1.4s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
-  %val = call %struct.__neon_int32x4x3_t @llvm.arm64.neon.ld1x3.v4i32.p0i32(i32* %addr)
+  %val = call %struct.__neon_int32x4x3_t @llvm.aarch64.neon.ld1x3.v4i32.p0i32(i32* %addr)
   ret %struct.__neon_int32x4x3_t %val
 }
 
 define %struct.__neon_float32x4x3_t @ld1_x3_v4f32(float* %addr) {
 ; CHECK-LABEL: ld1_x3_v4f32:
 ; CHECK: ld1.4s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
-  %val = call %struct.__neon_float32x4x3_t @llvm.arm64.neon.ld1x3.v4f32.p0f32(float* %addr)
+  %val = call %struct.__neon_float32x4x3_t @llvm.aarch64.neon.ld1x3.v4f32.p0f32(float* %addr)
   ret %struct.__neon_float32x4x3_t %val
 }
 
 define %struct.__neon_int64x2x3_t @ld1_x3_v2i64(i64* %addr) {
 ; CHECK-LABEL: ld1_x3_v2i64:
 ; CHECK: ld1.2d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
-  %val = call %struct.__neon_int64x2x3_t @llvm.arm64.neon.ld1x3.v2i64.p0i64(i64* %addr)
+  %val = call %struct.__neon_int64x2x3_t @llvm.aarch64.neon.ld1x3.v2i64.p0i64(i64* %addr)
   ret %struct.__neon_int64x2x3_t %val
 }
 
 define %struct.__neon_float64x2x3_t @ld1_x3_v2f64(double* %addr) {
 ; CHECK-LABEL: ld1_x3_v2f64:
 ; CHECK: ld1.2d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
-  %val = call %struct.__neon_float64x2x3_t @llvm.arm64.neon.ld1x3.v2f64.p0f64(double* %addr)
+  %val = call %struct.__neon_float64x2x3_t @llvm.aarch64.neon.ld1x3.v2f64.p0f64(double* %addr)
   ret %struct.__neon_float64x2x3_t %val
 }
 
-declare %struct.__neon_int8x8x4_t @llvm.arm64.neon.ld1x4.v8i8.p0i8(i8*) nounwind readonly
-declare %struct.__neon_int16x4x4_t @llvm.arm64.neon.ld1x4.v4i16.p0i16(i16*) nounwind readonly
-declare %struct.__neon_int32x2x4_t @llvm.arm64.neon.ld1x4.v2i32.p0i32(i32*) nounwind readonly
-declare %struct.__neon_float32x2x4_t @llvm.arm64.neon.ld1x4.v2f32.p0f32(float*) nounwind readonly
-declare %struct.__neon_int64x1x4_t @llvm.arm64.neon.ld1x4.v1i64.p0i64(i64*) nounwind readonly
-declare %struct.__neon_float64x1x4_t @llvm.arm64.neon.ld1x4.v1f64.p0f64(double*) nounwind readonly
+declare %struct.__neon_int8x8x4_t @llvm.aarch64.neon.ld1x4.v8i8.p0i8(i8*) nounwind readonly
+declare %struct.__neon_int16x4x4_t @llvm.aarch64.neon.ld1x4.v4i16.p0i16(i16*) nounwind readonly
+declare %struct.__neon_int32x2x4_t @llvm.aarch64.neon.ld1x4.v2i32.p0i32(i32*) nounwind readonly
+declare %struct.__neon_float32x2x4_t @llvm.aarch64.neon.ld1x4.v2f32.p0f32(float*) nounwind readonly
+declare %struct.__neon_int64x1x4_t @llvm.aarch64.neon.ld1x4.v1i64.p0i64(i64*) nounwind readonly
+declare %struct.__neon_float64x1x4_t @llvm.aarch64.neon.ld1x4.v1f64.p0f64(double*) nounwind readonly
 
 define %struct.__neon_int8x8x4_t @ld1_x4_v8i8(i8* %addr) {
 ; CHECK-LABEL: ld1_x4_v8i8:
 ; CHECK: ld1.8b { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
-  %val = call %struct.__neon_int8x8x4_t @llvm.arm64.neon.ld1x4.v8i8.p0i8(i8* %addr)
+  %val = call %struct.__neon_int8x8x4_t @llvm.aarch64.neon.ld1x4.v8i8.p0i8(i8* %addr)
   ret %struct.__neon_int8x8x4_t %val
 }
 
 define %struct.__neon_int16x4x4_t @ld1_x4_v4i16(i16* %addr) {
 ; CHECK-LABEL: ld1_x4_v4i16:
 ; CHECK: ld1.4h { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
-  %val = call %struct.__neon_int16x4x4_t @llvm.arm64.neon.ld1x4.v4i16.p0i16(i16* %addr)
+  %val = call %struct.__neon_int16x4x4_t @llvm.aarch64.neon.ld1x4.v4i16.p0i16(i16* %addr)
   ret %struct.__neon_int16x4x4_t %val
 }
 
 define %struct.__neon_int32x2x4_t @ld1_x4_v2i32(i32* %addr) {
 ; CHECK-LABEL: ld1_x4_v2i32:
 ; CHECK: ld1.2s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
-  %val = call %struct.__neon_int32x2x4_t @llvm.arm64.neon.ld1x4.v2i32.p0i32(i32* %addr)
+  %val = call %struct.__neon_int32x2x4_t @llvm.aarch64.neon.ld1x4.v2i32.p0i32(i32* %addr)
   ret %struct.__neon_int32x2x4_t %val
 }
 
 define %struct.__neon_float32x2x4_t @ld1_x4_v2f32(float* %addr) {
 ; CHECK-LABEL: ld1_x4_v2f32:
 ; CHECK: ld1.2s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
-  %val = call %struct.__neon_float32x2x4_t @llvm.arm64.neon.ld1x4.v2f32.p0f32(float* %addr)
+  %val = call %struct.__neon_float32x2x4_t @llvm.aarch64.neon.ld1x4.v2f32.p0f32(float* %addr)
   ret %struct.__neon_float32x2x4_t %val
 }
 
 define %struct.__neon_int64x1x4_t @ld1_x4_v1i64(i64* %addr) {
 ; CHECK-LABEL: ld1_x4_v1i64:
 ; CHECK: ld1.1d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
-  %val = call %struct.__neon_int64x1x4_t @llvm.arm64.neon.ld1x4.v1i64.p0i64(i64* %addr)
+  %val = call %struct.__neon_int64x1x4_t @llvm.aarch64.neon.ld1x4.v1i64.p0i64(i64* %addr)
   ret %struct.__neon_int64x1x4_t %val
 }
 
 define %struct.__neon_float64x1x4_t @ld1_x4_v1f64(double* %addr) {
 ; CHECK-LABEL: ld1_x4_v1f64:
 ; CHECK: ld1.1d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
-  %val = call %struct.__neon_float64x1x4_t @llvm.arm64.neon.ld1x4.v1f64.p0f64(double* %addr)
+  %val = call %struct.__neon_float64x1x4_t @llvm.aarch64.neon.ld1x4.v1f64.p0f64(double* %addr)
   ret %struct.__neon_float64x1x4_t %val
 }
 
-declare %struct.__neon_int8x16x4_t @llvm.arm64.neon.ld1x4.v16i8.p0i8(i8*) nounwind readonly
-declare %struct.__neon_int16x8x4_t @llvm.arm64.neon.ld1x4.v8i16.p0i16(i16*) nounwind readonly
-declare %struct.__neon_int32x4x4_t @llvm.arm64.neon.ld1x4.v4i32.p0i32(i32*) nounwind readonly
-declare %struct.__neon_float32x4x4_t @llvm.arm64.neon.ld1x4.v4f32.p0f32(float*) nounwind readonly
-declare %struct.__neon_int64x2x4_t @llvm.arm64.neon.ld1x4.v2i64.p0i64(i64*) nounwind readonly
-declare %struct.__neon_float64x2x4_t @llvm.arm64.neon.ld1x4.v2f64.p0f64(double*) nounwind readonly
+declare %struct.__neon_int8x16x4_t @llvm.aarch64.neon.ld1x4.v16i8.p0i8(i8*) nounwind readonly
+declare %struct.__neon_int16x8x4_t @llvm.aarch64.neon.ld1x4.v8i16.p0i16(i16*) nounwind readonly
+declare %struct.__neon_int32x4x4_t @llvm.aarch64.neon.ld1x4.v4i32.p0i32(i32*) nounwind readonly
+declare %struct.__neon_float32x4x4_t @llvm.aarch64.neon.ld1x4.v4f32.p0f32(float*) nounwind readonly
+declare %struct.__neon_int64x2x4_t @llvm.aarch64.neon.ld1x4.v2i64.p0i64(i64*) nounwind readonly
+declare %struct.__neon_float64x2x4_t @llvm.aarch64.neon.ld1x4.v2f64.p0f64(double*) nounwind readonly
 
 define %struct.__neon_int8x16x4_t @ld1_x4_v16i8(i8* %addr) {
 ; CHECK-LABEL: ld1_x4_v16i8:
 ; CHECK: ld1.16b { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
-  %val = call %struct.__neon_int8x16x4_t @llvm.arm64.neon.ld1x4.v16i8.p0i8(i8* %addr)
+  %val = call %struct.__neon_int8x16x4_t @llvm.aarch64.neon.ld1x4.v16i8.p0i8(i8* %addr)
   ret %struct.__neon_int8x16x4_t %val
 }
 
 define %struct.__neon_int16x8x4_t @ld1_x4_v8i16(i16* %addr) {
 ; CHECK-LABEL: ld1_x4_v8i16:
 ; CHECK: ld1.8h { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
-  %val = call %struct.__neon_int16x8x4_t @llvm.arm64.neon.ld1x4.v8i16.p0i16(i16* %addr)
+  %val = call %struct.__neon_int16x8x4_t @llvm.aarch64.neon.ld1x4.v8i16.p0i16(i16* %addr)
   ret %struct.__neon_int16x8x4_t %val
 }
 
 define %struct.__neon_int32x4x4_t @ld1_x4_v4i32(i32* %addr) {
 ; CHECK-LABEL: ld1_x4_v4i32:
 ; CHECK: ld1.4s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
-  %val = call %struct.__neon_int32x4x4_t @llvm.arm64.neon.ld1x4.v4i32.p0i32(i32* %addr)
+  %val = call %struct.__neon_int32x4x4_t @llvm.aarch64.neon.ld1x4.v4i32.p0i32(i32* %addr)
   ret %struct.__neon_int32x4x4_t %val
 }
 
 define %struct.__neon_float32x4x4_t @ld1_x4_v4f32(float* %addr) {
 ; CHECK-LABEL: ld1_x4_v4f32:
 ; CHECK: ld1.4s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
-  %val = call %struct.__neon_float32x4x4_t @llvm.arm64.neon.ld1x4.v4f32.p0f32(float* %addr)
+  %val = call %struct.__neon_float32x4x4_t @llvm.aarch64.neon.ld1x4.v4f32.p0f32(float* %addr)
   ret %struct.__neon_float32x4x4_t %val
 }
 
 define %struct.__neon_int64x2x4_t @ld1_x4_v2i64(i64* %addr) {
 ; CHECK-LABEL: ld1_x4_v2i64:
 ; CHECK: ld1.2d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
-  %val = call %struct.__neon_int64x2x4_t @llvm.arm64.neon.ld1x4.v2i64.p0i64(i64* %addr)
+  %val = call %struct.__neon_int64x2x4_t @llvm.aarch64.neon.ld1x4.v2i64.p0i64(i64* %addr)
   ret %struct.__neon_int64x2x4_t %val
 }
 
 define %struct.__neon_float64x2x4_t @ld1_x4_v2f64(double* %addr) {
 ; CHECK-LABEL: ld1_x4_v2f64:
 ; CHECK: ld1.2d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
-  %val = call %struct.__neon_float64x2x4_t @llvm.arm64.neon.ld1x4.v2f64.p0f64(double* %addr)
+  %val = call %struct.__neon_float64x2x4_t @llvm.aarch64.neon.ld1x4.v2f64.p0f64(double* %addr)
   ret %struct.__neon_float64x2x4_t %val
 }
diff --git a/test/CodeGen/ARM64/ldp.ll b/test/CodeGen/AArch64/arm64-ldp.ll
similarity index 98%
rename from test/CodeGen/ARM64/ldp.ll
rename to test/CodeGen/AArch64/arm64-ldp.ll
index 9444385f8ab..5a986261b31 100644
--- a/test/CodeGen/ARM64/ldp.ll
+++ b/test/CodeGen/AArch64/arm64-ldp.ll
@@ -1,5 +1,5 @@
 ; RUN: llc < %s -march=arm64 -verify-machineinstrs | FileCheck %s
-; RUN: llc < %s -march=arm64 -arm64-unscaled-mem-op=true\
+; RUN: llc < %s -march=arm64 -aarch64-unscaled-mem-op=true\
 ; RUN:   -verify-machineinstrs | FileCheck -check-prefix=LDUR_CHK %s
 
 ; CHECK: ldp_int
diff --git a/test/CodeGen/ARM64/ldur.ll b/test/CodeGen/AArch64/arm64-ldur.ll
similarity index 100%
rename from test/CodeGen/ARM64/ldur.ll
rename to test/CodeGen/AArch64/arm64-ldur.ll
diff --git a/test/CodeGen/ARM64/ldxr-stxr.ll b/test/CodeGen/AArch64/arm64-ldxr-stxr.ll
similarity index 69%
rename from test/CodeGen/ARM64/ldxr-stxr.ll
rename to test/CodeGen/AArch64/arm64-ldxr-stxr.ll
index ed53a14ca8c..9093df27cdd 100644
--- a/test/CodeGen/ARM64/ldxr-stxr.ll
+++ b/test/CodeGen/AArch64/arm64-ldxr-stxr.ll
@@ -6,7 +6,7 @@ define i128 @f0(i8* %p) nounwind readonly {
 ; CHECK-LABEL: f0:
 ; CHECK: ldxp {{x[0-9]+}}, {{x[0-9]+}}, [x0]
 entry:
-  %ldrexd = tail call %0 @llvm.arm64.ldxp(i8* %p)
+  %ldrexd = tail call %0 @llvm.aarch64.ldxp(i8* %p)
   %0 = extractvalue %0 %ldrexd, 1
   %1 = extractvalue %0 %ldrexd, 0
   %2 = zext i64 %0 to i128
@@ -23,12 +23,12 @@ entry:
   %tmp4 = trunc i128 %val to i64
   %tmp6 = lshr i128 %val, 64
   %tmp7 = trunc i128 %tmp6 to i64
-  %strexd = tail call i32 @llvm.arm64.stxp(i64 %tmp4, i64 %tmp7, i8* %ptr)
+  %strexd = tail call i32 @llvm.aarch64.stxp(i64 %tmp4, i64 %tmp7, i8* %ptr)
   ret i32 %strexd
 }
 
-declare %0 @llvm.arm64.ldxp(i8*) nounwind
-declare i32 @llvm.arm64.stxp(i64, i64, i8*) nounwind
+declare %0 @llvm.aarch64.ldxp(i8*) nounwind
+declare i32 @llvm.aarch64.stxp(i64, i64, i8*) nounwind
 
 @var = global i64 0, align 8
 
@@ -39,7 +39,7 @@ define void @test_load_i8(i8* %addr) {
 ; CHECK-NOT: and
 ; CHECK: str x[[LOADVAL]], [{{x[0-9]+}}, :lo12:var]
 
-  %val = call i64 @llvm.arm64.ldxr.p0i8(i8* %addr)
+  %val = call i64 @llvm.aarch64.ldxr.p0i8(i8* %addr)
   %shortval = trunc i64 %val to i8
   %extval = zext i8 %shortval to i64
   store i64 %extval, i64* @var, align 8
@@ -53,7 +53,7 @@ define void @test_load_i16(i16* %addr) {
 ; CHECK-NOT: and
 ; CHECK: str x[[LOADVAL]], [{{x[0-9]+}}, :lo12:var]
 
-  %val = call i64 @llvm.arm64.ldxr.p0i16(i16* %addr)
+  %val = call i64 @llvm.aarch64.ldxr.p0i16(i16* %addr)
   %shortval = trunc i64 %val to i16
   %extval = zext i16 %shortval to i64
   store i64 %extval, i64* @var, align 8
@@ -67,7 +67,7 @@ define void @test_load_i32(i32* %addr) {
 ; CHECK-NOT: and
 ; CHECK: str x[[LOADVAL]], [{{x[0-9]+}}, :lo12:var]
 
-  %val = call i64 @llvm.arm64.ldxr.p0i32(i32* %addr)
+  %val = call i64 @llvm.aarch64.ldxr.p0i32(i32* %addr)
   %shortval = trunc i64 %val to i32
   %extval = zext i32 %shortval to i64
   store i64 %extval, i64* @var, align 8
@@ -79,16 +79,16 @@ define void @test_load_i64(i64* %addr) {
 ; CHECK: ldxr x[[LOADVAL:[0-9]+]], [x0]
 ; CHECK: str x[[LOADVAL]], [{{x[0-9]+}}, :lo12:var]
 
-  %val = call i64 @llvm.arm64.ldxr.p0i64(i64* %addr)
+  %val = call i64 @llvm.aarch64.ldxr.p0i64(i64* %addr)
   store i64 %val, i64* @var, align 8
   ret void
 }
 
 
-declare i64 @llvm.arm64.ldxr.p0i8(i8*) nounwind
-declare i64 @llvm.arm64.ldxr.p0i16(i16*) nounwind
-declare i64 @llvm.arm64.ldxr.p0i32(i32*) nounwind
-declare i64 @llvm.arm64.ldxr.p0i64(i64*) nounwind
+declare i64 @llvm.aarch64.ldxr.p0i8(i8*) nounwind
+declare i64 @llvm.aarch64.ldxr.p0i16(i16*) nounwind
+declare i64 @llvm.aarch64.ldxr.p0i32(i32*) nounwind
+declare i64 @llvm.aarch64.ldxr.p0i64(i64*) nounwind
 
 define i32 @test_store_i8(i32, i8 %val, i8* %addr) {
 ; CHECK-LABEL: test_store_i8:
@@ -96,7 +96,7 @@ define i32 @test_store_i8(i32, i8 %val, i8* %addr) {
 ; CHECK-NOT: and
 ; CHECK: stxrb w0, w1, [x2]
   %extval = zext i8 %val to i64
-  %res = call i32 @llvm.arm64.stxr.p0i8(i64 %extval, i8* %addr)
+  %res = call i32 @llvm.aarch64.stxr.p0i8(i64 %extval, i8* %addr)
   ret i32 %res
 }
 
@@ -106,7 +106,7 @@ define i32 @test_store_i16(i32, i16 %val, i16* %addr) {
 ; CHECK-NOT: and
 ; CHECK: stxrh w0, w1, [x2]
   %extval = zext i16 %val to i64
-  %res = call i32 @llvm.arm64.stxr.p0i16(i64 %extval, i16* %addr)
+  %res = call i32 @llvm.aarch64.stxr.p0i16(i64 %extval, i16* %addr)
   ret i32 %res
 }
 
@@ -116,36 +116,36 @@ define i32 @test_store_i32(i32, i32 %val, i32* %addr) {
 ; CHECK-NOT: and
 ; CHECK: stxr w0, w1, [x2]
   %extval = zext i32 %val to i64
-  %res = call i32 @llvm.arm64.stxr.p0i32(i64 %extval, i32* %addr)
+  %res = call i32 @llvm.aarch64.stxr.p0i32(i64 %extval, i32* %addr)
   ret i32 %res
 }
 
 define i32 @test_store_i64(i32, i64 %val, i64* %addr) {
 ; CHECK-LABEL: test_store_i64:
 ; CHECK: stxr w0, x1, [x2]
-  %res = call i32 @llvm.arm64.stxr.p0i64(i64 %val, i64* %addr)
+  %res = call i32 @llvm.aarch64.stxr.p0i64(i64 %val, i64* %addr)
   ret i32 %res
 }
 
-declare i32 @llvm.arm64.stxr.p0i8(i64, i8*) nounwind
-declare i32 @llvm.arm64.stxr.p0i16(i64, i16*) nounwind
-declare i32 @llvm.arm64.stxr.p0i32(i64, i32*) nounwind
-declare i32 @llvm.arm64.stxr.p0i64(i64, i64*) nounwind
+declare i32 @llvm.aarch64.stxr.p0i8(i64, i8*) nounwind
+declare i32 @llvm.aarch64.stxr.p0i16(i64, i16*) nounwind
+declare i32 @llvm.aarch64.stxr.p0i32(i64, i32*) nounwind
+declare i32 @llvm.aarch64.stxr.p0i64(i64, i64*) nounwind
 
 ; CHECK: test_clear:
 ; CHECK: clrex
 define void @test_clear() {
-  call void @llvm.arm64.clrex()
+  call void @llvm.aarch64.clrex()
   ret void
 }
 
-declare void @llvm.arm64.clrex() nounwind
+declare void @llvm.aarch64.clrex() nounwind
 
 define i128 @test_load_acquire_i128(i8* %p) nounwind readonly {
 ; CHECK-LABEL: test_load_acquire_i128:
 ; CHECK: ldaxp {{x[0-9]+}}, {{x[0-9]+}}, [x0]
 entry:
-  %ldrexd = tail call %0 @llvm.arm64.ldaxp(i8* %p)
+  %ldrexd = tail call %0 @llvm.aarch64.ldaxp(i8* %p)
   %0 = extractvalue %0 %ldrexd, 1
   %1 = extractvalue %0 %ldrexd, 0
   %2 = zext i64 %0 to i128
@@ -162,12 +162,12 @@ entry:
   %tmp4 = trunc i128 %val to i64
   %tmp6 = lshr i128 %val, 64
   %tmp7 = trunc i128 %tmp6 to i64
-  %strexd = tail call i32 @llvm.arm64.stlxp(i64 %tmp4, i64 %tmp7, i8* %ptr)
+  %strexd = tail call i32 @llvm.aarch64.stlxp(i64 %tmp4, i64 %tmp7, i8* %ptr)
   ret i32 %strexd
 }
 
-declare %0 @llvm.arm64.ldaxp(i8*) nounwind
-declare i32 @llvm.arm64.stlxp(i64, i64, i8*) nounwind
+declare %0 @llvm.aarch64.ldaxp(i8*) nounwind
+declare i32 @llvm.aarch64.stlxp(i64, i64, i8*) nounwind
 
 define void @test_load_acquire_i8(i8* %addr) {
 ; CHECK-LABEL: test_load_acquire_i8:
@@ -176,7 +176,7 @@ define void @test_load_acquire_i8(i8* %addr) {
 ; CHECK-NOT: and
 ; CHECK: str x[[LOADVAL]], [{{x[0-9]+}}, :lo12:var]
 
-  %val = call i64 @llvm.arm64.ldaxr.p0i8(i8* %addr)
+  %val = call i64 @llvm.aarch64.ldaxr.p0i8(i8* %addr)
   %shortval = trunc i64 %val to i8
   %extval = zext i8 %shortval to i64
   store i64 %extval, i64* @var, align 8
@@ -190,7 +190,7 @@ define void @test_load_acquire_i16(i16* %addr) {
 ; CHECK-NOT: and
 ; CHECK: str x[[LOADVAL]], [{{x[0-9]+}}, :lo12:var]
 
-  %val = call i64 @llvm.arm64.ldaxr.p0i16(i16* %addr)
+  %val = call i64 @llvm.aarch64.ldaxr.p0i16(i16* %addr)
   %shortval = trunc i64 %val to i16
   %extval = zext i16 %shortval to i64
   store i64 %extval, i64* @var, align 8
@@ -204,7 +204,7 @@ define void @test_load_acquire_i32(i32* %addr) {
 ; CHECK-NOT: and
 ; CHECK: str x[[LOADVAL]], [{{x[0-9]+}}, :lo12:var]
 
-  %val = call i64 @llvm.arm64.ldaxr.p0i32(i32* %addr)
+  %val = call i64 @llvm.aarch64.ldaxr.p0i32(i32* %addr)
   %shortval = trunc i64 %val to i32
   %extval = zext i32 %shortval to i64
   store i64 %extval, i64* @var, align 8
@@ -216,16 +216,16 @@ define void @test_load_acquire_i64(i64* %addr) {
 ; CHECK: ldaxr x[[LOADVAL:[0-9]+]], [x0]
 ; CHECK: str x[[LOADVAL]], [{{x[0-9]+}}, :lo12:var]
 
-  %val = call i64 @llvm.arm64.ldaxr.p0i64(i64* %addr)
+  %val = call i64 @llvm.aarch64.ldaxr.p0i64(i64* %addr)
   store i64 %val, i64* @var, align 8
   ret void
 }
 
 
-declare i64 @llvm.arm64.ldaxr.p0i8(i8*) nounwind
-declare i64 @llvm.arm64.ldaxr.p0i16(i16*) nounwind
-declare i64 @llvm.arm64.ldaxr.p0i32(i32*) nounwind
-declare i64 @llvm.arm64.ldaxr.p0i64(i64*) nounwind
+declare i64 @llvm.aarch64.ldaxr.p0i8(i8*) nounwind
+declare i64 @llvm.aarch64.ldaxr.p0i16(i16*) nounwind
+declare i64 @llvm.aarch64.ldaxr.p0i32(i32*) nounwind
+declare i64 @llvm.aarch64.ldaxr.p0i64(i64*) nounwind
 
 define i32 @test_store_release_i8(i32, i8 %val, i8* %addr) {
 ; CHECK-LABEL: test_store_release_i8:
@@ -233,7 +233,7 @@ define i32 @test_store_release_i8(i32, i8 %val, i8* %addr) {
 ; CHECK-NOT: and
 ; CHECK: stlxrb w0, w1, [x2]
   %extval = zext i8 %val to i64
-  %res = call i32 @llvm.arm64.stlxr.p0i8(i64 %extval, i8* %addr)
+  %res = call i32 @llvm.aarch64.stlxr.p0i8(i64 %extval, i8* %addr)
   ret i32 %res
 }
 
@@ -243,7 +243,7 @@ define i32 @test_store_release_i16(i32, i16 %val, i16* %addr) {
 ; CHECK-NOT: and
 ; CHECK: stlxrh w0, w1, [x2]
   %extval = zext i16 %val to i64
-  %res = call i32 @llvm.arm64.stlxr.p0i16(i64 %extval, i16* %addr)
+  %res = call i32 @llvm.aarch64.stlxr.p0i16(i64 %extval, i16* %addr)
   ret i32 %res
 }
 
@@ -253,18 +253,18 @@ define i32 @test_store_release_i32(i32, i32 %val, i32* %addr) {
 ; CHECK-NOT: and
 ; CHECK: stlxr w0, w1, [x2]
   %extval = zext i32 %val to i64
-  %res = call i32 @llvm.arm64.stlxr.p0i32(i64 %extval, i32* %addr)
+  %res = call i32 @llvm.aarch64.stlxr.p0i32(i64 %extval, i32* %addr)
   ret i32 %res
 }
 
 define i32 @test_store_release_i64(i32, i64 %val, i64* %addr) {
 ; CHECK-LABEL: test_store_release_i64:
 ; CHECK: stlxr w0, x1, [x2]
-  %res = call i32 @llvm.arm64.stlxr.p0i64(i64 %val, i64* %addr)
+  %res = call i32 @llvm.aarch64.stlxr.p0i64(i64 %val, i64* %addr)
   ret i32 %res
 }
 
-declare i32 @llvm.arm64.stlxr.p0i8(i64, i8*) nounwind
-declare i32 @llvm.arm64.stlxr.p0i16(i64, i16*) nounwind
-declare i32 @llvm.arm64.stlxr.p0i32(i64, i32*) nounwind
-declare i32 @llvm.arm64.stlxr.p0i64(i64, i64*) nounwind
+declare i32 @llvm.aarch64.stlxr.p0i8(i64, i8*) nounwind
+declare i32 @llvm.aarch64.stlxr.p0i16(i64, i16*) nounwind
+declare i32 @llvm.aarch64.stlxr.p0i32(i64, i32*) nounwind
+declare i32 @llvm.aarch64.stlxr.p0i64(i64, i64*) nounwind
diff --git a/test/CodeGen/ARM64/leaf.ll b/test/CodeGen/AArch64/arm64-leaf.ll
similarity index 100%
rename from test/CodeGen/ARM64/leaf.ll
rename to test/CodeGen/AArch64/arm64-leaf.ll
diff --git a/test/CodeGen/ARM64/long-shift.ll b/test/CodeGen/AArch64/arm64-long-shift.ll
similarity index 100%
rename from test/CodeGen/ARM64/long-shift.ll
rename to test/CodeGen/AArch64/arm64-long-shift.ll
diff --git a/test/CodeGen/ARM64/memcpy-inline.ll b/test/CodeGen/AArch64/arm64-memcpy-inline.ll
similarity index 100%
rename from test/CodeGen/ARM64/memcpy-inline.ll
rename to test/CodeGen/AArch64/arm64-memcpy-inline.ll
diff --git a/test/CodeGen/ARM64/memset-inline.ll b/test/CodeGen/AArch64/arm64-memset-inline.ll
similarity index 100%
rename from test/CodeGen/ARM64/memset-inline.ll
rename to test/CodeGen/AArch64/arm64-memset-inline.ll
diff --git a/test/CodeGen/ARM64/memset-to-bzero.ll b/test/CodeGen/AArch64/arm64-memset-to-bzero.ll
similarity index 100%
rename from test/CodeGen/ARM64/memset-to-bzero.ll
rename to test/CodeGen/AArch64/arm64-memset-to-bzero.ll
diff --git a/test/CodeGen/ARM64/misched-basic-A53.ll b/test/CodeGen/AArch64/arm64-misched-basic-A53.ll
similarity index 96%
rename from test/CodeGen/ARM64/misched-basic-A53.ll
rename to test/CodeGen/AArch64/arm64-misched-basic-A53.ll
index d69b097a9b5..f88bd6a4fe3 100644
--- a/test/CodeGen/ARM64/misched-basic-A53.ll
+++ b/test/CodeGen/AArch64/arm64-misched-basic-A53.ll
@@ -115,10 +115,10 @@ attributes #1 = { nounwind }
 ;
 ; Nothing explicit to check other than llc not crashing.
 define { <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld2(i8* %A, i8** %ptr) {
-  %ld2 = tail call { <16 x i8>, <16 x i8> } @llvm.arm64.neon.ld2.v16i8.p0i8(i8* %A)
+  %ld2 = tail call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2.v16i8.p0i8(i8* %A)
   %tmp = getelementptr i8* %A, i32 32
   store i8* %tmp, i8** %ptr
   ret { <16 x i8>, <16 x i8> } %ld2
 }
 
-declare { <16 x i8>, <16 x i8> } @llvm.arm64.neon.ld2.v16i8.p0i8(i8*)
+declare { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2.v16i8.p0i8(i8*)
diff --git a/test/CodeGen/ARM64/misched-forwarding-A53.ll b/test/CodeGen/AArch64/arm64-misched-forwarding-A53.ll
similarity index 100%
rename from test/CodeGen/ARM64/misched-forwarding-A53.ll
rename to test/CodeGen/AArch64/arm64-misched-forwarding-A53.ll
diff --git a/test/CodeGen/ARM64/movi.ll b/test/CodeGen/AArch64/arm64-movi.ll
similarity index 100%
rename from test/CodeGen/ARM64/movi.ll
rename to test/CodeGen/AArch64/arm64-movi.ll
diff --git a/test/CodeGen/ARM64/mul.ll b/test/CodeGen/AArch64/arm64-mul.ll
similarity index 100%
rename from test/CodeGen/ARM64/mul.ll
rename to test/CodeGen/AArch64/arm64-mul.ll
diff --git a/test/CodeGen/ARM64/named-reg-alloc.ll b/test/CodeGen/AArch64/arm64-named-reg-alloc.ll
similarity index 100%
rename from test/CodeGen/ARM64/named-reg-alloc.ll
rename to test/CodeGen/AArch64/arm64-named-reg-alloc.ll
diff --git a/test/CodeGen/ARM64/named-reg-notareg.ll b/test/CodeGen/AArch64/arm64-named-reg-notareg.ll
similarity index 100%
rename from test/CodeGen/ARM64/named-reg-notareg.ll
rename to test/CodeGen/AArch64/arm64-named-reg-notareg.ll
diff --git a/test/CodeGen/ARM64/neg.ll b/test/CodeGen/AArch64/arm64-neg.ll
similarity index 100%
rename from test/CodeGen/ARM64/neg.ll
rename to test/CodeGen/AArch64/arm64-neg.ll
diff --git a/test/CodeGen/ARM64/aarch64-neon-2velem-high.ll b/test/CodeGen/AArch64/arm64-neon-2velem-high.ll
similarity index 82%
rename from test/CodeGen/ARM64/aarch64-neon-2velem-high.ll
rename to test/CodeGen/AArch64/arm64-neon-2velem-high.ll
index 2013747713b..58df094d192 100644
--- a/test/CodeGen/ARM64/aarch64-neon-2velem-high.ll
+++ b/test/CodeGen/AArch64/arm64-neon-2velem-high.ll
@@ -4,25 +4,25 @@ declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>)
 
 declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>)
 
-declare <2 x i64> @llvm.arm64.neon.sqdmull.v2i64(<2 x i32>, <2 x i32>)
+declare <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32>, <2 x i32>)
 
-declare <2 x i64> @llvm.arm64.neon.sqsub.v2i64(<2 x i64>, <2 x i64>)
+declare <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64>, <2 x i64>)
 
-declare <4 x i32> @llvm.arm64.neon.sqdmull.v4i32(<4 x i16>, <4 x i16>)
+declare <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16>, <4 x i16>)
 
-declare <4 x i32> @llvm.arm64.neon.sqsub.v4i32(<4 x i32>, <4 x i32>)
+declare <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32>, <4 x i32>)
 
-declare <2 x i64> @llvm.arm64.neon.sqadd.v2i64(<2 x i64>, <2 x i64>)
+declare <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64>, <2 x i64>)
 
-declare <4 x i32> @llvm.arm64.neon.sqadd.v4i32(<4 x i32>, <4 x i32>)
+declare <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32>, <4 x i32>)
 
-declare <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32>, <2 x i32>)
+declare <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32>, <2 x i32>)
 
-declare <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16>, <4 x i16>)
+declare <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16>, <4 x i16>)
 
-declare <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32>, <2 x i32>)
+declare <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32>, <2 x i32>)
 
-declare <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16>, <4 x i16>)
+declare <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16>, <4 x i16>)
 
 define <4 x i32> @test_vmull_high_n_s16(<8 x i16> %a, i16 %b) {
 ; CHECK-LABEL: test_vmull_high_n_s16:
@@ -34,7 +34,7 @@ entry:
   %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %b, i32 1
   %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %b, i32 2
   %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %b, i32 3
-  %vmull15.i.i = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i)
+  %vmull15.i.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i)
   ret <4 x i32> %vmull15.i.i
 }
 
@@ -46,7 +46,7 @@ entry:
   %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
   %vecinit.i.i = insertelement <2 x i32> undef, i32 %b, i32 0
   %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %b, i32 1
-  %vmull9.i.i = tail call <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i)
+  %vmull9.i.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i)
   ret <2 x i64> %vmull9.i.i
 }
 
@@ -60,7 +60,7 @@ entry:
   %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %b, i32 1
   %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %b, i32 2
   %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %b, i32 3
-  %vmull15.i.i = tail call <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i)
+  %vmull15.i.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i)
   ret <4 x i32> %vmull15.i.i
 }
 
@@ -72,7 +72,7 @@ entry:
   %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
   %vecinit.i.i = insertelement <2 x i32> undef, i32 %b, i32 0
   %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %b, i32 1
-  %vmull9.i.i = tail call <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i)
+  %vmull9.i.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i)
   ret <2 x i64> %vmull9.i.i
 }
 
@@ -86,7 +86,7 @@ entry:
   %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %b, i32 1
   %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %b, i32 2
   %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %b, i32 3
-  %vqdmull15.i.i = tail call <4 x i32> @llvm.arm64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i)
+  %vqdmull15.i.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i)
   ret <4 x i32> %vqdmull15.i.i
 }
 
@@ -98,7 +98,7 @@ entry:
   %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
   %vecinit.i.i = insertelement <2 x i32> undef, i32 %b, i32 0
   %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %b, i32 1
-  %vqdmull9.i.i = tail call <2 x i64> @llvm.arm64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i)
+  %vqdmull9.i.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i)
   ret <2 x i64> %vqdmull9.i.i
 }
 
@@ -112,7 +112,7 @@ entry:
   %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %c, i32 1
   %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %c, i32 2
   %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %c, i32 3
-  %vmull2.i.i.i = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i)
+  %vmull2.i.i.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i)
   %add.i.i = add <4 x i32> %vmull2.i.i.i, %a
   ret <4 x i32> %add.i.i
 }
@@ -125,7 +125,7 @@ entry:
   %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
   %vecinit.i.i = insertelement <2 x i32> undef, i32 %c, i32 0
   %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %c, i32 1
-  %vmull2.i.i.i = tail call <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i)
+  %vmull2.i.i.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i)
   %add.i.i = add <2 x i64> %vmull2.i.i.i, %a
   ret <2 x i64> %add.i.i
 }
@@ -140,7 +140,7 @@ entry:
   %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %c, i32 1
   %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %c, i32 2
   %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %c, i32 3
-  %vmull2.i.i.i = tail call <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i)
+  %vmull2.i.i.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i)
   %add.i.i = add <4 x i32> %vmull2.i.i.i, %a
   ret <4 x i32> %add.i.i
 }
@@ -153,7 +153,7 @@ entry:
   %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
   %vecinit.i.i = insertelement <2 x i32> undef, i32 %c, i32 0
   %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %c, i32 1
-  %vmull2.i.i.i = tail call <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i)
+  %vmull2.i.i.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i)
   %add.i.i = add <2 x i64> %vmull2.i.i.i, %a
   ret <2 x i64> %add.i.i
 }
@@ -167,8 +167,8 @@ entry:
   %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %c, i32 1
   %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %c, i32 2
   %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %c, i32 3
-  %vqdmlal15.i.i = tail call <4 x i32> @llvm.arm64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i)
-  %vqdmlal17.i.i = tail call <4 x i32> @llvm.arm64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal15.i.i)
+  %vqdmlal15.i.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i)
+  %vqdmlal17.i.i = tail call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal15.i.i)
   ret <4 x i32> %vqdmlal17.i.i
 }
 
@@ -179,8 +179,8 @@ entry:
   %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
   %vecinit.i.i = insertelement <2 x i32> undef, i32 %c, i32 0
   %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %c, i32 1
-  %vqdmlal9.i.i = tail call <2 x i64> @llvm.arm64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i)
-  %vqdmlal11.i.i = tail call <2 x i64> @llvm.arm64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal9.i.i)
+  %vqdmlal9.i.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i)
+  %vqdmlal11.i.i = tail call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal9.i.i)
   ret <2 x i64> %vqdmlal11.i.i
 }
 
@@ -193,7 +193,7 @@ entry:
   %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %c, i32 1
   %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %c, i32 2
   %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %c, i32 3
-  %vmull2.i.i.i = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i)
+  %vmull2.i.i.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i)
   %sub.i.i = sub <4 x i32> %a, %vmull2.i.i.i
   ret <4 x i32> %sub.i.i
 }
@@ -205,7 +205,7 @@ entry:
   %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
   %vecinit.i.i = insertelement <2 x i32> undef, i32 %c, i32 0
   %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %c, i32 1
-  %vmull2.i.i.i = tail call <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i)
+  %vmull2.i.i.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i)
   %sub.i.i = sub <2 x i64> %a, %vmull2.i.i.i
   ret <2 x i64> %sub.i.i
 }
@@ -219,7 +219,7 @@ entry:
   %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %c, i32 1
   %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %c, i32 2
   %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %c, i32 3
-  %vmull2.i.i.i = tail call <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i)
+  %vmull2.i.i.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i)
   %sub.i.i = sub <4 x i32> %a, %vmull2.i.i.i
   ret <4 x i32> %sub.i.i
 }
@@ -231,7 +231,7 @@ entry:
   %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
   %vecinit.i.i = insertelement <2 x i32> undef, i32 %c, i32 0
   %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %c, i32 1
-  %vmull2.i.i.i = tail call <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i)
+  %vmull2.i.i.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i)
   %sub.i.i = sub <2 x i64> %a, %vmull2.i.i.i
   ret <2 x i64> %sub.i.i
 }
@@ -245,8 +245,8 @@ entry:
   %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %c, i32 1
   %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %c, i32 2
   %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %c, i32 3
-  %vqdmlsl15.i.i = tail call <4 x i32> @llvm.arm64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i)
-  %vqdmlsl17.i.i = tail call <4 x i32> @llvm.arm64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> %vqdmlsl15.i.i)
+  %vqdmlsl15.i.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i)
+  %vqdmlsl17.i.i = tail call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> %vqdmlsl15.i.i)
   ret <4 x i32> %vqdmlsl17.i.i
 }
 
@@ -257,8 +257,8 @@ entry:
   %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
   %vecinit.i.i = insertelement <2 x i32> undef, i32 %c, i32 0
   %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %c, i32 1
-  %vqdmlsl9.i.i = tail call <2 x i64> @llvm.arm64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i)
-  %vqdmlsl11.i.i = tail call <2 x i64> @llvm.arm64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> %vqdmlsl9.i.i)
+  %vqdmlsl9.i.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i)
+  %vqdmlsl11.i.i = tail call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> %vqdmlsl9.i.i)
   ret <2 x i64> %vqdmlsl11.i.i
 }
 
diff --git a/test/CodeGen/ARM64/aarch64-neon-2velem.ll b/test/CodeGen/AArch64/arm64-neon-2velem.ll
similarity index 84%
rename from test/CodeGen/ARM64/aarch64-neon-2velem.ll
rename to test/CodeGen/AArch64/arm64-neon-2velem.ll
index 4c6b72d55c3..869966caa3a 100644
--- a/test/CodeGen/ARM64/aarch64-neon-2velem.ll
+++ b/test/CodeGen/AArch64/arm64-neon-2velem.ll
@@ -1,46 +1,46 @@
 ; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s
 
-declare <2 x double> @llvm.arm64.neon.fmulx.v2f64(<2 x double>, <2 x double>)
+declare <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double>, <2 x double>)
 
-declare <4 x float> @llvm.arm64.neon.fmulx.v4f32(<4 x float>, <4 x float>)
+declare <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float>, <4 x float>)
 
-declare <2 x float> @llvm.arm64.neon.fmulx.v2f32(<2 x float>, <2 x float>)
+declare <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float>, <2 x float>)
 
-declare <4 x i32> @llvm.arm64.neon.sqrdmulh.v4i32(<4 x i32>, <4 x i32>)
+declare <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32>, <4 x i32>)
 
-declare <2 x i32> @llvm.arm64.neon.sqrdmulh.v2i32(<2 x i32>, <2 x i32>)
+declare <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32>, <2 x i32>)
 
-declare <8 x i16> @llvm.arm64.neon.sqrdmulh.v8i16(<8 x i16>, <8 x i16>)
+declare <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16>, <8 x i16>)
 
-declare <4 x i16> @llvm.arm64.neon.sqrdmulh.v4i16(<4 x i16>, <4 x i16>)
+declare <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16>, <4 x i16>)
 
-declare <4 x i32> @llvm.arm64.neon.sqdmulh.v4i32(<4 x i32>, <4 x i32>)
+declare <4 x i32> @llvm.aarch64.neon.sqdmulh.v4i32(<4 x i32>, <4 x i32>)
 
-declare <2 x i32> @llvm.arm64.neon.sqdmulh.v2i32(<2 x i32>, <2 x i32>)
+declare <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32>, <2 x i32>)
 
-declare <8 x i16> @llvm.arm64.neon.sqdmulh.v8i16(<8 x i16>, <8 x i16>)
+declare <8 x i16> @llvm.aarch64.neon.sqdmulh.v8i16(<8 x i16>, <8 x i16>)
 
-declare <4 x i16> @llvm.arm64.neon.sqdmulh.v4i16(<4 x i16>, <4 x i16>)
+declare <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16>, <4 x i16>)
 
-declare <2 x i64> @llvm.arm64.neon.sqdmull.v2i64(<2 x i32>, <2 x i32>)
+declare <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32>, <2 x i32>)
 
-declare <4 x i32> @llvm.arm64.neon.sqdmull.v4i32(<4 x i16>, <4 x i16>)
+declare <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16>, <4 x i16>)
 
-declare <2 x i64> @llvm.arm64.neon.sqsub.v2i64(<2 x i64>, <2 x i64>)
+declare <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64>, <2 x i64>)
 
-declare <4 x i32> @llvm.arm64.neon.sqsub.v4i32(<4 x i32>, <4 x i32>)
+declare <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32>, <4 x i32>)
 
-declare <2 x i64> @llvm.arm64.neon.sqadd.v2i64(<2 x i64>, <2 x i64>)
+declare <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64>, <2 x i64>)
 
-declare <4 x i32> @llvm.arm64.neon.sqadd.v4i32(<4 x i32>, <4 x i32>)
+declare <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32>, <4 x i32>)
 
-declare <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32>, <2 x i32>)
+declare <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32>, <2 x i32>)
 
-declare <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16>, <4 x i16>)
+declare <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16>, <4 x i16>)
 
-declare <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32>, <2 x i32>)
+declare <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32>, <2 x i32>)
 
-declare <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16>, <4 x i16>)
+declare <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16>, <4 x i16>)
 
 define <4 x i16> @test_vmla_lane_s16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %v) {
 ; CHECK-LABEL: test_vmla_lane_s16:
@@ -563,7 +563,7 @@ define <4 x i32> @test_vmlal_lane_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v)
 ; CHECK-NEXT: ret
 entry:
   %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-  %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
+  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
   %add = add <4 x i32> %vmull2.i, %a
   ret <4 x i32> %add
 }
@@ -574,7 +574,7 @@ define <2 x i64> @test_vmlal_lane_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v)
 ; CHECK-NEXT: ret
 entry:
   %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
-  %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
+  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
   %add = add <2 x i64> %vmull2.i, %a
   ret <2 x i64> %add
 }
@@ -585,7 +585,7 @@ define <4 x i32> @test_vmlal_laneq_s16(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v)
 ; CHECK-NEXT: ret
 entry:
   %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
-  %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
+  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
   %add = add <4 x i32> %vmull2.i, %a
   ret <4 x i32> %add
 }
@@ -596,7 +596,7 @@ define <2 x i64> @test_vmlal_laneq_s32(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v)
 ; CHECK-NEXT: ret
 entry:
   %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
-  %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
+  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
   %add = add <2 x i64> %vmull2.i, %a
   ret <2 x i64> %add
 }
@@ -608,7 +608,7 @@ define <4 x i32> @test_vmlal_high_lane_s16(<4 x i32> %a, <8 x i16> %b, <4 x i16>
 entry:
   %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
   %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-  %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
+  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
   %add = add <4 x i32> %vmull2.i, %a
   ret <4 x i32> %add
 }
@@ -620,7 +620,7 @@ define <2 x i64> @test_vmlal_high_lane_s32(<2 x i64> %a, <4 x i32> %b, <2 x i32>
 entry:
   %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
   %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
-  %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
+  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
   %add = add <2 x i64> %vmull2.i, %a
   ret <2 x i64> %add
 }
@@ -632,7 +632,7 @@ define <4 x i32> @test_vmlal_high_laneq_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16
 entry:
   %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
   %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
-  %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
+  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
   %add = add <4 x i32> %vmull2.i, %a
   ret <4 x i32> %add
 }
@@ -644,7 +644,7 @@ define <2 x i64> @test_vmlal_high_laneq_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32
 entry:
   %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
   %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
-  %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
+  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
   %add = add <2 x i64> %vmull2.i, %a
   ret <2 x i64> %add
 }
@@ -655,7 +655,7 @@ define <4 x i32> @test_vmlsl_lane_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v)
 ; CHECK-NEXT: ret
 entry:
   %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-  %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
+  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
   %sub = sub <4 x i32> %a, %vmull2.i
   ret <4 x i32> %sub
 }
@@ -666,7 +666,7 @@ define <2 x i64> @test_vmlsl_lane_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v)
 ; CHECK-NEXT: ret
 entry:
   %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
-  %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
+  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
   %sub = sub <2 x i64> %a, %vmull2.i
   ret <2 x i64> %sub
 }
@@ -677,7 +677,7 @@ define <4 x i32> @test_vmlsl_laneq_s16(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v)
 ; CHECK-NEXT: ret
 entry:
   %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
-  %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
+  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
   %sub = sub <4 x i32> %a, %vmull2.i
   ret <4 x i32> %sub
 }
@@ -688,7 +688,7 @@ define <2 x i64> @test_vmlsl_laneq_s32(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v)
 ; CHECK-NEXT: ret
 entry:
   %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
-  %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
+  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
   %sub = sub <2 x i64> %a, %vmull2.i
   ret <2 x i64> %sub
 }
@@ -700,7 +700,7 @@ define <4 x i32> @test_vmlsl_high_lane_s16(<4 x i32> %a, <8 x i16> %b, <4 x i16>
 entry:
   %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
   %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-  %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
+  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
   %sub = sub <4 x i32> %a, %vmull2.i
   ret <4 x i32> %sub
 }
@@ -712,7 +712,7 @@ define <2 x i64> @test_vmlsl_high_lane_s32(<2 x i64> %a, <4 x i32> %b, <2 x i32>
 entry:
   %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
   %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
-  %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
+  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
   %sub = sub <2 x i64> %a, %vmull2.i
   ret <2 x i64> %sub
 }
@@ -724,7 +724,7 @@ define <4 x i32> @test_vmlsl_high_laneq_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16
 entry:
   %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
   %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
-  %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
+  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
   %sub = sub <4 x i32> %a, %vmull2.i
   ret <4 x i32> %sub
 }
@@ -736,7 +736,7 @@ define <2 x i64> @test_vmlsl_high_laneq_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32
 entry:
   %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
   %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
-  %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
+  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
   %sub = sub <2 x i64> %a, %vmull2.i
   ret <2 x i64> %sub
 }
@@ -747,7 +747,7 @@ define <4 x i32> @test_vmlal_lane_u16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v)
 ; CHECK-NEXT: ret
 entry:
   %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-  %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
+  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
   %add = add <4 x i32> %vmull2.i, %a
   ret <4 x i32> %add
 }
@@ -758,7 +758,7 @@ define <2 x i64> @test_vmlal_lane_u32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v)
 ; CHECK-NEXT: ret
 entry:
   %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
-  %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
+  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
   %add = add <2 x i64> %vmull2.i, %a
   ret <2 x i64> %add
 }
@@ -769,7 +769,7 @@ define <4 x i32> @test_vmlal_laneq_u16(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v)
 ; CHECK-NEXT: ret
 entry:
   %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
-  %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
+  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
   %add = add <4 x i32> %vmull2.i, %a
   ret <4 x i32> %add
 }
@@ -780,7 +780,7 @@ define <2 x i64> @test_vmlal_laneq_u32(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v)
 ; CHECK-NEXT: ret
 entry:
   %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
-  %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
+  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
   %add = add <2 x i64> %vmull2.i, %a
   ret <2 x i64> %add
 }
@@ -792,7 +792,7 @@ define <4 x i32> @test_vmlal_high_lane_u16(<4 x i32> %a, <8 x i16> %b, <4 x i16>
 entry:
   %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
   %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-  %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
+  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
   %add = add <4 x i32> %vmull2.i, %a
   ret <4 x i32> %add
 }
@@ -804,7 +804,7 @@ define <2 x i64> @test_vmlal_high_lane_u32(<2 x i64> %a, <4 x i32> %b, <2 x i32>
 entry:
   %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
   %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
-  %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
+  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
   %add = add <2 x i64> %vmull2.i, %a
   ret <2 x i64> %add
 }
@@ -816,7 +816,7 @@ define <4 x i32> @test_vmlal_high_laneq_u16(<4 x i32> %a, <8 x i16> %b, <8 x i16
 entry:
   %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
   %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
-  %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
+  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
   %add = add <4 x i32> %vmull2.i, %a
   ret <4 x i32> %add
 }
@@ -828,7 +828,7 @@ define <2 x i64> @test_vmlal_high_laneq_u32(<2 x i64> %a, <4 x i32> %b, <4 x i32
 entry:
   %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
   %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
-  %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
+  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
   %add = add <2 x i64> %vmull2.i, %a
   ret <2 x i64> %add
 }
@@ -839,7 +839,7 @@ define <4 x i32> @test_vmlsl_lane_u16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v)
 ; CHECK-NEXT: ret
 entry:
   %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-  %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
+  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
   %sub = sub <4 x i32> %a, %vmull2.i
   ret <4 x i32> %sub
 }
@@ -850,7 +850,7 @@ define <2 x i64> @test_vmlsl_lane_u32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v)
 ; CHECK-NEXT: ret
 entry:
   %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
-  %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
+  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
   %sub = sub <2 x i64> %a, %vmull2.i
   ret <2 x i64> %sub
 }
@@ -861,7 +861,7 @@ define <4 x i32> @test_vmlsl_laneq_u16(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v)
 ; CHECK-NEXT: ret
 entry:
   %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
-  %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
+  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
   %sub = sub <4 x i32> %a, %vmull2.i
   ret <4 x i32> %sub
 }
@@ -872,7 +872,7 @@ define <2 x i64> @test_vmlsl_laneq_u32(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v)
 ; CHECK-NEXT: ret
 entry:
   %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
-  %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
+  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
   %sub = sub <2 x i64> %a, %vmull2.i
   ret <2 x i64> %sub
 }
@@ -884,7 +884,7 @@ define <4 x i32> @test_vmlsl_high_lane_u16(<4 x i32> %a, <8 x i16> %b, <4 x i16>
 entry:
   %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
   %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-  %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
+  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
   %sub = sub <4 x i32> %a, %vmull2.i
   ret <4 x i32> %sub
 }
@@ -896,7 +896,7 @@ define <2 x i64> @test_vmlsl_high_lane_u32(<2 x i64> %a, <4 x i32> %b, <2 x i32>
 entry:
   %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
   %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
-  %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
+  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
   %sub = sub <2 x i64> %a, %vmull2.i
   ret <2 x i64> %sub
 }
@@ -908,7 +908,7 @@ define <4 x i32> @test_vmlsl_high_laneq_u16(<4 x i32> %a, <8 x i16> %b, <8 x i16
 entry:
   %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
   %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
-  %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
+  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
   %sub = sub <4 x i32> %a, %vmull2.i
   ret <4 x i32> %sub
 }
@@ -920,7 +920,7 @@ define <2 x i64> @test_vmlsl_high_laneq_u32(<2 x i64> %a, <4 x i32> %b, <4 x i32
 entry:
   %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
   %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
-  %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
+  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
   %sub = sub <2 x i64> %a, %vmull2.i
   ret <2 x i64> %sub
 }
@@ -931,7 +931,7 @@ define <4 x i32> @test_vmull_lane_s16(<4 x i16> %a, <4 x i16> %v) {
 ; CHECK-NEXT: ret
 entry:
   %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-  %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> %a, <4 x i16> %shuffle)
+  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %a, <4 x i16> %shuffle)
   ret <4 x i32> %vmull2.i
 }
 
@@ -941,7 +941,7 @@ define <2 x i64> @test_vmull_lane_s32(<2 x i32> %a, <2 x i32> %v) {
 ; CHECK-NEXT: ret
 entry:
   %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
-  %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32> %a, <2 x i32> %shuffle)
+  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %a, <2 x i32> %shuffle)
   ret <2 x i64> %vmull2.i
 }
 
@@ -951,7 +951,7 @@ define <4 x i32> @test_vmull_lane_u16(<4 x i16> %a, <4 x i16> %v) {
 ; CHECK-NEXT: ret
 entry:
   %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-  %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16> %a, <4 x i16> %shuffle)
+  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %a, <4 x i16> %shuffle)
   ret <4 x i32> %vmull2.i
 }
 
@@ -961,7 +961,7 @@ define <2 x i64> @test_vmull_lane_u32(<2 x i32> %a, <2 x i32> %v) {
 ; CHECK-NEXT: ret
 entry:
   %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
-  %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32> %a, <2 x i32> %shuffle)
+  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %a, <2 x i32> %shuffle)
   ret <2 x i64> %vmull2.i
 }
 
@@ -972,7 +972,7 @@ define <4 x i32> @test_vmull_high_lane_s16(<8 x i16> %a, <4 x i16> %v) {
 entry:
   %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
   %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-  %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
+  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
   ret <4 x i32> %vmull2.i
 }
 
@@ -983,7 +983,7 @@ define <2 x i64> @test_vmull_high_lane_s32(<4 x i32> %a, <2 x i32> %v) {
 entry:
   %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
   %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
-  %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
+  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
   ret <2 x i64> %vmull2.i
 }
 
@@ -994,7 +994,7 @@ define <4 x i32> @test_vmull_high_lane_u16(<8 x i16> %a, <4 x i16> %v) {
 entry:
   %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
   %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-  %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
+  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
   ret <4 x i32> %vmull2.i
 }
 
@@ -1005,7 +1005,7 @@ define <2 x i64> @test_vmull_high_lane_u32(<4 x i32> %a, <2 x i32> %v) {
 entry:
   %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
   %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
-  %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
+  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
   ret <2 x i64> %vmull2.i
 }
 
@@ -1015,7 +1015,7 @@ define <4 x i32> @test_vmull_laneq_s16(<4 x i16> %a, <8 x i16> %v) {
 ; CHECK-NEXT: ret
 entry:
   %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
-  %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> %a, <4 x i16> %shuffle)
+  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %a, <4 x i16> %shuffle)
   ret <4 x i32> %vmull2.i
 }
 
@@ -1025,7 +1025,7 @@ define <2 x i64> @test_vmull_laneq_s32(<2 x i32> %a, <4 x i32> %v) {
 ; CHECK-NEXT: ret
 entry:
   %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
-  %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32> %a, <2 x i32> %shuffle)
+  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %a, <2 x i32> %shuffle)
   ret <2 x i64> %vmull2.i
 }
 
@@ -1035,7 +1035,7 @@ define <4 x i32> @test_vmull_laneq_u16(<4 x i16> %a, <8 x i16> %v) {
 ; CHECK-NEXT: ret
 entry:
   %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
-  %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16> %a, <4 x i16> %shuffle)
+  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %a, <4 x i16> %shuffle)
   ret <4 x i32> %vmull2.i
 }
 
@@ -1045,7 +1045,7 @@ define <2 x i64> @test_vmull_laneq_u32(<2 x i32> %a, <4 x i32> %v) {
 ; CHECK-NEXT: ret
 entry:
   %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
-  %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32> %a, <2 x i32> %shuffle)
+  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %a, <2 x i32> %shuffle)
   ret <2 x i64> %vmull2.i
 }
 
@@ -1056,7 +1056,7 @@ define <4 x i32> @test_vmull_high_laneq_s16(<8 x i16> %a, <8 x i16> %v) {
 entry:
   %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
   %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
-  %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
+  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
   ret <4 x i32> %vmull2.i
 }
 
@@ -1067,7 +1067,7 @@ define <2 x i64> @test_vmull_high_laneq_s32(<4 x i32> %a, <4 x i32> %v) {
 entry:
   %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
   %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
-  %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
+  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
   ret <2 x i64> %vmull2.i
 }
 
@@ -1078,7 +1078,7 @@ define <4 x i32> @test_vmull_high_laneq_u16(<8 x i16> %a, <8 x i16> %v) {
 entry:
   %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
   %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
-  %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
+  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
   ret <4 x i32> %vmull2.i
 }
 
@@ -1089,7 +1089,7 @@ define <2 x i64> @test_vmull_high_laneq_u32(<4 x i32> %a, <4 x i32> %v) {
 entry:
   %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
   %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
-  %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
+  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
   ret <2 x i64> %vmull2.i
 }
 
@@ -1099,8 +1099,8 @@ define <4 x i32> @test_vqdmlal_lane_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v
 ; CHECK-NEXT: ret
 entry:
   %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-  %vqdmlal2.i = tail call <4 x i32> @llvm.arm64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
-  %vqdmlal4.i = tail call <4 x i32> @llvm.arm64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal2.i)
+  %vqdmlal2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
+  %vqdmlal4.i = tail call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal2.i)
   ret <4 x i32> %vqdmlal4.i
 }
 
@@ -1110,8 +1110,8 @@ define <2 x i64> @test_vqdmlal_lane_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v
 ; CHECK-NEXT: ret
 entry:
   %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
-  %vqdmlal2.i = tail call <2 x i64> @llvm.arm64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
-  %vqdmlal4.i = tail call <2 x i64> @llvm.arm64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal2.i)
+  %vqdmlal2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
+  %vqdmlal4.i = tail call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal2.i)
   ret <2 x i64> %vqdmlal4.i
 }
 
@@ -1122,8 +1122,8 @@ define <4 x i32> @test_vqdmlal_high_lane_s16(<4 x i32> %a, <8 x i16> %b, <4 x i1
 entry:
   %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
   %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-  %vqdmlal2.i = tail call <4 x i32> @llvm.arm64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
-  %vqdmlal4.i = tail call <4 x i32> @llvm.arm64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal2.i)
+  %vqdmlal2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
+  %vqdmlal4.i = tail call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal2.i)
   ret <4 x i32> %vqdmlal4.i
 }
 
@@ -1134,8 +1134,8 @@ define <2 x i64> @test_vqdmlal_high_lane_s32(<2 x i64> %a, <4 x i32> %b, <2 x i3
 entry:
   %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
   %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
-  %vqdmlal2.i = tail call <2 x i64> @llvm.arm64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
-  %vqdmlal4.i = tail call <2 x i64> @llvm.arm64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal2.i)
+  %vqdmlal2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
+  %vqdmlal4.i = tail call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal2.i)
   ret <2 x i64> %vqdmlal4.i
 }
 
@@ -1145,8 +1145,8 @@ define <4 x i32> @test_vqdmlsl_lane_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v
 ; CHECK-NEXT: ret
 entry:
   %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-  %vqdmlsl2.i = tail call <4 x i32> @llvm.arm64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
-  %vqdmlsl4.i = tail call <4 x i32> @llvm.arm64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> %vqdmlsl2.i)
+  %vqdmlsl2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
+  %vqdmlsl4.i = tail call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> %vqdmlsl2.i)
   ret <4 x i32> %vqdmlsl4.i
 }
 
@@ -1156,8 +1156,8 @@ define <2 x i64> @test_vqdmlsl_lane_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v
 ; CHECK-NEXT: ret
 entry:
   %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
-  %vqdmlsl2.i = tail call <2 x i64> @llvm.arm64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
-  %vqdmlsl4.i = tail call <2 x i64> @llvm.arm64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> %vqdmlsl2.i)
+  %vqdmlsl2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
+  %vqdmlsl4.i = tail call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> %vqdmlsl2.i)
   ret <2 x i64> %vqdmlsl4.i
 }
 
@@ -1168,8 +1168,8 @@ define <4 x i32> @test_vqdmlsl_high_lane_s16(<4 x i32> %a, <8 x i16> %b, <4 x i1
 entry:
   %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
   %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-  %vqdmlsl2.i = tail call <4 x i32> @llvm.arm64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
-  %vqdmlsl4.i = tail call <4 x i32> @llvm.arm64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> %vqdmlsl2.i)
+  %vqdmlsl2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
+  %vqdmlsl4.i = tail call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> %vqdmlsl2.i)
   ret <4 x i32> %vqdmlsl4.i
 }
 
@@ -1180,8 +1180,8 @@ define <2 x i64> @test_vqdmlsl_high_lane_s32(<2 x i64> %a, <4 x i32> %b, <2 x i3
 entry:
   %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
   %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
-  %vqdmlsl2.i = tail call <2 x i64> @llvm.arm64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
-  %vqdmlsl4.i = tail call <2 x i64> @llvm.arm64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> %vqdmlsl2.i)
+  %vqdmlsl2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
+  %vqdmlsl4.i = tail call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> %vqdmlsl2.i)
   ret <2 x i64> %vqdmlsl4.i
 }
 
@@ -1191,7 +1191,7 @@ define <4 x i32> @test_vqdmull_lane_s16(<4 x i16> %a, <4 x i16> %v) {
 ; CHECK-NEXT: ret
 entry:
   %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-  %vqdmull2.i = tail call <4 x i32> @llvm.arm64.neon.sqdmull.v4i32(<4 x i16> %a, <4 x i16> %shuffle)
+  %vqdmull2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %a, <4 x i16> %shuffle)
   ret <4 x i32> %vqdmull2.i
 }
 
@@ -1201,7 +1201,7 @@ define <2 x i64> @test_vqdmull_lane_s32(<2 x i32> %a, <2 x i32> %v) {
 ; CHECK-NEXT: ret
 entry:
   %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
-  %vqdmull2.i = tail call <2 x i64> @llvm.arm64.neon.sqdmull.v2i64(<2 x i32> %a, <2 x i32> %shuffle)
+  %vqdmull2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %a, <2 x i32> %shuffle)
   ret <2 x i64> %vqdmull2.i
 }
 
@@ -1211,7 +1211,7 @@ define <4 x i32> @test_vqdmull_laneq_s16(<4 x i16> %a, <8 x i16> %v) {
 ; CHECK-NEXT: ret
 entry:
   %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-  %vqdmull2.i = tail call <4 x i32> @llvm.arm64.neon.sqdmull.v4i32(<4 x i16> %a, <4 x i16> %shuffle)
+  %vqdmull2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %a, <4 x i16> %shuffle)
   ret <4 x i32> %vqdmull2.i
 }
 
@@ -1221,7 +1221,7 @@ define <2 x i64> @test_vqdmull_laneq_s32(<2 x i32> %a, <4 x i32> %v) {
 ; CHECK-NEXT: ret
 entry:
   %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
-  %vqdmull2.i = tail call <2 x i64> @llvm.arm64.neon.sqdmull.v2i64(<2 x i32> %a, <2 x i32> %shuffle)
+  %vqdmull2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %a, <2 x i32> %shuffle)
   ret <2 x i64> %vqdmull2.i
 }
 
@@ -1232,7 +1232,7 @@ define <4 x i32> @test_vqdmull_high_lane_s16(<8 x i16> %a, <4 x i16> %v) {
 entry:
   %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
   %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-  %vqdmull2.i = tail call <4 x i32> @llvm.arm64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
+  %vqdmull2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
   ret <4 x i32> %vqdmull2.i
 }
 
@@ -1243,7 +1243,7 @@ define <2 x i64> @test_vqdmull_high_lane_s32(<4 x i32> %a, <2 x i32> %v) {
 entry:
   %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
   %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
-  %vqdmull2.i = tail call <2 x i64> @llvm.arm64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
+  %vqdmull2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
   ret <2 x i64> %vqdmull2.i
 }
 
@@ -1254,7 +1254,7 @@ define <4 x i32> @test_vqdmull_high_laneq_s16(<8 x i16> %a, <8 x i16> %v) {
 entry:
   %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
   %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
-  %vqdmull2.i = tail call <4 x i32> @llvm.arm64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
+  %vqdmull2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
   ret <4 x i32> %vqdmull2.i
 }
 
@@ -1265,7 +1265,7 @@ define <2 x i64> @test_vqdmull_high_laneq_s32(<4 x i32> %a, <4 x i32> %v) {
 entry:
   %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
   %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
-  %vqdmull2.i = tail call <2 x i64> @llvm.arm64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
+  %vqdmull2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
   ret <2 x i64> %vqdmull2.i
 }
 
@@ -1275,7 +1275,7 @@ define <4 x i16> @test_vqdmulh_lane_s16(<4 x i16> %a, <4 x i16> %v) {
 ; CHECK-NEXT: ret
 entry:
   %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-  %vqdmulh2.i = tail call <4 x i16> @llvm.arm64.neon.sqdmulh.v4i16(<4 x i16> %a, <4 x i16> %shuffle)
+  %vqdmulh2.i = tail call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> %a, <4 x i16> %shuffle)
   ret <4 x i16> %vqdmulh2.i
 }
 
@@ -1285,7 +1285,7 @@ define <8 x i16> @test_vqdmulhq_lane_s16(<8 x i16> %a, <4 x i16> %v) {
 ; CHECK-NEXT: ret
 entry:
   %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-  %vqdmulh2.i = tail call <8 x i16> @llvm.arm64.neon.sqdmulh.v8i16(<8 x i16> %a, <8 x i16> %shuffle)
+  %vqdmulh2.i = tail call <8 x i16> @llvm.aarch64.neon.sqdmulh.v8i16(<8 x i16> %a, <8 x i16> %shuffle)
   ret <8 x i16> %vqdmulh2.i
 }
 
@@ -1295,7 +1295,7 @@ define <2 x i32> @test_vqdmulh_lane_s32(<2 x i32> %a, <2 x i32> %v) {
 ; CHECK-NEXT: ret
 entry:
   %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
-  %vqdmulh2.i = tail call <2 x i32> @llvm.arm64.neon.sqdmulh.v2i32(<2 x i32> %a, <2 x i32> %shuffle)
+  %vqdmulh2.i = tail call <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32> %a, <2 x i32> %shuffle)
   ret <2 x i32> %vqdmulh2.i
 }
 
@@ -1305,7 +1305,7 @@ define <4 x i32> @test_vqdmulhq_lane_s32(<4 x i32> %a, <2 x i32> %v) {
 ; CHECK-NEXT: ret
 entry:
   %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
-  %vqdmulh2.i = tail call <4 x i32> @llvm.arm64.neon.sqdmulh.v4i32(<4 x i32> %a, <4 x i32> %shuffle)
+  %vqdmulh2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmulh.v4i32(<4 x i32> %a, <4 x i32> %shuffle)
   ret <4 x i32> %vqdmulh2.i
 }
 
@@ -1315,7 +1315,7 @@ define <4 x i16> @test_vqrdmulh_lane_s16(<4 x i16> %a, <4 x i16> %v) {
 ; CHECK-NEXT: ret
 entry:
   %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-  %vqrdmulh2.i = tail call <4 x i16> @llvm.arm64.neon.sqrdmulh.v4i16(<4 x i16> %a, <4 x i16> %shuffle)
+  %vqrdmulh2.i = tail call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> %a, <4 x i16> %shuffle)
   ret <4 x i16> %vqrdmulh2.i
 }
 
@@ -1325,7 +1325,7 @@ define <8 x i16> @test_vqrdmulhq_lane_s16(<8 x i16> %a, <4 x i16> %v) {
 ; CHECK-NEXT: ret
 entry:
   %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-  %vqrdmulh2.i = tail call <8 x i16> @llvm.arm64.neon.sqrdmulh.v8i16(<8 x i16> %a, <8 x i16> %shuffle)
+  %vqrdmulh2.i = tail call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> %a, <8 x i16> %shuffle)
   ret <8 x i16> %vqrdmulh2.i
 }
 
@@ -1335,7 +1335,7 @@ define <2 x i32> @test_vqrdmulh_lane_s32(<2 x i32> %a, <2 x i32> %v) {
 ; CHECK-NEXT: ret
 entry:
   %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
-  %vqrdmulh2.i = tail call <2 x i32> @llvm.arm64.neon.sqrdmulh.v2i32(<2 x i32> %a, <2 x i32> %shuffle)
+  %vqrdmulh2.i = tail call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> %a, <2 x i32> %shuffle)
   ret <2 x i32> %vqrdmulh2.i
 }
 
@@ -1345,7 +1345,7 @@ define <4 x i32> @test_vqrdmulhq_lane_s32(<4 x i32> %a, <2 x i32> %v) {
 ; CHECK-NEXT: ret
 entry:
   %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
-  %vqrdmulh2.i = tail call <4 x i32> @llvm.arm64.neon.sqrdmulh.v4i32(<4 x i32> %a, <4 x i32> %shuffle)
+  %vqrdmulh2.i = tail call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> %a, <4 x i32> %shuffle)
   ret <4 x i32> %vqrdmulh2.i
 }
 
@@ -1441,7 +1441,7 @@ define <2 x float> @test_vmulx_lane_f32(<2 x float> %a, <2 x float> %v) {
 ; CHECK-NEXT: ret
 entry:
   %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> <i32 1, i32 1>
-  %vmulx2.i = tail call <2 x float> @llvm.arm64.neon.fmulx.v2f32(<2 x float> %a, <2 x float> %shuffle)
+  %vmulx2.i = tail call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> %a, <2 x float> %shuffle)
   ret <2 x float> %vmulx2.i
 }
 
@@ -1451,7 +1451,7 @@ define <4 x float> @test_vmulxq_lane_f32(<4 x float> %a, <2 x float> %v) {
 ; CHECK-NEXT: ret
 entry:
   %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
-  %vmulx2.i = tail call <4 x float> @llvm.arm64.neon.fmulx.v4f32(<4 x float> %a, <4 x float> %shuffle)
+  %vmulx2.i = tail call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> %a, <4 x float> %shuffle)
   ret <4 x float> %vmulx2.i
 }
 
@@ -1461,7 +1461,7 @@ define <2 x double> @test_vmulxq_lane_f64(<2 x double> %a, <1 x double> %v) {
 ; CHECK-NEXT: ret
 entry:
   %shuffle = shufflevector <1 x double> %v, <1 x double> undef, <2 x i32> zeroinitializer
-  %vmulx2.i = tail call <2 x double> @llvm.arm64.neon.fmulx.v2f64(<2 x double> %a, <2 x double> %shuffle)
+  %vmulx2.i = tail call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> %a, <2 x double> %shuffle)
   ret <2 x double> %vmulx2.i
 }
 
@@ -1471,7 +1471,7 @@ define <2 x float> @test_vmulx_laneq_f32(<2 x float> %a, <4 x float> %v) {
 ; CHECK-NEXT: ret
 entry:
   %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> <i32 3, i32 3>
-  %vmulx2.i = tail call <2 x float> @llvm.arm64.neon.fmulx.v2f32(<2 x float> %a, <2 x float> %shuffle)
+  %vmulx2.i = tail call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> %a, <2 x float> %shuffle)
   ret <2 x float> %vmulx2.i
 }
 
@@ -1481,7 +1481,7 @@ define <4 x float> @test_vmulxq_laneq_f32(<4 x float> %a, <4 x float> %v) {
 ; CHECK-NEXT: ret
 entry:
   %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-  %vmulx2.i = tail call <4 x float> @llvm.arm64.neon.fmulx.v4f32(<4 x float> %a, <4 x float> %shuffle)
+  %vmulx2.i = tail call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> %a, <4 x float> %shuffle)
   ret <4 x float> %vmulx2.i
 }
 
@@ -1491,7 +1491,7 @@ define <2 x double> @test_vmulxq_laneq_f64(<2 x double> %a, <2 x double> %v) {
 ; CHECK-NEXT: ret
 entry:
   %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 1, i32 1>
-  %vmulx2.i = tail call <2 x double> @llvm.arm64.neon.fmulx.v2f64(<2 x double> %a, <2 x double> %shuffle)
+  %vmulx2.i = tail call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> %a, <2 x double> %shuffle)
   ret <2 x double> %vmulx2.i
 }
 
@@ -1942,7 +1942,7 @@ define <4 x i32> @test_vmlal_lane_s16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v
 ; CHECK-NEXT: ret
 entry:
   %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
-  %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
+  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
   %add = add <4 x i32> %vmull2.i, %a
   ret <4 x i32> %add
 }
@@ -1953,7 +1953,7 @@ define <2 x i64> @test_vmlal_lane_s32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v
 ; CHECK-NEXT: ret
 entry:
   %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
-  %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
+  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
   %add = add <2 x i64> %vmull2.i, %a
   ret <2 x i64> %add
 }
@@ -1964,7 +1964,7 @@ define <4 x i32> @test_vmlal_laneq_s16_0(<4 x i32> %a, <4 x i16> %b, <8 x i16> %
 ; CHECK-NEXT: ret
 entry:
   %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
-  %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
+  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
   %add = add <4 x i32> %vmull2.i, %a
   ret <4 x i32> %add
 }
@@ -1975,7 +1975,7 @@ define <2 x i64> @test_vmlal_laneq_s32_0(<2 x i64> %a, <2 x i32> %b, <4 x i32> %
 ; CHECK-NEXT: ret
 entry:
   %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
-  %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
+  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
   %add = add <2 x i64> %vmull2.i, %a
   ret <2 x i64> %add
 }
@@ -1987,7 +1987,7 @@ define <4 x i32> @test_vmlal_high_lane_s16_0(<4 x i32> %a, <8 x i16> %b, <4 x i1
 entry:
   %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
   %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
-  %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
+  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
   %add = add <4 x i32> %vmull2.i, %a
   ret <4 x i32> %add
 }
@@ -1999,7 +1999,7 @@ define <2 x i64> @test_vmlal_high_lane_s32_0(<2 x i64> %a, <4 x i32> %b, <2 x i3
 entry:
   %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
   %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
-  %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
+  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
   %add = add <2 x i64> %vmull2.i, %a
   ret <2 x i64> %add
 }
@@ -2011,7 +2011,7 @@ define <4 x i32> @test_vmlal_high_laneq_s16_0(<4 x i32> %a, <8 x i16> %b, <8 x i
 entry:
   %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
   %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
-  %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
+  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
   %add = add <4 x i32> %vmull2.i, %a
   ret <4 x i32> %add
 }
@@ -2023,7 +2023,7 @@ define <2 x i64> @test_vmlal_high_laneq_s32_0(<2 x i64> %a, <4 x i32> %b, <4 x i
 entry:
   %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
   %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
-  %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
+  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
   %add = add <2 x i64> %vmull2.i, %a
   ret <2 x i64> %add
 }
@@ -2034,7 +2034,7 @@ define <4 x i32> @test_vmlsl_lane_s16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v
 ; CHECK-NEXT: ret
 entry:
   %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
-  %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
+  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
   %sub = sub <4 x i32> %a, %vmull2.i
   ret <4 x i32> %sub
 }
@@ -2045,7 +2045,7 @@ define <2 x i64> @test_vmlsl_lane_s32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v
 ; CHECK-NEXT: ret
 entry:
   %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
-  %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
+  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
   %sub = sub <2 x i64> %a, %vmull2.i
   ret <2 x i64> %sub
 }
@@ -2056,7 +2056,7 @@ define <4 x i32> @test_vmlsl_laneq_s16_0(<4 x i32> %a, <4 x i16> %b, <8 x i16> %
 ; CHECK-NEXT: ret
 entry:
   %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
-  %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
+  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
   %sub = sub <4 x i32> %a, %vmull2.i
   ret <4 x i32> %sub
 }
@@ -2067,7 +2067,7 @@ define <2 x i64> @test_vmlsl_laneq_s32_0(<2 x i64> %a, <2 x i32> %b, <4 x i32> %
 ; CHECK-NEXT: ret
 entry:
   %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
-  %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
+  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
   %sub = sub <2 x i64> %a, %vmull2.i
   ret <2 x i64> %sub
 }
@@ -2079,7 +2079,7 @@ define <4 x i32> @test_vmlsl_high_lane_s16_0(<4 x i32> %a, <8 x i16> %b, <4 x i1
 entry:
   %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
   %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
-  %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
+  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
   %sub = sub <4 x i32> %a, %vmull2.i
   ret <4 x i32> %sub
 }
@@ -2091,7 +2091,7 @@ define <2 x i64> @test_vmlsl_high_lane_s32_0(<2 x i64> %a, <4 x i32> %b, <2 x i3
 entry:
   %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
   %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
-  %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
+  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
   %sub = sub <2 x i64> %a, %vmull2.i
   ret <2 x i64> %sub
 }
@@ -2103,7 +2103,7 @@ define <4 x i32> @test_vmlsl_high_laneq_s16_0(<4 x i32> %a, <8 x i16> %b, <8 x i
 entry:
   %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
   %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
-  %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
+  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
   %sub = sub <4 x i32> %a, %vmull2.i
   ret <4 x i32> %sub
 }
@@ -2115,7 +2115,7 @@ define <2 x i64> @test_vmlsl_high_laneq_s32_0(<2 x i64> %a, <4 x i32> %b, <4 x i
 entry:
   %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
   %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
-  %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
+  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
   %sub = sub <2 x i64> %a, %vmull2.i
   ret <2 x i64> %sub
 }
@@ -2126,7 +2126,7 @@ define <4 x i32> @test_vmlal_lane_u16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v
 ; CHECK-NEXT: ret
 entry:
   %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
-  %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
+  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
   %add = add <4 x i32> %vmull2.i, %a
   ret <4 x i32> %add
 }
@@ -2137,7 +2137,7 @@ define <2 x i64> @test_vmlal_lane_u32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v
 ; CHECK-NEXT: ret
 entry:
   %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
-  %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
+  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
   %add = add <2 x i64> %vmull2.i, %a
   ret <2 x i64> %add
 }
@@ -2148,7 +2148,7 @@ define <4 x i32> @test_vmlal_laneq_u16_0(<4 x i32> %a, <4 x i16> %b, <8 x i16> %
 ; CHECK-NEXT: ret
 entry:
   %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
-  %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
+  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
   %add = add <4 x i32> %vmull2.i, %a
   ret <4 x i32> %add
 }
@@ -2159,7 +2159,7 @@ define <2 x i64> @test_vmlal_laneq_u32_0(<2 x i64> %a, <2 x i32> %b, <4 x i32> %
 ; CHECK-NEXT: ret
 entry:
   %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
-  %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
+  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
   %add = add <2 x i64> %vmull2.i, %a
   ret <2 x i64> %add
 }
@@ -2171,7 +2171,7 @@ define <4 x i32> @test_vmlal_high_lane_u16_0(<4 x i32> %a, <8 x i16> %b, <4 x i1
 entry:
   %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
   %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
-  %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
+  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
   %add = add <4 x i32> %vmull2.i, %a
   ret <4 x i32> %add
 }
@@ -2183,7 +2183,7 @@ define <2 x i64> @test_vmlal_high_lane_u32_0(<2 x i64> %a, <4 x i32> %b, <2 x i3
 entry:
   %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
   %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
-  %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
+  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
   %add = add <2 x i64> %vmull2.i, %a
   ret <2 x i64> %add
 }
@@ -2195,7 +2195,7 @@ define <4 x i32> @test_vmlal_high_laneq_u16_0(<4 x i32> %a, <8 x i16> %b, <8 x i
 entry:
   %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
   %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
-  %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
+  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
   %add = add <4 x i32> %vmull2.i, %a
   ret <4 x i32> %add
 }
@@ -2207,7 +2207,7 @@ define <2 x i64> @test_vmlal_high_laneq_u32_0(<2 x i64> %a, <4 x i32> %b, <4 x i
 entry:
   %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
   %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
-  %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
+  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
   %add = add <2 x i64> %vmull2.i, %a
   ret <2 x i64> %add
 }
@@ -2218,7 +2218,7 @@ define <4 x i32> @test_vmlsl_lane_u16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v
 ; CHECK-NEXT: ret
 entry:
   %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
-  %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
+  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
   %sub = sub <4 x i32> %a, %vmull2.i
   ret <4 x i32> %sub
 }
@@ -2229,7 +2229,7 @@ define <2 x i64> @test_vmlsl_lane_u32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v
 ; CHECK-NEXT: ret
 entry:
   %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
-  %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
+  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
   %sub = sub <2 x i64> %a, %vmull2.i
   ret <2 x i64> %sub
 }
@@ -2240,7 +2240,7 @@ define <4 x i32> @test_vmlsl_laneq_u16_0(<4 x i32> %a, <4 x i16> %b, <8 x i16> %
 ; CHECK-NEXT: ret
 entry:
   %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
-  %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
+  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
   %sub = sub <4 x i32> %a, %vmull2.i
   ret <4 x i32> %sub
 }
@@ -2251,7 +2251,7 @@ define <2 x i64> @test_vmlsl_laneq_u32_0(<2 x i64> %a, <2 x i32> %b, <4 x i32> %
 ; CHECK-NEXT: ret
 entry:
   %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
-  %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
+  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
   %sub = sub <2 x i64> %a, %vmull2.i
   ret <2 x i64> %sub
 }
@@ -2263,7 +2263,7 @@ define <4 x i32> @test_vmlsl_high_lane_u16_0(<4 x i32> %a, <8 x i16> %b, <4 x i1
 entry:
   %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
   %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
-  %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
+  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
   %sub = sub <4 x i32> %a, %vmull2.i
   ret <4 x i32> %sub
 }
@@ -2275,7 +2275,7 @@ define <2 x i64> @test_vmlsl_high_lane_u32_0(<2 x i64> %a, <4 x i32> %b, <2 x i3
 entry:
   %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
   %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
-  %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
+  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
   %sub = sub <2 x i64> %a, %vmull2.i
   ret <2 x i64> %sub
 }
@@ -2287,7 +2287,7 @@ define <4 x i32> @test_vmlsl_high_laneq_u16_0(<4 x i32> %a, <8 x i16> %b, <8 x i
 entry:
   %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
   %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
-  %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
+  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
   %sub = sub <4 x i32> %a, %vmull2.i
   ret <4 x i32> %sub
 }
@@ -2299,7 +2299,7 @@ define <2 x i64> @test_vmlsl_high_laneq_u32_0(<2 x i64> %a, <4 x i32> %b, <4 x i
 entry:
   %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
   %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
-  %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
+  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
   %sub = sub <2 x i64> %a, %vmull2.i
   ret <2 x i64> %sub
 }
@@ -2310,7 +2310,7 @@ define <4 x i32> @test_vmull_lane_s16_0(<4 x i16> %a, <4 x i16> %v) {
 ; CHECK-NEXT: ret
 entry:
   %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
-  %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> %a, <4 x i16> %shuffle)
+  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %a, <4 x i16> %shuffle)
   ret <4 x i32> %vmull2.i
 }
 
@@ -2320,7 +2320,7 @@ define <2 x i64> @test_vmull_lane_s32_0(<2 x i32> %a, <2 x i32> %v) {
 ; CHECK-NEXT: ret
 entry:
   %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
-  %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32> %a, <2 x i32> %shuffle)
+  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %a, <2 x i32> %shuffle)
   ret <2 x i64> %vmull2.i
 }
 
@@ -2330,7 +2330,7 @@ define <4 x i32> @test_vmull_lane_u16_0(<4 x i16> %a, <4 x i16> %v) {
 ; CHECK-NEXT: ret
 entry:
   %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
-  %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16> %a, <4 x i16> %shuffle)
+  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %a, <4 x i16> %shuffle)
   ret <4 x i32> %vmull2.i
 }
 
@@ -2340,7 +2340,7 @@ define <2 x i64> @test_vmull_lane_u32_0(<2 x i32> %a, <2 x i32> %v) {
 ; CHECK-NEXT: ret
 entry:
   %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
-  %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32> %a, <2 x i32> %shuffle)
+  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %a, <2 x i32> %shuffle)
   ret <2 x i64> %vmull2.i
 }
 
@@ -2351,7 +2351,7 @@ define <4 x i32> @test_vmull_high_lane_s16_0(<8 x i16> %a, <4 x i16> %v) {
 entry:
   %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
   %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
-  %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
+  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
   ret <4 x i32> %vmull2.i
 }
 
@@ -2362,7 +2362,7 @@ define <2 x i64> @test_vmull_high_lane_s32_0(<4 x i32> %a, <2 x i32> %v) {
 entry:
   %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
   %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
-  %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
+  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
   ret <2 x i64> %vmull2.i
 }
 
@@ -2373,7 +2373,7 @@ define <4 x i32> @test_vmull_high_lane_u16_0(<8 x i16> %a, <4 x i16> %v) {
 entry:
   %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
   %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
-  %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
+  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
   ret <4 x i32> %vmull2.i
 }
 
@@ -2384,7 +2384,7 @@ define <2 x i64> @test_vmull_high_lane_u32_0(<4 x i32> %a, <2 x i32> %v) {
 entry:
   %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
   %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
-  %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
+  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
   ret <2 x i64> %vmull2.i
 }
 
@@ -2394,7 +2394,7 @@ define <4 x i32> @test_vmull_laneq_s16_0(<4 x i16> %a, <8 x i16> %v) {
 ; CHECK-NEXT: ret
 entry:
   %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
-  %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> %a, <4 x i16> %shuffle)
+  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %a, <4 x i16> %shuffle)
   ret <4 x i32> %vmull2.i
 }
 
@@ -2404,7 +2404,7 @@ define <2 x i64> @test_vmull_laneq_s32_0(<2 x i32> %a, <4 x i32> %v) {
 ; CHECK-NEXT: ret
 entry:
   %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
-  %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32> %a, <2 x i32> %shuffle)
+  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %a, <2 x i32> %shuffle)
   ret <2 x i64> %vmull2.i
 }
 
@@ -2414,7 +2414,7 @@ define <4 x i32> @test_vmull_laneq_u16_0(<4 x i16> %a, <8 x i16> %v) {
 ; CHECK-NEXT: ret
 entry:
   %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
-  %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16> %a, <4 x i16> %shuffle)
+  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %a, <4 x i16> %shuffle)
   ret <4 x i32> %vmull2.i
 }
 
@@ -2424,7 +2424,7 @@ define <2 x i64> @test_vmull_laneq_u32_0(<2 x i32> %a, <4 x i32> %v) {
 ; CHECK-NEXT: ret
 entry:
   %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
-  %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32> %a, <2 x i32> %shuffle)
+  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %a, <2 x i32> %shuffle)
   ret <2 x i64> %vmull2.i
 }
 
@@ -2435,7 +2435,7 @@ define <4 x i32> @test_vmull_high_laneq_s16_0(<8 x i16> %a, <8 x i16> %v) {
 entry:
   %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
   %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
-  %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
+  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
   ret <4 x i32> %vmull2.i
 }
 
@@ -2446,7 +2446,7 @@ define <2 x i64> @test_vmull_high_laneq_s32_0(<4 x i32> %a, <4 x i32> %v) {
 entry:
   %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
   %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
-  %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
+  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
   ret <2 x i64> %vmull2.i
 }
 
@@ -2457,7 +2457,7 @@ define <4 x i32> @test_vmull_high_laneq_u16_0(<8 x i16> %a, <8 x i16> %v) {
 entry:
   %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
   %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
-  %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
+  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
   ret <4 x i32> %vmull2.i
 }
 
@@ -2468,7 +2468,7 @@ define <2 x i64> @test_vmull_high_laneq_u32_0(<4 x i32> %a, <4 x i32> %v) {
 entry:
   %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
   %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
-  %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
+  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
   ret <2 x i64> %vmull2.i
 }
 
@@ -2478,8 +2478,8 @@ define <4 x i32> @test_vqdmlal_lane_s16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16>
 ; CHECK-NEXT: ret
 entry:
   %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
-  %vqdmlal2.i = tail call <4 x i32> @llvm.arm64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
-  %vqdmlal4.i = tail call <4 x i32> @llvm.arm64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal2.i)
+  %vqdmlal2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
+  %vqdmlal4.i = tail call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal2.i)
   ret <4 x i32> %vqdmlal4.i
 }
 
@@ -2489,8 +2489,8 @@ define <2 x i64> @test_vqdmlal_lane_s32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32>
 ; CHECK-NEXT: ret
 entry:
   %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
-  %vqdmlal2.i = tail call <2 x i64> @llvm.arm64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
-  %vqdmlal4.i = tail call <2 x i64> @llvm.arm64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal2.i)
+  %vqdmlal2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
+  %vqdmlal4.i = tail call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal2.i)
   ret <2 x i64> %vqdmlal4.i
 }
 
@@ -2501,8 +2501,8 @@ define <4 x i32> @test_vqdmlal_high_lane_s16_0(<4 x i32> %a, <8 x i16> %b, <4 x
 entry:
   %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
   %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
-  %vqdmlal2.i = tail call <4 x i32> @llvm.arm64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
-  %vqdmlal4.i = tail call <4 x i32> @llvm.arm64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal2.i)
+  %vqdmlal2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
+  %vqdmlal4.i = tail call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal2.i)
   ret <4 x i32> %vqdmlal4.i
 }
 
@@ -2513,8 +2513,8 @@ define <2 x i64> @test_vqdmlal_high_lane_s32_0(<2 x i64> %a, <4 x i32> %b, <2 x
 entry:
   %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
   %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
-  %vqdmlal2.i = tail call <2 x i64> @llvm.arm64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
-  %vqdmlal4.i = tail call <2 x i64> @llvm.arm64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal2.i)
+  %vqdmlal2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
+  %vqdmlal4.i = tail call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal2.i)
   ret <2 x i64> %vqdmlal4.i
 }
 
@@ -2524,8 +2524,8 @@ define <4 x i32> @test_vqdmlsl_lane_s16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16>
 ; CHECK-NEXT: ret
 entry:
   %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
-  %vqdmlsl2.i = tail call <4 x i32> @llvm.arm64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
-  %vqdmlsl4.i = tail call <4 x i32> @llvm.arm64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> %vqdmlsl2.i)
+  %vqdmlsl2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
+  %vqdmlsl4.i = tail call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> %vqdmlsl2.i)
   ret <4 x i32> %vqdmlsl4.i
 }
 
@@ -2535,8 +2535,8 @@ define <2 x i64> @test_vqdmlsl_lane_s32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32>
 ; CHECK-NEXT: ret
 entry:
   %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
-  %vqdmlsl2.i = tail call <2 x i64> @llvm.arm64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
-  %vqdmlsl4.i = tail call <2 x i64> @llvm.arm64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> %vqdmlsl2.i)
+  %vqdmlsl2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
+  %vqdmlsl4.i = tail call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> %vqdmlsl2.i)
   ret <2 x i64> %vqdmlsl4.i
 }
 
@@ -2547,8 +2547,8 @@ define <4 x i32> @test_vqdmlsl_high_lane_s16_0(<4 x i32> %a, <8 x i16> %b, <4 x
 entry:
   %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
   %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
-  %vqdmlsl2.i = tail call <4 x i32> @llvm.arm64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
-  %vqdmlsl4.i = tail call <4 x i32> @llvm.arm64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> %vqdmlsl2.i)
+  %vqdmlsl2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
+  %vqdmlsl4.i = tail call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> %vqdmlsl2.i)
   ret <4 x i32> %vqdmlsl4.i
 }
 
@@ -2559,8 +2559,8 @@ define <2 x i64> @test_vqdmlsl_high_lane_s32_0(<2 x i64> %a, <4 x i32> %b, <2 x
 entry:
   %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
   %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
-  %vqdmlsl2.i = tail call <2 x i64> @llvm.arm64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
-  %vqdmlsl4.i = tail call <2 x i64> @llvm.arm64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> %vqdmlsl2.i)
+  %vqdmlsl2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
+  %vqdmlsl4.i = tail call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> %vqdmlsl2.i)
   ret <2 x i64> %vqdmlsl4.i
 }
 
@@ -2570,7 +2570,7 @@ define <4 x i32> @test_vqdmull_lane_s16_0(<4 x i16> %a, <4 x i16> %v) {
 ; CHECK-NEXT: ret
 entry:
   %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
-  %vqdmull2.i = tail call <4 x i32> @llvm.arm64.neon.sqdmull.v4i32(<4 x i16> %a, <4 x i16> %shuffle)
+  %vqdmull2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %a, <4 x i16> %shuffle)
   ret <4 x i32> %vqdmull2.i
 }
 
@@ -2580,7 +2580,7 @@ define <2 x i64> @test_vqdmull_lane_s32_0(<2 x i32> %a, <2 x i32> %v) {
 ; CHECK-NEXT: ret
 entry:
   %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
-  %vqdmull2.i = tail call <2 x i64> @llvm.arm64.neon.sqdmull.v2i64(<2 x i32> %a, <2 x i32> %shuffle)
+  %vqdmull2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %a, <2 x i32> %shuffle)
   ret <2 x i64> %vqdmull2.i
 }
 
@@ -2590,7 +2590,7 @@ define <4 x i32> @test_vqdmull_laneq_s16_0(<4 x i16> %a, <8 x i16> %v) {
 ; CHECK-NEXT: ret
 entry:
   %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
-  %vqdmull2.i = tail call <4 x i32> @llvm.arm64.neon.sqdmull.v4i32(<4 x i16> %a, <4 x i16> %shuffle)
+  %vqdmull2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %a, <4 x i16> %shuffle)
   ret <4 x i32> %vqdmull2.i
 }
 
@@ -2600,7 +2600,7 @@ define <2 x i64> @test_vqdmull_laneq_s32_0(<2 x i32> %a, <4 x i32> %v) {
 ; CHECK-NEXT: ret
 entry:
   %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
-  %vqdmull2.i = tail call <2 x i64> @llvm.arm64.neon.sqdmull.v2i64(<2 x i32> %a, <2 x i32> %shuffle)
+  %vqdmull2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %a, <2 x i32> %shuffle)
   ret <2 x i64> %vqdmull2.i
 }
 
@@ -2611,7 +2611,7 @@ define <4 x i32> @test_vqdmull_high_lane_s16_0(<8 x i16> %a, <4 x i16> %v) {
 entry:
   %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
   %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
-  %vqdmull2.i = tail call <4 x i32> @llvm.arm64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
+  %vqdmull2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
   ret <4 x i32> %vqdmull2.i
 }
 
@@ -2622,7 +2622,7 @@ define <2 x i64> @test_vqdmull_high_lane_s32_0(<4 x i32> %a, <2 x i32> %v) {
 entry:
   %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
   %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
-  %vqdmull2.i = tail call <2 x i64> @llvm.arm64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
+  %vqdmull2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
   ret <2 x i64> %vqdmull2.i
 }
 
@@ -2633,7 +2633,7 @@ define <4 x i32> @test_vqdmull_high_laneq_s16_0(<8 x i16> %a, <8 x i16> %v) {
 entry:
   %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
   %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
-  %vqdmull2.i = tail call <4 x i32> @llvm.arm64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
+  %vqdmull2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
   ret <4 x i32> %vqdmull2.i
 }
 
@@ -2644,7 +2644,7 @@ define <2 x i64> @test_vqdmull_high_laneq_s32_0(<4 x i32> %a, <4 x i32> %v) {
 entry:
   %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
   %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
-  %vqdmull2.i = tail call <2 x i64> @llvm.arm64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
+  %vqdmull2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
   ret <2 x i64> %vqdmull2.i
 }
 
@@ -2654,7 +2654,7 @@ define <4 x i16> @test_vqdmulh_lane_s16_0(<4 x i16> %a, <4 x i16> %v) {
 ; CHECK-NEXT: ret
 entry:
   %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
-  %vqdmulh2.i = tail call <4 x i16> @llvm.arm64.neon.sqdmulh.v4i16(<4 x i16> %a, <4 x i16> %shuffle)
+  %vqdmulh2.i = tail call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> %a, <4 x i16> %shuffle)
   ret <4 x i16> %vqdmulh2.i
 }
 
@@ -2664,7 +2664,7 @@ define <8 x i16> @test_vqdmulhq_lane_s16_0(<8 x i16> %a, <4 x i16> %v) {
 ; CHECK-NEXT: ret
 entry:
   %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> zeroinitializer
-  %vqdmulh2.i = tail call <8 x i16> @llvm.arm64.neon.sqdmulh.v8i16(<8 x i16> %a, <8 x i16> %shuffle)
+  %vqdmulh2.i = tail call <8 x i16> @llvm.aarch64.neon.sqdmulh.v8i16(<8 x i16> %a, <8 x i16> %shuffle)
   ret <8 x i16> %vqdmulh2.i
 }
 
@@ -2674,7 +2674,7 @@ define <2 x i32> @test_vqdmulh_lane_s32_0(<2 x i32> %a, <2 x i32> %v) {
 ; CHECK-NEXT: ret
 entry:
   %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
-  %vqdmulh2.i = tail call <2 x i32> @llvm.arm64.neon.sqdmulh.v2i32(<2 x i32> %a, <2 x i32> %shuffle)
+  %vqdmulh2.i = tail call <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32> %a, <2 x i32> %shuffle)
   ret <2 x i32> %vqdmulh2.i
 }
 
@@ -2684,7 +2684,7 @@ define <4 x i32> @test_vqdmulhq_lane_s32_0(<4 x i32> %a, <2 x i32> %v) {
 ; CHECK-NEXT: ret
 entry:
   %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> zeroinitializer
-  %vqdmulh2.i = tail call <4 x i32> @llvm.arm64.neon.sqdmulh.v4i32(<4 x i32> %a, <4 x i32> %shuffle)
+  %vqdmulh2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmulh.v4i32(<4 x i32> %a, <4 x i32> %shuffle)
   ret <4 x i32> %vqdmulh2.i
 }
 
@@ -2694,7 +2694,7 @@ define <4 x i16> @test_vqrdmulh_lane_s16_0(<4 x i16> %a, <4 x i16> %v) {
 ; CHECK-NEXT: ret
 entry:
   %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
-  %vqrdmulh2.i = tail call <4 x i16> @llvm.arm64.neon.sqrdmulh.v4i16(<4 x i16> %a, <4 x i16> %shuffle)
+  %vqrdmulh2.i = tail call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> %a, <4 x i16> %shuffle)
   ret <4 x i16> %vqrdmulh2.i
 }
 
@@ -2704,7 +2704,7 @@ define <8 x i16> @test_vqrdmulhq_lane_s16_0(<8 x i16> %a, <4 x i16> %v) {
 ; CHECK-NEXT: ret
 entry:
   %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> zeroinitializer
-  %vqrdmulh2.i = tail call <8 x i16> @llvm.arm64.neon.sqrdmulh.v8i16(<8 x i16> %a, <8 x i16> %shuffle)
+  %vqrdmulh2.i = tail call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> %a, <8 x i16> %shuffle)
   ret <8 x i16> %vqrdmulh2.i
 }
 
@@ -2714,7 +2714,7 @@ define <2 x i32> @test_vqrdmulh_lane_s32_0(<2 x i32> %a, <2 x i32> %v) {
 ; CHECK-NEXT: ret
 entry:
   %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
-  %vqrdmulh2.i = tail call <2 x i32> @llvm.arm64.neon.sqrdmulh.v2i32(<2 x i32> %a, <2 x i32> %shuffle)
+  %vqrdmulh2.i = tail call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> %a, <2 x i32> %shuffle)
   ret <2 x i32> %vqrdmulh2.i
 }
 
@@ -2724,7 +2724,7 @@ define <4 x i32> @test_vqrdmulhq_lane_s32_0(<4 x i32> %a, <2 x i32> %v) {
 ; CHECK-NEXT: ret
 entry:
   %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> zeroinitializer
-  %vqrdmulh2.i = tail call <4 x i32> @llvm.arm64.neon.sqrdmulh.v4i32(<4 x i32> %a, <4 x i32> %shuffle)
+  %vqrdmulh2.i = tail call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> %a, <4 x i32> %shuffle)
   ret <4 x i32> %vqrdmulh2.i
 }
 
@@ -2797,7 +2797,7 @@ define <2 x float> @test_vmulx_lane_f32_0(<2 x float> %a, <2 x float> %v) {
 ; CHECK-NEXT: ret
 entry:
   %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> zeroinitializer
-  %vmulx2.i = tail call <2 x float> @llvm.arm64.neon.fmulx.v2f32(<2 x float> %a, <2 x float> %shuffle)
+  %vmulx2.i = tail call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> %a, <2 x float> %shuffle)
   ret <2 x float> %vmulx2.i
 }
 
@@ -2807,7 +2807,7 @@ define <4 x float> @test_vmulxq_lane_f32_0(<4 x float> %a, <2 x float> %v) {
 ; CHECK-NEXT: ret
 entry:
   %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <4 x i32> zeroinitializer
-  %vmulx2.i = tail call <4 x float> @llvm.arm64.neon.fmulx.v4f32(<4 x float> %a, <4 x float> %shuffle)
+  %vmulx2.i = tail call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> %a, <4 x float> %shuffle)
   ret <4 x float> %vmulx2.i
 }
 
@@ -2817,7 +2817,7 @@ define <2 x double> @test_vmulxq_lane_f64_0(<2 x double> %a, <1 x double> %v) {
 ; CHECK-NEXT: ret
 entry:
   %shuffle = shufflevector <1 x double> %v, <1 x double> undef, <2 x i32> zeroinitializer
-  %vmulx2.i = tail call <2 x double> @llvm.arm64.neon.fmulx.v2f64(<2 x double> %a, <2 x double> %shuffle)
+  %vmulx2.i = tail call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> %a, <2 x double> %shuffle)
   ret <2 x double> %vmulx2.i
 }
 
@@ -2827,7 +2827,7 @@ define <2 x float> @test_vmulx_laneq_f32_0(<2 x float> %a, <4 x float> %v) {
 ; CHECK-NEXT: ret
 entry:
   %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> zeroinitializer
-  %vmulx2.i = tail call <2 x float> @llvm.arm64.neon.fmulx.v2f32(<2 x float> %a, <2 x float> %shuffle)
+  %vmulx2.i = tail call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> %a, <2 x float> %shuffle)
   ret <2 x float> %vmulx2.i
 }
 
@@ -2837,7 +2837,7 @@ define <4 x float> @test_vmulxq_laneq_f32_0(<4 x float> %a, <4 x float> %v) {
 ; CHECK-NEXT: ret
 entry:
   %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> zeroinitializer
-  %vmulx2.i = tail call <4 x float> @llvm.arm64.neon.fmulx.v4f32(<4 x float> %a, <4 x float> %shuffle)
+  %vmulx2.i = tail call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> %a, <4 x float> %shuffle)
   ret <4 x float> %vmulx2.i
 }
 
@@ -2847,7 +2847,7 @@ define <2 x double> @test_vmulxq_laneq_f64_0(<2 x double> %a, <2 x double> %v) {
 ; CHECK-NEXT: ret
 entry:
   %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> zeroinitializer
-  %vmulx2.i = tail call <2 x double> @llvm.arm64.neon.fmulx.v2f64(<2 x double> %a, <2 x double> %shuffle)
+  %vmulx2.i = tail call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> %a, <2 x double> %shuffle)
   ret <2 x double> %vmulx2.i
 }
 
diff --git a/test/CodeGen/ARM64/aarch64-neon-3vdiff.ll b/test/CodeGen/AArch64/arm64-neon-3vdiff.ll
similarity index 85%
rename from test/CodeGen/ARM64/aarch64-neon-3vdiff.ll
rename to test/CodeGen/AArch64/arm64-neon-3vdiff.ll
index a479844de8d..cb9b36c4c18 100644
--- a/test/CodeGen/ARM64/aarch64-neon-3vdiff.ll
+++ b/test/CodeGen/AArch64/arm64-neon-3vdiff.ll
@@ -1,54 +1,54 @@
 ; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon | FileCheck %s
 
-declare <8 x i16> @llvm.arm64.neon.pmull.v8i16(<8 x i8>, <8 x i8>)
+declare <8 x i16> @llvm.aarch64.neon.pmull.v8i16(<8 x i8>, <8 x i8>)
 
-declare <2 x i64> @llvm.arm64.neon.sqdmull.v2i64(<2 x i32>, <2 x i32>)
+declare <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32>, <2 x i32>)
 
-declare <2 x i64> @llvm.arm64.neon.sqsub.v2i64(<2 x i64>, <2 x i64>)
+declare <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64>, <2 x i64>)
 
-declare <4 x i32> @llvm.arm64.neon.sqdmull.v4i32(<4 x i16>, <4 x i16>)
+declare <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16>, <4 x i16>)
 
-declare <4 x i32> @llvm.arm64.neon.sqsub.v4i32(<4 x i32>, <4 x i32>)
+declare <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32>, <4 x i32>)
 
-declare <2 x i64> @llvm.arm64.neon.sqadd.v2i64(<2 x i64>, <2 x i64>)
+declare <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64>, <2 x i64>)
 
-declare <4 x i32> @llvm.arm64.neon.sqadd.v4i32(<4 x i32>, <4 x i32>)
+declare <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32>, <4 x i32>)
 
-declare <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32>, <2 x i32>)
+declare <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32>, <2 x i32>)
 
-declare <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16>, <4 x i16>)
+declare <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16>, <4 x i16>)
 
-declare <8 x i16> @llvm.arm64.neon.umull.v8i16(<8 x i8>, <8 x i8>)
+declare <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8>, <8 x i8>)
 
-declare <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32>, <2 x i32>)
+declare <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32>, <2 x i32>)
 
-declare <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16>, <4 x i16>)
+declare <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16>, <4 x i16>)
 
-declare <8 x i16> @llvm.arm64.neon.smull.v8i16(<8 x i8>, <8 x i8>)
+declare <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8>, <8 x i8>)
 
-declare <2 x i32> @llvm.arm64.neon.uabd.v2i32(<2 x i32>, <2 x i32>)
+declare <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32>, <2 x i32>)
 
-declare <4 x i16> @llvm.arm64.neon.uabd.v4i16(<4 x i16>, <4 x i16>)
+declare <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16>, <4 x i16>)
 
-declare <8 x i8> @llvm.arm64.neon.uabd.v8i8(<8 x i8>, <8 x i8>)
+declare <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8>, <8 x i8>)
 
-declare <2 x i32> @llvm.arm64.neon.sabd.v2i32(<2 x i32>, <2 x i32>)
+declare <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32>, <2 x i32>)
 
-declare <4 x i16> @llvm.arm64.neon.sabd.v4i16(<4 x i16>, <4 x i16>)
+declare <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16>, <4 x i16>)
 
-declare <8 x i8> @llvm.arm64.neon.sabd.v8i8(<8 x i8>, <8 x i8>)
+declare <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8>, <8 x i8>)
 
-declare <2 x i32> @llvm.arm64.neon.rsubhn.v2i32(<2 x i64>, <2 x i64>)
+declare <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64>, <2 x i64>)
 
-declare <4 x i16> @llvm.arm64.neon.rsubhn.v4i16(<4 x i32>, <4 x i32>)
+declare <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32>, <4 x i32>)
 
-declare <8 x i8> @llvm.arm64.neon.rsubhn.v8i8(<8 x i16>, <8 x i16>)
+declare <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16>, <8 x i16>)
 
-declare <2 x i32> @llvm.arm64.neon.raddhn.v2i32(<2 x i64>, <2 x i64>)
+declare <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64>, <2 x i64>)
 
-declare <4 x i16> @llvm.arm64.neon.raddhn.v4i16(<4 x i32>, <4 x i32>)
+declare <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32>, <4 x i32>)
 
-declare <8 x i8> @llvm.arm64.neon.raddhn.v8i8(<8 x i16>, <8 x i16>)
+declare <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16>, <8 x i16>)
 
 define <8 x i16> @test_vaddl_s8(<8 x i8> %a, <8 x i8> %b) {
 ; CHECK-LABEL: test_vaddl_s8:
@@ -690,7 +690,7 @@ define <8 x i8> @test_vraddhn_s16(<8 x i16> %a, <8 x i16> %b) {
 ; CHECK-LABEL: test_vraddhn_s16:
 ; CHECK: raddhn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
 entry:
-  %vraddhn2.i = tail call <8 x i8> @llvm.arm64.neon.raddhn.v8i8(<8 x i16> %a, <8 x i16> %b)
+  %vraddhn2.i = tail call <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16> %a, <8 x i16> %b)
   ret <8 x i8> %vraddhn2.i
 }
 
@@ -698,7 +698,7 @@ define <4 x i16> @test_vraddhn_s32(<4 x i32> %a, <4 x i32> %b) {
 ; CHECK-LABEL: test_vraddhn_s32:
 ; CHECK: raddhn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
 entry:
-  %vraddhn2.i = tail call <4 x i16> @llvm.arm64.neon.raddhn.v4i16(<4 x i32> %a, <4 x i32> %b)
+  %vraddhn2.i = tail call <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32> %a, <4 x i32> %b)
   ret <4 x i16> %vraddhn2.i
 }
 
@@ -706,7 +706,7 @@ define <2 x i32> @test_vraddhn_s64(<2 x i64> %a, <2 x i64> %b) {
 ; CHECK-LABEL: test_vraddhn_s64:
 ; CHECK: raddhn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
 entry:
-  %vraddhn2.i = tail call <2 x i32> @llvm.arm64.neon.raddhn.v2i32(<2 x i64> %a, <2 x i64> %b)
+  %vraddhn2.i = tail call <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64> %a, <2 x i64> %b)
   ret <2 x i32> %vraddhn2.i
 }
 
@@ -714,7 +714,7 @@ define <8 x i8> @test_vraddhn_u16(<8 x i16> %a, <8 x i16> %b) {
 ; CHECK-LABEL: test_vraddhn_u16:
 ; CHECK: raddhn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
 entry:
-  %vraddhn2.i = tail call <8 x i8> @llvm.arm64.neon.raddhn.v8i8(<8 x i16> %a, <8 x i16> %b)
+  %vraddhn2.i = tail call <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16> %a, <8 x i16> %b)
   ret <8 x i8> %vraddhn2.i
 }
 
@@ -722,7 +722,7 @@ define <4 x i16> @test_vraddhn_u32(<4 x i32> %a, <4 x i32> %b) {
 ; CHECK-LABEL: test_vraddhn_u32:
 ; CHECK: raddhn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
 entry:
-  %vraddhn2.i = tail call <4 x i16> @llvm.arm64.neon.raddhn.v4i16(<4 x i32> %a, <4 x i32> %b)
+  %vraddhn2.i = tail call <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32> %a, <4 x i32> %b)
   ret <4 x i16> %vraddhn2.i
 }
 
@@ -730,7 +730,7 @@ define <2 x i32> @test_vraddhn_u64(<2 x i64> %a, <2 x i64> %b) {
 ; CHECK-LABEL: test_vraddhn_u64:
 ; CHECK: raddhn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
 entry:
-  %vraddhn2.i = tail call <2 x i32> @llvm.arm64.neon.raddhn.v2i32(<2 x i64> %a, <2 x i64> %b)
+  %vraddhn2.i = tail call <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64> %a, <2 x i64> %b)
   ret <2 x i32> %vraddhn2.i
 }
 
@@ -738,7 +738,7 @@ define <16 x i8> @test_vraddhn_high_s16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b)
 ; CHECK-LABEL: test_vraddhn_high_s16:
 ; CHECK: raddhn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
 entry:
-  %vraddhn2.i.i = tail call <8 x i8> @llvm.arm64.neon.raddhn.v8i8(<8 x i16> %a, <8 x i16> %b)
+  %vraddhn2.i.i = tail call <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16> %a, <8 x i16> %b)
   %0 = bitcast <8 x i8> %r to <1 x i64>
   %1 = bitcast <8 x i8> %vraddhn2.i.i to <1 x i64>
   %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
@@ -750,7 +750,7 @@ define <8 x i16> @test_vraddhn_high_s32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b
 ; CHECK-LABEL: test_vraddhn_high_s32:
 ; CHECK: raddhn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
 entry:
-  %vraddhn2.i.i = tail call <4 x i16> @llvm.arm64.neon.raddhn.v4i16(<4 x i32> %a, <4 x i32> %b)
+  %vraddhn2.i.i = tail call <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32> %a, <4 x i32> %b)
   %0 = bitcast <4 x i16> %r to <1 x i64>
   %1 = bitcast <4 x i16> %vraddhn2.i.i to <1 x i64>
   %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
@@ -762,7 +762,7 @@ define <4 x i32> @test_vraddhn_high_s64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b
 ; CHECK-LABEL: test_vraddhn_high_s64:
 ; CHECK: raddhn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
 entry:
-  %vraddhn2.i.i = tail call <2 x i32> @llvm.arm64.neon.raddhn.v2i32(<2 x i64> %a, <2 x i64> %b)
+  %vraddhn2.i.i = tail call <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64> %a, <2 x i64> %b)
   %0 = bitcast <2 x i32> %r to <1 x i64>
   %1 = bitcast <2 x i32> %vraddhn2.i.i to <1 x i64>
   %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
@@ -774,7 +774,7 @@ define <16 x i8> @test_vraddhn_high_u16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b)
 ; CHECK-LABEL: test_vraddhn_high_u16:
 ; CHECK: raddhn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
 entry:
-  %vraddhn2.i.i = tail call <8 x i8> @llvm.arm64.neon.raddhn.v8i8(<8 x i16> %a, <8 x i16> %b)
+  %vraddhn2.i.i = tail call <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16> %a, <8 x i16> %b)
   %0 = bitcast <8 x i8> %r to <1 x i64>
   %1 = bitcast <8 x i8> %vraddhn2.i.i to <1 x i64>
   %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
@@ -786,7 +786,7 @@ define <8 x i16> @test_vraddhn_high_u32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b
 ; CHECK-LABEL: test_vraddhn_high_u32:
 ; CHECK: raddhn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
 entry:
-  %vraddhn2.i.i = tail call <4 x i16> @llvm.arm64.neon.raddhn.v4i16(<4 x i32> %a, <4 x i32> %b)
+  %vraddhn2.i.i = tail call <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32> %a, <4 x i32> %b)
   %0 = bitcast <4 x i16> %r to <1 x i64>
   %1 = bitcast <4 x i16> %vraddhn2.i.i to <1 x i64>
   %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
@@ -798,7 +798,7 @@ define <4 x i32> @test_vraddhn_high_u64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b
 ; CHECK-LABEL: test_vraddhn_high_u64:
 ; CHECK: raddhn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
 entry:
-  %vraddhn2.i.i = tail call <2 x i32> @llvm.arm64.neon.raddhn.v2i32(<2 x i64> %a, <2 x i64> %b)
+  %vraddhn2.i.i = tail call <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64> %a, <2 x i64> %b)
   %0 = bitcast <2 x i32> %r to <1 x i64>
   %1 = bitcast <2 x i32> %vraddhn2.i.i to <1 x i64>
   %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
@@ -954,7 +954,7 @@ define <8 x i8> @test_vrsubhn_s16(<8 x i16> %a, <8 x i16> %b) {
 ; CHECK-LABEL: test_vrsubhn_s16:
 ; CHECK: rsubhn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
 entry:
-  %vrsubhn2.i = tail call <8 x i8> @llvm.arm64.neon.rsubhn.v8i8(<8 x i16> %a, <8 x i16> %b)
+  %vrsubhn2.i = tail call <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16> %a, <8 x i16> %b)
   ret <8 x i8> %vrsubhn2.i
 }
 
@@ -962,7 +962,7 @@ define <4 x i16> @test_vrsubhn_s32(<4 x i32> %a, <4 x i32> %b) {
 ; CHECK-LABEL: test_vrsubhn_s32:
 ; CHECK: rsubhn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
 entry:
-  %vrsubhn2.i = tail call <4 x i16> @llvm.arm64.neon.rsubhn.v4i16(<4 x i32> %a, <4 x i32> %b)
+  %vrsubhn2.i = tail call <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32> %a, <4 x i32> %b)
   ret <4 x i16> %vrsubhn2.i
 }
 
@@ -970,7 +970,7 @@ define <2 x i32> @test_vrsubhn_s64(<2 x i64> %a, <2 x i64> %b) {
 ; CHECK-LABEL: test_vrsubhn_s64:
 ; CHECK: rsubhn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
 entry:
-  %vrsubhn2.i = tail call <2 x i32> @llvm.arm64.neon.rsubhn.v2i32(<2 x i64> %a, <2 x i64> %b)
+  %vrsubhn2.i = tail call <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64> %a, <2 x i64> %b)
   ret <2 x i32> %vrsubhn2.i
 }
 
@@ -978,7 +978,7 @@ define <8 x i8> @test_vrsubhn_u16(<8 x i16> %a, <8 x i16> %b) {
 ; CHECK-LABEL: test_vrsubhn_u16:
 ; CHECK: rsubhn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
 entry:
-  %vrsubhn2.i = tail call <8 x i8> @llvm.arm64.neon.rsubhn.v8i8(<8 x i16> %a, <8 x i16> %b)
+  %vrsubhn2.i = tail call <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16> %a, <8 x i16> %b)
   ret <8 x i8> %vrsubhn2.i
 }
 
@@ -986,7 +986,7 @@ define <4 x i16> @test_vrsubhn_u32(<4 x i32> %a, <4 x i32> %b) {
 ; CHECK-LABEL: test_vrsubhn_u32:
 ; CHECK: rsubhn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
 entry:
-  %vrsubhn2.i = tail call <4 x i16> @llvm.arm64.neon.rsubhn.v4i16(<4 x i32> %a, <4 x i32> %b)
+  %vrsubhn2.i = tail call <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32> %a, <4 x i32> %b)
   ret <4 x i16> %vrsubhn2.i
 }
 
@@ -994,7 +994,7 @@ define <2 x i32> @test_vrsubhn_u64(<2 x i64> %a, <2 x i64> %b) {
 ; CHECK-LABEL: test_vrsubhn_u64:
 ; CHECK: rsubhn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
 entry:
-  %vrsubhn2.i = tail call <2 x i32> @llvm.arm64.neon.rsubhn.v2i32(<2 x i64> %a, <2 x i64> %b)
+  %vrsubhn2.i = tail call <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64> %a, <2 x i64> %b)
   ret <2 x i32> %vrsubhn2.i
 }
 
@@ -1002,7 +1002,7 @@ define <16 x i8> @test_vrsubhn_high_s16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b)
 ; CHECK-LABEL: test_vrsubhn_high_s16:
 ; CHECK: rsubhn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
 entry:
-  %vrsubhn2.i.i = tail call <8 x i8> @llvm.arm64.neon.rsubhn.v8i8(<8 x i16> %a, <8 x i16> %b)
+  %vrsubhn2.i.i = tail call <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16> %a, <8 x i16> %b)
   %0 = bitcast <8 x i8> %r to <1 x i64>
   %1 = bitcast <8 x i8> %vrsubhn2.i.i to <1 x i64>
   %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
@@ -1014,7 +1014,7 @@ define <8 x i16> @test_vrsubhn_high_s32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b
 ; CHECK-LABEL: test_vrsubhn_high_s32:
 ; CHECK: rsubhn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
 entry:
-  %vrsubhn2.i.i = tail call <4 x i16> @llvm.arm64.neon.rsubhn.v4i16(<4 x i32> %a, <4 x i32> %b)
+  %vrsubhn2.i.i = tail call <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32> %a, <4 x i32> %b)
   %0 = bitcast <4 x i16> %r to <1 x i64>
   %1 = bitcast <4 x i16> %vrsubhn2.i.i to <1 x i64>
   %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
@@ -1026,7 +1026,7 @@ define <4 x i32> @test_vrsubhn_high_s64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b
 ; CHECK-LABEL: test_vrsubhn_high_s64:
 ; CHECK: rsubhn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
 entry:
-  %vrsubhn2.i.i = tail call <2 x i32> @llvm.arm64.neon.rsubhn.v2i32(<2 x i64> %a, <2 x i64> %b)
+  %vrsubhn2.i.i = tail call <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64> %a, <2 x i64> %b)
   %0 = bitcast <2 x i32> %r to <1 x i64>
   %1 = bitcast <2 x i32> %vrsubhn2.i.i to <1 x i64>
   %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
@@ -1038,7 +1038,7 @@ define <16 x i8> @test_vrsubhn_high_u16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b)
 ; CHECK-LABEL: test_vrsubhn_high_u16:
 ; CHECK: rsubhn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
 entry:
-  %vrsubhn2.i.i = tail call <8 x i8> @llvm.arm64.neon.rsubhn.v8i8(<8 x i16> %a, <8 x i16> %b)
+  %vrsubhn2.i.i = tail call <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16> %a, <8 x i16> %b)
   %0 = bitcast <8 x i8> %r to <1 x i64>
   %1 = bitcast <8 x i8> %vrsubhn2.i.i to <1 x i64>
   %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
@@ -1050,7 +1050,7 @@ define <8 x i16> @test_vrsubhn_high_u32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b
 ; CHECK-LABEL: test_vrsubhn_high_u32:
 ; CHECK: rsubhn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
 entry:
-  %vrsubhn2.i.i = tail call <4 x i16> @llvm.arm64.neon.rsubhn.v4i16(<4 x i32> %a, <4 x i32> %b)
+  %vrsubhn2.i.i = tail call <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32> %a, <4 x i32> %b)
   %0 = bitcast <4 x i16> %r to <1 x i64>
   %1 = bitcast <4 x i16> %vrsubhn2.i.i to <1 x i64>
   %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
@@ -1062,7 +1062,7 @@ define <4 x i32> @test_vrsubhn_high_u64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b
 ; CHECK-LABEL: test_vrsubhn_high_u64:
 ; CHECK: rsubhn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
 entry:
-  %vrsubhn2.i.i = tail call <2 x i32> @llvm.arm64.neon.rsubhn.v2i32(<2 x i64> %a, <2 x i64> %b)
+  %vrsubhn2.i.i = tail call <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64> %a, <2 x i64> %b)
   %0 = bitcast <2 x i32> %r to <1 x i64>
   %1 = bitcast <2 x i32> %vrsubhn2.i.i to <1 x i64>
   %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
@@ -1074,7 +1074,7 @@ define <8 x i16> @test_vabdl_s8(<8 x i8> %a, <8 x i8> %b) {
 ; CHECK-LABEL: test_vabdl_s8:
 ; CHECK: sabdl {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
 entry:
-  %vabd.i.i = tail call <8 x i8> @llvm.arm64.neon.sabd.v8i8(<8 x i8> %a, <8 x i8> %b)
+  %vabd.i.i = tail call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %a, <8 x i8> %b)
   %vmovl.i.i = zext <8 x i8> %vabd.i.i to <8 x i16>
   ret <8 x i16> %vmovl.i.i
 }
@@ -1083,7 +1083,7 @@ define <4 x i32> @test_vabdl_s16(<4 x i16> %a, <4 x i16> %b) {
 ; CHECK-LABEL: test_vabdl_s16:
 ; CHECK: sabdl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
 entry:
-  %vabd2.i.i = tail call <4 x i16> @llvm.arm64.neon.sabd.v4i16(<4 x i16> %a, <4 x i16> %b)
+  %vabd2.i.i = tail call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %a, <4 x i16> %b)
   %vmovl.i.i = zext <4 x i16> %vabd2.i.i to <4 x i32>
   ret <4 x i32> %vmovl.i.i
 }
@@ -1092,7 +1092,7 @@ define <2 x i64> @test_vabdl_s32(<2 x i32> %a, <2 x i32> %b) {
 ; CHECK-LABEL: test_vabdl_s32:
 ; CHECK: sabdl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
 entry:
-  %vabd2.i.i = tail call <2 x i32> @llvm.arm64.neon.sabd.v2i32(<2 x i32> %a, <2 x i32> %b)
+  %vabd2.i.i = tail call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %a, <2 x i32> %b)
   %vmovl.i.i = zext <2 x i32> %vabd2.i.i to <2 x i64>
   ret <2 x i64> %vmovl.i.i
 }
@@ -1101,7 +1101,7 @@ define <8 x i16> @test_vabdl_u8(<8 x i8> %a, <8 x i8> %b) {
 ; CHECK-LABEL: test_vabdl_u8:
 ; CHECK: uabdl {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
 entry:
-  %vabd.i.i = tail call <8 x i8> @llvm.arm64.neon.uabd.v8i8(<8 x i8> %a, <8 x i8> %b)
+  %vabd.i.i = tail call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %a, <8 x i8> %b)
   %vmovl.i.i = zext <8 x i8> %vabd.i.i to <8 x i16>
   ret <8 x i16> %vmovl.i.i
 }
@@ -1110,7 +1110,7 @@ define <4 x i32> @test_vabdl_u16(<4 x i16> %a, <4 x i16> %b) {
 ; CHECK-LABEL: test_vabdl_u16:
 ; CHECK: uabdl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
 entry:
-  %vabd2.i.i = tail call <4 x i16> @llvm.arm64.neon.uabd.v4i16(<4 x i16> %a, <4 x i16> %b)
+  %vabd2.i.i = tail call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> %a, <4 x i16> %b)
   %vmovl.i.i = zext <4 x i16> %vabd2.i.i to <4 x i32>
   ret <4 x i32> %vmovl.i.i
 }
@@ -1119,7 +1119,7 @@ define <2 x i64> @test_vabdl_u32(<2 x i32> %a, <2 x i32> %b) {
 ; CHECK-LABEL: test_vabdl_u32:
 ; CHECK: uabdl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
 entry:
-  %vabd2.i.i = tail call <2 x i32> @llvm.arm64.neon.uabd.v2i32(<2 x i32> %a, <2 x i32> %b)
+  %vabd2.i.i = tail call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %a, <2 x i32> %b)
   %vmovl.i.i = zext <2 x i32> %vabd2.i.i to <2 x i64>
   ret <2 x i64> %vmovl.i.i
 }
@@ -1128,7 +1128,7 @@ define <8 x i16> @test_vabal_s8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) {
 ; CHECK-LABEL: test_vabal_s8:
 ; CHECK: sabal {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
 entry:
-  %vabd.i.i.i = tail call <8 x i8> @llvm.arm64.neon.sabd.v8i8(<8 x i8> %b, <8 x i8> %c)
+  %vabd.i.i.i = tail call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %b, <8 x i8> %c)
   %vmovl.i.i.i = zext <8 x i8> %vabd.i.i.i to <8 x i16>
   %add.i = add <8 x i16> %vmovl.i.i.i, %a
   ret <8 x i16> %add.i
@@ -1138,7 +1138,7 @@ define <4 x i32> @test_vabal_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) {
 ; CHECK-LABEL: test_vabal_s16:
 ; CHECK: sabal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
 entry:
-  %vabd2.i.i.i = tail call <4 x i16> @llvm.arm64.neon.sabd.v4i16(<4 x i16> %b, <4 x i16> %c)
+  %vabd2.i.i.i = tail call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %b, <4 x i16> %c)
   %vmovl.i.i.i = zext <4 x i16> %vabd2.i.i.i to <4 x i32>
   %add.i = add <4 x i32> %vmovl.i.i.i, %a
   ret <4 x i32> %add.i
@@ -1148,7 +1148,7 @@ define <2 x i64> @test_vabal_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) {
 ; CHECK-LABEL: test_vabal_s32:
 ; CHECK: sabal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
 entry:
-  %vabd2.i.i.i = tail call <2 x i32> @llvm.arm64.neon.sabd.v2i32(<2 x i32> %b, <2 x i32> %c)
+  %vabd2.i.i.i = tail call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %b, <2 x i32> %c)
   %vmovl.i.i.i = zext <2 x i32> %vabd2.i.i.i to <2 x i64>
   %add.i = add <2 x i64> %vmovl.i.i.i, %a
   ret <2 x i64> %add.i
@@ -1158,7 +1158,7 @@ define <8 x i16> @test_vabal_u8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) {
 ; CHECK-LABEL: test_vabal_u8:
 ; CHECK: uabal {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
 entry:
-  %vabd.i.i.i = tail call <8 x i8> @llvm.arm64.neon.uabd.v8i8(<8 x i8> %b, <8 x i8> %c)
+  %vabd.i.i.i = tail call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %b, <8 x i8> %c)
   %vmovl.i.i.i = zext <8 x i8> %vabd.i.i.i to <8 x i16>
   %add.i = add <8 x i16> %vmovl.i.i.i, %a
   ret <8 x i16> %add.i
@@ -1168,7 +1168,7 @@ define <4 x i32> @test_vabal_u16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) {
 ; CHECK-LABEL: test_vabal_u16:
 ; CHECK: uabal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
 entry:
-  %vabd2.i.i.i = tail call <4 x i16> @llvm.arm64.neon.uabd.v4i16(<4 x i16> %b, <4 x i16> %c)
+  %vabd2.i.i.i = tail call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> %b, <4 x i16> %c)
   %vmovl.i.i.i = zext <4 x i16> %vabd2.i.i.i to <4 x i32>
   %add.i = add <4 x i32> %vmovl.i.i.i, %a
   ret <4 x i32> %add.i
@@ -1178,7 +1178,7 @@ define <2 x i64> @test_vabal_u32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) {
 ; CHECK-LABEL: test_vabal_u32:
 ; CHECK: uabal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
 entry:
-  %vabd2.i.i.i = tail call <2 x i32> @llvm.arm64.neon.uabd.v2i32(<2 x i32> %b, <2 x i32> %c)
+  %vabd2.i.i.i = tail call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %b, <2 x i32> %c)
   %vmovl.i.i.i = zext <2 x i32> %vabd2.i.i.i to <2 x i64>
   %add.i = add <2 x i64> %vmovl.i.i.i, %a
   ret <2 x i64> %add.i
@@ -1190,7 +1190,7 @@ define <8 x i16> @test_vabdl_high_s8(<16 x i8> %a, <16 x i8> %b) {
 entry:
   %shuffle.i.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
   %shuffle.i3.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %vabd.i.i.i = tail call <8 x i8> @llvm.arm64.neon.sabd.v8i8(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i)
+  %vabd.i.i.i = tail call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i)
   %vmovl.i.i.i = zext <8 x i8> %vabd.i.i.i to <8 x i16>
   ret <8 x i16> %vmovl.i.i.i
 }
@@ -1201,7 +1201,7 @@ define <4 x i32> @test_vabdl_high_s16(<8 x i16> %a, <8 x i16> %b) {
 entry:
   %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
   %shuffle.i3.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %vabd2.i.i.i = tail call <4 x i16> @llvm.arm64.neon.sabd.v4i16(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i)
+  %vabd2.i.i.i = tail call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i)
   %vmovl.i.i.i = zext <4 x i16> %vabd2.i.i.i to <4 x i32>
   ret <4 x i32> %vmovl.i.i.i
 }
@@ -1212,7 +1212,7 @@ define <2 x i64> @test_vabdl_high_s32(<4 x i32> %a, <4 x i32> %b) {
 entry:
   %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
   %shuffle.i3.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %vabd2.i.i.i = tail call <2 x i32> @llvm.arm64.neon.sabd.v2i32(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i)
+  %vabd2.i.i.i = tail call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i)
   %vmovl.i.i.i = zext <2 x i32> %vabd2.i.i.i to <2 x i64>
   ret <2 x i64> %vmovl.i.i.i
 }
@@ -1223,7 +1223,7 @@ define <8 x i16> @test_vabdl_high_u8(<16 x i8> %a, <16 x i8> %b) {
 entry:
   %shuffle.i.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
   %shuffle.i3.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %vabd.i.i.i = tail call <8 x i8> @llvm.arm64.neon.uabd.v8i8(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i)
+  %vabd.i.i.i = tail call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i)
   %vmovl.i.i.i = zext <8 x i8> %vabd.i.i.i to <8 x i16>
   ret <8 x i16> %vmovl.i.i.i
 }
@@ -1234,7 +1234,7 @@ define <4 x i32> @test_vabdl_high_u16(<8 x i16> %a, <8 x i16> %b) {
 entry:
   %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
   %shuffle.i3.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %vabd2.i.i.i = tail call <4 x i16> @llvm.arm64.neon.uabd.v4i16(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i)
+  %vabd2.i.i.i = tail call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i)
   %vmovl.i.i.i = zext <4 x i16> %vabd2.i.i.i to <4 x i32>
   ret <4 x i32> %vmovl.i.i.i
 }
@@ -1245,7 +1245,7 @@ define <2 x i64> @test_vabdl_high_u32(<4 x i32> %a, <4 x i32> %b) {
 entry:
   %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
   %shuffle.i3.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %vabd2.i.i.i = tail call <2 x i32> @llvm.arm64.neon.uabd.v2i32(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i)
+  %vabd2.i.i.i = tail call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i)
   %vmovl.i.i.i = zext <2 x i32> %vabd2.i.i.i to <2 x i64>
   ret <2 x i64> %vmovl.i.i.i
 }
@@ -1256,7 +1256,7 @@ define <8 x i16> @test_vabal_high_s8(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) {
 entry:
   %shuffle.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
   %shuffle.i3.i = shufflevector <16 x i8> %c, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %vabd.i.i.i.i = tail call <8 x i8> @llvm.arm64.neon.sabd.v8i8(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i)
+  %vabd.i.i.i.i = tail call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i)
   %vmovl.i.i.i.i = zext <8 x i8> %vabd.i.i.i.i to <8 x i16>
   %add.i.i = add <8 x i16> %vmovl.i.i.i.i, %a
   ret <8 x i16> %add.i.i
@@ -1268,7 +1268,7 @@ define <4 x i32> @test_vabal_high_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c)
 entry:
   %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
   %shuffle.i3.i = shufflevector <8 x i16> %c, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %vabd2.i.i.i.i = tail call <4 x i16> @llvm.arm64.neon.sabd.v4i16(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i)
+  %vabd2.i.i.i.i = tail call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i)
   %vmovl.i.i.i.i = zext <4 x i16> %vabd2.i.i.i.i to <4 x i32>
   %add.i.i = add <4 x i32> %vmovl.i.i.i.i, %a
   ret <4 x i32> %add.i.i
@@ -1280,7 +1280,7 @@ define <2 x i64> @test_vabal_high_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c)
 entry:
   %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
   %shuffle.i3.i = shufflevector <4 x i32> %c, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %vabd2.i.i.i.i = tail call <2 x i32> @llvm.arm64.neon.sabd.v2i32(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i)
+  %vabd2.i.i.i.i = tail call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i)
   %vmovl.i.i.i.i = zext <2 x i32> %vabd2.i.i.i.i to <2 x i64>
   %add.i.i = add <2 x i64> %vmovl.i.i.i.i, %a
   ret <2 x i64> %add.i.i
@@ -1292,7 +1292,7 @@ define <8 x i16> @test_vabal_high_u8(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) {
 entry:
   %shuffle.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
   %shuffle.i3.i = shufflevector <16 x i8> %c, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %vabd.i.i.i.i = tail call <8 x i8> @llvm.arm64.neon.uabd.v8i8(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i)
+  %vabd.i.i.i.i = tail call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i)
   %vmovl.i.i.i.i = zext <8 x i8> %vabd.i.i.i.i to <8 x i16>
   %add.i.i = add <8 x i16> %vmovl.i.i.i.i, %a
   ret <8 x i16> %add.i.i
@@ -1304,7 +1304,7 @@ define <4 x i32> @test_vabal_high_u16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c)
 entry:
   %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
   %shuffle.i3.i = shufflevector <8 x i16> %c, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %vabd2.i.i.i.i = tail call <4 x i16> @llvm.arm64.neon.uabd.v4i16(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i)
+  %vabd2.i.i.i.i = tail call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i)
   %vmovl.i.i.i.i = zext <4 x i16> %vabd2.i.i.i.i to <4 x i32>
   %add.i.i = add <4 x i32> %vmovl.i.i.i.i, %a
   ret <4 x i32> %add.i.i
@@ -1316,7 +1316,7 @@ define <2 x i64> @test_vabal_high_u32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c)
 entry:
   %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
   %shuffle.i3.i = shufflevector <4 x i32> %c, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %vabd2.i.i.i.i = tail call <2 x i32> @llvm.arm64.neon.uabd.v2i32(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i)
+  %vabd2.i.i.i.i = tail call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i)
   %vmovl.i.i.i.i = zext <2 x i32> %vabd2.i.i.i.i to <2 x i64>
   %add.i.i = add <2 x i64> %vmovl.i.i.i.i, %a
   ret <2 x i64> %add.i.i
@@ -1326,7 +1326,7 @@ define <8 x i16> @test_vmull_s8(<8 x i8> %a, <8 x i8> %b) {
 ; CHECK-LABEL: test_vmull_s8:
 ; CHECK: smull {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
 entry:
-  %vmull.i = tail call <8 x i16> @llvm.arm64.neon.smull.v8i16(<8 x i8> %a, <8 x i8> %b)
+  %vmull.i = tail call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %a, <8 x i8> %b)
   ret <8 x i16> %vmull.i
 }
 
@@ -1334,7 +1334,7 @@ define <4 x i32> @test_vmull_s16(<4 x i16> %a, <4 x i16> %b) {
 ; CHECK-LABEL: test_vmull_s16:
 ; CHECK: smull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
 entry:
-  %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> %a, <4 x i16> %b)
+  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %a, <4 x i16> %b)
   ret <4 x i32> %vmull2.i
 }
 
@@ -1342,7 +1342,7 @@ define <2 x i64> @test_vmull_s32(<2 x i32> %a, <2 x i32> %b) {
 ; CHECK-LABEL: test_vmull_s32:
 ; CHECK: smull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
 entry:
-  %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32> %a, <2 x i32> %b)
+  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %a, <2 x i32> %b)
   ret <2 x i64> %vmull2.i
 }
 
@@ -1350,7 +1350,7 @@ define <8 x i16> @test_vmull_u8(<8 x i8> %a, <8 x i8> %b) {
 ; CHECK-LABEL: test_vmull_u8:
 ; CHECK: umull {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
 entry:
-  %vmull.i = tail call <8 x i16> @llvm.arm64.neon.umull.v8i16(<8 x i8> %a, <8 x i8> %b)
+  %vmull.i = tail call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %a, <8 x i8> %b)
   ret <8 x i16> %vmull.i
 }
 
@@ -1358,7 +1358,7 @@ define <4 x i32> @test_vmull_u16(<4 x i16> %a, <4 x i16> %b) {
 ; CHECK-LABEL: test_vmull_u16:
 ; CHECK: umull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
 entry:
-  %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16> %a, <4 x i16> %b)
+  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %a, <4 x i16> %b)
   ret <4 x i32> %vmull2.i
 }
 
@@ -1366,7 +1366,7 @@ define <2 x i64> @test_vmull_u32(<2 x i32> %a, <2 x i32> %b) {
 ; CHECK-LABEL: test_vmull_u32:
 ; CHECK: umull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
 entry:
-  %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32> %a, <2 x i32> %b)
+  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %a, <2 x i32> %b)
   ret <2 x i64> %vmull2.i
 }
 
@@ -1376,7 +1376,7 @@ define <8 x i16> @test_vmull_high_s8(<16 x i8> %a, <16 x i8> %b) {
 entry:
   %shuffle.i.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
   %shuffle.i3.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %vmull.i.i = tail call <8 x i16> @llvm.arm64.neon.smull.v8i16(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i)
+  %vmull.i.i = tail call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i)
   ret <8 x i16> %vmull.i.i
 }
 
@@ -1386,7 +1386,7 @@ define <4 x i32> @test_vmull_high_s16(<8 x i16> %a, <8 x i16> %b) {
 entry:
   %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
   %shuffle.i3.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %vmull2.i.i = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i)
+  %vmull2.i.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i)
   ret <4 x i32> %vmull2.i.i
 }
 
@@ -1396,7 +1396,7 @@ define <2 x i64> @test_vmull_high_s32(<4 x i32> %a, <4 x i32> %b) {
 entry:
   %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
   %shuffle.i3.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %vmull2.i.i = tail call <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i)
+  %vmull2.i.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i)
   ret <2 x i64> %vmull2.i.i
 }
 
@@ -1406,7 +1406,7 @@ define <8 x i16> @test_vmull_high_u8(<16 x i8> %a, <16 x i8> %b) {
 entry:
   %shuffle.i.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
   %shuffle.i3.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %vmull.i.i = tail call <8 x i16> @llvm.arm64.neon.umull.v8i16(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i)
+  %vmull.i.i = tail call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i)
   ret <8 x i16> %vmull.i.i
 }
 
@@ -1416,7 +1416,7 @@ define <4 x i32> @test_vmull_high_u16(<8 x i16> %a, <8 x i16> %b) {
 entry:
   %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
   %shuffle.i3.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %vmull2.i.i = tail call <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i)
+  %vmull2.i.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i)
   ret <4 x i32> %vmull2.i.i
 }
 
@@ -1426,7 +1426,7 @@ define <2 x i64> @test_vmull_high_u32(<4 x i32> %a, <4 x i32> %b) {
 entry:
   %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
   %shuffle.i3.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %vmull2.i.i = tail call <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i)
+  %vmull2.i.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i)
   ret <2 x i64> %vmull2.i.i
 }
 
@@ -1434,7 +1434,7 @@ define <8 x i16> @test_vmlal_s8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) {
 ; CHECK-LABEL: test_vmlal_s8:
 ; CHECK: smlal {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
 entry:
-  %vmull.i.i = tail call <8 x i16> @llvm.arm64.neon.smull.v8i16(<8 x i8> %b, <8 x i8> %c)
+  %vmull.i.i = tail call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %b, <8 x i8> %c)
   %add.i = add <8 x i16> %vmull.i.i, %a
   ret <8 x i16> %add.i
 }
@@ -1443,7 +1443,7 @@ define <4 x i32> @test_vmlal_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) {
 ; CHECK-LABEL: test_vmlal_s16:
 ; CHECK: smlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
 entry:
-  %vmull2.i.i = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %c)
+  %vmull2.i.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %c)
   %add.i = add <4 x i32> %vmull2.i.i, %a
   ret <4 x i32> %add.i
 }
@@ -1452,7 +1452,7 @@ define <2 x i64> @test_vmlal_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) {
 ; CHECK-LABEL: test_vmlal_s32:
 ; CHECK: smlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
 entry:
-  %vmull2.i.i = tail call <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %c)
+  %vmull2.i.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %c)
   %add.i = add <2 x i64> %vmull2.i.i, %a
   ret <2 x i64> %add.i
 }
@@ -1461,7 +1461,7 @@ define <8 x i16> @test_vmlal_u8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) {
 ; CHECK-LABEL: test_vmlal_u8:
 ; CHECK: umlal {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
 entry:
-  %vmull.i.i = tail call <8 x i16> @llvm.arm64.neon.umull.v8i16(<8 x i8> %b, <8 x i8> %c)
+  %vmull.i.i = tail call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %b, <8 x i8> %c)
   %add.i = add <8 x i16> %vmull.i.i, %a
   ret <8 x i16> %add.i
 }
@@ -1470,7 +1470,7 @@ define <4 x i32> @test_vmlal_u16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) {
 ; CHECK-LABEL: test_vmlal_u16:
 ; CHECK: umlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
 entry:
-  %vmull2.i.i = tail call <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %c)
+  %vmull2.i.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %c)
   %add.i = add <4 x i32> %vmull2.i.i, %a
   ret <4 x i32> %add.i
 }
@@ -1479,7 +1479,7 @@ define <2 x i64> @test_vmlal_u32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) {
 ; CHECK-LABEL: test_vmlal_u32:
 ; CHECK: umlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
 entry:
-  %vmull2.i.i = tail call <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %c)
+  %vmull2.i.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %c)
   %add.i = add <2 x i64> %vmull2.i.i, %a
   ret <2 x i64> %add.i
 }
@@ -1490,7 +1490,7 @@ define <8 x i16> @test_vmlal_high_s8(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) {
 entry:
   %shuffle.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
   %shuffle.i3.i = shufflevector <16 x i8> %c, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %vmull.i.i.i = tail call <8 x i16> @llvm.arm64.neon.smull.v8i16(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i)
+  %vmull.i.i.i = tail call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i)
   %add.i.i = add <8 x i16> %vmull.i.i.i, %a
   ret <8 x i16> %add.i.i
 }
@@ -1501,7 +1501,7 @@ define <4 x i32> @test_vmlal_high_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c)
 entry:
   %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
   %shuffle.i3.i = shufflevector <8 x i16> %c, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %vmull2.i.i.i = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i)
+  %vmull2.i.i.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i)
   %add.i.i = add <4 x i32> %vmull2.i.i.i, %a
   ret <4 x i32> %add.i.i
 }
@@ -1512,7 +1512,7 @@ define <2 x i64> @test_vmlal_high_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c)
 entry:
   %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
   %shuffle.i3.i = shufflevector <4 x i32> %c, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %vmull2.i.i.i = tail call <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i)
+  %vmull2.i.i.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i)
   %add.i.i = add <2 x i64> %vmull2.i.i.i, %a
   ret <2 x i64> %add.i.i
 }
@@ -1523,7 +1523,7 @@ define <8 x i16> @test_vmlal_high_u8(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) {
 entry:
   %shuffle.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
   %shuffle.i3.i = shufflevector <16 x i8> %c, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %vmull.i.i.i = tail call <8 x i16> @llvm.arm64.neon.umull.v8i16(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i)
+  %vmull.i.i.i = tail call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i)
   %add.i.i = add <8 x i16> %vmull.i.i.i, %a
   ret <8 x i16> %add.i.i
 }
@@ -1534,7 +1534,7 @@ define <4 x i32> @test_vmlal_high_u16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c)
 entry:
   %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
   %shuffle.i3.i = shufflevector <8 x i16> %c, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %vmull2.i.i.i = tail call <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i)
+  %vmull2.i.i.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i)
   %add.i.i = add <4 x i32> %vmull2.i.i.i, %a
   ret <4 x i32> %add.i.i
 }
@@ -1545,7 +1545,7 @@ define <2 x i64> @test_vmlal_high_u32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c)
 entry:
   %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
   %shuffle.i3.i = shufflevector <4 x i32> %c, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %vmull2.i.i.i = tail call <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i)
+  %vmull2.i.i.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i)
   %add.i.i = add <2 x i64> %vmull2.i.i.i, %a
   ret <2 x i64> %add.i.i
 }
@@ -1554,7 +1554,7 @@ define <8 x i16> @test_vmlsl_s8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) {
 ; CHECK-LABEL: test_vmlsl_s8:
 ; CHECK: smlsl {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
 entry:
-  %vmull.i.i = tail call <8 x i16> @llvm.arm64.neon.smull.v8i16(<8 x i8> %b, <8 x i8> %c)
+  %vmull.i.i = tail call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %b, <8 x i8> %c)
   %sub.i = sub <8 x i16> %a, %vmull.i.i
   ret <8 x i16> %sub.i
 }
@@ -1563,7 +1563,7 @@ define <4 x i32> @test_vmlsl_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) {
 ; CHECK-LABEL: test_vmlsl_s16:
 ; CHECK: smlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
 entry:
-  %vmull2.i.i = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %c)
+  %vmull2.i.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %c)
   %sub.i = sub <4 x i32> %a, %vmull2.i.i
   ret <4 x i32> %sub.i
 }
@@ -1572,7 +1572,7 @@ define <2 x i64> @test_vmlsl_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) {
 ; CHECK-LABEL: test_vmlsl_s32:
 ; CHECK: smlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
 entry:
-  %vmull2.i.i = tail call <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %c)
+  %vmull2.i.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %c)
   %sub.i = sub <2 x i64> %a, %vmull2.i.i
   ret <2 x i64> %sub.i
 }
@@ -1581,7 +1581,7 @@ define <8 x i16> @test_vmlsl_u8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) {
 ; CHECK-LABEL: test_vmlsl_u8:
 ; CHECK: umlsl {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
 entry:
-  %vmull.i.i = tail call <8 x i16> @llvm.arm64.neon.umull.v8i16(<8 x i8> %b, <8 x i8> %c)
+  %vmull.i.i = tail call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %b, <8 x i8> %c)
   %sub.i = sub <8 x i16> %a, %vmull.i.i
   ret <8 x i16> %sub.i
 }
@@ -1590,7 +1590,7 @@ define <4 x i32> @test_vmlsl_u16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) {
 ; CHECK-LABEL: test_vmlsl_u16:
 ; CHECK: umlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
 entry:
-  %vmull2.i.i = tail call <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %c)
+  %vmull2.i.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %c)
   %sub.i = sub <4 x i32> %a, %vmull2.i.i
   ret <4 x i32> %sub.i
 }
@@ -1599,7 +1599,7 @@ define <2 x i64> @test_vmlsl_u32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) {
 ; CHECK-LABEL: test_vmlsl_u32:
 ; CHECK: umlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
 entry:
-  %vmull2.i.i = tail call <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %c)
+  %vmull2.i.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %c)
   %sub.i = sub <2 x i64> %a, %vmull2.i.i
   ret <2 x i64> %sub.i
 }
@@ -1610,7 +1610,7 @@ define <8 x i16> @test_vmlsl_high_s8(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) {
 entry:
   %shuffle.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
   %shuffle.i3.i = shufflevector <16 x i8> %c, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %vmull.i.i.i = tail call <8 x i16> @llvm.arm64.neon.smull.v8i16(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i)
+  %vmull.i.i.i = tail call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i)
   %sub.i.i = sub <8 x i16> %a, %vmull.i.i.i
   ret <8 x i16> %sub.i.i
 }
@@ -1621,7 +1621,7 @@ define <4 x i32> @test_vmlsl_high_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c)
 entry:
   %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
   %shuffle.i3.i = shufflevector <8 x i16> %c, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %vmull2.i.i.i = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i)
+  %vmull2.i.i.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i)
   %sub.i.i = sub <4 x i32> %a, %vmull2.i.i.i
   ret <4 x i32> %sub.i.i
 }
@@ -1632,7 +1632,7 @@ define <2 x i64> @test_vmlsl_high_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c)
 entry:
   %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
   %shuffle.i3.i = shufflevector <4 x i32> %c, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %vmull2.i.i.i = tail call <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i)
+  %vmull2.i.i.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i)
   %sub.i.i = sub <2 x i64> %a, %vmull2.i.i.i
   ret <2 x i64> %sub.i.i
 }
@@ -1643,7 +1643,7 @@ define <8 x i16> @test_vmlsl_high_u8(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) {
 entry:
   %shuffle.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
   %shuffle.i3.i = shufflevector <16 x i8> %c, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %vmull.i.i.i = tail call <8 x i16> @llvm.arm64.neon.umull.v8i16(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i)
+  %vmull.i.i.i = tail call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i)
   %sub.i.i = sub <8 x i16> %a, %vmull.i.i.i
   ret <8 x i16> %sub.i.i
 }
@@ -1654,7 +1654,7 @@ define <4 x i32> @test_vmlsl_high_u16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c)
 entry:
   %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
   %shuffle.i3.i = shufflevector <8 x i16> %c, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %vmull2.i.i.i = tail call <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i)
+  %vmull2.i.i.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i)
   %sub.i.i = sub <4 x i32> %a, %vmull2.i.i.i
   ret <4 x i32> %sub.i.i
 }
@@ -1665,7 +1665,7 @@ define <2 x i64> @test_vmlsl_high_u32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c)
 entry:
   %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
   %shuffle.i3.i = shufflevector <4 x i32> %c, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %vmull2.i.i.i = tail call <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i)
+  %vmull2.i.i.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i)
   %sub.i.i = sub <2 x i64> %a, %vmull2.i.i.i
   ret <2 x i64> %sub.i.i
 }
@@ -1674,7 +1674,7 @@ define <4 x i32> @test_vqdmull_s16(<4 x i16> %a, <4 x i16> %b) {
 ; CHECK-LABEL: test_vqdmull_s16:
 ; CHECK: sqdmull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
 entry:
-  %vqdmull2.i = tail call <4 x i32> @llvm.arm64.neon.sqdmull.v4i32(<4 x i16> %a, <4 x i16> %b)
+  %vqdmull2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %a, <4 x i16> %b)
   ret <4 x i32> %vqdmull2.i
 }
 
@@ -1682,7 +1682,7 @@ define <2 x i64> @test_vqdmull_s32(<2 x i32> %a, <2 x i32> %b) {
 ; CHECK-LABEL: test_vqdmull_s32:
 ; CHECK: sqdmull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
 entry:
-  %vqdmull2.i = tail call <2 x i64> @llvm.arm64.neon.sqdmull.v2i64(<2 x i32> %a, <2 x i32> %b)
+  %vqdmull2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %a, <2 x i32> %b)
   ret <2 x i64> %vqdmull2.i
 }
 
@@ -1690,8 +1690,8 @@ define <4 x i32> @test_vqdmlal_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) {
 ; CHECK-LABEL: test_vqdmlal_s16:
 ; CHECK: sqdmlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
 entry:
-  %vqdmlal2.i = tail call <4 x i32> @llvm.arm64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> %c)
-  %vqdmlal4.i = tail call <4 x i32> @llvm.arm64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal2.i)
+  %vqdmlal2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> %c)
+  %vqdmlal4.i = tail call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal2.i)
   ret <4 x i32> %vqdmlal4.i
 }
 
@@ -1699,8 +1699,8 @@ define <2 x i64> @test_vqdmlal_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) {
 ; CHECK-LABEL: test_vqdmlal_s32:
 ; CHECK: sqdmlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
 entry:
-  %vqdmlal2.i = tail call <2 x i64> @llvm.arm64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> %c)
-  %vqdmlal4.i = tail call <2 x i64> @llvm.arm64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal2.i)
+  %vqdmlal2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> %c)
+  %vqdmlal4.i = tail call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal2.i)
   ret <2 x i64> %vqdmlal4.i
 }
 
@@ -1708,8 +1708,8 @@ define <4 x i32> @test_vqdmlsl_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) {
 ; CHECK-LABEL: test_vqdmlsl_s16:
 ; CHECK: sqdmlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
 entry:
-  %vqdmlsl2.i = tail call <4 x i32> @llvm.arm64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> %c)
-  %vqdmlsl4.i = tail call <4 x i32> @llvm.arm64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> %vqdmlsl2.i)
+  %vqdmlsl2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> %c)
+  %vqdmlsl4.i = tail call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> %vqdmlsl2.i)
   ret <4 x i32> %vqdmlsl4.i
 }
 
@@ -1717,8 +1717,8 @@ define <2 x i64> @test_vqdmlsl_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) {
 ; CHECK-LABEL: test_vqdmlsl_s32:
 ; CHECK: sqdmlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
 entry:
-  %vqdmlsl2.i = tail call <2 x i64> @llvm.arm64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> %c)
-  %vqdmlsl4.i = tail call <2 x i64> @llvm.arm64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> %vqdmlsl2.i)
+  %vqdmlsl2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> %c)
+  %vqdmlsl4.i = tail call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> %vqdmlsl2.i)
   ret <2 x i64> %vqdmlsl4.i
 }
 
@@ -1728,7 +1728,7 @@ define <4 x i32> @test_vqdmull_high_s16(<8 x i16> %a, <8 x i16> %b) {
 entry:
   %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
   %shuffle.i3.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %vqdmull2.i.i = tail call <4 x i32> @llvm.arm64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i)
+  %vqdmull2.i.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i)
   ret <4 x i32> %vqdmull2.i.i
 }
 
@@ -1738,7 +1738,7 @@ define <2 x i64> @test_vqdmull_high_s32(<4 x i32> %a, <4 x i32> %b) {
 entry:
   %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
   %shuffle.i3.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %vqdmull2.i.i = tail call <2 x i64> @llvm.arm64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i)
+  %vqdmull2.i.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i)
   ret <2 x i64> %vqdmull2.i.i
 }
 
@@ -1748,8 +1748,8 @@ define <4 x i32> @test_vqdmlal_high_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c
 entry:
   %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
   %shuffle.i3.i = shufflevector <8 x i16> %c, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %vqdmlal2.i.i = tail call <4 x i32> @llvm.arm64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i)
-  %vqdmlal4.i.i = tail call <4 x i32> @llvm.arm64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal2.i.i)
+  %vqdmlal2.i.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i)
+  %vqdmlal4.i.i = tail call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal2.i.i)
   ret <4 x i32> %vqdmlal4.i.i
 }
 
@@ -1759,8 +1759,8 @@ define <2 x i64> @test_vqdmlal_high_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c
 entry:
   %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
   %shuffle.i3.i = shufflevector <4 x i32> %c, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %vqdmlal2.i.i = tail call <2 x i64> @llvm.arm64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i)
-  %vqdmlal4.i.i = tail call <2 x i64> @llvm.arm64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal2.i.i)
+  %vqdmlal2.i.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i)
+  %vqdmlal4.i.i = tail call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal2.i.i)
   ret <2 x i64> %vqdmlal4.i.i
 }
 
@@ -1770,8 +1770,8 @@ define <4 x i32> @test_vqdmlsl_high_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c
 entry:
   %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
   %shuffle.i3.i = shufflevector <8 x i16> %c, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %vqdmlsl2.i.i = tail call <4 x i32> @llvm.arm64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i)
-  %vqdmlsl4.i.i = tail call <4 x i32> @llvm.arm64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> %vqdmlsl2.i.i)
+  %vqdmlsl2.i.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i)
+  %vqdmlsl4.i.i = tail call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> %vqdmlsl2.i.i)
   ret <4 x i32> %vqdmlsl4.i.i
 }
 
@@ -1781,8 +1781,8 @@ define <2 x i64> @test_vqdmlsl_high_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c
 entry:
   %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
   %shuffle.i3.i = shufflevector <4 x i32> %c, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %vqdmlsl2.i.i = tail call <2 x i64> @llvm.arm64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i)
-  %vqdmlsl4.i.i = tail call <2 x i64> @llvm.arm64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> %vqdmlsl2.i.i)
+  %vqdmlsl2.i.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i)
+  %vqdmlsl4.i.i = tail call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> %vqdmlsl2.i.i)
   ret <2 x i64> %vqdmlsl4.i.i
 }
 
@@ -1790,7 +1790,7 @@ define <8 x i16> @test_vmull_p8(<8 x i8> %a, <8 x i8> %b) {
 ; CHECK-LABEL: test_vmull_p8:
 ; CHECK: pmull {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
 entry:
-  %vmull.i = tail call <8 x i16> @llvm.arm64.neon.pmull.v8i16(<8 x i8> %a, <8 x i8> %b)
+  %vmull.i = tail call <8 x i16> @llvm.aarch64.neon.pmull.v8i16(<8 x i8> %a, <8 x i8> %b)
   ret <8 x i16> %vmull.i
 }
 
@@ -1800,7 +1800,7 @@ define <8 x i16> @test_vmull_high_p8(<16 x i8> %a, <16 x i8> %b) {
 entry:
   %shuffle.i.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
   %shuffle.i3.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %vmull.i.i = tail call <8 x i16> @llvm.arm64.neon.pmull.v8i16(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i)
+  %vmull.i.i = tail call <8 x i16> @llvm.aarch64.neon.pmull.v8i16(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i)
   ret <8 x i16> %vmull.i.i
 }
 
@@ -1808,7 +1808,7 @@ define i128 @test_vmull_p64(i64 %a, i64 %b) #4 {
 ; CHECK-LABEL: test_vmull_p64
 ; CHECK: pmull {{v[0-9]+}}.1q, {{v[0-9]+}}.1d, {{v[0-9]+}}.1d
 entry:
-  %vmull2.i = tail call <16 x i8> @llvm.arm64.neon.pmull64(i64 %a, i64 %b)
+  %vmull2.i = tail call <16 x i8> @llvm.aarch64.neon.pmull64(i64 %a, i64 %b)
   %vmull3.i = bitcast <16 x i8> %vmull2.i to i128
   ret i128 %vmull3.i
 }
@@ -1819,11 +1819,11 @@ define i128 @test_vmull_high_p64(<2 x i64> %a, <2 x i64> %b) #4 {
 entry:
   %0 = extractelement <2 x i64> %a, i32 1
   %1 = extractelement <2 x i64> %b, i32 1
-  %vmull2.i.i = tail call <16 x i8> @llvm.arm64.neon.pmull64(i64 %0, i64 %1) #1
+  %vmull2.i.i = tail call <16 x i8> @llvm.aarch64.neon.pmull64(i64 %0, i64 %1) #1
   %vmull3.i.i = bitcast <16 x i8> %vmull2.i.i to i128
   ret i128 %vmull3.i.i
 }
 
-declare <16 x i8> @llvm.arm64.neon.pmull64(i64, i64) #5
+declare <16 x i8> @llvm.aarch64.neon.pmull64(i64, i64) #5
 
 
diff --git a/test/CodeGen/ARM64/aarch64-neon-aba-abd.ll b/test/CodeGen/AArch64/arm64-neon-aba-abd.ll
similarity index 58%
rename from test/CodeGen/ARM64/aarch64-neon-aba-abd.ll
rename to test/CodeGen/AArch64/arm64-neon-aba-abd.ll
index 9d321b23976..6404ab72801 100644
--- a/test/CodeGen/ARM64/aarch64-neon-aba-abd.ll
+++ b/test/CodeGen/AArch64/arm64-neon-aba-abd.ll
@@ -1,18 +1,18 @@
 ; RUN: llc -mtriple=arm64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
 
-declare <8 x i8> @llvm.arm64.neon.uabd.v8i8(<8 x i8>, <8 x i8>)
-declare <8 x i8> @llvm.arm64.neon.sabd.v8i8(<8 x i8>, <8 x i8>)
+declare <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8>, <8 x i8>)
+declare <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8>, <8 x i8>)
 
 define <8 x i8> @test_uabd_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
 ; CHECK: test_uabd_v8i8:
-  %abd = call <8 x i8> @llvm.arm64.neon.uabd.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
+  %abd = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
 ; CHECK: uabd v0.8b, v0.8b, v1.8b
   ret <8 x i8> %abd
 }
 
 define <8 x i8> @test_uaba_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
 ; CHECK: test_uaba_v8i8:
-  %abd = call <8 x i8> @llvm.arm64.neon.uabd.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
+  %abd = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
   %aba = add <8 x i8> %lhs, %abd
 ; CHECK: uaba v0.8b, v0.8b, v1.8b
   ret <8 x i8> %aba
@@ -20,32 +20,32 @@ define <8 x i8> @test_uaba_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
 
 define <8 x i8> @test_sabd_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
 ; CHECK: test_sabd_v8i8:
-  %abd = call <8 x i8> @llvm.arm64.neon.sabd.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
+  %abd = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
 ; CHECK: sabd v0.8b, v0.8b, v1.8b
   ret <8 x i8> %abd
 }
 
 define <8 x i8> @test_saba_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
 ; CHECK: test_saba_v8i8:
-  %abd = call <8 x i8> @llvm.arm64.neon.sabd.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
+  %abd = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
   %aba = add <8 x i8> %lhs, %abd
 ; CHECK: saba v0.8b, v0.8b, v1.8b
   ret <8 x i8> %aba
 }
 
-declare <16 x i8> @llvm.arm64.neon.uabd.v16i8(<16 x i8>, <16 x i8>)
-declare <16 x i8> @llvm.arm64.neon.sabd.v16i8(<16 x i8>, <16 x i8>)
+declare <16 x i8> @llvm.aarch64.neon.uabd.v16i8(<16 x i8>, <16 x i8>)
+declare <16 x i8> @llvm.aarch64.neon.sabd.v16i8(<16 x i8>, <16 x i8>)
 
 define <16 x i8> @test_uabd_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
 ; CHECK: test_uabd_v16i8:
-  %abd = call <16 x i8> @llvm.arm64.neon.uabd.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
+  %abd = call <16 x i8> @llvm.aarch64.neon.uabd.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
 ; CHECK: uabd v0.16b, v0.16b, v1.16b
   ret <16 x i8> %abd
 }
 
 define <16 x i8> @test_uaba_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
 ; CHECK: test_uaba_v16i8:
-  %abd = call <16 x i8> @llvm.arm64.neon.uabd.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
+  %abd = call <16 x i8> @llvm.aarch64.neon.uabd.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
   %aba = add <16 x i8> %lhs, %abd
 ; CHECK: uaba v0.16b, v0.16b, v1.16b
   ret <16 x i8> %aba
@@ -53,32 +53,32 @@ define <16 x i8> @test_uaba_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
 
 define <16 x i8> @test_sabd_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
 ; CHECK: test_sabd_v16i8:
-  %abd = call <16 x i8> @llvm.arm64.neon.sabd.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
+  %abd = call <16 x i8> @llvm.aarch64.neon.sabd.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
 ; CHECK: sabd v0.16b, v0.16b, v1.16b
   ret <16 x i8> %abd
 }
 
 define <16 x i8> @test_saba_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
 ; CHECK: test_saba_v16i8:
-  %abd = call <16 x i8> @llvm.arm64.neon.sabd.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
+  %abd = call <16 x i8> @llvm.aarch64.neon.sabd.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
   %aba = add <16 x i8> %lhs, %abd
 ; CHECK: saba v0.16b, v0.16b, v1.16b
   ret <16 x i8> %aba
 }
 
-declare <4 x i16> @llvm.arm64.neon.uabd.v4i16(<4 x i16>, <4 x i16>)
-declare <4 x i16> @llvm.arm64.neon.sabd.v4i16(<4 x i16>, <4 x i16>)
+declare <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16>, <4 x i16>)
+declare <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16>, <4 x i16>)
 
 define <4 x i16> @test_uabd_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
 ; CHECK: test_uabd_v4i16:
-  %abd = call <4 x i16> @llvm.arm64.neon.uabd.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
+  %abd = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
 ; CHECK: uabd v0.4h, v0.4h, v1.4h
   ret <4 x i16> %abd
 }
 
 define <4 x i16> @test_uaba_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
 ; CHECK: test_uaba_v4i16:
-  %abd = call <4 x i16> @llvm.arm64.neon.uabd.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
+  %abd = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
   %aba = add <4 x i16> %lhs, %abd
 ; CHECK: uaba v0.4h, v0.4h, v1.4h
   ret <4 x i16> %aba
@@ -86,32 +86,32 @@ define <4 x i16> @test_uaba_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
 
 define <4 x i16> @test_sabd_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
 ; CHECK: test_sabd_v4i16:
-  %abd = call <4 x i16> @llvm.arm64.neon.sabd.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
+  %abd = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
 ; CHECK: sabd v0.4h, v0.4h, v1.4h
   ret <4 x i16> %abd
 }
 
 define <4 x i16> @test_saba_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
 ; CHECK: test_saba_v4i16:
-  %abd = call <4 x i16> @llvm.arm64.neon.sabd.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
+  %abd = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
   %aba = add <4 x i16> %lhs, %abd
 ; CHECK: saba v0.4h, v0.4h, v1.4h
   ret <4 x i16> %aba
 }
 
-declare <8 x i16> @llvm.arm64.neon.uabd.v8i16(<8 x i16>, <8 x i16>)
-declare <8 x i16> @llvm.arm64.neon.sabd.v8i16(<8 x i16>, <8 x i16>)
+declare <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16>, <8 x i16>)
+declare <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16>, <8 x i16>)
 
 define <8 x i16> @test_uabd_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
 ; CHECK: test_uabd_v8i16:
-  %abd = call <8 x i16> @llvm.arm64.neon.uabd.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
+  %abd = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
 ; CHECK: uabd v0.8h, v0.8h, v1.8h
   ret <8 x i16> %abd
 }
 
 define <8 x i16> @test_uaba_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
 ; CHECK: test_uaba_v8i16:
-  %abd = call <8 x i16> @llvm.arm64.neon.uabd.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
+  %abd = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
   %aba = add <8 x i16> %lhs, %abd
 ; CHECK: uaba v0.8h, v0.8h, v1.8h
   ret <8 x i16> %aba
@@ -119,32 +119,32 @@ define <8 x i16> @test_uaba_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
 
 define <8 x i16> @test_sabd_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
 ; CHECK: test_sabd_v8i16:
-  %abd = call <8 x i16> @llvm.arm64.neon.sabd.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
+  %abd = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
 ; CHECK: sabd v0.8h, v0.8h, v1.8h
   ret <8 x i16> %abd
 }
 
 define <8 x i16> @test_saba_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
 ; CHECK: test_saba_v8i16:
-  %abd = call <8 x i16> @llvm.arm64.neon.sabd.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
+  %abd = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
   %aba = add <8 x i16> %lhs, %abd
 ; CHECK: saba v0.8h, v0.8h, v1.8h
   ret <8 x i16> %aba
 }
 
-declare <2 x i32> @llvm.arm64.neon.uabd.v2i32(<2 x i32>, <2 x i32>)
-declare <2 x i32> @llvm.arm64.neon.sabd.v2i32(<2 x i32>, <2 x i32>)
+declare <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32>, <2 x i32>)
+declare <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32>, <2 x i32>)
 
 define <2 x i32> @test_uabd_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
 ; CHECK: test_uabd_v2i32:
-  %abd = call <2 x i32> @llvm.arm64.neon.uabd.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
+  %abd = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
 ; CHECK: uabd v0.2s, v0.2s, v1.2s
   ret <2 x i32> %abd
 }
 
 define <2 x i32> @test_uaba_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
 ; CHECK: test_uaba_v2i32:
-  %abd = call <2 x i32> @llvm.arm64.neon.uabd.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
+  %abd = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
   %aba = add <2 x i32> %lhs, %abd
 ; CHECK: uaba v0.2s, v0.2s, v1.2s
   ret <2 x i32> %aba
@@ -152,7 +152,7 @@ define <2 x i32> @test_uaba_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
 
 define <2 x i32> @test_sabd_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
 ; CHECK: test_sabd_v2i32:
-  %abd = call <2 x i32> @llvm.arm64.neon.sabd.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
+  %abd = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
 ; CHECK: sabd v0.2s, v0.2s, v1.2s
   ret <2 x i32> %abd
 }
@@ -161,7 +161,7 @@ define <2 x i32> @test_sabd_v2i32_const() {
 ; CHECK: test_sabd_v2i32_const:
 ; CHECK: movi     d1, #0x00ffffffff0000
 ; CHECK-NEXT: sabd v0.2s, v0.2s, v1.2s
-  %1 = tail call <2 x i32> @llvm.arm64.neon.sabd.v2i32(
+  %1 = tail call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(
     <2 x i32> <i32 -2147483648, i32 2147450880>,
     <2 x i32> <i32 -65536, i32 65535>)
   ret <2 x i32> %1
@@ -169,25 +169,25 @@ define <2 x i32> @test_sabd_v2i32_const() {
 
 define <2 x i32> @test_saba_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
 ; CHECK: test_saba_v2i32:
-  %abd = call <2 x i32> @llvm.arm64.neon.sabd.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
+  %abd = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
   %aba = add <2 x i32> %lhs, %abd
 ; CHECK: saba v0.2s, v0.2s, v1.2s
   ret <2 x i32> %aba
 }
 
-declare <4 x i32> @llvm.arm64.neon.uabd.v4i32(<4 x i32>, <4 x i32>)
-declare <4 x i32> @llvm.arm64.neon.sabd.v4i32(<4 x i32>, <4 x i32>)
+declare <4 x i32> @llvm.aarch64.neon.uabd.v4i32(<4 x i32>, <4 x i32>)
+declare <4 x i32> @llvm.aarch64.neon.sabd.v4i32(<4 x i32>, <4 x i32>)
 
 define <4 x i32> @test_uabd_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
 ; CHECK: test_uabd_v4i32:
-  %abd = call <4 x i32> @llvm.arm64.neon.uabd.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
+  %abd = call <4 x i32> @llvm.aarch64.neon.uabd.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
 ; CHECK: uabd v0.4s, v0.4s, v1.4s
   ret <4 x i32> %abd
 }
 
 define <4 x i32> @test_uaba_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
 ; CHECK: test_uaba_v4i32:
-  %abd = call <4 x i32> @llvm.arm64.neon.uabd.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
+  %abd = call <4 x i32> @llvm.aarch64.neon.uabd.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
   %aba = add <4 x i32> %lhs, %abd
 ; CHECK: uaba v0.4s, v0.4s, v1.4s
   ret <4 x i32> %aba
@@ -195,42 +195,42 @@ define <4 x i32> @test_uaba_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
 
 define <4 x i32> @test_sabd_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
 ; CHECK: test_sabd_v4i32:
-  %abd = call <4 x i32> @llvm.arm64.neon.sabd.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
+  %abd = call <4 x i32> @llvm.aarch64.neon.sabd.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
 ; CHECK: sabd v0.4s, v0.4s, v1.4s
   ret <4 x i32> %abd
 }
 
 define <4 x i32> @test_saba_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
 ; CHECK: test_saba_v4i32:
-  %abd = call <4 x i32> @llvm.arm64.neon.sabd.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
+  %abd = call <4 x i32> @llvm.aarch64.neon.sabd.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
   %aba = add <4 x i32> %lhs, %abd
 ; CHECK: saba v0.4s, v0.4s, v1.4s
   ret <4 x i32> %aba
 }
 
-declare <2 x float> @llvm.arm64.neon.fabd.v2f32(<2 x float>, <2 x float>)
+declare <2 x float> @llvm.aarch64.neon.fabd.v2f32(<2 x float>, <2 x float>)
 
 define <2 x float> @test_fabd_v2f32(<2 x float> %lhs, <2 x float> %rhs) {
 ; CHECK: test_fabd_v2f32:
-  %abd = call <2 x float> @llvm.arm64.neon.fabd.v2f32(<2 x float> %lhs, <2 x float> %rhs)
+  %abd = call <2 x float> @llvm.aarch64.neon.fabd.v2f32(<2 x float> %lhs, <2 x float> %rhs)
 ; CHECK: fabd v0.2s, v0.2s, v1.2s
   ret <2 x float> %abd
 }
 
-declare <4 x float> @llvm.arm64.neon.fabd.v4f32(<4 x float>, <4 x float>)
+declare <4 x float> @llvm.aarch64.neon.fabd.v4f32(<4 x float>, <4 x float>)
 
 define <4 x float> @test_fabd_v4f32(<4 x float> %lhs, <4 x float> %rhs) {
 ; CHECK: test_fabd_v4f32:
-  %abd = call <4 x float> @llvm.arm64.neon.fabd.v4f32(<4 x float> %lhs, <4 x float> %rhs)
+  %abd = call <4 x float> @llvm.aarch64.neon.fabd.v4f32(<4 x float> %lhs, <4 x float> %rhs)
 ; CHECK: fabd v0.4s, v0.4s, v1.4s
   ret <4 x float> %abd
 }
 
-declare <2 x double> @llvm.arm64.neon.fabd.v2f64(<2 x double>, <2 x double>)
+declare <2 x double> @llvm.aarch64.neon.fabd.v2f64(<2 x double>, <2 x double>)
 
 define <2 x double> @test_fabd_v2f64(<2 x double> %lhs, <2 x double> %rhs) {
 ; CHECK: test_fabd_v2f64:
-  %abd = call <2 x double> @llvm.arm64.neon.fabd.v2f64(<2 x double> %lhs, <2 x double> %rhs)
+  %abd = call <2 x double> @llvm.aarch64.neon.fabd.v2f64(<2 x double> %lhs, <2 x double> %rhs)
 ; CHECK: fabd v0.2d, v0.2d, v1.2d
   ret <2 x double> %abd
 }
diff --git a/test/CodeGen/ARM64/aarch64-neon-across.ll b/test/CodeGen/AArch64/arm64-neon-across.ll
similarity index 56%
rename from test/CodeGen/ARM64/aarch64-neon-across.ll
rename to test/CodeGen/AArch64/arm64-neon-across.ll
index 5a3538ba88e..3a63673f120 100644
--- a/test/CodeGen/ARM64/aarch64-neon-across.ll
+++ b/test/CodeGen/AArch64/arm64-neon-across.ll
@@ -1,88 +1,88 @@
 ; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon | FileCheck %s
 
-declare float @llvm.arm64.neon.fminnmv.f32.v4f32(<4 x float>)
+declare float @llvm.aarch64.neon.fminnmv.f32.v4f32(<4 x float>)
 
-declare float @llvm.arm64.neon.fmaxnmv.f32.v4f32(<4 x float>)
+declare float @llvm.aarch64.neon.fmaxnmv.f32.v4f32(<4 x float>)
 
-declare float @llvm.arm64.neon.fminv.f32.v4f32(<4 x float>)
+declare float @llvm.aarch64.neon.fminv.f32.v4f32(<4 x float>)
 
-declare float @llvm.arm64.neon.fmaxv.f32.v4f32(<4 x float>)
+declare float @llvm.aarch64.neon.fmaxv.f32.v4f32(<4 x float>)
 
-declare i32 @llvm.arm64.neon.saddv.i32.v4i32(<4 x i32>)
+declare i32 @llvm.aarch64.neon.saddv.i32.v4i32(<4 x i32>)
 
-declare i32 @llvm.arm64.neon.saddv.i32.v8i16(<8 x i16>)
+declare i32 @llvm.aarch64.neon.saddv.i32.v8i16(<8 x i16>)
 
-declare i32 @llvm.arm64.neon.saddv.i32.v16i8(<16 x i8>)
+declare i32 @llvm.aarch64.neon.saddv.i32.v16i8(<16 x i8>)
 
-declare i32 @llvm.arm64.neon.saddv.i32.v4i16(<4 x i16>)
+declare i32 @llvm.aarch64.neon.saddv.i32.v4i16(<4 x i16>)
 
-declare i32 @llvm.arm64.neon.saddv.i32.v8i8(<8 x i8>)
+declare i32 @llvm.aarch64.neon.saddv.i32.v8i8(<8 x i8>)
 
-declare i32 @llvm.arm64.neon.uminv.i32.v4i32(<4 x i32>)
+declare i32 @llvm.aarch64.neon.uminv.i32.v4i32(<4 x i32>)
 
-declare i32 @llvm.arm64.neon.uminv.i32.v8i16(<8 x i16>)
+declare i32 @llvm.aarch64.neon.uminv.i32.v8i16(<8 x i16>)
 
-declare i32 @llvm.arm64.neon.uminv.i32.v16i8(<16 x i8>)
+declare i32 @llvm.aarch64.neon.uminv.i32.v16i8(<16 x i8>)
 
-declare i32 @llvm.arm64.neon.sminv.i32.v4i32(<4 x i32>)
+declare i32 @llvm.aarch64.neon.sminv.i32.v4i32(<4 x i32>)
 
-declare i32 @llvm.arm64.neon.sminv.i32.v8i16(<8 x i16>)
+declare i32 @llvm.aarch64.neon.sminv.i32.v8i16(<8 x i16>)
 
-declare i32 @llvm.arm64.neon.sminv.i32.v16i8(<16 x i8>)
+declare i32 @llvm.aarch64.neon.sminv.i32.v16i8(<16 x i8>)
 
-declare i32 @llvm.arm64.neon.uminv.i32.v4i16(<4 x i16>)
+declare i32 @llvm.aarch64.neon.uminv.i32.v4i16(<4 x i16>)
 
-declare i32 @llvm.arm64.neon.uminv.i32.v8i8(<8 x i8>)
+declare i32 @llvm.aarch64.neon.uminv.i32.v8i8(<8 x i8>)
 
-declare i32 @llvm.arm64.neon.sminv.i32.v4i16(<4 x i16>)
+declare i32 @llvm.aarch64.neon.sminv.i32.v4i16(<4 x i16>)
 
-declare i32 @llvm.arm64.neon.sminv.i32.v8i8(<8 x i8>)
+declare i32 @llvm.aarch64.neon.sminv.i32.v8i8(<8 x i8>)
 
-declare i32 @llvm.arm64.neon.umaxv.i32.v4i32(<4 x i32>)
+declare i32 @llvm.aarch64.neon.umaxv.i32.v4i32(<4 x i32>)
 
-declare i32 @llvm.arm64.neon.umaxv.i32.v8i16(<8 x i16>)
+declare i32 @llvm.aarch64.neon.umaxv.i32.v8i16(<8 x i16>)
 
-declare i32 @llvm.arm64.neon.umaxv.i32.v16i8(<16 x i8>)
+declare i32 @llvm.aarch64.neon.umaxv.i32.v16i8(<16 x i8>)
 
-declare i32 @llvm.arm64.neon.smaxv.i32.v4i32(<4 x i32>)
+declare i32 @llvm.aarch64.neon.smaxv.i32.v4i32(<4 x i32>)
 
-declare i32 @llvm.arm64.neon.smaxv.i32.v8i16(<8 x i16>)
+declare i32 @llvm.aarch64.neon.smaxv.i32.v8i16(<8 x i16>)
 
-declare i32 @llvm.arm64.neon.smaxv.i32.v16i8(<16 x i8>)
+declare i32 @llvm.aarch64.neon.smaxv.i32.v16i8(<16 x i8>)
 
-declare i32 @llvm.arm64.neon.umaxv.i32.v4i16(<4 x i16>)
+declare i32 @llvm.aarch64.neon.umaxv.i32.v4i16(<4 x i16>)
 
-declare i32 @llvm.arm64.neon.umaxv.i32.v8i8(<8 x i8>)
+declare i32 @llvm.aarch64.neon.umaxv.i32.v8i8(<8 x i8>)
 
-declare i32 @llvm.arm64.neon.smaxv.i32.v4i16(<4 x i16>)
+declare i32 @llvm.aarch64.neon.smaxv.i32.v4i16(<4 x i16>)
 
-declare i32 @llvm.arm64.neon.smaxv.i32.v8i8(<8 x i8>)
+declare i32 @llvm.aarch64.neon.smaxv.i32.v8i8(<8 x i8>)
 
-declare i64 @llvm.arm64.neon.uaddlv.i64.v4i32(<4 x i32>)
+declare i64 @llvm.aarch64.neon.uaddlv.i64.v4i32(<4 x i32>)
 
-declare i32 @llvm.arm64.neon.uaddlv.i32.v8i16(<8 x i16>)
+declare i32 @llvm.aarch64.neon.uaddlv.i32.v8i16(<8 x i16>)
 
-declare i32 @llvm.arm64.neon.uaddlv.i32.v16i8(<16 x i8>)
+declare i32 @llvm.aarch64.neon.uaddlv.i32.v16i8(<16 x i8>)
 
-declare i64 @llvm.arm64.neon.saddlv.i64.v4i32(<4 x i32>)
+declare i64 @llvm.aarch64.neon.saddlv.i64.v4i32(<4 x i32>)
 
-declare i32 @llvm.arm64.neon.saddlv.i32.v8i16(<8 x i16>)
+declare i32 @llvm.aarch64.neon.saddlv.i32.v8i16(<8 x i16>)
 
-declare i32 @llvm.arm64.neon.saddlv.i32.v16i8(<16 x i8>)
+declare i32 @llvm.aarch64.neon.saddlv.i32.v16i8(<16 x i8>)
 
-declare i32 @llvm.arm64.neon.uaddlv.i32.v4i16(<4 x i16>)
+declare i32 @llvm.aarch64.neon.uaddlv.i32.v4i16(<4 x i16>)
 
-declare i32 @llvm.arm64.neon.uaddlv.i32.v8i8(<8 x i8>)
+declare i32 @llvm.aarch64.neon.uaddlv.i32.v8i8(<8 x i8>)
 
-declare i32 @llvm.arm64.neon.saddlv.i32.v4i16(<4 x i16>)
+declare i32 @llvm.aarch64.neon.saddlv.i32.v4i16(<4 x i16>)
 
-declare i32 @llvm.arm64.neon.saddlv.i32.v8i8(<8 x i8>)
+declare i32 @llvm.aarch64.neon.saddlv.i32.v8i8(<8 x i8>)
 
 define i16 @test_vaddlv_s8(<8 x i8> %a) {
 ; CHECK: test_vaddlv_s8:
 ; CHECK: saddlv h{{[0-9]+}}, {{v[0-9]+}}.8b
 entry:
-  %saddlvv.i = tail call i32 @llvm.arm64.neon.saddlv.i32.v8i8(<8 x i8> %a)
+  %saddlvv.i = tail call i32 @llvm.aarch64.neon.saddlv.i32.v8i8(<8 x i8> %a)
   %0 = trunc i32 %saddlvv.i to i16
   ret i16 %0
 }
@@ -91,7 +91,7 @@ define i32 @test_vaddlv_s16(<4 x i16> %a) {
 ; CHECK: test_vaddlv_s16:
 ; CHECK: saddlv s{{[0-9]+}}, {{v[0-9]+}}.4h
 entry:
-  %saddlvv.i = tail call i32 @llvm.arm64.neon.saddlv.i32.v4i16(<4 x i16> %a)
+  %saddlvv.i = tail call i32 @llvm.aarch64.neon.saddlv.i32.v4i16(<4 x i16> %a)
   ret i32 %saddlvv.i
 }
 
@@ -99,7 +99,7 @@ define i16 @test_vaddlv_u8(<8 x i8> %a) {
 ; CHECK: test_vaddlv_u8:
 ; CHECK: uaddlv h{{[0-9]+}}, {{v[0-9]+}}.8b
 entry:
-  %uaddlvv.i = tail call i32 @llvm.arm64.neon.uaddlv.i32.v8i8(<8 x i8> %a)
+  %uaddlvv.i = tail call i32 @llvm.aarch64.neon.uaddlv.i32.v8i8(<8 x i8> %a)
   %0 = trunc i32 %uaddlvv.i to i16
   ret i16 %0
 }
@@ -108,7 +108,7 @@ define i32 @test_vaddlv_u16(<4 x i16> %a) {
 ; CHECK: test_vaddlv_u16:
 ; CHECK: uaddlv s{{[0-9]+}}, {{v[0-9]+}}.4h
 entry:
-  %uaddlvv.i = tail call i32 @llvm.arm64.neon.uaddlv.i32.v4i16(<4 x i16> %a)
+  %uaddlvv.i = tail call i32 @llvm.aarch64.neon.uaddlv.i32.v4i16(<4 x i16> %a)
   ret i32 %uaddlvv.i
 }
 
@@ -116,7 +116,7 @@ define i16 @test_vaddlvq_s8(<16 x i8> %a) {
 ; CHECK: test_vaddlvq_s8:
 ; CHECK: saddlv h{{[0-9]+}}, {{v[0-9]+}}.16b
 entry:
-  %saddlvv.i = tail call i32 @llvm.arm64.neon.saddlv.i32.v16i8(<16 x i8> %a)
+  %saddlvv.i = tail call i32 @llvm.aarch64.neon.saddlv.i32.v16i8(<16 x i8> %a)
   %0 = trunc i32 %saddlvv.i to i16
   ret i16 %0
 }
@@ -125,7 +125,7 @@ define i32 @test_vaddlvq_s16(<8 x i16> %a) {
 ; CHECK: test_vaddlvq_s16:
 ; CHECK: saddlv s{{[0-9]+}}, {{v[0-9]+}}.8h
 entry:
-  %saddlvv.i = tail call i32 @llvm.arm64.neon.saddlv.i32.v8i16(<8 x i16> %a)
+  %saddlvv.i = tail call i32 @llvm.aarch64.neon.saddlv.i32.v8i16(<8 x i16> %a)
   ret i32 %saddlvv.i
 }
 
@@ -133,7 +133,7 @@ define i64 @test_vaddlvq_s32(<4 x i32> %a) {
 ; CHECK: test_vaddlvq_s32:
 ; CHECK: saddlv d{{[0-9]+}}, {{v[0-9]+}}.4s
 entry:
-  %saddlvv.i = tail call i64 @llvm.arm64.neon.saddlv.i64.v4i32(<4 x i32> %a)
+  %saddlvv.i = tail call i64 @llvm.aarch64.neon.saddlv.i64.v4i32(<4 x i32> %a)
   ret i64 %saddlvv.i
 }
 
@@ -141,7 +141,7 @@ define i16 @test_vaddlvq_u8(<16 x i8> %a) {
 ; CHECK: test_vaddlvq_u8:
 ; CHECK: uaddlv h{{[0-9]+}}, {{v[0-9]+}}.16b
 entry:
-  %uaddlvv.i = tail call i32 @llvm.arm64.neon.uaddlv.i32.v16i8(<16 x i8> %a)
+  %uaddlvv.i = tail call i32 @llvm.aarch64.neon.uaddlv.i32.v16i8(<16 x i8> %a)
   %0 = trunc i32 %uaddlvv.i to i16
   ret i16 %0
 }
@@ -150,7 +150,7 @@ define i32 @test_vaddlvq_u16(<8 x i16> %a) {
 ; CHECK: test_vaddlvq_u16:
 ; CHECK: uaddlv s{{[0-9]+}}, {{v[0-9]+}}.8h
 entry:
-  %uaddlvv.i = tail call i32 @llvm.arm64.neon.uaddlv.i32.v8i16(<8 x i16> %a)
+  %uaddlvv.i = tail call i32 @llvm.aarch64.neon.uaddlv.i32.v8i16(<8 x i16> %a)
   ret i32 %uaddlvv.i
 }
 
@@ -158,7 +158,7 @@ define i64 @test_vaddlvq_u32(<4 x i32> %a) {
 ; CHECK: test_vaddlvq_u32:
 ; CHECK: uaddlv d{{[0-9]+}}, {{v[0-9]+}}.4s
 entry:
-  %uaddlvv.i = tail call i64 @llvm.arm64.neon.uaddlv.i64.v4i32(<4 x i32> %a)
+  %uaddlvv.i = tail call i64 @llvm.aarch64.neon.uaddlv.i64.v4i32(<4 x i32> %a)
   ret i64 %uaddlvv.i
 }
 
@@ -166,7 +166,7 @@ define i8 @test_vmaxv_s8(<8 x i8> %a) {
 ; CHECK: test_vmaxv_s8:
 ; CHECK: smaxv b{{[0-9]+}}, {{v[0-9]+}}.8b
 entry:
-  %smaxv.i = tail call i32 @llvm.arm64.neon.smaxv.i32.v8i8(<8 x i8> %a)
+  %smaxv.i = tail call i32 @llvm.aarch64.neon.smaxv.i32.v8i8(<8 x i8> %a)
   %0 = trunc i32 %smaxv.i to i8
   ret i8 %0
 }
@@ -175,7 +175,7 @@ define i16 @test_vmaxv_s16(<4 x i16> %a) {
 ; CHECK: test_vmaxv_s16:
 ; CHECK: smaxv h{{[0-9]+}}, {{v[0-9]+}}.4h
 entry:
-  %smaxv.i = tail call i32 @llvm.arm64.neon.smaxv.i32.v4i16(<4 x i16> %a)
+  %smaxv.i = tail call i32 @llvm.aarch64.neon.smaxv.i32.v4i16(<4 x i16> %a)
   %0 = trunc i32 %smaxv.i to i16
   ret i16 %0
 }
@@ -184,7 +184,7 @@ define i8 @test_vmaxv_u8(<8 x i8> %a) {
 ; CHECK: test_vmaxv_u8:
 ; CHECK: umaxv b{{[0-9]+}}, {{v[0-9]+}}.8b
 entry:
-  %umaxv.i = tail call i32 @llvm.arm64.neon.umaxv.i32.v8i8(<8 x i8> %a)
+  %umaxv.i = tail call i32 @llvm.aarch64.neon.umaxv.i32.v8i8(<8 x i8> %a)
   %0 = trunc i32 %umaxv.i to i8
   ret i8 %0
 }
@@ -193,7 +193,7 @@ define i16 @test_vmaxv_u16(<4 x i16> %a) {
 ; CHECK: test_vmaxv_u16:
 ; CHECK: umaxv h{{[0-9]+}}, {{v[0-9]+}}.4h
 entry:
-  %umaxv.i = tail call i32 @llvm.arm64.neon.umaxv.i32.v4i16(<4 x i16> %a)
+  %umaxv.i = tail call i32 @llvm.aarch64.neon.umaxv.i32.v4i16(<4 x i16> %a)
   %0 = trunc i32 %umaxv.i to i16
   ret i16 %0
 }
@@ -202,7 +202,7 @@ define i8 @test_vmaxvq_s8(<16 x i8> %a) {
 ; CHECK: test_vmaxvq_s8:
 ; CHECK: smaxv b{{[0-9]+}}, {{v[0-9]+}}.16b
 entry:
-  %smaxv.i = tail call i32 @llvm.arm64.neon.smaxv.i32.v16i8(<16 x i8> %a)
+  %smaxv.i = tail call i32 @llvm.aarch64.neon.smaxv.i32.v16i8(<16 x i8> %a)
   %0 = trunc i32 %smaxv.i to i8
   ret i8 %0
 }
@@ -211,7 +211,7 @@ define i16 @test_vmaxvq_s16(<8 x i16> %a) {
 ; CHECK: test_vmaxvq_s16:
 ; CHECK: smaxv h{{[0-9]+}}, {{v[0-9]+}}.8h
 entry:
-  %smaxv.i = tail call i32 @llvm.arm64.neon.smaxv.i32.v8i16(<8 x i16> %a)
+  %smaxv.i = tail call i32 @llvm.aarch64.neon.smaxv.i32.v8i16(<8 x i16> %a)
   %0 = trunc i32 %smaxv.i to i16
   ret i16 %0
 }
@@ -220,7 +220,7 @@ define i32 @test_vmaxvq_s32(<4 x i32> %a) {
 ; CHECK: test_vmaxvq_s32:
 ; CHECK: smaxv s{{[0-9]+}}, {{v[0-9]+}}.4s
 entry:
-  %smaxv.i = tail call i32 @llvm.arm64.neon.smaxv.i32.v4i32(<4 x i32> %a)
+  %smaxv.i = tail call i32 @llvm.aarch64.neon.smaxv.i32.v4i32(<4 x i32> %a)
   ret i32 %smaxv.i
 }
 
@@ -228,7 +228,7 @@ define i8 @test_vmaxvq_u8(<16 x i8> %a) {
 ; CHECK: test_vmaxvq_u8:
 ; CHECK: umaxv b{{[0-9]+}}, {{v[0-9]+}}.16b
 entry:
-  %umaxv.i = tail call i32 @llvm.arm64.neon.umaxv.i32.v16i8(<16 x i8> %a)
+  %umaxv.i = tail call i32 @llvm.aarch64.neon.umaxv.i32.v16i8(<16 x i8> %a)
   %0 = trunc i32 %umaxv.i to i8
   ret i8 %0
 }
@@ -237,7 +237,7 @@ define i16 @test_vmaxvq_u16(<8 x i16> %a) {
 ; CHECK: test_vmaxvq_u16:
 ; CHECK: umaxv h{{[0-9]+}}, {{v[0-9]+}}.8h
 entry:
-  %umaxv.i = tail call i32 @llvm.arm64.neon.umaxv.i32.v8i16(<8 x i16> %a)
+  %umaxv.i = tail call i32 @llvm.aarch64.neon.umaxv.i32.v8i16(<8 x i16> %a)
   %0 = trunc i32 %umaxv.i to i16
   ret i16 %0
 }
@@ -246,7 +246,7 @@ define i32 @test_vmaxvq_u32(<4 x i32> %a) {
 ; CHECK: test_vmaxvq_u32:
 ; CHECK: umaxv s{{[0-9]+}}, {{v[0-9]+}}.4s
 entry:
-  %umaxv.i = tail call i32 @llvm.arm64.neon.umaxv.i32.v4i32(<4 x i32> %a)
+  %umaxv.i = tail call i32 @llvm.aarch64.neon.umaxv.i32.v4i32(<4 x i32> %a)
   ret i32 %umaxv.i
 }
 
@@ -254,7 +254,7 @@ define i8 @test_vminv_s8(<8 x i8> %a) {
 ; CHECK: test_vminv_s8:
 ; CHECK: sminv b{{[0-9]+}}, {{v[0-9]+}}.8b
 entry:
-  %sminv.i = tail call i32 @llvm.arm64.neon.sminv.i32.v8i8(<8 x i8> %a)
+  %sminv.i = tail call i32 @llvm.aarch64.neon.sminv.i32.v8i8(<8 x i8> %a)
   %0 = trunc i32 %sminv.i to i8
   ret i8 %0
 }
@@ -263,7 +263,7 @@ define i16 @test_vminv_s16(<4 x i16> %a) {
 ; CHECK: test_vminv_s16:
 ; CHECK: sminv h{{[0-9]+}}, {{v[0-9]+}}.4h
 entry:
-  %sminv.i = tail call i32 @llvm.arm64.neon.sminv.i32.v4i16(<4 x i16> %a)
+  %sminv.i = tail call i32 @llvm.aarch64.neon.sminv.i32.v4i16(<4 x i16> %a)
   %0 = trunc i32 %sminv.i to i16
   ret i16 %0
 }
@@ -272,7 +272,7 @@ define i8 @test_vminv_u8(<8 x i8> %a) {
 ; CHECK: test_vminv_u8:
 ; CHECK: uminv b{{[0-9]+}}, {{v[0-9]+}}.8b
 entry:
-  %uminv.i = tail call i32 @llvm.arm64.neon.uminv.i32.v8i8(<8 x i8> %a)
+  %uminv.i = tail call i32 @llvm.aarch64.neon.uminv.i32.v8i8(<8 x i8> %a)
   %0 = trunc i32 %uminv.i to i8
   ret i8 %0
 }
@@ -281,7 +281,7 @@ define i16 @test_vminv_u16(<4 x i16> %a) {
 ; CHECK: test_vminv_u16:
 ; CHECK: uminv h{{[0-9]+}}, {{v[0-9]+}}.4h
 entry:
-  %uminv.i = tail call i32 @llvm.arm64.neon.uminv.i32.v4i16(<4 x i16> %a)
+  %uminv.i = tail call i32 @llvm.aarch64.neon.uminv.i32.v4i16(<4 x i16> %a)
   %0 = trunc i32 %uminv.i to i16
   ret i16 %0
 }
@@ -290,7 +290,7 @@ define i8 @test_vminvq_s8(<16 x i8> %a) {
 ; CHECK: test_vminvq_s8:
 ; CHECK: sminv b{{[0-9]+}}, {{v[0-9]+}}.16b
 entry:
-  %sminv.i = tail call i32 @llvm.arm64.neon.sminv.i32.v16i8(<16 x i8> %a)
+  %sminv.i = tail call i32 @llvm.aarch64.neon.sminv.i32.v16i8(<16 x i8> %a)
   %0 = trunc i32 %sminv.i to i8
   ret i8 %0
 }
@@ -299,7 +299,7 @@ define i16 @test_vminvq_s16(<8 x i16> %a) {
 ; CHECK: test_vminvq_s16:
 ; CHECK: sminv h{{[0-9]+}}, {{v[0-9]+}}.8h
 entry:
-  %sminv.i = tail call i32 @llvm.arm64.neon.sminv.i32.v8i16(<8 x i16> %a)
+  %sminv.i = tail call i32 @llvm.aarch64.neon.sminv.i32.v8i16(<8 x i16> %a)
   %0 = trunc i32 %sminv.i to i16
   ret i16 %0
 }
@@ -308,7 +308,7 @@ define i32 @test_vminvq_s32(<4 x i32> %a) {
 ; CHECK: test_vminvq_s32:
 ; CHECK: sminv s{{[0-9]+}}, {{v[0-9]+}}.4s
 entry:
-  %sminv.i = tail call i32 @llvm.arm64.neon.sminv.i32.v4i32(<4 x i32> %a)
+  %sminv.i = tail call i32 @llvm.aarch64.neon.sminv.i32.v4i32(<4 x i32> %a)
   ret i32 %sminv.i
 }
 
@@ -316,7 +316,7 @@ define i8 @test_vminvq_u8(<16 x i8> %a) {
 ; CHECK: test_vminvq_u8:
 ; CHECK: uminv b{{[0-9]+}}, {{v[0-9]+}}.16b
 entry:
-  %uminv.i = tail call i32 @llvm.arm64.neon.uminv.i32.v16i8(<16 x i8> %a)
+  %uminv.i = tail call i32 @llvm.aarch64.neon.uminv.i32.v16i8(<16 x i8> %a)
   %0 = trunc i32 %uminv.i to i8
   ret i8 %0
 }
@@ -325,7 +325,7 @@ define i16 @test_vminvq_u16(<8 x i16> %a) {
 ; CHECK: test_vminvq_u16:
 ; CHECK: uminv h{{[0-9]+}}, {{v[0-9]+}}.8h
 entry:
-  %uminv.i = tail call i32 @llvm.arm64.neon.uminv.i32.v8i16(<8 x i16> %a)
+  %uminv.i = tail call i32 @llvm.aarch64.neon.uminv.i32.v8i16(<8 x i16> %a)
   %0 = trunc i32 %uminv.i to i16
   ret i16 %0
 }
@@ -334,7 +334,7 @@ define i32 @test_vminvq_u32(<4 x i32> %a) {
 ; CHECK: test_vminvq_u32:
 ; CHECK: uminv s{{[0-9]+}}, {{v[0-9]+}}.4s
 entry:
-  %uminv.i = tail call i32 @llvm.arm64.neon.uminv.i32.v4i32(<4 x i32> %a)
+  %uminv.i = tail call i32 @llvm.aarch64.neon.uminv.i32.v4i32(<4 x i32> %a)
   ret i32 %uminv.i
 }
 
@@ -342,7 +342,7 @@ define i8 @test_vaddv_s8(<8 x i8> %a) {
 ; CHECK: test_vaddv_s8:
 ; CHECK: addv b{{[0-9]+}}, {{v[0-9]+}}.8b
 entry:
-  %vaddv.i = tail call i32 @llvm.arm64.neon.saddv.i32.v8i8(<8 x i8> %a)
+  %vaddv.i = tail call i32 @llvm.aarch64.neon.saddv.i32.v8i8(<8 x i8> %a)
   %0 = trunc i32 %vaddv.i to i8
   ret i8 %0
 }
@@ -351,7 +351,7 @@ define i16 @test_vaddv_s16(<4 x i16> %a) {
 ; CHECK: test_vaddv_s16:
 ; CHECK: addv h{{[0-9]+}}, {{v[0-9]+}}.4h
 entry:
-  %vaddv.i = tail call i32 @llvm.arm64.neon.saddv.i32.v4i16(<4 x i16> %a)
+  %vaddv.i = tail call i32 @llvm.aarch64.neon.saddv.i32.v4i16(<4 x i16> %a)
   %0 = trunc i32 %vaddv.i to i16
   ret i16 %0
 }
@@ -360,7 +360,7 @@ define i8 @test_vaddv_u8(<8 x i8> %a) {
 ; CHECK: test_vaddv_u8:
 ; CHECK: addv b{{[0-9]+}}, {{v[0-9]+}}.8b
 entry:
-  %vaddv.i = tail call i32 @llvm.arm64.neon.saddv.i32.v8i8(<8 x i8> %a)
+  %vaddv.i = tail call i32 @llvm.aarch64.neon.saddv.i32.v8i8(<8 x i8> %a)
   %0 = trunc i32 %vaddv.i to i8
   ret i8 %0
 }
@@ -369,7 +369,7 @@ define i16 @test_vaddv_u16(<4 x i16> %a) {
 ; CHECK: test_vaddv_u16:
 ; CHECK: addv h{{[0-9]+}}, {{v[0-9]+}}.4h
 entry:
-  %vaddv.i = tail call i32 @llvm.arm64.neon.saddv.i32.v4i16(<4 x i16> %a)
+  %vaddv.i = tail call i32 @llvm.aarch64.neon.saddv.i32.v4i16(<4 x i16> %a)
   %0 = trunc i32 %vaddv.i to i16
   ret i16 %0
 }
@@ -378,7 +378,7 @@ define i8 @test_vaddvq_s8(<16 x i8> %a) {
 ; CHECK: test_vaddvq_s8:
 ; CHECK: addv b{{[0-9]+}}, {{v[0-9]+}}.16b
 entry:
-  %vaddv.i = tail call i32 @llvm.arm64.neon.saddv.i32.v16i8(<16 x i8> %a)
+  %vaddv.i = tail call i32 @llvm.aarch64.neon.saddv.i32.v16i8(<16 x i8> %a)
   %0 = trunc i32 %vaddv.i to i8
   ret i8 %0
 }
@@ -387,7 +387,7 @@ define i16 @test_vaddvq_s16(<8 x i16> %a) {
 ; CHECK: test_vaddvq_s16:
 ; CHECK: addv h{{[0-9]+}}, {{v[0-9]+}}.8h
 entry:
-  %vaddv.i = tail call i32 @llvm.arm64.neon.saddv.i32.v8i16(<8 x i16> %a)
+  %vaddv.i = tail call i32 @llvm.aarch64.neon.saddv.i32.v8i16(<8 x i16> %a)
   %0 = trunc i32 %vaddv.i to i16
   ret i16 %0
 }
@@ -396,7 +396,7 @@ define i32 @test_vaddvq_s32(<4 x i32> %a) {
 ; CHECK: test_vaddvq_s32:
 ; CHECK: addv s{{[0-9]+}}, {{v[0-9]+}}.4s
 entry:
-  %vaddv.i = tail call i32 @llvm.arm64.neon.saddv.i32.v4i32(<4 x i32> %a)
+  %vaddv.i = tail call i32 @llvm.aarch64.neon.saddv.i32.v4i32(<4 x i32> %a)
   ret i32 %vaddv.i
 }
 
@@ -404,7 +404,7 @@ define i8 @test_vaddvq_u8(<16 x i8> %a) {
 ; CHECK: test_vaddvq_u8:
 ; CHECK: addv b{{[0-9]+}}, {{v[0-9]+}}.16b
 entry:
-  %vaddv.i = tail call i32 @llvm.arm64.neon.saddv.i32.v16i8(<16 x i8> %a)
+  %vaddv.i = tail call i32 @llvm.aarch64.neon.saddv.i32.v16i8(<16 x i8> %a)
   %0 = trunc i32 %vaddv.i to i8
   ret i8 %0
 }
@@ -413,7 +413,7 @@ define i16 @test_vaddvq_u16(<8 x i16> %a) {
 ; CHECK: test_vaddvq_u16:
 ; CHECK: addv h{{[0-9]+}}, {{v[0-9]+}}.8h
 entry:
-  %vaddv.i = tail call i32 @llvm.arm64.neon.saddv.i32.v8i16(<8 x i16> %a)
+  %vaddv.i = tail call i32 @llvm.aarch64.neon.saddv.i32.v8i16(<8 x i16> %a)
   %0 = trunc i32 %vaddv.i to i16
   ret i16 %0
 }
@@ -422,7 +422,7 @@ define i32 @test_vaddvq_u32(<4 x i32> %a) {
 ; CHECK: test_vaddvq_u32:
 ; CHECK: addv s{{[0-9]+}}, {{v[0-9]+}}.4s
 entry:
-  %vaddv.i = tail call i32 @llvm.arm64.neon.saddv.i32.v4i32(<4 x i32> %a)
+  %vaddv.i = tail call i32 @llvm.aarch64.neon.saddv.i32.v4i32(<4 x i32> %a)
   ret i32 %vaddv.i
 }
 
@@ -430,7 +430,7 @@ define float @test_vmaxvq_f32(<4 x float> %a) {
 ; CHECK: test_vmaxvq_f32:
 ; CHECK: fmaxv s{{[0-9]+}}, {{v[0-9]+}}.4s
 entry:
-  %0 = call float @llvm.arm64.neon.fmaxv.f32.v4f32(<4 x float> %a)
+  %0 = call float @llvm.aarch64.neon.fmaxv.f32.v4f32(<4 x float> %a)
   ret float %0
 }
 
@@ -438,7 +438,7 @@ define float @test_vminvq_f32(<4 x float> %a) {
 ; CHECK: test_vminvq_f32:
 ; CHECK: fminv s{{[0-9]+}}, {{v[0-9]+}}.4s
 entry:
-  %0 = call float @llvm.arm64.neon.fminv.f32.v4f32(<4 x float> %a)
+  %0 = call float @llvm.aarch64.neon.fminv.f32.v4f32(<4 x float> %a)
   ret float %0
 }
 
@@ -446,7 +446,7 @@ define float @test_vmaxnmvq_f32(<4 x float> %a) {
 ; CHECK: test_vmaxnmvq_f32:
 ; CHECK: fmaxnmv s{{[0-9]+}}, {{v[0-9]+}}.4s
 entry:
-  %0 = call float @llvm.arm64.neon.fmaxnmv.f32.v4f32(<4 x float> %a)
+  %0 = call float @llvm.aarch64.neon.fmaxnmv.f32.v4f32(<4 x float> %a)
   ret float %0
 }
 
@@ -454,7 +454,7 @@ define float @test_vminnmvq_f32(<4 x float> %a) {
 ; CHECK: test_vminnmvq_f32:
 ; CHECK: fminnmv s{{[0-9]+}}, {{v[0-9]+}}.4s
 entry:
-  %0 = call float @llvm.arm64.neon.fminnmv.f32.v4f32(<4 x float> %a)
+  %0 = call float @llvm.aarch64.neon.fminnmv.f32.v4f32(<4 x float> %a)
   ret float %0
 }
 
diff --git a/test/CodeGen/ARM64/aarch64-neon-add-pairwise.ll b/test/CodeGen/AArch64/arm64-neon-add-pairwise.ll
similarity index 51%
rename from test/CodeGen/ARM64/aarch64-neon-add-pairwise.ll
rename to test/CodeGen/AArch64/arm64-neon-add-pairwise.ll
index 9cd76ff1b7e..d3dc1b8d010 100644
--- a/test/CodeGen/ARM64/aarch64-neon-add-pairwise.ll
+++ b/test/CodeGen/AArch64/arm64-neon-add-pairwise.ll
@@ -1,91 +1,91 @@
 ; RUN: llc -mtriple=arm64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
 
-declare <8 x i8> @llvm.arm64.neon.addp.v8i8(<8 x i8>, <8 x i8>)
+declare <8 x i8> @llvm.aarch64.neon.addp.v8i8(<8 x i8>, <8 x i8>)
 
 define <8 x i8> @test_addp_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
 ; Using registers other than v0, v1 are possible, but would be odd.
 ; CHECK: test_addp_v8i8:
-  %tmp1 = call <8 x i8> @llvm.arm64.neon.addp.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
+  %tmp1 = call <8 x i8> @llvm.aarch64.neon.addp.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
 ; CHECK: addp v0.8b, v0.8b, v1.8b
   ret <8 x i8> %tmp1
 }
 
-declare <16 x i8> @llvm.arm64.neon.addp.v16i8(<16 x i8>, <16 x i8>)
+declare <16 x i8> @llvm.aarch64.neon.addp.v16i8(<16 x i8>, <16 x i8>)
 
 define <16 x i8> @test_addp_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
 ; CHECK: test_addp_v16i8:
-  %tmp1 = call <16 x i8> @llvm.arm64.neon.addp.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
+  %tmp1 = call <16 x i8> @llvm.aarch64.neon.addp.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
 ; CHECK: addp v0.16b, v0.16b, v1.16b
   ret <16 x i8> %tmp1
 }
 
-declare <4 x i16> @llvm.arm64.neon.addp.v4i16(<4 x i16>, <4 x i16>)
+declare <4 x i16> @llvm.aarch64.neon.addp.v4i16(<4 x i16>, <4 x i16>)
 
 define <4 x i16> @test_addp_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
 ; CHECK: test_addp_v4i16:
-  %tmp1 = call <4 x i16> @llvm.arm64.neon.addp.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
+  %tmp1 = call <4 x i16> @llvm.aarch64.neon.addp.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
 ; CHECK: addp v0.4h, v0.4h, v1.4h
   ret <4 x i16> %tmp1
 }
 
-declare <8 x i16> @llvm.arm64.neon.addp.v8i16(<8 x i16>, <8 x i16>)
+declare <8 x i16> @llvm.aarch64.neon.addp.v8i16(<8 x i16>, <8 x i16>)
 
 define <8 x i16> @test_addp_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
 ; CHECK: test_addp_v8i16:
-  %tmp1 = call <8 x i16> @llvm.arm64.neon.addp.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
+  %tmp1 = call <8 x i16> @llvm.aarch64.neon.addp.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
 ; CHECK: addp v0.8h, v0.8h, v1.8h
   ret <8 x i16> %tmp1
 }
 
-declare <2 x i32> @llvm.arm64.neon.addp.v2i32(<2 x i32>, <2 x i32>)
+declare <2 x i32> @llvm.aarch64.neon.addp.v2i32(<2 x i32>, <2 x i32>)
 
 define <2 x i32> @test_addp_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
 ; CHECK: test_addp_v2i32:
-  %tmp1 = call <2 x i32> @llvm.arm64.neon.addp.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
+  %tmp1 = call <2 x i32> @llvm.aarch64.neon.addp.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
 ; CHECK: addp v0.2s, v0.2s, v1.2s
   ret <2 x i32> %tmp1
 }
 
-declare <4 x i32> @llvm.arm64.neon.addp.v4i32(<4 x i32>, <4 x i32>)
+declare <4 x i32> @llvm.aarch64.neon.addp.v4i32(<4 x i32>, <4 x i32>)
 
 define <4 x i32> @test_addp_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
 ; CHECK: test_addp_v4i32:
-  %tmp1 = call <4 x i32> @llvm.arm64.neon.addp.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
+  %tmp1 = call <4 x i32> @llvm.aarch64.neon.addp.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
 ; CHECK: addp v0.4s, v0.4s, v1.4s
   ret <4 x i32> %tmp1
 }
 
 
-declare <2 x i64> @llvm.arm64.neon.addp.v2i64(<2 x i64>, <2 x i64>)
+declare <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64>, <2 x i64>)
 
 define <2 x i64> @test_addp_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) {
 ; CHECK: test_addp_v2i64:
-        %val = call <2 x i64> @llvm.arm64.neon.addp.v2i64(<2 x i64> %lhs, <2 x i64> %rhs)
+        %val = call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> %lhs, <2 x i64> %rhs)
 ; CHECK: addp v0.2d, v0.2d, v1.2d
         ret <2 x i64> %val
 }
 
-declare <2 x float> @llvm.arm64.neon.addp.v2f32(<2 x float>, <2 x float>)
-declare <4 x float> @llvm.arm64.neon.addp.v4f32(<4 x float>, <4 x float>)
-declare <2 x double> @llvm.arm64.neon.addp.v2f64(<2 x double>, <2 x double>)
+declare <2 x float> @llvm.aarch64.neon.addp.v2f32(<2 x float>, <2 x float>)
+declare <4 x float> @llvm.aarch64.neon.addp.v4f32(<4 x float>, <4 x float>)
+declare <2 x double> @llvm.aarch64.neon.addp.v2f64(<2 x double>, <2 x double>)
 
 define <2 x float> @test_faddp_v2f32(<2 x float> %lhs, <2 x float> %rhs) {
 ; CHECK: test_faddp_v2f32:
-        %val = call <2 x float> @llvm.arm64.neon.addp.v2f32(<2 x float> %lhs, <2 x float> %rhs)
+        %val = call <2 x float> @llvm.aarch64.neon.addp.v2f32(<2 x float> %lhs, <2 x float> %rhs)
 ; CHECK: faddp v0.2s, v0.2s, v1.2s
         ret <2 x float> %val
 }
 
 define <4 x float> @test_faddp_v4f32(<4 x float> %lhs, <4 x float> %rhs) {
 ; CHECK: test_faddp_v4f32:
-        %val = call <4 x float> @llvm.arm64.neon.addp.v4f32(<4 x float> %lhs, <4 x float> %rhs)
+        %val = call <4 x float> @llvm.aarch64.neon.addp.v4f32(<4 x float> %lhs, <4 x float> %rhs)
 ; CHECK: faddp v0.4s, v0.4s, v1.4s
         ret <4 x float> %val
 }
 
 define <2 x double> @test_faddp_v2f64(<2 x double> %lhs, <2 x double> %rhs) {
 ; CHECK: test_faddp_v2f64:
-        %val = call <2 x double> @llvm.arm64.neon.addp.v2f64(<2 x double> %lhs, <2 x double> %rhs)
+        %val = call <2 x double> @llvm.aarch64.neon.addp.v2f64(<2 x double> %lhs, <2 x double> %rhs)
 ; CHECK: faddp v0.2d, v0.2d, v1.2d
         ret <2 x double> %val
 }
@@ -93,8 +93,8 @@ define <2 x double> @test_faddp_v2f64(<2 x double> %lhs, <2 x double> %rhs) {
 define i32 @test_vaddv.v2i32(<2 x i32> %a) {
 ; CHECK-LABEL: test_vaddv.v2i32
 ; CHECK: addp {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
-  %1 = tail call i32 @llvm.arm64.neon.saddv.i32.v2i32(<2 x i32> %a)
+  %1 = tail call i32 @llvm.aarch64.neon.saddv.i32.v2i32(<2 x i32> %a)
   ret i32 %1
 }
 
-declare i32 @llvm.arm64.neon.saddv.i32.v2i32(<2 x i32>)
+declare i32 @llvm.aarch64.neon.saddv.i32.v2i32(<2 x i32>)
diff --git a/test/CodeGen/ARM64/aarch64-neon-add-sub.ll b/test/CodeGen/AArch64/arm64-neon-add-sub.ll
similarity index 89%
rename from test/CodeGen/ARM64/aarch64-neon-add-sub.ll
rename to test/CodeGen/AArch64/arm64-neon-add-sub.ll
index 241025eca33..fbde606538c 100644
--- a/test/CodeGen/ARM64/aarch64-neon-add-sub.ll
+++ b/test/CodeGen/AArch64/arm64-neon-add-sub.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon -arm64-simd-scalar| FileCheck %s
+; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon -aarch64-simd-scalar| FileCheck %s
 
 define <8 x i8> @add8xi8(<8 x i8> %A, <8 x i8> %B) {
 ;CHECK: add {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
@@ -182,35 +182,35 @@ define <1 x double> @test_vsub_f64(<1 x double> %a, <1 x double> %b) {
 define <1 x double> @test_vabd_f64(<1 x double> %a, <1 x double> %b) {
 ; CHECK-LABEL: test_vabd_f64
 ; CHECK: fabd d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
-  %1 = tail call <1 x double> @llvm.arm64.neon.fabd.v1f64(<1 x double> %a, <1 x double> %b)
+  %1 = tail call <1 x double> @llvm.aarch64.neon.fabd.v1f64(<1 x double> %a, <1 x double> %b)
   ret <1 x double> %1
 }
 
 define <1 x double> @test_vmax_f64(<1 x double> %a, <1 x double> %b) {
 ; CHECK-LABEL: test_vmax_f64
 ; CHECK: fmax d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
-  %1 = tail call <1 x double> @llvm.arm64.neon.fmax.v1f64(<1 x double> %a, <1 x double> %b)
+  %1 = tail call <1 x double> @llvm.aarch64.neon.fmax.v1f64(<1 x double> %a, <1 x double> %b)
   ret <1 x double> %1
 }
 
 define <1 x double> @test_vmin_f64(<1 x double> %a, <1 x double> %b) {
 ; CHECK-LABEL: test_vmin_f64
 ; CHECK: fmin d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
-  %1 = tail call <1 x double> @llvm.arm64.neon.fmin.v1f64(<1 x double> %a, <1 x double> %b)
+  %1 = tail call <1 x double> @llvm.aarch64.neon.fmin.v1f64(<1 x double> %a, <1 x double> %b)
   ret <1 x double> %1
 }
 
 define <1 x double> @test_vmaxnm_f64(<1 x double> %a, <1 x double> %b) {
 ; CHECK-LABEL: test_vmaxnm_f64
 ; CHECK: fmaxnm d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
-  %1 = tail call <1 x double> @llvm.arm64.neon.fmaxnm.v1f64(<1 x double> %a, <1 x double> %b)
+  %1 = tail call <1 x double> @llvm.aarch64.neon.fmaxnm.v1f64(<1 x double> %a, <1 x double> %b)
   ret <1 x double> %1
 }
 
 define <1 x double> @test_vminnm_f64(<1 x double> %a, <1 x double> %b) {
 ; CHECK-LABEL: test_vminnm_f64
 ; CHECK: fminnm d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
-  %1 = tail call <1 x double> @llvm.arm64.neon.fminnm.v1f64(<1 x double> %a, <1 x double> %b)
+  %1 = tail call <1 x double> @llvm.aarch64.neon.fminnm.v1f64(<1 x double> %a, <1 x double> %b)
   ret <1 x double> %1
 }
 
@@ -229,9 +229,9 @@ define <1 x double> @test_vneg_f64(<1 x double> %a) {
 }
 
 declare <1 x double> @llvm.fabs.v1f64(<1 x double>)
-declare <1 x double> @llvm.arm64.neon.fminnm.v1f64(<1 x double>, <1 x double>)
-declare <1 x double> @llvm.arm64.neon.fmaxnm.v1f64(<1 x double>, <1 x double>)
-declare <1 x double> @llvm.arm64.neon.fmin.v1f64(<1 x double>, <1 x double>)
-declare <1 x double> @llvm.arm64.neon.fmax.v1f64(<1 x double>, <1 x double>)
-declare <1 x double> @llvm.arm64.neon.fabd.v1f64(<1 x double>, <1 x double>)
+declare <1 x double> @llvm.aarch64.neon.fminnm.v1f64(<1 x double>, <1 x double>)
+declare <1 x double> @llvm.aarch64.neon.fmaxnm.v1f64(<1 x double>, <1 x double>)
+declare <1 x double> @llvm.aarch64.neon.fmin.v1f64(<1 x double>, <1 x double>)
+declare <1 x double> @llvm.aarch64.neon.fmax.v1f64(<1 x double>, <1 x double>)
+declare <1 x double> @llvm.aarch64.neon.fabd.v1f64(<1 x double>, <1 x double>)
 declare <1 x double> @llvm.fma.v1f64(<1 x double>, <1 x double>, <1 x double>)
diff --git a/test/CodeGen/ARM64/neon-compare-instructions.ll b/test/CodeGen/AArch64/arm64-neon-compare-instructions.ll
similarity index 100%
rename from test/CodeGen/ARM64/neon-compare-instructions.ll
rename to test/CodeGen/AArch64/arm64-neon-compare-instructions.ll
diff --git a/test/CodeGen/ARM64/aarch64-neon-copy.ll b/test/CodeGen/AArch64/arm64-neon-copy.ll
similarity index 99%
rename from test/CodeGen/ARM64/aarch64-neon-copy.ll
rename to test/CodeGen/AArch64/arm64-neon-copy.ll
index 9493cad3345..cfc2ebf0a2e 100644
--- a/test/CodeGen/ARM64/aarch64-neon-copy.ll
+++ b/test/CodeGen/AArch64/arm64-neon-copy.ll
@@ -1030,7 +1030,7 @@ define <2 x float> @test_scalar_to_vector_f32_to_v2f32(<2 x float> %a) {
 ; CHECK: fmaxp s{{[0-9]+}}, v{{[0-9]+}}.2s
 ; CHECK-NEXT: ret
 entry:
-  %0 = call float @llvm.arm64.neon.fmaxv.f32.v2f32(<2 x float> %a)
+  %0 = call float @llvm.aarch64.neon.fmaxv.f32.v2f32(<2 x float> %a)
   %1 = insertelement <1 x float> undef, float %0, i32 0
   %2 = extractelement <1 x float> %1, i32 0
   %vecinit1.i = insertelement <2 x float> undef, float %2, i32 0
@@ -1042,14 +1042,14 @@ define <4 x float> @test_scalar_to_vector_f32_to_v4f32(<2 x float> %a) {
 ; CHECK: fmaxp s{{[0-9]+}}, v{{[0-9]+}}.2s
 ; CHECK-NEXT: ret
 entry:
-  %0 = call float @llvm.arm64.neon.fmaxv.f32.v2f32(<2 x float> %a)
+  %0 = call float @llvm.aarch64.neon.fmaxv.f32.v2f32(<2 x float> %a)
   %1 = insertelement <1 x float> undef, float %0, i32 0
   %2 = extractelement <1 x float> %1, i32 0
   %vecinit1.i = insertelement <4 x float> undef, float %2, i32 0
   ret <4 x float> %vecinit1.i
 }
 
-declare float @llvm.arm64.neon.fmaxv.f32.v2f32(<2 x float>)
+declare float @llvm.aarch64.neon.fmaxv.f32.v2f32(<2 x float>)
 
 define <2 x i32> @test_concat_undef_v1i32(<2 x i32> %a) {
 ; CHECK-LABEL: test_concat_undef_v1i32:
@@ -1060,14 +1060,14 @@ entry:
   ret <2 x i32> %vecinit1.i
 }
 
-declare i32 @llvm.arm64.neon.sqabs.i32(i32) #4
+declare i32 @llvm.aarch64.neon.sqabs.i32(i32) #4
 
 define <2 x i32> @test_concat_v1i32_undef(i32 %a) {
 ; CHECK-LABEL: test_concat_v1i32_undef:
 ; CHECK: sqabs s{{[0-9]+}}, s{{[0-9]+}}
 ; CHECK-NEXT: ret
 entry:
-  %b = tail call i32 @llvm.arm64.neon.sqabs.i32(i32 %a)
+  %b = tail call i32 @llvm.aarch64.neon.sqabs.i32(i32 %a)
   %vecinit.i432 = insertelement <2 x i32> undef, i32 %b, i32 0
   ret <2 x i32> %vecinit.i432
 }
@@ -1088,9 +1088,9 @@ define <2 x i32> @test_concat_diff_v1i32_v1i32(i32 %a, i32 %b) {
 ; CHECK: sqabs s{{[0-9]+}}, s{{[0-9]+}}
 ; CHECK-NEXT: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
 entry:
-  %c = tail call i32 @llvm.arm64.neon.sqabs.i32(i32 %a)
+  %c = tail call i32 @llvm.aarch64.neon.sqabs.i32(i32 %a)
   %d = insertelement <2 x i32> undef, i32 %c, i32 0
-  %e = tail call i32 @llvm.arm64.neon.sqabs.i32(i32 %b)
+  %e = tail call i32 @llvm.aarch64.neon.sqabs.i32(i32 %b)
   %f = insertelement <2 x i32> undef, i32 %e, i32 0
   %h = shufflevector <2 x i32> %d, <2 x i32> %f, <2 x i32> <i32 0, i32 2>
   ret <2 x i32> %h
diff --git a/test/CodeGen/ARM64/aarch64-neon-copyPhysReg-tuple.ll b/test/CodeGen/AArch64/arm64-neon-copyPhysReg-tuple.ll
similarity index 56%
rename from test/CodeGen/ARM64/aarch64-neon-copyPhysReg-tuple.ll
rename to test/CodeGen/AArch64/arm64-neon-copyPhysReg-tuple.ll
index f24392bb8fc..276ac13da40 100644
--- a/test/CodeGen/ARM64/aarch64-neon-copyPhysReg-tuple.ll
+++ b/test/CodeGen/AArch64/arm64-neon-copyPhysReg-tuple.ll
@@ -7,9 +7,9 @@ define <4 x i32> @copyTuple.QPair(i32* %a, i32* %b) {
 ; CHECK: mov v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
 ; CHECK: ld2 { {{v[0-9]+}}.s, {{v[0-9]+}}.s }[{{[0-9]+}}], [x{{[0-9]+|sp}}]
 entry:
-  %vld = tail call { <4 x i32>, <4 x i32> } @llvm.arm64.neon.ld2lane.v4i32.p0i32(<4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> <i32 2, i32 2, i32 2, i32 2>, i64 1, i32* %a)
+  %vld = tail call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2lane.v4i32.p0i32(<4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> <i32 2, i32 2, i32 2, i32 2>, i64 1, i32* %a)
   %extract = extractvalue { <4 x i32>, <4 x i32> } %vld, 0
-  %vld1 = tail call { <4 x i32>, <4 x i32> } @llvm.arm64.neon.ld2lane.v4i32.p0i32(<4 x i32> %extract, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, i64 1, i32* %b)
+  %vld1 = tail call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2lane.v4i32.p0i32(<4 x i32> %extract, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, i64 1, i32* %b)
   %vld1.fca.0.extract = extractvalue { <4 x i32>, <4 x i32> } %vld1, 0
   ret <4 x i32> %vld1.fca.0.extract
 }
@@ -21,9 +21,9 @@ define <4 x i32> @copyTuple.QTriple(i32* %a, i32* %b, <4 x i32> %c) {
 ; CHECK: mov v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
 ; CHECK: ld3 { {{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s }[{{[0-9]+}}], [x{{[0-9]+|sp}}]
 entry:
-  %vld = tail call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm64.neon.ld3lane.v4i32.p0i32(<4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> %c, <4 x i32> %c, i64 1, i32* %a)
+  %vld = tail call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3lane.v4i32.p0i32(<4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> %c, <4 x i32> %c, i64 1, i32* %a)
   %extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } %vld, 0
-  %vld1 = tail call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm64.neon.ld3lane.v4i32.p0i32(<4 x i32> %extract, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> %c, i64 1, i32* %b)
+  %vld1 = tail call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3lane.v4i32.p0i32(<4 x i32> %extract, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> %c, i64 1, i32* %b)
   %vld1.fca.0.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } %vld1, 0
   ret <4 x i32> %vld1.fca.0.extract
 }
@@ -36,13 +36,13 @@ define <4 x i32> @copyTuple.QQuad(i32* %a, i32* %b, <4 x i32> %c) {
 ; CHECK: mov v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
 ; CHECK: ld4 { {{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s }[{{[0-9]+}}], [x{{[0-9]+|sp}}]
 entry:
-  %vld = tail call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm64.neon.ld4lane.v4i32.p0i32(<4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> %c, <4 x i32> %c, <4 x i32> %c, i64 1, i32* %a)
+  %vld = tail call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4lane.v4i32.p0i32(<4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> %c, <4 x i32> %c, <4 x i32> %c, i64 1, i32* %a)
   %extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %vld, 0
-  %vld1 = tail call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm64.neon.ld4lane.v4i32.p0i32(<4 x i32> %extract, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> %c, <4 x i32> %c, i64 1, i32* %b)
+  %vld1 = tail call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4lane.v4i32.p0i32(<4 x i32> %extract, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> %c, <4 x i32> %c, i64 1, i32* %b)
   %vld1.fca.0.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %vld1, 0
   ret <4 x i32> %vld1.fca.0.extract
 }
 
-declare { <4 x i32>, <4 x i32> } @llvm.arm64.neon.ld2lane.v4i32.p0i32(<4 x i32>, <4 x i32>, i64, i32*)
-declare { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm64.neon.ld3lane.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, i64, i32*)
-declare { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm64.neon.ld4lane.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i64, i32*)
+declare { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2lane.v4i32.p0i32(<4 x i32>, <4 x i32>, i64, i32*)
+declare { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3lane.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, i64, i32*)
+declare { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4lane.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i64, i32*)
diff --git a/test/CodeGen/ARM64/aarch64-neon-mul-div.ll b/test/CodeGen/AArch64/arm64-neon-mul-div.ll
similarity index 92%
rename from test/CodeGen/ARM64/aarch64-neon-mul-div.ll
rename to test/CodeGen/AArch64/arm64-neon-mul-div.ll
index f3a97663197..720f3eb6a4b 100644
--- a/test/CodeGen/ARM64/aarch64-neon-mul-div.ll
+++ b/test/CodeGen/AArch64/arm64-neon-mul-div.ll
@@ -684,98 +684,98 @@ define <2 x double> @frem2d64(<2 x double> %A, <2 x double> %B) {
 	ret <2 x double> %tmp3
 }
 
-declare <8 x i8> @llvm.arm64.neon.pmul.v8i8(<8 x i8>, <8 x i8>)
-declare <16 x i8> @llvm.arm64.neon.pmul.v16i8(<16 x i8>, <16 x i8>)
+declare <8 x i8> @llvm.aarch64.neon.pmul.v8i8(<8 x i8>, <8 x i8>)
+declare <16 x i8> @llvm.aarch64.neon.pmul.v16i8(<16 x i8>, <16 x i8>)
 
 define <8 x i8> @poly_mulv8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
 ; CHECK-LABEL: poly_mulv8i8:
-   %prod = call <8 x i8> @llvm.arm64.neon.pmul.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
+   %prod = call <8 x i8> @llvm.aarch64.neon.pmul.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
 ; CHECK: pmul v0.8b, v0.8b, v1.8b
    ret <8 x i8> %prod
 }
 
 define <16 x i8> @poly_mulv16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
 ; CHECK-LABEL: poly_mulv16i8:
-   %prod = call <16 x i8> @llvm.arm64.neon.pmul.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
+   %prod = call <16 x i8> @llvm.aarch64.neon.pmul.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
 ; CHECK: pmul v0.16b, v0.16b, v1.16b
    ret <16 x i8> %prod
 }
 
-declare <4 x i16> @llvm.arm64.neon.sqdmulh.v4i16(<4 x i16>, <4 x i16>)
-declare <8 x i16> @llvm.arm64.neon.sqdmulh.v8i16(<8 x i16>, <8 x i16>)
-declare <2 x i32> @llvm.arm64.neon.sqdmulh.v2i32(<2 x i32>, <2 x i32>)
-declare <4 x i32> @llvm.arm64.neon.sqdmulh.v4i32(<4 x i32>, <4 x i32>)
+declare <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16>, <4 x i16>)
+declare <8 x i16> @llvm.aarch64.neon.sqdmulh.v8i16(<8 x i16>, <8 x i16>)
+declare <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32>, <2 x i32>)
+declare <4 x i32> @llvm.aarch64.neon.sqdmulh.v4i32(<4 x i32>, <4 x i32>)
 
 define <4 x i16> @test_sqdmulh_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
 ; CHECK-LABEL: test_sqdmulh_v4i16:
-   %prod = call <4 x i16> @llvm.arm64.neon.sqdmulh.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
+   %prod = call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
 ; CHECK: sqdmulh v0.4h, v0.4h, v1.4h
    ret <4 x i16> %prod
 }
 
 define <8 x i16> @test_sqdmulh_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
 ; CHECK-LABEL: test_sqdmulh_v8i16:
-   %prod = call <8 x i16> @llvm.arm64.neon.sqdmulh.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
+   %prod = call <8 x i16> @llvm.aarch64.neon.sqdmulh.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
 ; CHECK: sqdmulh v0.8h, v0.8h, v1.8h
    ret <8 x i16> %prod
 }
 
 define <2 x i32> @test_sqdmulh_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
 ; CHECK-LABEL: test_sqdmulh_v2i32:
-   %prod = call <2 x i32> @llvm.arm64.neon.sqdmulh.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
+   %prod = call <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
 ; CHECK: sqdmulh v0.2s, v0.2s, v1.2s
    ret <2 x i32> %prod
 }
 
 define <4 x i32> @test_sqdmulh_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
 ; CHECK-LABEL: test_sqdmulh_v4i32:
-   %prod = call <4 x i32> @llvm.arm64.neon.sqdmulh.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
+   %prod = call <4 x i32> @llvm.aarch64.neon.sqdmulh.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
 ; CHECK: sqdmulh v0.4s, v0.4s, v1.4s
    ret <4 x i32> %prod
 }
 
-declare <4 x i16> @llvm.arm64.neon.sqrdmulh.v4i16(<4 x i16>, <4 x i16>)
-declare <8 x i16> @llvm.arm64.neon.sqrdmulh.v8i16(<8 x i16>, <8 x i16>)
-declare <2 x i32> @llvm.arm64.neon.sqrdmulh.v2i32(<2 x i32>, <2 x i32>)
-declare <4 x i32> @llvm.arm64.neon.sqrdmulh.v4i32(<4 x i32>, <4 x i32>)
+declare <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16>, <4 x i16>)
+declare <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16>, <8 x i16>)
+declare <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32>, <2 x i32>)
+declare <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32>, <4 x i32>)
 
 define <4 x i16> @test_sqrdmulh_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
 ; CHECK-LABEL: test_sqrdmulh_v4i16:
-   %prod = call <4 x i16> @llvm.arm64.neon.sqrdmulh.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
+   %prod = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
 ; CHECK: sqrdmulh v0.4h, v0.4h, v1.4h
    ret <4 x i16> %prod
 }
 
 define <8 x i16> @test_sqrdmulh_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
 ; CHECK-LABEL: test_sqrdmulh_v8i16:
-   %prod = call <8 x i16> @llvm.arm64.neon.sqrdmulh.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
+   %prod = call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
 ; CHECK: sqrdmulh v0.8h, v0.8h, v1.8h
    ret <8 x i16> %prod
 }
 
 define <2 x i32> @test_sqrdmulh_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
 ; CHECK-LABEL: test_sqrdmulh_v2i32:
-   %prod = call <2 x i32> @llvm.arm64.neon.sqrdmulh.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
+   %prod = call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
 ; CHECK: sqrdmulh v0.2s, v0.2s, v1.2s
    ret <2 x i32> %prod
 }
 
 define <4 x i32> @test_sqrdmulh_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
 ; CHECK-LABEL: test_sqrdmulh_v4i32:
-   %prod = call <4 x i32> @llvm.arm64.neon.sqrdmulh.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
+   %prod = call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
 ; CHECK: sqrdmulh v0.4s, v0.4s, v1.4s
    ret <4 x i32> %prod
 }
 
-declare <2 x float> @llvm.arm64.neon.fmulx.v2f32(<2 x float>, <2 x float>)
-declare <4 x float> @llvm.arm64.neon.fmulx.v4f32(<4 x float>, <4 x float>)
-declare <2 x double> @llvm.arm64.neon.fmulx.v2f64(<2 x double>, <2 x double>)
+declare <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float>, <2 x float>)
+declare <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float>, <4 x float>)
+declare <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double>, <2 x double>)
 
 define <2 x float> @fmulx_v2f32(<2 x float> %lhs, <2 x float> %rhs) {
 ; CHECK-LABEL: fmulx_v2f32:
 ; Using registers other than v0, v1 and v2 are possible, but would be odd.
 ; CHECK: fmulx v0.2s, v0.2s, v1.2s
-        %val = call <2 x float> @llvm.arm64.neon.fmulx.v2f32(<2 x float> %lhs, <2 x float> %rhs)
+        %val = call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> %lhs, <2 x float> %rhs)
         ret <2 x float> %val
 }
 
@@ -783,7 +783,7 @@ define <4 x float> @fmulx_v4f32(<4 x float> %lhs, <4 x float> %rhs) {
 ; CHECK-LABEL: fmulx_v4f32:
 ; Using registers other than v0, v1 and v2 are possible, but would be odd.
 ; CHECK: fmulx v0.4s, v0.4s, v1.4s
-        %val = call <4 x float> @llvm.arm64.neon.fmulx.v4f32(<4 x float> %lhs, <4 x float> %rhs)
+        %val = call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> %lhs, <4 x float> %rhs)
         ret <4 x float> %val
 }
 
@@ -791,7 +791,7 @@ define <2 x double> @fmulx_v2f64(<2 x double> %lhs, <2 x double> %rhs) {
 ; CHECK-LABEL: fmulx_v2f64:
 ; Using registers other than v0, v1 and v2 are possible, but would be odd.
 ; CHECK: fmulx v0.2d, v0.2d, v1.2d
-        %val = call <2 x double> @llvm.arm64.neon.fmulx.v2f64(<2 x double> %lhs, <2 x double> %rhs)
+        %val = call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> %lhs, <2 x double> %rhs)
         ret <2 x double> %val
 }
 
diff --git a/test/CodeGen/ARM64/aarch64-neon-scalar-by-elem-mul.ll b/test/CodeGen/AArch64/arm64-neon-scalar-by-elem-mul.ll
similarity index 85%
rename from test/CodeGen/ARM64/aarch64-neon-scalar-by-elem-mul.ll
rename to test/CodeGen/AArch64/arm64-neon-scalar-by-elem-mul.ll
index 18e6e0fe8b6..92ed2399509 100644
--- a/test/CodeGen/ARM64/aarch64-neon-scalar-by-elem-mul.ll
+++ b/test/CodeGen/AArch64/arm64-neon-scalar-by-elem-mul.ll
@@ -61,13 +61,13 @@ define double @test_fmul_lane_dd2D_swap(double %a, <2 x double> %v) {
   ret double %tmp2;
 }
 
-declare float @llvm.arm64.neon.fmulx.f32(float, float)
+declare float @llvm.aarch64.neon.fmulx.f32(float, float)
 
 define float @test_fmulx_lane_f32(float %a, <2 x float> %v) {
   ; CHECK-LABEL: test_fmulx_lane_f32
   ; CHECK: fmulx {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[1]
   %tmp1 = extractelement <2 x float> %v, i32 1
-  %tmp2 = call float @llvm.arm64.neon.fmulx.f32(float %a, float %tmp1)
+  %tmp2 = call float @llvm.aarch64.neon.fmulx.f32(float %a, float %tmp1)
   ret float %tmp2;
 }
 
@@ -75,7 +75,7 @@ define float @test_fmulx_laneq_f32(float %a, <4 x float> %v) {
   ; CHECK-LABEL: test_fmulx_laneq_f32
   ; CHECK: fmulx {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3]
   %tmp1 = extractelement <4 x float> %v, i32 3
-  %tmp2 = call float @llvm.arm64.neon.fmulx.f32(float %a, float %tmp1)
+  %tmp2 = call float @llvm.aarch64.neon.fmulx.f32(float %a, float %tmp1)
   ret float %tmp2;
 }
 
@@ -83,17 +83,17 @@ define float @test_fmulx_laneq_f32_swap(float %a, <4 x float> %v) {
   ; CHECK-LABEL: test_fmulx_laneq_f32_swap
   ; CHECK: fmulx {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3]
   %tmp1 = extractelement <4 x float> %v, i32 3
-  %tmp2 = call float @llvm.arm64.neon.fmulx.f32(float %tmp1, float %a)
+  %tmp2 = call float @llvm.aarch64.neon.fmulx.f32(float %tmp1, float %a)
   ret float %tmp2;
 }
 
-declare double @llvm.arm64.neon.fmulx.f64(double, double)
+declare double @llvm.aarch64.neon.fmulx.f64(double, double)
 
 define double @test_fmulx_lane_f64(double %a, <1 x double> %v) {
   ; CHECK-LABEL: test_fmulx_lane_f64
   ; CHECK: fmulx {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+.d\[0]|d[0-9]+}}
   %tmp1 = extractelement <1 x double> %v, i32 0
-  %tmp2 = call double @llvm.arm64.neon.fmulx.f64(double %a, double %tmp1)
+  %tmp2 = call double @llvm.aarch64.neon.fmulx.f64(double %a, double %tmp1)
   ret double %tmp2;
 }
 
@@ -101,7 +101,7 @@ define double @test_fmulx_laneq_f64_0(double %a, <2 x double> %v) {
   ; CHECK-LABEL: test_fmulx_laneq_f64_0
   ; CHECK: fmulx {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[0]
   %tmp1 = extractelement <2 x double> %v, i32 0
-  %tmp2 = call double @llvm.arm64.neon.fmulx.f64(double %a, double %tmp1)
+  %tmp2 = call double @llvm.aarch64.neon.fmulx.f64(double %a, double %tmp1)
   ret double %tmp2;
 }
 
@@ -110,7 +110,7 @@ define double @test_fmulx_laneq_f64_1(double %a, <2 x double> %v) {
   ; CHECK-LABEL: test_fmulx_laneq_f64_1
   ; CHECK: fmulx {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1]
   %tmp1 = extractelement <2 x double> %v, i32 1
-  %tmp2 = call double @llvm.arm64.neon.fmulx.f64(double %a, double %tmp1)
+  %tmp2 = call double @llvm.aarch64.neon.fmulx.f64(double %a, double %tmp1)
   ret double %tmp2;
 }
 
@@ -118,7 +118,7 @@ define double @test_fmulx_laneq_f64_1_swap(double %a, <2 x double> %v) {
   ; CHECK-LABEL: test_fmulx_laneq_f64_1_swap
   ; CHECK: fmulx {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1]
   %tmp1 = extractelement <2 x double> %v, i32 1
-  %tmp2 = call double @llvm.arm64.neon.fmulx.f64(double %tmp1, double %a)
+  %tmp2 = call double @llvm.aarch64.neon.fmulx.f64(double %tmp1, double %a)
   ret double %tmp2;
 }
 
diff --git a/test/CodeGen/ARM64/aarch64-neon-select_cc.ll b/test/CodeGen/AArch64/arm64-neon-select_cc.ll
similarity index 100%
rename from test/CodeGen/ARM64/aarch64-neon-select_cc.ll
rename to test/CodeGen/AArch64/arm64-neon-select_cc.ll
diff --git a/test/CodeGen/ARM64/aarch64-neon-simd-ldst-one.ll b/test/CodeGen/AArch64/arm64-neon-simd-ldst-one.ll
similarity index 100%
rename from test/CodeGen/ARM64/aarch64-neon-simd-ldst-one.ll
rename to test/CodeGen/AArch64/arm64-neon-simd-ldst-one.ll
diff --git a/test/CodeGen/ARM64/aarch64-neon-simd-shift.ll b/test/CodeGen/AArch64/arm64-neon-simd-shift.ll
similarity index 81%
rename from test/CodeGen/ARM64/aarch64-neon-simd-shift.ll
rename to test/CodeGen/AArch64/arm64-neon-simd-shift.ll
index 2fd2c1e35ce..447fb6307f2 100644
--- a/test/CodeGen/ARM64/aarch64-neon-simd-shift.ll
+++ b/test/CodeGen/AArch64/arm64-neon-simd-shift.ll
@@ -333,7 +333,7 @@ define <4 x i32> @test_vshrn_high_n_u64(<2 x i32> %a, <2 x i64> %b) {
 define <16 x i8> @test_vqshrun_high_n_s16(<8 x i8> %a, <8 x i16> %b) {
 ; CHECK: test_vqshrun_high_n_s16
 ; CHECK: sqshrun2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
-  %vqshrun = tail call <8 x i8> @llvm.arm64.neon.sqshrun.v8i8(<8 x i16> %b, i32 3)
+  %vqshrun = tail call <8 x i8> @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16> %b, i32 3)
   %1 = bitcast <8 x i8> %a to <1 x i64>
   %2 = bitcast <8 x i8> %vqshrun to <1 x i64>
   %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
@@ -344,7 +344,7 @@ define <16 x i8> @test_vqshrun_high_n_s16(<8 x i8> %a, <8 x i16> %b) {
 define <8 x i16> @test_vqshrun_high_n_s32(<4 x i16> %a, <4 x i32> %b) {
 ; CHECK: test_vqshrun_high_n_s32
 ; CHECK: sqshrun2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
-  %vqshrun = tail call <4 x i16> @llvm.arm64.neon.sqshrun.v4i16(<4 x i32> %b, i32 9)
+  %vqshrun = tail call <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32> %b, i32 9)
   %1 = bitcast <4 x i16> %a to <1 x i64>
   %2 = bitcast <4 x i16> %vqshrun to <1 x i64>
   %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
@@ -356,7 +356,7 @@ define <4 x i32> @test_vqshrun_high_n_s64(<2 x i32> %a, <2 x i64> %b) {
 ; CHECK: test_vqshrun_high_n_s64
 ; CHECK: sqshrun2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
   %1 = bitcast <2 x i32> %a to <1 x i64>
-  %vqshrun = tail call <2 x i32> @llvm.arm64.neon.sqshrun.v2i32(<2 x i64> %b, i32 19)
+  %vqshrun = tail call <2 x i32> @llvm.aarch64.neon.sqshrun.v2i32(<2 x i64> %b, i32 19)
   %2 = bitcast <2 x i32> %vqshrun to <1 x i64>
   %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
   %3 = bitcast <2 x i64> %shuffle.i to <4 x i32>
@@ -366,7 +366,7 @@ define <4 x i32> @test_vqshrun_high_n_s64(<2 x i32> %a, <2 x i64> %b) {
 define <16 x i8> @test_vrshrn_high_n_s16(<8 x i8> %a, <8 x i16> %b) {
 ; CHECK: test_vrshrn_high_n_s16
 ; CHECK: rshrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
-  %vrshrn = tail call <8 x i8> @llvm.arm64.neon.rshrn.v8i8(<8 x i16> %b, i32 3)
+  %vrshrn = tail call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> %b, i32 3)
   %1 = bitcast <8 x i8> %a to <1 x i64>
   %2 = bitcast <8 x i8> %vrshrn to <1 x i64>
   %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
@@ -377,7 +377,7 @@ define <16 x i8> @test_vrshrn_high_n_s16(<8 x i8> %a, <8 x i16> %b) {
 define <8 x i16> @test_vrshrn_high_n_s32(<4 x i16> %a, <4 x i32> %b) {
 ; CHECK: test_vrshrn_high_n_s32
 ; CHECK: rshrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
-  %vrshrn = tail call <4 x i16> @llvm.arm64.neon.rshrn.v4i16(<4 x i32> %b, i32 9)
+  %vrshrn = tail call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> %b, i32 9)
   %1 = bitcast <4 x i16> %a to <1 x i64>
   %2 = bitcast <4 x i16> %vrshrn to <1 x i64>
   %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
@@ -389,7 +389,7 @@ define <4 x i32> @test_vrshrn_high_n_s64(<2 x i32> %a, <2 x i64> %b) {
 ; CHECK: test_vrshrn_high_n_s64
 ; CHECK: rshrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
   %1 = bitcast <2 x i32> %a to <1 x i64>
-  %vrshrn = tail call <2 x i32> @llvm.arm64.neon.rshrn.v2i32(<2 x i64> %b, i32 19)
+  %vrshrn = tail call <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64> %b, i32 19)
   %2 = bitcast <2 x i32> %vrshrn to <1 x i64>
   %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
   %3 = bitcast <2 x i64> %shuffle.i to <4 x i32>
@@ -399,7 +399,7 @@ define <4 x i32> @test_vrshrn_high_n_s64(<2 x i32> %a, <2 x i64> %b) {
 define <16 x i8> @test_vqrshrun_high_n_s16(<8 x i8> %a, <8 x i16> %b) {
 ; CHECK: test_vqrshrun_high_n_s16
 ; CHECK: sqrshrun2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
-  %vqrshrun = tail call <8 x i8> @llvm.arm64.neon.sqrshrun.v8i8(<8 x i16> %b, i32 3)
+  %vqrshrun = tail call <8 x i8> @llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16> %b, i32 3)
   %1 = bitcast <8 x i8> %a to <1 x i64>
   %2 = bitcast <8 x i8> %vqrshrun to <1 x i64>
   %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
@@ -410,7 +410,7 @@ define <16 x i8> @test_vqrshrun_high_n_s16(<8 x i8> %a, <8 x i16> %b) {
 define <8 x i16> @test_vqrshrun_high_n_s32(<4 x i16> %a, <4 x i32> %b) {
 ; CHECK: test_vqrshrun_high_n_s32
 ; CHECK: sqrshrun2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
-  %vqrshrun = tail call <4 x i16> @llvm.arm64.neon.sqrshrun.v4i16(<4 x i32> %b, i32 9)
+  %vqrshrun = tail call <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32> %b, i32 9)
   %1 = bitcast <4 x i16> %a to <1 x i64>
   %2 = bitcast <4 x i16> %vqrshrun to <1 x i64>
   %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
@@ -422,7 +422,7 @@ define <4 x i32> @test_vqrshrun_high_n_s64(<2 x i32> %a, <2 x i64> %b) {
 ; CHECK: test_vqrshrun_high_n_s64
 ; CHECK: sqrshrun2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
   %1 = bitcast <2 x i32> %a to <1 x i64>
-  %vqrshrun = tail call <2 x i32> @llvm.arm64.neon.sqrshrun.v2i32(<2 x i64> %b, i32 19)
+  %vqrshrun = tail call <2 x i32> @llvm.aarch64.neon.sqrshrun.v2i32(<2 x i64> %b, i32 19)
   %2 = bitcast <2 x i32> %vqrshrun to <1 x i64>
   %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
   %3 = bitcast <2 x i64> %shuffle.i to <4 x i32>
@@ -432,7 +432,7 @@ define <4 x i32> @test_vqrshrun_high_n_s64(<2 x i32> %a, <2 x i64> %b) {
 define <16 x i8> @test_vqshrn_high_n_s16(<8 x i8> %a, <8 x i16> %b) {
 ; CHECK: test_vqshrn_high_n_s16
 ; CHECK: sqshrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
-  %vqshrn = tail call <8 x i8> @llvm.arm64.neon.sqshrn.v8i8(<8 x i16> %b, i32 3)
+  %vqshrn = tail call <8 x i8> @llvm.aarch64.neon.sqshrn.v8i8(<8 x i16> %b, i32 3)
   %1 = bitcast <8 x i8> %a to <1 x i64>
   %2 = bitcast <8 x i8> %vqshrn to <1 x i64>
   %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
@@ -443,7 +443,7 @@ define <16 x i8> @test_vqshrn_high_n_s16(<8 x i8> %a, <8 x i16> %b) {
 define <8 x i16> @test_vqshrn_high_n_s32(<4 x i16> %a, <4 x i32> %b) {
 ; CHECK: test_vqshrn_high_n_s32
 ; CHECK: sqshrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
-  %vqshrn = tail call <4 x i16> @llvm.arm64.neon.sqshrn.v4i16(<4 x i32> %b, i32 9)
+  %vqshrn = tail call <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32> %b, i32 9)
   %1 = bitcast <4 x i16> %a to <1 x i64>
   %2 = bitcast <4 x i16> %vqshrn to <1 x i64>
   %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
@@ -455,7 +455,7 @@ define <4 x i32> @test_vqshrn_high_n_s64(<2 x i32> %a, <2 x i64> %b) {
 ; CHECK: test_vqshrn_high_n_s64
 ; CHECK: sqshrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
   %1 = bitcast <2 x i32> %a to <1 x i64>
-  %vqshrn = tail call <2 x i32> @llvm.arm64.neon.sqshrn.v2i32(<2 x i64> %b, i32 19)
+  %vqshrn = tail call <2 x i32> @llvm.aarch64.neon.sqshrn.v2i32(<2 x i64> %b, i32 19)
   %2 = bitcast <2 x i32> %vqshrn to <1 x i64>
   %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
   %3 = bitcast <2 x i64> %shuffle.i to <4 x i32>
@@ -465,7 +465,7 @@ define <4 x i32> @test_vqshrn_high_n_s64(<2 x i32> %a, <2 x i64> %b) {
 define <16 x i8> @test_vqshrn_high_n_u16(<8 x i8> %a, <8 x i16> %b) {
 ; CHECK: test_vqshrn_high_n_u16
 ; CHECK: uqshrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
-  %vqshrn = tail call <8 x i8> @llvm.arm64.neon.uqshrn.v8i8(<8 x i16> %b, i32 3)
+  %vqshrn = tail call <8 x i8> @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16> %b, i32 3)
   %1 = bitcast <8 x i8> %a to <1 x i64>
   %2 = bitcast <8 x i8> %vqshrn to <1 x i64>
   %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
@@ -476,7 +476,7 @@ define <16 x i8> @test_vqshrn_high_n_u16(<8 x i8> %a, <8 x i16> %b) {
 define <8 x i16> @test_vqshrn_high_n_u32(<4 x i16> %a, <4 x i32> %b) {
 ; CHECK: test_vqshrn_high_n_u32
 ; CHECK: uqshrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
-  %vqshrn = tail call <4 x i16> @llvm.arm64.neon.uqshrn.v4i16(<4 x i32> %b, i32 9)
+  %vqshrn = tail call <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32> %b, i32 9)
   %1 = bitcast <4 x i16> %a to <1 x i64>
   %2 = bitcast <4 x i16> %vqshrn to <1 x i64>
   %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
@@ -488,7 +488,7 @@ define <4 x i32> @test_vqshrn_high_n_u64(<2 x i32> %a, <2 x i64> %b) {
 ; CHECK: test_vqshrn_high_n_u64
 ; CHECK: uqshrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
   %1 = bitcast <2 x i32> %a to <1 x i64>
-  %vqshrn = tail call <2 x i32> @llvm.arm64.neon.uqshrn.v2i32(<2 x i64> %b, i32 19)
+  %vqshrn = tail call <2 x i32> @llvm.aarch64.neon.uqshrn.v2i32(<2 x i64> %b, i32 19)
   %2 = bitcast <2 x i32> %vqshrn to <1 x i64>
   %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
   %3 = bitcast <2 x i64> %shuffle.i to <4 x i32>
@@ -498,7 +498,7 @@ define <4 x i32> @test_vqshrn_high_n_u64(<2 x i32> %a, <2 x i64> %b) {
 define <16 x i8> @test_vqrshrn_high_n_s16(<8 x i8> %a, <8 x i16> %b) {
 ; CHECK: test_vqrshrn_high_n_s16
 ; CHECK: sqrshrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
-  %vqrshrn = tail call <8 x i8> @llvm.arm64.neon.sqrshrn.v8i8(<8 x i16> %b, i32 3)
+  %vqrshrn = tail call <8 x i8> @llvm.aarch64.neon.sqrshrn.v8i8(<8 x i16> %b, i32 3)
   %1 = bitcast <8 x i8> %a to <1 x i64>
   %2 = bitcast <8 x i8> %vqrshrn to <1 x i64>
   %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
@@ -509,7 +509,7 @@ define <16 x i8> @test_vqrshrn_high_n_s16(<8 x i8> %a, <8 x i16> %b) {
 define <8 x i16> @test_vqrshrn_high_n_s32(<4 x i16> %a, <4 x i32> %b) {
 ; CHECK: test_vqrshrn_high_n_s32
 ; CHECK: sqrshrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
-  %vqrshrn = tail call <4 x i16> @llvm.arm64.neon.sqrshrn.v4i16(<4 x i32> %b, i32 9)
+  %vqrshrn = tail call <4 x i16> @llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32> %b, i32 9)
   %1 = bitcast <4 x i16> %a to <1 x i64>
   %2 = bitcast <4 x i16> %vqrshrn to <1 x i64>
   %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
@@ -521,7 +521,7 @@ define <4 x i32> @test_vqrshrn_high_n_s64(<2 x i32> %a, <2 x i64> %b) {
 ; CHECK: test_vqrshrn_high_n_s64
 ; CHECK: sqrshrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
   %1 = bitcast <2 x i32> %a to <1 x i64>
-  %vqrshrn = tail call <2 x i32> @llvm.arm64.neon.sqrshrn.v2i32(<2 x i64> %b, i32 19)
+  %vqrshrn = tail call <2 x i32> @llvm.aarch64.neon.sqrshrn.v2i32(<2 x i64> %b, i32 19)
   %2 = bitcast <2 x i32> %vqrshrn to <1 x i64>
   %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
   %3 = bitcast <2 x i64> %shuffle.i to <4 x i32>
@@ -531,7 +531,7 @@ define <4 x i32> @test_vqrshrn_high_n_s64(<2 x i32> %a, <2 x i64> %b) {
 define <16 x i8> @test_vqrshrn_high_n_u16(<8 x i8> %a, <8 x i16> %b) {
 ; CHECK: test_vqrshrn_high_n_u16
 ; CHECK: uqrshrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
-  %vqrshrn = tail call <8 x i8> @llvm.arm64.neon.uqrshrn.v8i8(<8 x i16> %b, i32 3)
+  %vqrshrn = tail call <8 x i8> @llvm.aarch64.neon.uqrshrn.v8i8(<8 x i16> %b, i32 3)
   %1 = bitcast <8 x i8> %a to <1 x i64>
   %2 = bitcast <8 x i8> %vqrshrn to <1 x i64>
   %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
@@ -542,7 +542,7 @@ define <16 x i8> @test_vqrshrn_high_n_u16(<8 x i8> %a, <8 x i16> %b) {
 define <8 x i16> @test_vqrshrn_high_n_u32(<4 x i16> %a, <4 x i32> %b) {
 ; CHECK: test_vqrshrn_high_n_u32
 ; CHECK: uqrshrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
-  %vqrshrn = tail call <4 x i16> @llvm.arm64.neon.uqrshrn.v4i16(<4 x i32> %b, i32 9)
+  %vqrshrn = tail call <4 x i16> @llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32> %b, i32 9)
   %1 = bitcast <4 x i16> %a to <1 x i64>
   %2 = bitcast <4 x i16> %vqrshrn to <1 x i64>
   %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
@@ -554,7 +554,7 @@ define <4 x i32> @test_vqrshrn_high_n_u64(<2 x i32> %a, <2 x i64> %b) {
 ; CHECK: test_vqrshrn_high_n_u64
 ; CHECK: uqrshrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
   %1 = bitcast <2 x i32> %a to <1 x i64>
-  %vqrshrn = tail call <2 x i32> @llvm.arm64.neon.uqrshrn.v2i32(<2 x i64> %b, i32 19)
+  %vqrshrn = tail call <2 x i32> @llvm.aarch64.neon.uqrshrn.v2i32(<2 x i64> %b, i32 19)
   %2 = bitcast <2 x i32> %vqrshrn to <1 x i64>
   %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
   %3 = bitcast <2 x i64> %shuffle.i to <4 x i32>
@@ -563,101 +563,101 @@ define <4 x i32> @test_vqrshrn_high_n_u64(<2 x i32> %a, <2 x i64> %b) {
 
 
 
-declare <8 x i8> @llvm.arm64.neon.sqshrun.v8i8(<8 x i16>, i32)
+declare <8 x i8> @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16>, i32)
 
-declare <4 x i16> @llvm.arm64.neon.sqshrun.v4i16(<4 x i32>, i32)
+declare <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32>, i32)
 
-declare <2 x i32> @llvm.arm64.neon.sqshrun.v2i32(<2 x i64>, i32)
+declare <2 x i32> @llvm.aarch64.neon.sqshrun.v2i32(<2 x i64>, i32)
 
-declare <8 x i8> @llvm.arm64.neon.rshrn.v8i8(<8 x i16>, i32)
+declare <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16>, i32)
 
-declare <4 x i16> @llvm.arm64.neon.rshrn.v4i16(<4 x i32>, i32)
+declare <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32>, i32)
 
-declare <2 x i32> @llvm.arm64.neon.rshrn.v2i32(<2 x i64>, i32)
+declare <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64>, i32)
 
-declare <8 x i8> @llvm.arm64.neon.sqrshrun.v8i8(<8 x i16>, i32)
+declare <8 x i8> @llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16>, i32)
 
-declare <4 x i16> @llvm.arm64.neon.sqrshrun.v4i16(<4 x i32>, i32)
+declare <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32>, i32)
 
-declare <2 x i32> @llvm.arm64.neon.sqrshrun.v2i32(<2 x i64>, i32)
+declare <2 x i32> @llvm.aarch64.neon.sqrshrun.v2i32(<2 x i64>, i32)
 
-declare <8 x i8> @llvm.arm64.neon.sqshrn.v8i8(<8 x i16>, i32)
+declare <8 x i8> @llvm.aarch64.neon.sqshrn.v8i8(<8 x i16>, i32)
 
-declare <4 x i16> @llvm.arm64.neon.sqshrn.v4i16(<4 x i32>, i32)
+declare <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32>, i32)
 
-declare <2 x i32> @llvm.arm64.neon.sqshrn.v2i32(<2 x i64>, i32)
+declare <2 x i32> @llvm.aarch64.neon.sqshrn.v2i32(<2 x i64>, i32)
 
-declare <8 x i8> @llvm.arm64.neon.uqshrn.v8i8(<8 x i16>, i32)
+declare <8 x i8> @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16>, i32)
 
-declare <4 x i16> @llvm.arm64.neon.uqshrn.v4i16(<4 x i32>, i32)
+declare <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32>, i32)
 
-declare <2 x i32> @llvm.arm64.neon.uqshrn.v2i32(<2 x i64>, i32)
+declare <2 x i32> @llvm.aarch64.neon.uqshrn.v2i32(<2 x i64>, i32)
 
-declare <8 x i8> @llvm.arm64.neon.sqrshrn.v8i8(<8 x i16>, i32)
+declare <8 x i8> @llvm.aarch64.neon.sqrshrn.v8i8(<8 x i16>, i32)
 
-declare <4 x i16> @llvm.arm64.neon.sqrshrn.v4i16(<4 x i32>, i32)
+declare <4 x i16> @llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32>, i32)
 
-declare <2 x i32> @llvm.arm64.neon.sqrshrn.v2i32(<2 x i64>, i32)
+declare <2 x i32> @llvm.aarch64.neon.sqrshrn.v2i32(<2 x i64>, i32)
 
-declare <8 x i8> @llvm.arm64.neon.uqrshrn.v8i8(<8 x i16>, i32)
+declare <8 x i8> @llvm.aarch64.neon.uqrshrn.v8i8(<8 x i16>, i32)
 
-declare <4 x i16> @llvm.arm64.neon.uqrshrn.v4i16(<4 x i32>, i32)
+declare <4 x i16> @llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32>, i32)
 
-declare <2 x i32> @llvm.arm64.neon.uqrshrn.v2i32(<2 x i64>, i32)
+declare <2 x i32> @llvm.aarch64.neon.uqrshrn.v2i32(<2 x i64>, i32)
 
-declare <2 x float> @llvm.arm64.neon.vcvtfxs2fp.v2f32.v2i32(<2 x i32>, i32)
+declare <2 x float> @llvm.aarch64.neon.vcvtfxs2fp.v2f32.v2i32(<2 x i32>, i32)
 
-declare <4 x float> @llvm.arm64.neon.vcvtfxs2fp.v4f32.v4i32(<4 x i32>, i32)
+declare <4 x float> @llvm.aarch64.neon.vcvtfxs2fp.v4f32.v4i32(<4 x i32>, i32)
 
-declare <2 x double> @llvm.arm64.neon.vcvtfxs2fp.v2f64.v2i64(<2 x i64>, i32)
+declare <2 x double> @llvm.aarch64.neon.vcvtfxs2fp.v2f64.v2i64(<2 x i64>, i32)
 
-declare <2 x float> @llvm.arm64.neon.vcvtfxu2fp.v2f32.v2i32(<2 x i32>, i32)
+declare <2 x float> @llvm.aarch64.neon.vcvtfxu2fp.v2f32.v2i32(<2 x i32>, i32)
 
-declare <4 x float> @llvm.arm64.neon.vcvtfxu2fp.v4f32.v4i32(<4 x i32>, i32)
+declare <4 x float> @llvm.aarch64.neon.vcvtfxu2fp.v4f32.v4i32(<4 x i32>, i32)
 
-declare <2 x double> @llvm.arm64.neon.vcvtfxu2fp.v2f64.v2i64(<2 x i64>, i32)
+declare <2 x double> @llvm.aarch64.neon.vcvtfxu2fp.v2f64.v2i64(<2 x i64>, i32)
 
-declare <2 x i32> @llvm.arm64.neon.vcvtfp2fxs.v2i32.v2f32(<2 x float>, i32)
+declare <2 x i32> @llvm.aarch64.neon.vcvtfp2fxs.v2i32.v2f32(<2 x float>, i32)
 
-declare <4 x i32> @llvm.arm64.neon.vcvtfp2fxs.v4i32.v4f32(<4 x float>, i32)
+declare <4 x i32> @llvm.aarch64.neon.vcvtfp2fxs.v4i32.v4f32(<4 x float>, i32)
 
-declare <2 x i64> @llvm.arm64.neon.vcvtfp2fxs.v2i64.v2f64(<2 x double>, i32)
+declare <2 x i64> @llvm.aarch64.neon.vcvtfp2fxs.v2i64.v2f64(<2 x double>, i32)
 
-declare <2 x i32> @llvm.arm64.neon.vcvtfp2fxu.v2i32.v2f32(<2 x float>, i32)
+declare <2 x i32> @llvm.aarch64.neon.vcvtfp2fxu.v2i32.v2f32(<2 x float>, i32)
 
-declare <4 x i32> @llvm.arm64.neon.vcvtfp2fxu.v4i32.v4f32(<4 x float>, i32)
+declare <4 x i32> @llvm.aarch64.neon.vcvtfp2fxu.v4i32.v4f32(<4 x float>, i32)
 
-declare <2 x i64> @llvm.arm64.neon.vcvtfp2fxu.v2i64.v2f64(<2 x double>, i32)
+declare <2 x i64> @llvm.aarch64.neon.vcvtfp2fxu.v2i64.v2f64(<2 x double>, i32)
 
 define <1 x i64> @test_vcvt_n_s64_f64(<1 x double> %a) {
 ; CHECK-LABEL: test_vcvt_n_s64_f64
 ; CHECK: fcvtzs d{{[0-9]+}}, d{{[0-9]+}}, #64
-  %1 = tail call <1 x i64> @llvm.arm64.neon.vcvtfp2fxs.v1i64.v1f64(<1 x double> %a, i32 64)
+  %1 = tail call <1 x i64> @llvm.aarch64.neon.vcvtfp2fxs.v1i64.v1f64(<1 x double> %a, i32 64)
   ret <1 x i64> %1
 }
 
 define <1 x i64> @test_vcvt_n_u64_f64(<1 x double> %a) {
 ; CHECK-LABEL: test_vcvt_n_u64_f64
 ; CHECK: fcvtzu d{{[0-9]+}}, d{{[0-9]+}}, #64
-  %1 = tail call <1 x i64> @llvm.arm64.neon.vcvtfp2fxu.v1i64.v1f64(<1 x double> %a, i32 64)
+  %1 = tail call <1 x i64> @llvm.aarch64.neon.vcvtfp2fxu.v1i64.v1f64(<1 x double> %a, i32 64)
   ret <1 x i64> %1
 }
 
 define <1 x double> @test_vcvt_n_f64_s64(<1 x i64> %a) {
 ; CHECK-LABEL: test_vcvt_n_f64_s64
 ; CHECK: scvtf d{{[0-9]+}}, d{{[0-9]+}}, #64
-  %1 = tail call <1 x double> @llvm.arm64.neon.vcvtfxs2fp.v1f64.v1i64(<1 x i64> %a, i32 64)
+  %1 = tail call <1 x double> @llvm.aarch64.neon.vcvtfxs2fp.v1f64.v1i64(<1 x i64> %a, i32 64)
   ret <1 x double> %1
 }
 
 define <1 x double> @test_vcvt_n_f64_u64(<1 x i64> %a) {
 ; CHECK-LABEL: test_vcvt_n_f64_u64
 ; CHECK: ucvtf d{{[0-9]+}}, d{{[0-9]+}}, #64
-  %1 = tail call <1 x double> @llvm.arm64.neon.vcvtfxu2fp.v1f64.v1i64(<1 x i64> %a, i32 64)
+  %1 = tail call <1 x double> @llvm.aarch64.neon.vcvtfxu2fp.v1f64.v1i64(<1 x i64> %a, i32 64)
   ret <1 x double> %1
 }
 
-declare <1 x i64> @llvm.arm64.neon.vcvtfp2fxs.v1i64.v1f64(<1 x double>, i32)
-declare <1 x i64> @llvm.arm64.neon.vcvtfp2fxu.v1i64.v1f64(<1 x double>, i32)
-declare <1 x double> @llvm.arm64.neon.vcvtfxs2fp.v1f64.v1i64(<1 x i64>, i32)
-declare <1 x double> @llvm.arm64.neon.vcvtfxu2fp.v1f64.v1i64(<1 x i64>, i32)
+declare <1 x i64> @llvm.aarch64.neon.vcvtfp2fxs.v1i64.v1f64(<1 x double>, i32)
+declare <1 x i64> @llvm.aarch64.neon.vcvtfp2fxu.v1i64.v1f64(<1 x double>, i32)
+declare <1 x double> @llvm.aarch64.neon.vcvtfxs2fp.v1f64.v1i64(<1 x i64>, i32)
+declare <1 x double> @llvm.aarch64.neon.vcvtfxu2fp.v1f64.v1i64(<1 x i64>, i32)
diff --git a/test/CodeGen/ARM64/aarch64-neon-simd-vget.ll b/test/CodeGen/AArch64/arm64-neon-simd-vget.ll
similarity index 100%
rename from test/CodeGen/ARM64/aarch64-neon-simd-vget.ll
rename to test/CodeGen/AArch64/arm64-neon-simd-vget.ll
diff --git a/test/CodeGen/ARM64/neon-v1i1-setcc.ll b/test/CodeGen/AArch64/arm64-neon-v1i1-setcc.ll
similarity index 100%
rename from test/CodeGen/ARM64/neon-v1i1-setcc.ll
rename to test/CodeGen/AArch64/arm64-neon-v1i1-setcc.ll
diff --git a/test/CodeGen/ARM64/aarch64-neon-vector-list-spill.ll b/test/CodeGen/AArch64/arm64-neon-vector-list-spill.ll
similarity index 77%
rename from test/CodeGen/ARM64/aarch64-neon-vector-list-spill.ll
rename to test/CodeGen/AArch64/arm64-neon-vector-list-spill.ll
index 9e69ac025f9..8262fe43a66 100644
--- a/test/CodeGen/ARM64/aarch64-neon-vector-list-spill.ll
+++ b/test/CodeGen/AArch64/arm64-neon-vector-list-spill.ll
@@ -10,7 +10,7 @@ define i32 @spill.DPairReg(i32* %arg1, i32 %arg2) {
 ; CHECK: st1 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d }, [{{x[0-9]+|sp}}]
 ; CHECK: ld1 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d }, [{{x[0-9]+|sp}}]
 entry:
-  %vld = tail call { <2 x i32>, <2 x i32> } @llvm.arm64.neon.ld2.v2i32.p0i32(i32* %arg1)
+  %vld = tail call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2.v2i32.p0i32(i32* %arg1)
   %cmp = icmp eq i32 %arg2, 0
   br i1 %cmp, label %if.then, label %if.end
 
@@ -30,7 +30,7 @@ define i16 @spill.DTripleReg(i16* %arg1, i32 %arg2) {
 ; CHECK: st1 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d }, [{{x[0-9]+|sp}}]
 ; CHECK: ld1 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d }, [{{x[0-9]+|sp}}]
 entry:
-  %vld = tail call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm64.neon.ld3.v4i16.p0i16(i16* %arg1)
+  %vld = tail call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3.v4i16.p0i16(i16* %arg1)
   %cmp = icmp eq i32 %arg2, 0
   br i1 %cmp, label %if.then, label %if.end
 
@@ -50,7 +50,7 @@ define i16 @spill.DQuadReg(i16* %arg1, i32 %arg2) {
 ; CHECK: st1 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d }, [{{x[0-9]+|sp}}]
 ; CHECK: ld1 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d }, [{{x[0-9]+|sp}}]
 entry:
-  %vld = tail call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm64.neon.ld4.v4i16.p0i16(i16* %arg1)
+  %vld = tail call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4.v4i16.p0i16(i16* %arg1)
   %cmp = icmp eq i32 %arg2, 0
   br i1 %cmp, label %if.then, label %if.end
 
@@ -70,7 +70,7 @@ define i32 @spill.QPairReg(i32* %arg1, i32 %arg2) {
 ; CHECK: st1 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d }, [{{x[0-9]+|sp}}]
 ; CHECK: ld1 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d }, [{{x[0-9]+|sp}}]
 entry:
-  %vld = tail call { <4 x i32>, <4 x i32> } @llvm.arm64.neon.ld2.v4i32.p0i32(i32* %arg1)
+  %vld = tail call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2.v4i32.p0i32(i32* %arg1)
   %cmp = icmp eq i32 %arg2, 0
   br i1 %cmp, label %if.then, label %if.end
 
@@ -90,7 +90,7 @@ define float @spill.QTripleReg(float* %arg1, i32 %arg2) {
 ; CHECK: st1 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d }, [{{x[0-9]+|sp}}]
 ; CHECK: ld1 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d }, [{{x[0-9]+|sp}}]
 entry:
-  %vld3 = tail call { <4 x float>, <4 x float>, <4 x float> } @llvm.arm64.neon.ld3.v4f32.p0f32(float* %arg1)
+  %vld3 = tail call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3.v4f32.p0f32(float* %arg1)
   %cmp = icmp eq i32 %arg2, 0
   br i1 %cmp, label %if.then, label %if.end
 
@@ -110,7 +110,7 @@ define i8 @spill.QQuadReg(i8* %arg1, i32 %arg2) {
 ; CHECK: st1 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d }, [{{x[0-9]+|sp}}]
 ; CHECK: ld1 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d }, [{{x[0-9]+|sp}}]
 entry:
-  %vld = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm64.neon.ld4.v16i8.p0i8(i8* %arg1)
+  %vld = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4.v16i8.p0i8(i8* %arg1)
   %cmp = icmp eq i32 %arg2, 0
   br i1 %cmp, label %if.then, label %if.end
 
@@ -124,12 +124,12 @@ if.end:
   ret i8 %res
 }
 
-declare { <2 x i32>, <2 x i32> } @llvm.arm64.neon.ld2.v2i32.p0i32(i32*)
-declare { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm64.neon.ld3.v4i16.p0i16(i16*)
-declare { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm64.neon.ld4.v4i16.p0i16(i16*)
-declare { <4 x i32>, <4 x i32> } @llvm.arm64.neon.ld2.v4i32.p0i32(i32*)
-declare { <4 x float>, <4 x float>, <4 x float> } @llvm.arm64.neon.ld3.v4f32.p0f32(float*)
-declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm64.neon.ld4.v16i8.p0i8(i8*)
+declare { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2.v2i32.p0i32(i32*)
+declare { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3.v4i16.p0i16(i16*)
+declare { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4.v4i16.p0i16(i16*)
+declare { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2.v4i32.p0i32(i32*)
+declare { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3.v4f32.p0f32(float*)
+declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4.v16i8.p0i8(i8*)
 
 declare void @foo()
 
@@ -139,7 +139,7 @@ declare void @foo()
 ; then we can delete it.
 ; check the spill for Register Class QPair_with_qsub_0_in_FPR128Lo
 define <8 x i16> @test_2xFPR128Lo(i64 %got, i64* %ptr, <1 x i64> %a) {
-  tail call void @llvm.arm64.neon.st2lane.v1i64.p0i64(<1 x i64> zeroinitializer, <1 x i64> zeroinitializer, i64 0, i64* %ptr)
+  tail call void @llvm.aarch64.neon.st2lane.v1i64.p0i64(<1 x i64> zeroinitializer, <1 x i64> zeroinitializer, i64 0, i64* %ptr)
   tail call void @foo()
   %sv = shufflevector <1 x i64> zeroinitializer, <1 x i64> %a, <2 x i32> <i32 0, i32 1>
   %1 = bitcast <2 x i64> %sv to <8 x i16>
@@ -150,7 +150,7 @@ define <8 x i16> @test_2xFPR128Lo(i64 %got, i64* %ptr, <1 x i64> %a) {
 
 ; check the spill for Register Class QTriple_with_qsub_0_in_FPR128Lo
 define <8 x i16> @test_3xFPR128Lo(i64 %got, i64* %ptr, <1 x i64> %a) {
-  tail call void @llvm.arm64.neon.st3lane.v1i64.p0i64(<1 x i64> zeroinitializer, <1 x i64> zeroinitializer, <1 x i64> zeroinitializer, i64 0, i64* %ptr)
+  tail call void @llvm.aarch64.neon.st3lane.v1i64.p0i64(<1 x i64> zeroinitializer, <1 x i64> zeroinitializer, <1 x i64> zeroinitializer, i64 0, i64* %ptr)
   tail call void @foo()
   %sv = shufflevector <1 x i64> zeroinitializer, <1 x i64> %a, <2 x i32> <i32 0, i32 1>
   %1 = bitcast <2 x i64> %sv to <8 x i16>
@@ -161,7 +161,7 @@ define <8 x i16> @test_3xFPR128Lo(i64 %got, i64* %ptr, <1 x i64> %a) {
 
 ; check the spill for Register Class QQuad_with_qsub_0_in_FPR128Lo
 define <8 x i16> @test_4xFPR128Lo(i64 %got, i64* %ptr, <1 x i64> %a) {
-  tail call void @llvm.arm64.neon.st4lane.v1i64.p0i64(<1 x i64> zeroinitializer, <1 x i64> zeroinitializer, <1 x i64> zeroinitializer, <1 x i64> zeroinitializer, i64 0, i64* %ptr)
+  tail call void @llvm.aarch64.neon.st4lane.v1i64.p0i64(<1 x i64> zeroinitializer, <1 x i64> zeroinitializer, <1 x i64> zeroinitializer, <1 x i64> zeroinitializer, i64 0, i64* %ptr)
   tail call void @foo()
   %sv = shufflevector <1 x i64> zeroinitializer, <1 x i64> %a, <2 x i32> <i32 0, i32 1>
   %1 = bitcast <2 x i64> %sv to <8 x i16>
@@ -170,6 +170,6 @@ define <8 x i16> @test_4xFPR128Lo(i64 %got, i64* %ptr, <1 x i64> %a) {
   ret <8 x i16> %3
 }
 
-declare void @llvm.arm64.neon.st2lane.v1i64.p0i64(<1 x i64>, <1 x i64>, i64, i64*)
-declare void @llvm.arm64.neon.st3lane.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>, i64, i64*)
-declare void @llvm.arm64.neon.st4lane.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>, i64, i64*)
+declare void @llvm.aarch64.neon.st2lane.v1i64.p0i64(<1 x i64>, <1 x i64>, i64, i64*)
+declare void @llvm.aarch64.neon.st3lane.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>, i64, i64*)
+declare void @llvm.aarch64.neon.st4lane.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>, i64, i64*)
diff --git a/test/CodeGen/ARM64/patchpoint.ll b/test/CodeGen/AArch64/arm64-patchpoint.ll
similarity index 100%
rename from test/CodeGen/ARM64/patchpoint.ll
rename to test/CodeGen/AArch64/arm64-patchpoint.ll
diff --git a/test/CodeGen/ARM64/pic-local-symbol.ll b/test/CodeGen/AArch64/arm64-pic-local-symbol.ll
similarity index 100%
rename from test/CodeGen/ARM64/pic-local-symbol.ll
rename to test/CodeGen/AArch64/arm64-pic-local-symbol.ll
diff --git a/test/CodeGen/ARM64/platform-reg.ll b/test/CodeGen/AArch64/arm64-platform-reg.ll
similarity index 100%
rename from test/CodeGen/ARM64/platform-reg.ll
rename to test/CodeGen/AArch64/arm64-platform-reg.ll
diff --git a/test/CodeGen/ARM64/popcnt.ll b/test/CodeGen/AArch64/arm64-popcnt.ll
similarity index 93%
rename from test/CodeGen/ARM64/popcnt.ll
rename to test/CodeGen/AArch64/arm64-popcnt.ll
index 9bbba09c250..2afade2ee75 100644
--- a/test/CodeGen/ARM64/popcnt.ll
+++ b/test/CodeGen/AArch64/arm64-popcnt.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s
+; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
 
 define i32 @cnt32_advsimd(i32 %x) nounwind readnone {
   %cnt = tail call i32 @llvm.ctpop.i32(i32 %x)
diff --git a/test/CodeGen/ARM64/prefetch.ll b/test/CodeGen/AArch64/arm64-prefetch.ll
similarity index 100%
rename from test/CodeGen/ARM64/prefetch.ll
rename to test/CodeGen/AArch64/arm64-prefetch.ll
diff --git a/test/CodeGen/ARM64/promote-const.ll b/test/CodeGen/AArch64/arm64-promote-const.ll
similarity index 98%
rename from test/CodeGen/ARM64/promote-const.ll
rename to test/CodeGen/AArch64/arm64-promote-const.ll
index 9e7a215f64c..380ff55d683 100644
--- a/test/CodeGen/ARM64/promote-const.ll
+++ b/test/CodeGen/AArch64/arm64-promote-const.ll
@@ -1,9 +1,9 @@
 ; Disable machine cse to stress the different path of the algorithm.
 ; Otherwise, we always fall in the simple case, i.e., only one definition.
-; RUN: llc < %s -mtriple=arm64-apple-ios7.0 -disable-machine-cse -arm64-stress-promote-const -mcpu=cyclone | FileCheck -check-prefix=PROMOTED %s
+; RUN: llc < %s -mtriple=arm64-apple-ios7.0 -disable-machine-cse -aarch64-stress-promote-const -mcpu=cyclone | FileCheck -check-prefix=PROMOTED %s
 ; The REGULAR run just checks that the inputs passed to promote const expose
 ; the appropriate patterns.
-; RUN: llc < %s -mtriple=arm64-apple-ios7.0 -disable-machine-cse -arm64-promote-const=false -mcpu=cyclone | FileCheck -check-prefix=REGULAR %s
+; RUN: llc < %s -mtriple=arm64-apple-ios7.0 -disable-machine-cse -aarch64-promote-const=false -mcpu=cyclone | FileCheck -check-prefix=REGULAR %s
 
 %struct.uint8x16x4_t = type { [4 x <16 x i8>] }
 
diff --git a/test/CodeGen/ARM64/redzone.ll b/test/CodeGen/AArch64/arm64-redzone.ll
similarity index 88%
rename from test/CodeGen/ARM64/redzone.ll
rename to test/CodeGen/AArch64/arm64-redzone.ll
index b89d7b1de3f..9b0c384c4d9 100644
--- a/test/CodeGen/ARM64/redzone.ll
+++ b/test/CodeGen/AArch64/arm64-redzone.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm64 -arm64-redzone | FileCheck %s
+; RUN: llc < %s -march=arm64 -aarch64-redzone | FileCheck %s
 
 define i32 @foo(i32 %a, i32 %b) nounwind ssp {
 ; CHECK-LABEL: foo:
diff --git a/test/CodeGen/ARM64/reg-copy-noneon.ll b/test/CodeGen/AArch64/arm64-reg-copy-noneon.ll
similarity index 100%
rename from test/CodeGen/ARM64/reg-copy-noneon.ll
rename to test/CodeGen/AArch64/arm64-reg-copy-noneon.ll
diff --git a/test/CodeGen/ARM64/register-offset-addressing.ll b/test/CodeGen/AArch64/arm64-register-offset-addressing.ll
similarity index 100%
rename from test/CodeGen/ARM64/register-offset-addressing.ll
rename to test/CodeGen/AArch64/arm64-register-offset-addressing.ll
diff --git a/test/CodeGen/ARM64/register-pairing.ll b/test/CodeGen/AArch64/arm64-register-pairing.ll
similarity index 100%
rename from test/CodeGen/ARM64/register-pairing.ll
rename to test/CodeGen/AArch64/arm64-register-pairing.ll
diff --git a/test/CodeGen/ARM64/regress-f128csel-flags.ll b/test/CodeGen/AArch64/arm64-regress-f128csel-flags.ll
similarity index 100%
rename from test/CodeGen/ARM64/regress-f128csel-flags.ll
rename to test/CodeGen/AArch64/arm64-regress-f128csel-flags.ll
diff --git a/test/CodeGen/ARM64/regress-interphase-shift.ll b/test/CodeGen/AArch64/arm64-regress-interphase-shift.ll
similarity index 100%
rename from test/CodeGen/ARM64/regress-interphase-shift.ll
rename to test/CodeGen/AArch64/arm64-regress-interphase-shift.ll
diff --git a/test/CodeGen/ARM64/return-vector.ll b/test/CodeGen/AArch64/arm64-return-vector.ll
similarity index 100%
rename from test/CodeGen/ARM64/return-vector.ll
rename to test/CodeGen/AArch64/arm64-return-vector.ll
diff --git a/test/CodeGen/ARM64/returnaddr.ll b/test/CodeGen/AArch64/arm64-returnaddr.ll
similarity index 100%
rename from test/CodeGen/ARM64/returnaddr.ll
rename to test/CodeGen/AArch64/arm64-returnaddr.ll
diff --git a/test/CodeGen/ARM64/rev.ll b/test/CodeGen/AArch64/arm64-rev.ll
similarity index 99%
rename from test/CodeGen/ARM64/rev.ll
rename to test/CodeGen/AArch64/arm64-rev.ll
index 1da59e42f6b..30d9f4f3e67 100644
--- a/test/CodeGen/ARM64/rev.ll
+++ b/test/CodeGen/AArch64/arm64-rev.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s
+; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
 
 define i32 @test_rev_w(i32 %a) nounwind {
 entry:
diff --git a/test/CodeGen/ARM64/rounding.ll b/test/CodeGen/AArch64/arm64-rounding.ll
similarity index 100%
rename from test/CodeGen/ARM64/rounding.ll
rename to test/CodeGen/AArch64/arm64-rounding.ll
diff --git a/test/CodeGen/ARM64/scaled_iv.ll b/test/CodeGen/AArch64/arm64-scaled_iv.ll
similarity index 100%
rename from test/CodeGen/ARM64/scaled_iv.ll
rename to test/CodeGen/AArch64/arm64-scaled_iv.ll
diff --git a/test/CodeGen/ARM64/scvt.ll b/test/CodeGen/AArch64/arm64-scvt.ll
similarity index 99%
rename from test/CodeGen/ARM64/scvt.ll
rename to test/CodeGen/AArch64/arm64-scvt.ll
index b4d4add1e8a..2e006cff159 100644
--- a/test/CodeGen/ARM64/scvt.ll
+++ b/test/CodeGen/AArch64/arm64-scvt.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s
+; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
 ; rdar://13082402
 
 define float @t1(i32* nocapture %src) nounwind ssp {
diff --git a/test/CodeGen/ARM64/shifted-sext.ll b/test/CodeGen/AArch64/arm64-shifted-sext.ll
similarity index 100%
rename from test/CodeGen/ARM64/shifted-sext.ll
rename to test/CodeGen/AArch64/arm64-shifted-sext.ll
diff --git a/test/CodeGen/AArch64/arm64-simd-scalar-to-vector.ll b/test/CodeGen/AArch64/arm64-simd-scalar-to-vector.ll
new file mode 100644
index 00000000000..aed39e7ed8c
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-simd-scalar-to-vector.ll
@@ -0,0 +1,22 @@
+; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple -mcpu=cyclone | FileCheck %s
+; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple -O0 -mcpu=cyclone | FileCheck %s --check-prefix=CHECK-FAST
+
+define <16 x i8> @foo(<16 x i8> %a) nounwind optsize readnone ssp {
+; CHECK: uaddlv.16b h0, v0
+; CHECK: rshrn.8b v0, v0, #4
+; CHECK: dup.16b v0, v0[0]
+; CHECK: ret
+
+; CHECK-FAST: uaddlv.16b
+; CHECK-FAST: rshrn.8b
+; CHECK-FAST: dup.16b
+  %tmp = tail call i32 @llvm.aarch64.neon.uaddlv.i32.v16i8(<16 x i8> %a) nounwind
+  %tmp1 = trunc i32 %tmp to i16
+  %tmp2 = insertelement <8 x i16> undef, i16 %tmp1, i32 0
+  %tmp3 = tail call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> %tmp2, i32 4)
+  %tmp4 = shufflevector <8 x i8> %tmp3, <8 x i8> undef, <16 x i32> zeroinitializer
+  ret <16 x i8> %tmp4
+}
+
+declare <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16>, i32) nounwind readnone
+declare i32 @llvm.aarch64.neon.uaddlv.i32.v16i8(<16 x i8>) nounwind readnone
diff --git a/test/CodeGen/ARM64/simplest-elf.ll b/test/CodeGen/AArch64/arm64-simplest-elf.ll
similarity index 100%
rename from test/CodeGen/ARM64/simplest-elf.ll
rename to test/CodeGen/AArch64/arm64-simplest-elf.ll
diff --git a/test/CodeGen/ARM64/sincos.ll b/test/CodeGen/AArch64/arm64-sincos.ll
similarity index 100%
rename from test/CodeGen/ARM64/sincos.ll
rename to test/CodeGen/AArch64/arm64-sincos.ll
diff --git a/test/CodeGen/ARM64/sitofp-combine-chains.ll b/test/CodeGen/AArch64/arm64-sitofp-combine-chains.ll
similarity index 100%
rename from test/CodeGen/ARM64/sitofp-combine-chains.ll
rename to test/CodeGen/AArch64/arm64-sitofp-combine-chains.ll
diff --git a/test/CodeGen/ARM64/sli-sri-opt.ll b/test/CodeGen/AArch64/arm64-sli-sri-opt.ll
similarity index 95%
rename from test/CodeGen/ARM64/sli-sri-opt.ll
rename to test/CodeGen/AArch64/arm64-sli-sri-opt.ll
index 725dcd51fd1..7fec53993bc 100644
--- a/test/CodeGen/ARM64/sli-sri-opt.ll
+++ b/test/CodeGen/AArch64/arm64-sli-sri-opt.ll
@@ -1,4 +1,4 @@
-; RUN: llc -arm64-shift-insert-generation=true -march=arm64 -arm64-neon-syntax=apple < %s | FileCheck %s
+; RUN: llc -aarch64-shift-insert-generation=true -march=arm64 -aarch64-neon-syntax=apple < %s | FileCheck %s
 
 define void @testLeftGood(<16 x i8> %src1, <16 x i8> %src2, <16 x i8>* %dest) nounwind {
 ; CHECK-LABEL: testLeftGood:
diff --git a/test/CodeGen/ARM64/smaxv.ll b/test/CodeGen/AArch64/arm64-smaxv.ll
similarity index 61%
rename from test/CodeGen/ARM64/smaxv.ll
rename to test/CodeGen/AArch64/arm64-smaxv.ll
index 4f6e01b31ea..183e667643c 100644
--- a/test/CodeGen/ARM64/smaxv.ll
+++ b/test/CodeGen/AArch64/arm64-smaxv.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=arm64 -arm64-neon-syntax=apple < %s | FileCheck %s
+; RUN: llc -march=arm64 -aarch64-neon-syntax=apple < %s | FileCheck %s
 
 define signext i8 @test_vmaxv_s8(<8 x i8> %a1) {
 ; CHECK: test_vmaxv_s8
@@ -6,7 +6,7 @@ define signext i8 @test_vmaxv_s8(<8 x i8> %a1) {
 ; CHECK-NEXT: smov.b w0, v[[REGNUM]][0]
 ; CHECK-NEXT: ret
 entry:
-  %vmaxv.i = tail call i32 @llvm.arm64.neon.smaxv.i32.v8i8(<8 x i8> %a1)
+  %vmaxv.i = tail call i32 @llvm.aarch64.neon.smaxv.i32.v8i8(<8 x i8> %a1)
   %0 = trunc i32 %vmaxv.i to i8
   ret i8 %0
 }
@@ -17,7 +17,7 @@ define signext i16 @test_vmaxv_s16(<4 x i16> %a1) {
 ; CHECK-NEXT: smov.h w0, v[[REGNUM]][0]
 ; CHECK-NEXT: ret
 entry:
-  %vmaxv.i = tail call i32 @llvm.arm64.neon.smaxv.i32.v4i16(<4 x i16> %a1)
+  %vmaxv.i = tail call i32 @llvm.aarch64.neon.smaxv.i32.v4i16(<4 x i16> %a1)
   %0 = trunc i32 %vmaxv.i to i16
   ret i16 %0
 }
@@ -29,7 +29,7 @@ define i32 @test_vmaxv_s32(<2 x i32> %a1) {
 ; CHECK-NEXT: fmov w0, s[[REGNUM]]
 ; CHECK-NEXT: ret
 entry:
-  %vmaxv.i = tail call i32 @llvm.arm64.neon.smaxv.i32.v2i32(<2 x i32> %a1)
+  %vmaxv.i = tail call i32 @llvm.aarch64.neon.smaxv.i32.v2i32(<2 x i32> %a1)
   ret i32 %vmaxv.i
 }
 
@@ -39,7 +39,7 @@ define signext i8 @test_vmaxvq_s8(<16 x i8> %a1) {
 ; CHECK-NEXT: smov.b w0, v[[REGNUM]][0]
 ; CHECK-NEXT: ret
 entry:
-  %vmaxv.i = tail call i32 @llvm.arm64.neon.smaxv.i32.v16i8(<16 x i8> %a1)
+  %vmaxv.i = tail call i32 @llvm.aarch64.neon.smaxv.i32.v16i8(<16 x i8> %a1)
   %0 = trunc i32 %vmaxv.i to i8
   ret i8 %0
 }
@@ -50,7 +50,7 @@ define signext i16 @test_vmaxvq_s16(<8 x i16> %a1) {
 ; CHECK-NEXT: smov.h w0, v[[REGNUM]][0]
 ; CHECK-NEXT: ret
 entry:
-  %vmaxv.i = tail call i32 @llvm.arm64.neon.smaxv.i32.v8i16(<8 x i16> %a1)
+  %vmaxv.i = tail call i32 @llvm.aarch64.neon.smaxv.i32.v8i16(<8 x i16> %a1)
   %0 = trunc i32 %vmaxv.i to i16
   ret i16 %0
 }
@@ -61,14 +61,14 @@ define i32 @test_vmaxvq_s32(<4 x i32> %a1) {
 ; CHECK-NEXT: fmov w0, [[REGNUM]]
 ; CHECK-NEXT: ret
 entry:
-  %vmaxv.i = tail call i32 @llvm.arm64.neon.smaxv.i32.v4i32(<4 x i32> %a1)
+  %vmaxv.i = tail call i32 @llvm.aarch64.neon.smaxv.i32.v4i32(<4 x i32> %a1)
   ret i32 %vmaxv.i
 }
 
-declare i32 @llvm.arm64.neon.smaxv.i32.v4i32(<4 x i32>)
-declare i32 @llvm.arm64.neon.smaxv.i32.v8i16(<8 x i16>)
-declare i32 @llvm.arm64.neon.smaxv.i32.v16i8(<16 x i8>)
-declare i32 @llvm.arm64.neon.smaxv.i32.v2i32(<2 x i32>)
-declare i32 @llvm.arm64.neon.smaxv.i32.v4i16(<4 x i16>)
-declare i32 @llvm.arm64.neon.smaxv.i32.v8i8(<8 x i8>)
+declare i32 @llvm.aarch64.neon.smaxv.i32.v4i32(<4 x i32>)
+declare i32 @llvm.aarch64.neon.smaxv.i32.v8i16(<8 x i16>)
+declare i32 @llvm.aarch64.neon.smaxv.i32.v16i8(<16 x i8>)
+declare i32 @llvm.aarch64.neon.smaxv.i32.v2i32(<2 x i32>)
+declare i32 @llvm.aarch64.neon.smaxv.i32.v4i16(<4 x i16>)
+declare i32 @llvm.aarch64.neon.smaxv.i32.v8i8(<8 x i8>)
 
diff --git a/test/CodeGen/ARM64/sminv.ll b/test/CodeGen/AArch64/arm64-sminv.ll
similarity index 61%
rename from test/CodeGen/ARM64/sminv.ll
rename to test/CodeGen/AArch64/arm64-sminv.ll
index a246868d2f2..195c4e59dc4 100644
--- a/test/CodeGen/ARM64/sminv.ll
+++ b/test/CodeGen/AArch64/arm64-sminv.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=arm64 -arm64-neon-syntax=apple < %s | FileCheck %s
+; RUN: llc -march=arm64 -aarch64-neon-syntax=apple < %s | FileCheck %s
 
 define signext i8 @test_vminv_s8(<8 x i8> %a1) {
 ; CHECK: test_vminv_s8
@@ -6,7 +6,7 @@ define signext i8 @test_vminv_s8(<8 x i8> %a1) {
 ; CHECK-NEXT: smov.b w0, v[[REGNUM]][0]
 ; CHECK-NEXT: ret
 entry:
-  %vminv.i = tail call i32 @llvm.arm64.neon.sminv.i32.v8i8(<8 x i8> %a1)
+  %vminv.i = tail call i32 @llvm.aarch64.neon.sminv.i32.v8i8(<8 x i8> %a1)
   %0 = trunc i32 %vminv.i to i8
   ret i8 %0
 }
@@ -17,7 +17,7 @@ define signext i16 @test_vminv_s16(<4 x i16> %a1) {
 ; CHECK-NEXT: smov.h w0, v[[REGNUM]][0]
 ; CHECK-NEXT: ret
 entry:
-  %vminv.i = tail call i32 @llvm.arm64.neon.sminv.i32.v4i16(<4 x i16> %a1)
+  %vminv.i = tail call i32 @llvm.aarch64.neon.sminv.i32.v4i16(<4 x i16> %a1)
   %0 = trunc i32 %vminv.i to i16
   ret i16 %0
 }
@@ -29,7 +29,7 @@ define i32 @test_vminv_s32(<2 x i32> %a1) {
 ; CHECK-NEXT: fmov w0, s[[REGNUM]]
 ; CHECK-NEXT: ret
 entry:
-  %vminv.i = tail call i32 @llvm.arm64.neon.sminv.i32.v2i32(<2 x i32> %a1)
+  %vminv.i = tail call i32 @llvm.aarch64.neon.sminv.i32.v2i32(<2 x i32> %a1)
   ret i32 %vminv.i
 }
 
@@ -39,7 +39,7 @@ define signext i8 @test_vminvq_s8(<16 x i8> %a1) {
 ; CHECK-NEXT: smov.b w0, v[[REGNUM]][0]
 ; CHECK-NEXT: ret
 entry:
-  %vminv.i = tail call i32 @llvm.arm64.neon.sminv.i32.v16i8(<16 x i8> %a1)
+  %vminv.i = tail call i32 @llvm.aarch64.neon.sminv.i32.v16i8(<16 x i8> %a1)
   %0 = trunc i32 %vminv.i to i8
   ret i8 %0
 }
@@ -50,7 +50,7 @@ define signext i16 @test_vminvq_s16(<8 x i16> %a1) {
 ; CHECK-NEXT: smov.h w0, v[[REGNUM]][0]
 ; CHECK-NEXT: ret
 entry:
-  %vminv.i = tail call i32 @llvm.arm64.neon.sminv.i32.v8i16(<8 x i16> %a1)
+  %vminv.i = tail call i32 @llvm.aarch64.neon.sminv.i32.v8i16(<8 x i16> %a1)
   %0 = trunc i32 %vminv.i to i16
   ret i16 %0
 }
@@ -61,14 +61,14 @@ define i32 @test_vminvq_s32(<4 x i32> %a1) {
 ; CHECK-NEXT: fmov w0, [[REGNUM]]
 ; CHECK-NEXT: ret
 entry:
-  %vminv.i = tail call i32 @llvm.arm64.neon.sminv.i32.v4i32(<4 x i32> %a1)
+  %vminv.i = tail call i32 @llvm.aarch64.neon.sminv.i32.v4i32(<4 x i32> %a1)
   ret i32 %vminv.i
 }
 
-declare i32 @llvm.arm64.neon.sminv.i32.v4i32(<4 x i32>)
-declare i32 @llvm.arm64.neon.sminv.i32.v8i16(<8 x i16>)
-declare i32 @llvm.arm64.neon.sminv.i32.v16i8(<16 x i8>)
-declare i32 @llvm.arm64.neon.sminv.i32.v2i32(<2 x i32>)
-declare i32 @llvm.arm64.neon.sminv.i32.v4i16(<4 x i16>)
-declare i32 @llvm.arm64.neon.sminv.i32.v8i8(<8 x i8>)
+declare i32 @llvm.aarch64.neon.sminv.i32.v4i32(<4 x i32>)
+declare i32 @llvm.aarch64.neon.sminv.i32.v8i16(<8 x i16>)
+declare i32 @llvm.aarch64.neon.sminv.i32.v16i8(<16 x i8>)
+declare i32 @llvm.aarch64.neon.sminv.i32.v2i32(<2 x i32>)
+declare i32 @llvm.aarch64.neon.sminv.i32.v4i16(<4 x i16>)
+declare i32 @llvm.aarch64.neon.sminv.i32.v8i8(<8 x i8>)
 
diff --git a/test/CodeGen/ARM64/spill-lr.ll b/test/CodeGen/AArch64/arm64-spill-lr.ll
similarity index 100%
rename from test/CodeGen/ARM64/spill-lr.ll
rename to test/CodeGen/AArch64/arm64-spill-lr.ll
diff --git a/test/CodeGen/ARM64/spill.ll b/test/CodeGen/AArch64/arm64-spill.ll
similarity index 88%
rename from test/CodeGen/ARM64/spill.ll
rename to test/CodeGen/AArch64/arm64-spill.ll
index 5a5da9723cb..47cdc2bd95e 100644
--- a/test/CodeGen/ARM64/spill.ll
+++ b/test/CodeGen/AArch64/arm64-spill.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=arm64-apple-ios7.0 -arm64-neon-syntax=apple -verify-machineinstrs
+; RUN: llc < %s -mtriple=arm64-apple-ios7.0 -aarch64-neon-syntax=apple -verify-machineinstrs
 
 ; CHECK: fpr128
 ; CHECK: ld1.2d
diff --git a/test/CodeGen/ARM64/st1.ll b/test/CodeGen/AArch64/arm64-st1.ll
similarity index 50%
rename from test/CodeGen/ARM64/st1.ll
rename to test/CodeGen/AArch64/arm64-st1.ll
index b9aafc60e7b..4370484478c 100644
--- a/test/CodeGen/ARM64/st1.ll
+++ b/test/CodeGen/AArch64/arm64-st1.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple -verify-machineinstrs | FileCheck %s
+; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple -verify-machineinstrs | FileCheck %s
 
 define void @st1lane_16b(<16 x i8> %A, i8* %D) {
 ; CHECK-LABEL: st1lane_16b
@@ -83,594 +83,594 @@ define void @st1lane_2s_float(<2 x float> %A, float* %D) {
 define void @st2lane_16b(<16 x i8> %A, <16 x i8> %B, i8* %D) {
 ; CHECK-LABEL: st2lane_16b
 ; CHECK: st2.b
-  call void @llvm.arm64.neon.st2lane.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, i64 1, i8* %D)
+  call void @llvm.aarch64.neon.st2lane.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, i64 1, i8* %D)
   ret void
 }
 
 define void @st2lane_8h(<8 x i16> %A, <8 x i16> %B, i16* %D) {
 ; CHECK-LABEL: st2lane_8h
 ; CHECK: st2.h
-  call void @llvm.arm64.neon.st2lane.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, i64 1, i16* %D)
+  call void @llvm.aarch64.neon.st2lane.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, i64 1, i16* %D)
   ret void
 }
 
 define void @st2lane_4s(<4 x i32> %A, <4 x i32> %B, i32* %D) {
 ; CHECK-LABEL: st2lane_4s
 ; CHECK: st2.s
-  call void @llvm.arm64.neon.st2lane.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, i64 1, i32* %D)
+  call void @llvm.aarch64.neon.st2lane.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, i64 1, i32* %D)
   ret void
 }
 
 define void @st2lane_2d(<2 x i64> %A, <2 x i64> %B, i64* %D) {
 ; CHECK-LABEL: st2lane_2d
 ; CHECK: st2.d
-  call void @llvm.arm64.neon.st2lane.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, i64 1, i64* %D)
+  call void @llvm.aarch64.neon.st2lane.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, i64 1, i64* %D)
   ret void
 }
 
-declare void @llvm.arm64.neon.st2lane.v16i8.p0i8(<16 x i8>, <16 x i8>, i64, i8*) nounwind readnone
-declare void @llvm.arm64.neon.st2lane.v8i16.p0i16(<8 x i16>, <8 x i16>, i64, i16*) nounwind readnone
-declare void @llvm.arm64.neon.st2lane.v4i32.p0i32(<4 x i32>, <4 x i32>, i64, i32*) nounwind readnone
-declare void @llvm.arm64.neon.st2lane.v2i64.p0i64(<2 x i64>, <2 x i64>, i64, i64*) nounwind readnone
+declare void @llvm.aarch64.neon.st2lane.v16i8.p0i8(<16 x i8>, <16 x i8>, i64, i8*) nounwind readnone
+declare void @llvm.aarch64.neon.st2lane.v8i16.p0i16(<8 x i16>, <8 x i16>, i64, i16*) nounwind readnone
+declare void @llvm.aarch64.neon.st2lane.v4i32.p0i32(<4 x i32>, <4 x i32>, i64, i32*) nounwind readnone
+declare void @llvm.aarch64.neon.st2lane.v2i64.p0i64(<2 x i64>, <2 x i64>, i64, i64*) nounwind readnone
 
 define void @st3lane_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, i8* %D) {
 ; CHECK-LABEL: st3lane_16b
 ; CHECK: st3.b
-  call void @llvm.arm64.neon.st3lane.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, i64 1, i8* %D)
+  call void @llvm.aarch64.neon.st3lane.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, i64 1, i8* %D)
   ret void
 }
 
 define void @st3lane_8h(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, i16* %D) {
 ; CHECK-LABEL: st3lane_8h
 ; CHECK: st3.h
-  call void @llvm.arm64.neon.st3lane.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, i64 1, i16* %D)
+  call void @llvm.aarch64.neon.st3lane.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, i64 1, i16* %D)
   ret void
 }
 
 define void @st3lane_4s(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, i32* %D) {
 ; CHECK-LABEL: st3lane_4s
 ; CHECK: st3.s
-  call void @llvm.arm64.neon.st3lane.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, i64 1, i32* %D)
+  call void @llvm.aarch64.neon.st3lane.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, i64 1, i32* %D)
   ret void
 }
 
 define void @st3lane_2d(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, i64* %D) {
 ; CHECK-LABEL: st3lane_2d
 ; CHECK: st3.d
-  call void @llvm.arm64.neon.st3lane.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, i64 1, i64* %D)
+  call void @llvm.aarch64.neon.st3lane.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, i64 1, i64* %D)
   ret void
 }
 
-declare void @llvm.arm64.neon.st3lane.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, i64, i8*) nounwind readnone
-declare void @llvm.arm64.neon.st3lane.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, i64, i16*) nounwind readnone
-declare void @llvm.arm64.neon.st3lane.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, i64, i32*) nounwind readnone
-declare void @llvm.arm64.neon.st3lane.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, i64, i64*) nounwind readnone
+declare void @llvm.aarch64.neon.st3lane.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, i64, i8*) nounwind readnone
+declare void @llvm.aarch64.neon.st3lane.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, i64, i16*) nounwind readnone
+declare void @llvm.aarch64.neon.st3lane.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, i64, i32*) nounwind readnone
+declare void @llvm.aarch64.neon.st3lane.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, i64, i64*) nounwind readnone
 
 define void @st4lane_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i8* %E) {
 ; CHECK-LABEL: st4lane_16b
 ; CHECK: st4.b
-  call void @llvm.arm64.neon.st4lane.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 1, i8* %E)
+  call void @llvm.aarch64.neon.st4lane.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 1, i8* %E)
   ret void
 }
 
 define void @st4lane_8h(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i16* %E) {
 ; CHECK-LABEL: st4lane_8h
 ; CHECK: st4.h
-  call void @llvm.arm64.neon.st4lane.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 1, i16* %E)
+  call void @llvm.aarch64.neon.st4lane.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 1, i16* %E)
   ret void
 }
 
 define void @st4lane_4s(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i32* %E) {
 ; CHECK-LABEL: st4lane_4s
 ; CHECK: st4.s
-  call void @llvm.arm64.neon.st4lane.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 1, i32* %E)
+  call void @llvm.aarch64.neon.st4lane.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 1, i32* %E)
   ret void
 }
 
 define void @st4lane_2d(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64* %E) {
 ; CHECK-LABEL: st4lane_2d
 ; CHECK: st4.d
-  call void @llvm.arm64.neon.st4lane.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 1, i64* %E)
+  call void @llvm.aarch64.neon.st4lane.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 1, i64* %E)
   ret void
 }
 
-declare void @llvm.arm64.neon.st4lane.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i64, i8*) nounwind readnone
-declare void @llvm.arm64.neon.st4lane.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i64, i16*) nounwind readnone
-declare void @llvm.arm64.neon.st4lane.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i64, i32*) nounwind readnone
-declare void @llvm.arm64.neon.st4lane.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, i64, i64*) nounwind readnone
+declare void @llvm.aarch64.neon.st4lane.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i64, i8*) nounwind readnone
+declare void @llvm.aarch64.neon.st4lane.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i64, i16*) nounwind readnone
+declare void @llvm.aarch64.neon.st4lane.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i64, i32*) nounwind readnone
+declare void @llvm.aarch64.neon.st4lane.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, i64, i64*) nounwind readnone
 
 
 define void @st2_8b(<8 x i8> %A, <8 x i8> %B, i8* %P) nounwind {
 ; CHECK-LABEL: st2_8b
 ; CHECK st2.8b
-	call void @llvm.arm64.neon.st2.v8i8.p0i8(<8 x i8> %A, <8 x i8> %B, i8* %P)
+	call void @llvm.aarch64.neon.st2.v8i8.p0i8(<8 x i8> %A, <8 x i8> %B, i8* %P)
 	ret void
 }
 
 define void @st3_8b(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, i8* %P) nounwind {
 ; CHECK-LABEL: st3_8b
 ; CHECK st3.8b
-	call void @llvm.arm64.neon.st3.v8i8.p0i8(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, i8* %P)
+	call void @llvm.aarch64.neon.st3.v8i8.p0i8(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, i8* %P)
 	ret void
 }
 
 define void @st4_8b(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i8* %P) nounwind {
 ; CHECK-LABEL: st4_8b
 ; CHECK st4.8b
-	call void @llvm.arm64.neon.st4.v8i8.p0i8(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i8* %P)
+	call void @llvm.aarch64.neon.st4.v8i8.p0i8(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i8* %P)
 	ret void
 }
 
-declare void @llvm.arm64.neon.st2.v8i8.p0i8(<8 x i8>, <8 x i8>, i8*) nounwind readonly
-declare void @llvm.arm64.neon.st3.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, i8*) nounwind readonly
-declare void @llvm.arm64.neon.st4.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i8*) nounwind readonly
+declare void @llvm.aarch64.neon.st2.v8i8.p0i8(<8 x i8>, <8 x i8>, i8*) nounwind readonly
+declare void @llvm.aarch64.neon.st3.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, i8*) nounwind readonly
+declare void @llvm.aarch64.neon.st4.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i8*) nounwind readonly
 
 define void @st2_16b(<16 x i8> %A, <16 x i8> %B, i8* %P) nounwind {
 ; CHECK-LABEL: st2_16b
 ; CHECK st2.16b
-	call void @llvm.arm64.neon.st2.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, i8* %P)
+	call void @llvm.aarch64.neon.st2.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, i8* %P)
 	ret void
 }
 
 define void @st3_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, i8* %P) nounwind {
 ; CHECK-LABEL: st3_16b
 ; CHECK st3.16b
-	call void @llvm.arm64.neon.st3.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, i8* %P)
+	call void @llvm.aarch64.neon.st3.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, i8* %P)
 	ret void
 }
 
 define void @st4_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i8* %P) nounwind {
 ; CHECK-LABEL: st4_16b
 ; CHECK st4.16b
-	call void @llvm.arm64.neon.st4.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i8* %P)
+	call void @llvm.aarch64.neon.st4.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i8* %P)
 	ret void
 }
 
-declare void @llvm.arm64.neon.st2.v16i8.p0i8(<16 x i8>, <16 x i8>, i8*) nounwind readonly
-declare void @llvm.arm64.neon.st3.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, i8*) nounwind readonly
-declare void @llvm.arm64.neon.st4.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i8*) nounwind readonly
+declare void @llvm.aarch64.neon.st2.v16i8.p0i8(<16 x i8>, <16 x i8>, i8*) nounwind readonly
+declare void @llvm.aarch64.neon.st3.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, i8*) nounwind readonly
+declare void @llvm.aarch64.neon.st4.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i8*) nounwind readonly
 
 define void @st2_4h(<4 x i16> %A, <4 x i16> %B, i16* %P) nounwind {
 ; CHECK-LABEL: st2_4h
 ; CHECK st2.4h
-	call void @llvm.arm64.neon.st2.v4i16.p0i16(<4 x i16> %A, <4 x i16> %B, i16* %P)
+	call void @llvm.aarch64.neon.st2.v4i16.p0i16(<4 x i16> %A, <4 x i16> %B, i16* %P)
 	ret void
 }
 
 define void @st3_4h(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, i16* %P) nounwind {
 ; CHECK-LABEL: st3_4h
 ; CHECK st3.4h
-	call void @llvm.arm64.neon.st3.v4i16.p0i16(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, i16* %P)
+	call void @llvm.aarch64.neon.st3.v4i16.p0i16(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, i16* %P)
 	ret void
 }
 
 define void @st4_4h(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i16* %P) nounwind {
 ; CHECK-LABEL: st4_4h
 ; CHECK st4.4h
-	call void @llvm.arm64.neon.st4.v4i16.p0i16(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i16* %P)
+	call void @llvm.aarch64.neon.st4.v4i16.p0i16(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i16* %P)
 	ret void
 }
 
-declare void @llvm.arm64.neon.st2.v4i16.p0i16(<4 x i16>, <4 x i16>, i16*) nounwind readonly
-declare void @llvm.arm64.neon.st3.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>, i16*) nounwind readonly
-declare void @llvm.arm64.neon.st4.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i16*) nounwind readonly
+declare void @llvm.aarch64.neon.st2.v4i16.p0i16(<4 x i16>, <4 x i16>, i16*) nounwind readonly
+declare void @llvm.aarch64.neon.st3.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>, i16*) nounwind readonly
+declare void @llvm.aarch64.neon.st4.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i16*) nounwind readonly
 
 define void @st2_8h(<8 x i16> %A, <8 x i16> %B, i16* %P) nounwind {
 ; CHECK-LABEL: st2_8h
 ; CHECK st2.8h
-	call void @llvm.arm64.neon.st2.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, i16* %P)
+	call void @llvm.aarch64.neon.st2.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, i16* %P)
 	ret void
 }
 
 define void @st3_8h(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, i16* %P) nounwind {
 ; CHECK-LABEL: st3_8h
 ; CHECK st3.8h
-	call void @llvm.arm64.neon.st3.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, i16* %P)
+	call void @llvm.aarch64.neon.st3.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, i16* %P)
 	ret void
 }
 
 define void @st4_8h(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i16* %P) nounwind {
 ; CHECK-LABEL: st4_8h
 ; CHECK st4.8h
-	call void @llvm.arm64.neon.st4.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i16* %P)
+	call void @llvm.aarch64.neon.st4.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i16* %P)
 	ret void
 }
 
-declare void @llvm.arm64.neon.st2.v8i16.p0i16(<8 x i16>, <8 x i16>, i16*) nounwind readonly
-declare void @llvm.arm64.neon.st3.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, i16*) nounwind readonly
-declare void @llvm.arm64.neon.st4.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i16*) nounwind readonly
+declare void @llvm.aarch64.neon.st2.v8i16.p0i16(<8 x i16>, <8 x i16>, i16*) nounwind readonly
+declare void @llvm.aarch64.neon.st3.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, i16*) nounwind readonly
+declare void @llvm.aarch64.neon.st4.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i16*) nounwind readonly
 
 define void @st2_2s(<2 x i32> %A, <2 x i32> %B, i32* %P) nounwind {
 ; CHECK-LABEL: st2_2s
 ; CHECK st2.2s
-	call void @llvm.arm64.neon.st2.v2i32.p0i32(<2 x i32> %A, <2 x i32> %B, i32* %P)
+	call void @llvm.aarch64.neon.st2.v2i32.p0i32(<2 x i32> %A, <2 x i32> %B, i32* %P)
 	ret void
 }
 
 define void @st3_2s(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, i32* %P) nounwind {
 ; CHECK-LABEL: st3_2s
 ; CHECK st3.2s
-	call void @llvm.arm64.neon.st3.v2i32.p0i32(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, i32* %P)
+	call void @llvm.aarch64.neon.st3.v2i32.p0i32(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, i32* %P)
 	ret void
 }
 
 define void @st4_2s(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i32* %P) nounwind {
 ; CHECK-LABEL: st4_2s
 ; CHECK st4.2s
-	call void @llvm.arm64.neon.st4.v2i32.p0i32(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i32* %P)
+	call void @llvm.aarch64.neon.st4.v2i32.p0i32(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i32* %P)
 	ret void
 }
 
-declare void @llvm.arm64.neon.st2.v2i32.p0i32(<2 x i32>, <2 x i32>, i32*) nounwind readonly
-declare void @llvm.arm64.neon.st3.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, i32*) nounwind readonly
-declare void @llvm.arm64.neon.st4.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32*) nounwind readonly
+declare void @llvm.aarch64.neon.st2.v2i32.p0i32(<2 x i32>, <2 x i32>, i32*) nounwind readonly
+declare void @llvm.aarch64.neon.st3.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, i32*) nounwind readonly
+declare void @llvm.aarch64.neon.st4.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32*) nounwind readonly
 
 define void @st2_4s(<4 x i32> %A, <4 x i32> %B, i32* %P) nounwind {
 ; CHECK-LABEL: st2_4s
 ; CHECK st2.4s
-	call void @llvm.arm64.neon.st2.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, i32* %P)
+	call void @llvm.aarch64.neon.st2.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, i32* %P)
 	ret void
 }
 
 define void @st3_4s(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, i32* %P) nounwind {
 ; CHECK-LABEL: st3_4s
 ; CHECK st3.4s
-	call void @llvm.arm64.neon.st3.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, i32* %P)
+	call void @llvm.aarch64.neon.st3.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, i32* %P)
 	ret void
 }
 
 define void @st4_4s(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i32* %P) nounwind {
 ; CHECK-LABEL: st4_4s
 ; CHECK st4.4s
-	call void @llvm.arm64.neon.st4.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i32* %P)
+	call void @llvm.aarch64.neon.st4.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i32* %P)
 	ret void
 }
 
-declare void @llvm.arm64.neon.st2.v4i32.p0i32(<4 x i32>, <4 x i32>, i32*) nounwind readonly
-declare void @llvm.arm64.neon.st3.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, i32*) nounwind readonly
-declare void @llvm.arm64.neon.st4.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32*) nounwind readonly
+declare void @llvm.aarch64.neon.st2.v4i32.p0i32(<4 x i32>, <4 x i32>, i32*) nounwind readonly
+declare void @llvm.aarch64.neon.st3.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, i32*) nounwind readonly
+declare void @llvm.aarch64.neon.st4.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32*) nounwind readonly
 
 define void @st2_1d(<1 x i64> %A, <1 x i64> %B, i64* %P) nounwind {
 ; CHECK-LABEL: st2_1d
 ; CHECK st1.2d
-	call void @llvm.arm64.neon.st2.v1i64.p0i64(<1 x i64> %A, <1 x i64> %B, i64* %P)
+	call void @llvm.aarch64.neon.st2.v1i64.p0i64(<1 x i64> %A, <1 x i64> %B, i64* %P)
 	ret void
 }
 
 define void @st3_1d(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, i64* %P) nounwind {
 ; CHECK-LABEL: st3_1d
 ; CHECK st1.3d
-	call void @llvm.arm64.neon.st3.v1i64.p0i64(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, i64* %P)
+	call void @llvm.aarch64.neon.st3.v1i64.p0i64(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, i64* %P)
 	ret void
 }
 
 define void @st4_1d(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64* %P) nounwind {
 ; CHECK-LABEL: st4_1d
 ; CHECK st1.4d
-	call void @llvm.arm64.neon.st4.v1i64.p0i64(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64* %P)
+	call void @llvm.aarch64.neon.st4.v1i64.p0i64(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64* %P)
 	ret void
 }
 
-declare void @llvm.arm64.neon.st2.v1i64.p0i64(<1 x i64>, <1 x i64>, i64*) nounwind readonly
-declare void @llvm.arm64.neon.st3.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>, i64*) nounwind readonly
-declare void @llvm.arm64.neon.st4.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>, i64*) nounwind readonly
+declare void @llvm.aarch64.neon.st2.v1i64.p0i64(<1 x i64>, <1 x i64>, i64*) nounwind readonly
+declare void @llvm.aarch64.neon.st3.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>, i64*) nounwind readonly
+declare void @llvm.aarch64.neon.st4.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>, i64*) nounwind readonly
 
 define void @st2_2d(<2 x i64> %A, <2 x i64> %B, i64* %P) nounwind {
 ; CHECK-LABEL: st2_2d
 ; CHECK st2.2d
-	call void @llvm.arm64.neon.st2.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, i64* %P)
+	call void @llvm.aarch64.neon.st2.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, i64* %P)
 	ret void
 }
 
 define void @st3_2d(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, i64* %P) nounwind {
 ; CHECK-LABEL: st3_2d
 ; CHECK st2.3d
-	call void @llvm.arm64.neon.st3.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, i64* %P)
+	call void @llvm.aarch64.neon.st3.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, i64* %P)
 	ret void
 }
 
 define void @st4_2d(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64* %P) nounwind {
 ; CHECK-LABEL: st4_2d
 ; CHECK st2.4d
-	call void @llvm.arm64.neon.st4.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64* %P)
+	call void @llvm.aarch64.neon.st4.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64* %P)
 	ret void
 }
 
-declare void @llvm.arm64.neon.st2.v2i64.p0i64(<2 x i64>, <2 x i64>, i64*) nounwind readonly
-declare void @llvm.arm64.neon.st3.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, i64*) nounwind readonly
-declare void @llvm.arm64.neon.st4.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, i64*) nounwind readonly
+declare void @llvm.aarch64.neon.st2.v2i64.p0i64(<2 x i64>, <2 x i64>, i64*) nounwind readonly
+declare void @llvm.aarch64.neon.st3.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, i64*) nounwind readonly
+declare void @llvm.aarch64.neon.st4.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, i64*) nounwind readonly
 
-declare void @llvm.arm64.neon.st1x2.v8i8.p0i8(<8 x i8>, <8 x i8>, i8*) nounwind readonly
-declare void @llvm.arm64.neon.st1x2.v4i16.p0i16(<4 x i16>, <4 x i16>, i16*) nounwind readonly
-declare void @llvm.arm64.neon.st1x2.v2i32.p0i32(<2 x i32>, <2 x i32>, i32*) nounwind readonly
-declare void @llvm.arm64.neon.st1x2.v2f32.p0f32(<2 x float>, <2 x float>, float*) nounwind readonly
-declare void @llvm.arm64.neon.st1x2.v1i64.p0i64(<1 x i64>, <1 x i64>, i64*) nounwind readonly
-declare void @llvm.arm64.neon.st1x2.v1f64.p0f64(<1 x double>, <1 x double>, double*) nounwind readonly
+declare void @llvm.aarch64.neon.st1x2.v8i8.p0i8(<8 x i8>, <8 x i8>, i8*) nounwind readonly
+declare void @llvm.aarch64.neon.st1x2.v4i16.p0i16(<4 x i16>, <4 x i16>, i16*) nounwind readonly
+declare void @llvm.aarch64.neon.st1x2.v2i32.p0i32(<2 x i32>, <2 x i32>, i32*) nounwind readonly
+declare void @llvm.aarch64.neon.st1x2.v2f32.p0f32(<2 x float>, <2 x float>, float*) nounwind readonly
+declare void @llvm.aarch64.neon.st1x2.v1i64.p0i64(<1 x i64>, <1 x i64>, i64*) nounwind readonly
+declare void @llvm.aarch64.neon.st1x2.v1f64.p0f64(<1 x double>, <1 x double>, double*) nounwind readonly
 
 define void @st1_x2_v8i8(<8 x i8> %A, <8 x i8> %B, i8* %addr) {
 ; CHECK-LABEL: st1_x2_v8i8:
 ; CHECK: st1.8b { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
-  call void @llvm.arm64.neon.st1x2.v8i8.p0i8(<8 x i8> %A, <8 x i8> %B, i8* %addr)
+  call void @llvm.aarch64.neon.st1x2.v8i8.p0i8(<8 x i8> %A, <8 x i8> %B, i8* %addr)
   ret void
 }
 
 define void @st1_x2_v4i16(<4 x i16> %A, <4 x i16> %B, i16* %addr) {
 ; CHECK-LABEL: st1_x2_v4i16:
 ; CHECK: st1.4h { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
-  call void @llvm.arm64.neon.st1x2.v4i16.p0i16(<4 x i16> %A, <4 x i16> %B, i16* %addr)
+  call void @llvm.aarch64.neon.st1x2.v4i16.p0i16(<4 x i16> %A, <4 x i16> %B, i16* %addr)
   ret void
 }
 
 define void @st1_x2_v2i32(<2 x i32> %A, <2 x i32> %B, i32* %addr) {
 ; CHECK-LABEL: st1_x2_v2i32:
 ; CHECK: st1.2s { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
-  call void @llvm.arm64.neon.st1x2.v2i32.p0i32(<2 x i32> %A, <2 x i32> %B, i32* %addr)
+  call void @llvm.aarch64.neon.st1x2.v2i32.p0i32(<2 x i32> %A, <2 x i32> %B, i32* %addr)
   ret void
 }
 
 define void @st1_x2_v2f32(<2 x float> %A, <2 x float> %B, float* %addr) {
 ; CHECK-LABEL: st1_x2_v2f32:
 ; CHECK: st1.2s { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
-  call void @llvm.arm64.neon.st1x2.v2f32.p0f32(<2 x float> %A, <2 x float> %B, float* %addr)
+  call void @llvm.aarch64.neon.st1x2.v2f32.p0f32(<2 x float> %A, <2 x float> %B, float* %addr)
   ret void
 }
 
 define void @st1_x2_v1i64(<1 x i64> %A, <1 x i64> %B, i64* %addr) {
 ; CHECK-LABEL: st1_x2_v1i64:
 ; CHECK: st1.1d { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
-  call void @llvm.arm64.neon.st1x2.v1i64.p0i64(<1 x i64> %A, <1 x i64> %B, i64* %addr)
+  call void @llvm.aarch64.neon.st1x2.v1i64.p0i64(<1 x i64> %A, <1 x i64> %B, i64* %addr)
   ret void
 }
 
 define void @st1_x2_v1f64(<1 x double> %A, <1 x double> %B, double* %addr) {
 ; CHECK-LABEL: st1_x2_v1f64:
 ; CHECK: st1.1d { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
-  call void @llvm.arm64.neon.st1x2.v1f64.p0f64(<1 x double> %A, <1 x double> %B, double* %addr)
+  call void @llvm.aarch64.neon.st1x2.v1f64.p0f64(<1 x double> %A, <1 x double> %B, double* %addr)
   ret void
 }
 
-declare void @llvm.arm64.neon.st1x2.v16i8.p0i8(<16 x i8>, <16 x i8>, i8*) nounwind readonly
-declare void @llvm.arm64.neon.st1x2.v8i16.p0i16(<8 x i16>, <8 x i16>, i16*) nounwind readonly
-declare void @llvm.arm64.neon.st1x2.v4i32.p0i32(<4 x i32>, <4 x i32>, i32*) nounwind readonly
-declare void @llvm.arm64.neon.st1x2.v4f32.p0f32(<4 x float>, <4 x float>, float*) nounwind readonly
-declare void @llvm.arm64.neon.st1x2.v2i64.p0i64(<2 x i64>, <2 x i64>, i64*) nounwind readonly
-declare void @llvm.arm64.neon.st1x2.v2f64.p0f64(<2 x double>, <2 x double>, double*) nounwind readonly
+declare void @llvm.aarch64.neon.st1x2.v16i8.p0i8(<16 x i8>, <16 x i8>, i8*) nounwind readonly
+declare void @llvm.aarch64.neon.st1x2.v8i16.p0i16(<8 x i16>, <8 x i16>, i16*) nounwind readonly
+declare void @llvm.aarch64.neon.st1x2.v4i32.p0i32(<4 x i32>, <4 x i32>, i32*) nounwind readonly
+declare void @llvm.aarch64.neon.st1x2.v4f32.p0f32(<4 x float>, <4 x float>, float*) nounwind readonly
+declare void @llvm.aarch64.neon.st1x2.v2i64.p0i64(<2 x i64>, <2 x i64>, i64*) nounwind readonly
+declare void @llvm.aarch64.neon.st1x2.v2f64.p0f64(<2 x double>, <2 x double>, double*) nounwind readonly
 
 define void @st1_x2_v16i8(<16 x i8> %A, <16 x i8> %B, i8* %addr) {
 ; CHECK-LABEL: st1_x2_v16i8:
 ; CHECK: st1.16b { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
-  call void @llvm.arm64.neon.st1x2.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, i8* %addr)
+  call void @llvm.aarch64.neon.st1x2.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, i8* %addr)
   ret void
 }
 
 define void @st1_x2_v8i16(<8 x i16> %A, <8 x i16> %B, i16* %addr) {
 ; CHECK-LABEL: st1_x2_v8i16:
 ; CHECK: st1.8h { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
-  call void @llvm.arm64.neon.st1x2.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, i16* %addr)
+  call void @llvm.aarch64.neon.st1x2.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, i16* %addr)
   ret void
 }
 
 define void @st1_x2_v4i32(<4 x i32> %A, <4 x i32> %B, i32* %addr) {
 ; CHECK-LABEL: st1_x2_v4i32:
 ; CHECK: st1.4s { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
-  call void @llvm.arm64.neon.st1x2.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, i32* %addr)
+  call void @llvm.aarch64.neon.st1x2.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, i32* %addr)
   ret void
 }
 
 define void @st1_x2_v4f32(<4 x float> %A, <4 x float> %B, float* %addr) {
 ; CHECK-LABEL: st1_x2_v4f32:
 ; CHECK: st1.4s { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
-  call void @llvm.arm64.neon.st1x2.v4f32.p0f32(<4 x float> %A, <4 x float> %B, float* %addr)
+  call void @llvm.aarch64.neon.st1x2.v4f32.p0f32(<4 x float> %A, <4 x float> %B, float* %addr)
   ret void
 }
 
 define void @st1_x2_v2i64(<2 x i64> %A, <2 x i64> %B, i64* %addr) {
 ; CHECK-LABEL: st1_x2_v2i64:
 ; CHECK: st1.2d { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
-  call void @llvm.arm64.neon.st1x2.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, i64* %addr)
+  call void @llvm.aarch64.neon.st1x2.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, i64* %addr)
   ret void
 }
 
 define void @st1_x2_v2f64(<2 x double> %A, <2 x double> %B, double* %addr) {
 ; CHECK-LABEL: st1_x2_v2f64:
 ; CHECK: st1.2d { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
-  call void @llvm.arm64.neon.st1x2.v2f64.p0f64(<2 x double> %A, <2 x double> %B, double* %addr)
+  call void @llvm.aarch64.neon.st1x2.v2f64.p0f64(<2 x double> %A, <2 x double> %B, double* %addr)
   ret void
 }
 
-declare void @llvm.arm64.neon.st1x3.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, i8*) nounwind readonly
-declare void @llvm.arm64.neon.st1x3.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>, i16*) nounwind readonly
-declare void @llvm.arm64.neon.st1x3.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, i32*) nounwind readonly
-declare void @llvm.arm64.neon.st1x3.v2f32.p0f32(<2 x float>, <2 x float>, <2 x float>, float*) nounwind readonly
-declare void @llvm.arm64.neon.st1x3.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>, i64*) nounwind readonly
-declare void @llvm.arm64.neon.st1x3.v1f64.p0f64(<1 x double>, <1 x double>, <1 x double>, double*) nounwind readonly
+declare void @llvm.aarch64.neon.st1x3.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, i8*) nounwind readonly
+declare void @llvm.aarch64.neon.st1x3.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>, i16*) nounwind readonly
+declare void @llvm.aarch64.neon.st1x3.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, i32*) nounwind readonly
+declare void @llvm.aarch64.neon.st1x3.v2f32.p0f32(<2 x float>, <2 x float>, <2 x float>, float*) nounwind readonly
+declare void @llvm.aarch64.neon.st1x3.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>, i64*) nounwind readonly
+declare void @llvm.aarch64.neon.st1x3.v1f64.p0f64(<1 x double>, <1 x double>, <1 x double>, double*) nounwind readonly
 
 define void @st1_x3_v8i8(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, i8* %addr) {
 ; CHECK-LABEL: st1_x3_v8i8:
 ; CHECK: st1.8b { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
-  call void @llvm.arm64.neon.st1x3.v8i8.p0i8(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, i8* %addr)
+  call void @llvm.aarch64.neon.st1x3.v8i8.p0i8(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, i8* %addr)
   ret void
 }
 
 define void @st1_x3_v4i16(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, i16* %addr) {
 ; CHECK-LABEL: st1_x3_v4i16:
 ; CHECK: st1.4h { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
-  call void @llvm.arm64.neon.st1x3.v4i16.p0i16(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, i16* %addr)
+  call void @llvm.aarch64.neon.st1x3.v4i16.p0i16(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, i16* %addr)
   ret void
 }
 
 define void @st1_x3_v2i32(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, i32* %addr) {
 ; CHECK-LABEL: st1_x3_v2i32:
 ; CHECK: st1.2s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
-  call void @llvm.arm64.neon.st1x3.v2i32.p0i32(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, i32* %addr)
+  call void @llvm.aarch64.neon.st1x3.v2i32.p0i32(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, i32* %addr)
   ret void
 }
 
 define void @st1_x3_v2f32(<2 x float> %A, <2 x float> %B, <2 x float> %C, float* %addr) {
 ; CHECK-LABEL: st1_x3_v2f32:
 ; CHECK: st1.2s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
-  call void @llvm.arm64.neon.st1x3.v2f32.p0f32(<2 x float> %A, <2 x float> %B, <2 x float> %C, float* %addr)
+  call void @llvm.aarch64.neon.st1x3.v2f32.p0f32(<2 x float> %A, <2 x float> %B, <2 x float> %C, float* %addr)
   ret void
 }
 
 define void @st1_x3_v1i64(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, i64* %addr) {
 ; CHECK-LABEL: st1_x3_v1i64:
 ; CHECK: st1.1d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
-  call void @llvm.arm64.neon.st1x3.v1i64.p0i64(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, i64* %addr)
+  call void @llvm.aarch64.neon.st1x3.v1i64.p0i64(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, i64* %addr)
   ret void
 }
 
 define void @st1_x3_v1f64(<1 x double> %A, <1 x double> %B, <1 x double> %C, double* %addr) {
 ; CHECK-LABEL: st1_x3_v1f64:
 ; CHECK: st1.1d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
-  call void @llvm.arm64.neon.st1x3.v1f64.p0f64(<1 x double> %A, <1 x double> %B, <1 x double> %C, double* %addr)
+  call void @llvm.aarch64.neon.st1x3.v1f64.p0f64(<1 x double> %A, <1 x double> %B, <1 x double> %C, double* %addr)
   ret void
 }
 
-declare void @llvm.arm64.neon.st1x3.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, i8*) nounwind readonly
-declare void @llvm.arm64.neon.st1x3.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, i16*) nounwind readonly
-declare void @llvm.arm64.neon.st1x3.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, i32*) nounwind readonly
-declare void @llvm.arm64.neon.st1x3.v4f32.p0f32(<4 x float>, <4 x float>, <4 x float>, float*) nounwind readonly
-declare void @llvm.arm64.neon.st1x3.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, i64*) nounwind readonly
-declare void @llvm.arm64.neon.st1x3.v2f64.p0f64(<2 x double>, <2 x double>, <2 x double>, double*) nounwind readonly
+declare void @llvm.aarch64.neon.st1x3.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, i8*) nounwind readonly
+declare void @llvm.aarch64.neon.st1x3.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, i16*) nounwind readonly
+declare void @llvm.aarch64.neon.st1x3.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, i32*) nounwind readonly
+declare void @llvm.aarch64.neon.st1x3.v4f32.p0f32(<4 x float>, <4 x float>, <4 x float>, float*) nounwind readonly
+declare void @llvm.aarch64.neon.st1x3.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, i64*) nounwind readonly
+declare void @llvm.aarch64.neon.st1x3.v2f64.p0f64(<2 x double>, <2 x double>, <2 x double>, double*) nounwind readonly
 
 define void @st1_x3_v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, i8* %addr) {
 ; CHECK-LABEL: st1_x3_v16i8:
 ; CHECK: st1.16b { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
-  call void @llvm.arm64.neon.st1x3.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, i8* %addr)
+  call void @llvm.aarch64.neon.st1x3.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, i8* %addr)
   ret void
 }
 
 define void @st1_x3_v8i16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, i16* %addr) {
 ; CHECK-LABEL: st1_x3_v8i16:
 ; CHECK: st1.8h { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
-  call void @llvm.arm64.neon.st1x3.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, i16* %addr)
+  call void @llvm.aarch64.neon.st1x3.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, i16* %addr)
   ret void
 }
 
 define void @st1_x3_v4i32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, i32* %addr) {
 ; CHECK-LABEL: st1_x3_v4i32:
 ; CHECK: st1.4s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
-  call void @llvm.arm64.neon.st1x3.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, i32* %addr)
+  call void @llvm.aarch64.neon.st1x3.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, i32* %addr)
   ret void
 }
 
 define void @st1_x3_v4f32(<4 x float> %A, <4 x float> %B, <4 x float> %C, float* %addr) {
 ; CHECK-LABEL: st1_x3_v4f32:
 ; CHECK: st1.4s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
-  call void @llvm.arm64.neon.st1x3.v4f32.p0f32(<4 x float> %A, <4 x float> %B, <4 x float> %C, float* %addr)
+  call void @llvm.aarch64.neon.st1x3.v4f32.p0f32(<4 x float> %A, <4 x float> %B, <4 x float> %C, float* %addr)
   ret void
 }
 
 define void @st1_x3_v2i64(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, i64* %addr) {
 ; CHECK-LABEL: st1_x3_v2i64:
 ; CHECK: st1.2d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
-  call void @llvm.arm64.neon.st1x3.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, i64* %addr)
+  call void @llvm.aarch64.neon.st1x3.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, i64* %addr)
   ret void
 }
 
 define void @st1_x3_v2f64(<2 x double> %A, <2 x double> %B, <2 x double> %C, double* %addr) {
 ; CHECK-LABEL: st1_x3_v2f64:
 ; CHECK: st1.2d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
-  call void @llvm.arm64.neon.st1x3.v2f64.p0f64(<2 x double> %A, <2 x double> %B, <2 x double> %C, double* %addr)
+  call void @llvm.aarch64.neon.st1x3.v2f64.p0f64(<2 x double> %A, <2 x double> %B, <2 x double> %C, double* %addr)
   ret void
 }
 
 
-declare void @llvm.arm64.neon.st1x4.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i8*) nounwind readonly
-declare void @llvm.arm64.neon.st1x4.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i16*) nounwind readonly
-declare void @llvm.arm64.neon.st1x4.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32*) nounwind readonly
-declare void @llvm.arm64.neon.st1x4.v2f32.p0f32(<2 x float>, <2 x float>, <2 x float>, <2 x float>, float*) nounwind readonly
-declare void @llvm.arm64.neon.st1x4.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>, i64*) nounwind readonly
-declare void @llvm.arm64.neon.st1x4.v1f64.p0f64(<1 x double>, <1 x double>, <1 x double>, <1 x double>, double*) nounwind readonly
+declare void @llvm.aarch64.neon.st1x4.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i8*) nounwind readonly
+declare void @llvm.aarch64.neon.st1x4.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i16*) nounwind readonly
+declare void @llvm.aarch64.neon.st1x4.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32*) nounwind readonly
+declare void @llvm.aarch64.neon.st1x4.v2f32.p0f32(<2 x float>, <2 x float>, <2 x float>, <2 x float>, float*) nounwind readonly
+declare void @llvm.aarch64.neon.st1x4.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>, i64*) nounwind readonly
+declare void @llvm.aarch64.neon.st1x4.v1f64.p0f64(<1 x double>, <1 x double>, <1 x double>, <1 x double>, double*) nounwind readonly
 
 define void @st1_x4_v8i8(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i8* %addr) {
 ; CHECK-LABEL: st1_x4_v8i8:
 ; CHECK: st1.8b { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
-  call void @llvm.arm64.neon.st1x4.v8i8.p0i8(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i8* %addr)
+  call void @llvm.aarch64.neon.st1x4.v8i8.p0i8(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i8* %addr)
   ret void
 }
 
 define void @st1_x4_v4i16(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i16* %addr) {
 ; CHECK-LABEL: st1_x4_v4i16:
 ; CHECK: st1.4h { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
-  call void @llvm.arm64.neon.st1x4.v4i16.p0i16(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i16* %addr)
+  call void @llvm.aarch64.neon.st1x4.v4i16.p0i16(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i16* %addr)
   ret void
 }
 
 define void @st1_x4_v2i32(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i32* %addr) {
 ; CHECK-LABEL: st1_x4_v2i32:
 ; CHECK: st1.2s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
-  call void @llvm.arm64.neon.st1x4.v2i32.p0i32(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i32* %addr)
+  call void @llvm.aarch64.neon.st1x4.v2i32.p0i32(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i32* %addr)
   ret void
 }
 
 define void @st1_x4_v2f32(<2 x float> %A, <2 x float> %B, <2 x float> %C, <2 x float> %D, float* %addr) {
 ; CHECK-LABEL: st1_x4_v2f32:
 ; CHECK: st1.2s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
-  call void @llvm.arm64.neon.st1x4.v2f32.p0f32(<2 x float> %A, <2 x float> %B, <2 x float> %C, <2 x float> %D, float* %addr)
+  call void @llvm.aarch64.neon.st1x4.v2f32.p0f32(<2 x float> %A, <2 x float> %B, <2 x float> %C, <2 x float> %D, float* %addr)
   ret void
 }
 
 define void @st1_x4_v1i64(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64* %addr) {
 ; CHECK-LABEL: st1_x4_v1i64:
 ; CHECK: st1.1d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
-  call void @llvm.arm64.neon.st1x4.v1i64.p0i64(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64* %addr)
+  call void @llvm.aarch64.neon.st1x4.v1i64.p0i64(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64* %addr)
   ret void
 }
 
 define void @st1_x4_v1f64(<1 x double> %A, <1 x double> %B, <1 x double> %C, <1 x double> %D, double* %addr) {
 ; CHECK-LABEL: st1_x4_v1f64:
 ; CHECK: st1.1d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
-  call void @llvm.arm64.neon.st1x4.v1f64.p0f64(<1 x double> %A, <1 x double> %B, <1 x double> %C, <1 x double> %D, double* %addr)
+  call void @llvm.aarch64.neon.st1x4.v1f64.p0f64(<1 x double> %A, <1 x double> %B, <1 x double> %C, <1 x double> %D, double* %addr)
   ret void
 }
 
-declare void @llvm.arm64.neon.st1x4.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i8*) nounwind readonly
-declare void @llvm.arm64.neon.st1x4.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i16*) nounwind readonly
-declare void @llvm.arm64.neon.st1x4.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32*) nounwind readonly
-declare void @llvm.arm64.neon.st1x4.v4f32.p0f32(<4 x float>, <4 x float>, <4 x float>, <4 x float>, float*) nounwind readonly
-declare void @llvm.arm64.neon.st1x4.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, i64*) nounwind readonly
-declare void @llvm.arm64.neon.st1x4.v2f64.p0f64(<2 x double>, <2 x double>, <2 x double>, <2 x double>, double*) nounwind readonly
+declare void @llvm.aarch64.neon.st1x4.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i8*) nounwind readonly
+declare void @llvm.aarch64.neon.st1x4.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i16*) nounwind readonly
+declare void @llvm.aarch64.neon.st1x4.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32*) nounwind readonly
+declare void @llvm.aarch64.neon.st1x4.v4f32.p0f32(<4 x float>, <4 x float>, <4 x float>, <4 x float>, float*) nounwind readonly
+declare void @llvm.aarch64.neon.st1x4.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, i64*) nounwind readonly
+declare void @llvm.aarch64.neon.st1x4.v2f64.p0f64(<2 x double>, <2 x double>, <2 x double>, <2 x double>, double*) nounwind readonly
 
 define void @st1_x4_v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i8* %addr) {
 ; CHECK-LABEL: st1_x4_v16i8:
 ; CHECK: st1.16b { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
-  call void @llvm.arm64.neon.st1x4.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i8* %addr)
+  call void @llvm.aarch64.neon.st1x4.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i8* %addr)
   ret void
 }
 
 define void @st1_x4_v8i16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i16* %addr) {
 ; CHECK-LABEL: st1_x4_v8i16:
 ; CHECK: st1.8h { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
-  call void @llvm.arm64.neon.st1x4.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i16* %addr)
+  call void @llvm.aarch64.neon.st1x4.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i16* %addr)
   ret void
 }
 
 define void @st1_x4_v4i32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i32* %addr) {
 ; CHECK-LABEL: st1_x4_v4i32:
 ; CHECK: st1.4s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
-  call void @llvm.arm64.neon.st1x4.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i32* %addr)
+  call void @llvm.aarch64.neon.st1x4.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i32* %addr)
   ret void
 }
 
 define void @st1_x4_v4f32(<4 x float> %A, <4 x float> %B, <4 x float> %C, <4 x float> %D, float* %addr) {
 ; CHECK-LABEL: st1_x4_v4f32:
 ; CHECK: st1.4s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
-  call void @llvm.arm64.neon.st1x4.v4f32.p0f32(<4 x float> %A, <4 x float> %B, <4 x float> %C, <4 x float> %D, float* %addr)
+  call void @llvm.aarch64.neon.st1x4.v4f32.p0f32(<4 x float> %A, <4 x float> %B, <4 x float> %C, <4 x float> %D, float* %addr)
   ret void
 }
 
 define void @st1_x4_v2i64(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64* %addr) {
 ; CHECK-LABEL: st1_x4_v2i64:
 ; CHECK: st1.2d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
-  call void @llvm.arm64.neon.st1x4.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64* %addr)
+  call void @llvm.aarch64.neon.st1x4.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64* %addr)
   ret void
 }
 
 define void @st1_x4_v2f64(<2 x double> %A, <2 x double> %B, <2 x double> %C, <2 x double> %D, double* %addr) {
 ; CHECK-LABEL: st1_x4_v2f64:
 ; CHECK: st1.2d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
-  call void @llvm.arm64.neon.st1x4.v2f64.p0f64(<2 x double> %A, <2 x double> %B, <2 x double> %C, <2 x double> %D, double* %addr)
+  call void @llvm.aarch64.neon.st1x4.v2f64.p0f64(<2 x double> %A, <2 x double> %B, <2 x double> %C, <2 x double> %D, double* %addr)
   ret void
 }
diff --git a/test/CodeGen/ARM64/stack-no-frame.ll b/test/CodeGen/AArch64/arm64-stack-no-frame.ll
similarity index 100%
rename from test/CodeGen/ARM64/stack-no-frame.ll
rename to test/CodeGen/AArch64/arm64-stack-no-frame.ll
diff --git a/test/CodeGen/ARM64/stackmap.ll b/test/CodeGen/AArch64/arm64-stackmap.ll
similarity index 100%
rename from test/CodeGen/ARM64/stackmap.ll
rename to test/CodeGen/AArch64/arm64-stackmap.ll
diff --git a/test/CodeGen/ARM64/stackpointer.ll b/test/CodeGen/AArch64/arm64-stackpointer.ll
similarity index 100%
rename from test/CodeGen/ARM64/stackpointer.ll
rename to test/CodeGen/AArch64/arm64-stackpointer.ll
diff --git a/test/CodeGen/ARM64/stacksave.ll b/test/CodeGen/AArch64/arm64-stacksave.ll
similarity index 100%
rename from test/CodeGen/ARM64/stacksave.ll
rename to test/CodeGen/AArch64/arm64-stacksave.ll
diff --git a/test/CodeGen/ARM64/stp.ll b/test/CodeGen/AArch64/arm64-stp.ll
similarity index 94%
rename from test/CodeGen/ARM64/stp.ll
rename to test/CodeGen/AArch64/arm64-stp.ll
index 3a58396e61b..40bdf22c995 100644
--- a/test/CodeGen/ARM64/stp.ll
+++ b/test/CodeGen/AArch64/arm64-stp.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=arm64 -arm64-stp-suppress=false -verify-machineinstrs -mcpu=cyclone | FileCheck %s
-; RUN: llc < %s -march=arm64 -arm64-unscaled-mem-op=true\
+; RUN: llc < %s -march=arm64 -aarch64-stp-suppress=false -verify-machineinstrs -mcpu=cyclone | FileCheck %s
+; RUN: llc < %s -march=arm64 -aarch64-unscaled-mem-op=true\
 ; RUN:   -verify-machineinstrs -mcpu=cyclone | FileCheck -check-prefix=STUR_CHK %s
 
 ; CHECK: stp_int
diff --git a/test/CodeGen/ARM64/strict-align.ll b/test/CodeGen/AArch64/arm64-strict-align.ll
similarity index 75%
rename from test/CodeGen/ARM64/strict-align.ll
rename to test/CodeGen/AArch64/arm64-strict-align.ll
index 48a1528b5cd..5d137043a69 100644
--- a/test/CodeGen/ARM64/strict-align.ll
+++ b/test/CodeGen/AArch64/arm64-strict-align.ll
@@ -1,6 +1,6 @@
 ; RUN: llc < %s -mtriple=arm64-apple-darwin | FileCheck %s
-; RUN: llc < %s -mtriple=arm64-apple-darwin -arm64-no-strict-align | FileCheck %s
-; RUN: llc < %s -mtriple=arm64-apple-darwin -arm64-strict-align | FileCheck %s --check-prefix=CHECK-STRICT
+; RUN: llc < %s -mtriple=arm64-apple-darwin -aarch64-no-strict-align | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-apple-darwin -aarch64-strict-align | FileCheck %s --check-prefix=CHECK-STRICT
 
 define i32 @f0(i32* nocapture %p) nounwind {
 ; CHECK-STRICT: ldrh [[HIGH:w[0-9]+]], [x0, #2]
diff --git a/test/CodeGen/ARM64/stur.ll b/test/CodeGen/AArch64/arm64-stur.ll
similarity index 96%
rename from test/CodeGen/ARM64/stur.ll
rename to test/CodeGen/AArch64/arm64-stur.ll
index dc67b60000d..a2e684dc952 100644
--- a/test/CodeGen/ARM64/stur.ll
+++ b/test/CodeGen/AArch64/arm64-stur.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple -mcpu=cyclone | FileCheck %s
+; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple -mcpu=cyclone | FileCheck %s
 %struct.X = type <{ i32, i64, i64 }>
 
 define void @foo1(i32* %p, i64 %val) nounwind {
diff --git a/test/CodeGen/ARM64/subsections.ll b/test/CodeGen/AArch64/arm64-subsections.ll
similarity index 100%
rename from test/CodeGen/ARM64/subsections.ll
rename to test/CodeGen/AArch64/arm64-subsections.ll
diff --git a/test/CodeGen/ARM64/subvector-extend.ll b/test/CodeGen/AArch64/arm64-subvector-extend.ll
similarity index 97%
rename from test/CodeGen/ARM64/subvector-extend.ll
rename to test/CodeGen/AArch64/arm64-subvector-extend.ll
index ad2f06ce7ba..d5a178a9e65 100644
--- a/test/CodeGen/ARM64/subvector-extend.ll
+++ b/test/CodeGen/AArch64/arm64-subvector-extend.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple -asm-verbose=false | FileCheck %s
+; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple -asm-verbose=false | FileCheck %s
 
 ; Test efficient codegen of vector extends up from legal type to 128 bit
 ; and 256 bit vector types.
diff --git a/test/CodeGen/ARM64/swizzle-tbl-i16-layout.ll b/test/CodeGen/AArch64/arm64-swizzle-tbl-i16-layout.ll
similarity index 100%
rename from test/CodeGen/ARM64/swizzle-tbl-i16-layout.ll
rename to test/CodeGen/AArch64/arm64-swizzle-tbl-i16-layout.ll
diff --git a/test/CodeGen/AArch64/arm64-tbl.ll b/test/CodeGen/AArch64/arm64-tbl.ll
new file mode 100644
index 00000000000..b1ce15a1e19
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-tbl.ll
@@ -0,0 +1,132 @@
+; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
+
+define <8 x i8> @tbl1_8b(<16 x i8> %A, <8 x i8> %B) nounwind {
+; CHECK: tbl1_8b
+; CHECK: tbl.8b
+  %tmp3 = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> %A, <8 x i8> %B)
+  ret <8 x i8> %tmp3
+}
+
+define <16 x i8> @tbl1_16b(<16 x i8> %A, <16 x i8> %B) nounwind {
+; CHECK: tbl1_16b
+; CHECK: tbl.16b
+  %tmp3 = call <16 x i8> @llvm.aarch64.neon.tbl1.v16i8(<16 x i8> %A, <16 x i8> %B)
+  ret <16 x i8> %tmp3
+}
+
+define <8 x i8> @tbl2_8b(<16 x i8> %A, <16 x i8> %B, <8 x i8> %C) {
+; CHECK: tbl2_8b
+; CHECK: tbl.8b
+  %tmp3 = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> %A, <16 x i8> %B, <8 x i8> %C)
+  ret <8 x i8> %tmp3
+}
+
+define <16 x i8> @tbl2_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C) {
+; CHECK: tbl2_16b
+; CHECK: tbl.16b
+  %tmp3 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C)
+  ret <16 x i8> %tmp3
+}
+
+define <8 x i8> @tbl3_8b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <8 x i8> %D) {
+; CHECK: tbl3_8b
+; CHECK: tbl.8b
+  %tmp3 = call <8 x i8> @llvm.aarch64.neon.tbl3.v8i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <8 x i8> %D)
+  ret <8 x i8> %tmp3
+}
+
+define <16 x i8> @tbl3_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) {
+; CHECK: tbl3_16b
+; CHECK: tbl.16b
+  %tmp3 = call <16 x i8> @llvm.aarch64.neon.tbl3.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D)
+  ret <16 x i8> %tmp3
+}
+
+define <8 x i8> @tbl4_8b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <8 x i8> %E) {
+; CHECK: tbl4_8b
+; CHECK: tbl.8b
+  %tmp3 = call <8 x i8> @llvm.aarch64.neon.tbl4.v8i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <8 x i8> %E)
+  ret <8 x i8> %tmp3
+}
+
+define <16 x i8> @tbl4_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) {
+; CHECK: tbl4_16b
+; CHECK: tbl.16b
+  %tmp3 = call <16 x i8> @llvm.aarch64.neon.tbl4.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E)
+  ret <16 x i8> %tmp3
+}
+
+declare <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8>, <8 x i8>) nounwind readnone
+declare <16 x i8> @llvm.aarch64.neon.tbl1.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8>, <16 x i8>, <8 x i8>) nounwind readnone
+declare <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i8> @llvm.aarch64.neon.tbl3.v8i8(<16 x i8>, <16 x i8>, <16 x i8>, <8 x i8>) nounwind readnone
+declare <16 x i8> @llvm.aarch64.neon.tbl3.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i8> @llvm.aarch64.neon.tbl4.v8i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <8 x i8>) nounwind readnone
+declare <16 x i8> @llvm.aarch64.neon.tbl4.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone
+
+define <8 x i8> @tbx1_8b(<8 x i8> %A, <16 x i8> %B, <8 x i8> %C) nounwind {
+; CHECK: tbx1_8b
+; CHECK: tbx.8b
+  %tmp3 = call <8 x i8> @llvm.aarch64.neon.tbx1.v8i8(<8 x i8> %A, <16 x i8> %B, <8 x i8> %C)
+  ret <8 x i8> %tmp3
+}
+
+define <16 x i8> @tbx1_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C) nounwind {
+; CHECK: tbx1_16b
+; CHECK: tbx.16b
+  %tmp3 = call <16 x i8> @llvm.aarch64.neon.tbx1.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C)
+  ret <16 x i8> %tmp3
+}
+
+define <8 x i8> @tbx2_8b(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <8 x i8> %D) {
+; CHECK: tbx2_8b
+; CHECK: tbx.8b
+  %tmp3 = call <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <8 x i8> %D)
+  ret <8 x i8> %tmp3
+}
+
+define <16 x i8> @tbx2_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) {
+; CHECK: tbx2_16b
+; CHECK: tbx.16b
+  %tmp3 = call <16 x i8> @llvm.aarch64.neon.tbx2.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D)
+  ret <16 x i8> %tmp3
+}
+
+define <8 x i8> @tbx3_8b(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <8 x i8> %E) {
+; CHECK: tbx3_8b
+; CHECK: tbx.8b
+  %tmp3 = call <8 x i8> @llvm.aarch64.neon.tbx3.v8i8(< 8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <8 x i8> %E)
+  ret <8 x i8> %tmp3
+}
+
+define <16 x i8> @tbx3_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) {
+; CHECK: tbx3_16b
+; CHECK: tbx.16b
+  %tmp3 = call <16 x i8> @llvm.aarch64.neon.tbx3.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E)
+  ret <16 x i8> %tmp3
+}
+
+define <8 x i8> @tbx4_8b(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, <8 x i8> %F) {
+; CHECK: tbx4_8b
+; CHECK: tbx.8b
+  %tmp3 = call <8 x i8> @llvm.aarch64.neon.tbx4.v8i8(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, <8 x i8> %F)
+  ret <8 x i8> %tmp3
+}
+
+define <16 x i8> @tbx4_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, <16 x i8> %F) {
+; CHECK: tbx4_16b
+; CHECK: tbx.16b
+  %tmp3 = call <16 x i8> @llvm.aarch64.neon.tbx4.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, <16 x i8> %F)
+  ret <16 x i8> %tmp3
+}
+
+declare <8 x i8> @llvm.aarch64.neon.tbx1.v8i8(<8 x i8>, <16 x i8>, <8 x i8>) nounwind readnone
+declare <16 x i8> @llvm.aarch64.neon.tbx1.v16i8(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8>, <16 x i8>, <16 x i8>, <8 x i8>) nounwind readnone
+declare <16 x i8> @llvm.aarch64.neon.tbx2.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i8> @llvm.aarch64.neon.tbx3.v8i8(<8 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <8 x i8>) nounwind readnone
+declare <16 x i8> @llvm.aarch64.neon.tbx3.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i8> @llvm.aarch64.neon.tbx4.v8i8(<8 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <8 x i8>) nounwind readnone
+declare <16 x i8> @llvm.aarch64.neon.tbx4.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone
+
diff --git a/test/CodeGen/ARM64/this-return.ll b/test/CodeGen/AArch64/arm64-this-return.ll
similarity index 100%
rename from test/CodeGen/ARM64/this-return.ll
rename to test/CodeGen/AArch64/arm64-this-return.ll
diff --git a/test/CodeGen/ARM64/tls-darwin.ll b/test/CodeGen/AArch64/arm64-tls-darwin.ll
similarity index 100%
rename from test/CodeGen/ARM64/tls-darwin.ll
rename to test/CodeGen/AArch64/arm64-tls-darwin.ll
diff --git a/test/CodeGen/ARM64/tls-dynamic-together.ll b/test/CodeGen/AArch64/arm64-tls-dynamic-together.ll
similarity index 100%
rename from test/CodeGen/ARM64/tls-dynamic-together.ll
rename to test/CodeGen/AArch64/arm64-tls-dynamic-together.ll
diff --git a/test/CodeGen/ARM64/tls-dynamics.ll b/test/CodeGen/AArch64/arm64-tls-dynamics.ll
similarity index 100%
rename from test/CodeGen/ARM64/tls-dynamics.ll
rename to test/CodeGen/AArch64/arm64-tls-dynamics.ll
diff --git a/test/CodeGen/ARM64/tls-execs.ll b/test/CodeGen/AArch64/arm64-tls-execs.ll
similarity index 100%
rename from test/CodeGen/ARM64/tls-execs.ll
rename to test/CodeGen/AArch64/arm64-tls-execs.ll
diff --git a/test/CodeGen/ARM64/trap.ll b/test/CodeGen/AArch64/arm64-trap.ll
similarity index 100%
rename from test/CodeGen/ARM64/trap.ll
rename to test/CodeGen/AArch64/arm64-trap.ll
diff --git a/test/CodeGen/ARM64/trn.ll b/test/CodeGen/AArch64/arm64-trn.ll
similarity index 98%
rename from test/CodeGen/ARM64/trn.ll
rename to test/CodeGen/AArch64/arm64-trn.ll
index f46798490f6..2db7a14e754 100644
--- a/test/CodeGen/ARM64/trn.ll
+++ b/test/CodeGen/AArch64/arm64-trn.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s
+; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
 
 define <8 x i8> @vtrni8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vtrni8:
diff --git a/test/CodeGen/ARM64/trunc-store.ll b/test/CodeGen/AArch64/arm64-trunc-store.ll
similarity index 100%
rename from test/CodeGen/ARM64/trunc-store.ll
rename to test/CodeGen/AArch64/arm64-trunc-store.ll
diff --git a/test/CodeGen/ARM64/umaxv.ll b/test/CodeGen/AArch64/arm64-umaxv.ll
similarity index 75%
rename from test/CodeGen/ARM64/umaxv.ll
rename to test/CodeGen/AArch64/arm64-umaxv.ll
index 15277d32f03..d523f317d08 100644
--- a/test/CodeGen/ARM64/umaxv.ll
+++ b/test/CodeGen/AArch64/arm64-umaxv.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s
+; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
 
 define i32 @vmax_u8x8(<8 x i8> %a) nounwind ssp {
 ; CHECK-LABEL: vmax_u8x8:
@@ -7,7 +7,7 @@ define i32 @vmax_u8x8(<8 x i8> %a) nounwind ssp {
 ; CHECK-NOT: and
 ; CHECK: cbz     [[REG2]],
 entry:
-  %vmaxv.i = tail call i32 @llvm.arm64.neon.umaxv.i32.v8i8(<8 x i8> %a) nounwind
+  %vmaxv.i = tail call i32 @llvm.aarch64.neon.umaxv.i32.v8i8(<8 x i8> %a) nounwind
   %tmp = trunc i32 %vmaxv.i to i8
   %tobool = icmp eq i8 %tmp, 0
   br i1 %tobool, label %return, label %if.then
@@ -30,7 +30,7 @@ define i32 @vmax_u4x16(<4 x i16> %a) nounwind ssp {
 ; CHECK-NOT: and
 ; CHECK: cbz     [[REG2]],
 entry:
-  %vmaxv.i = tail call i32 @llvm.arm64.neon.umaxv.i32.v4i16(<4 x i16> %a) nounwind
+  %vmaxv.i = tail call i32 @llvm.aarch64.neon.umaxv.i32.v4i16(<4 x i16> %a) nounwind
   %tmp = trunc i32 %vmaxv.i to i16
   %tobool = icmp eq i16 %tmp, 0
   br i1 %tobool, label %return, label %if.then
@@ -51,7 +51,7 @@ define i32 @vmax_u8x16(<8 x i16> %a) nounwind ssp {
 ; CHECK-NOT: and
 ; CHECK: cbz     [[REG2]],
 entry:
-  %vmaxv.i = tail call i32 @llvm.arm64.neon.umaxv.i32.v8i16(<8 x i16> %a) nounwind
+  %vmaxv.i = tail call i32 @llvm.aarch64.neon.umaxv.i32.v8i16(<8 x i16> %a) nounwind
   %tmp = trunc i32 %vmaxv.i to i16
   %tobool = icmp eq i16 %tmp, 0
   br i1 %tobool, label %return, label %if.then
@@ -72,7 +72,7 @@ define i32 @vmax_u16x8(<16 x i8> %a) nounwind ssp {
 ; CHECK-NOT: and
 ; CHECK: cbz     [[REG2]],
 entry:
-  %vmaxv.i = tail call i32 @llvm.arm64.neon.umaxv.i32.v16i8(<16 x i8> %a) nounwind
+  %vmaxv.i = tail call i32 @llvm.aarch64.neon.umaxv.i32.v16i8(<16 x i8> %a) nounwind
   %tmp = trunc i32 %vmaxv.i to i8
   %tobool = icmp eq i8 %tmp, 0
   br i1 %tobool, label %return, label %if.then
@@ -86,7 +86,7 @@ return:
   ret i32 %retval.0
 }
 
-declare i32 @llvm.arm64.neon.umaxv.i32.v16i8(<16 x i8>) nounwind readnone
-declare i32 @llvm.arm64.neon.umaxv.i32.v8i16(<8 x i16>) nounwind readnone
-declare i32 @llvm.arm64.neon.umaxv.i32.v4i16(<4 x i16>) nounwind readnone
-declare i32 @llvm.arm64.neon.umaxv.i32.v8i8(<8 x i8>) nounwind readnone
+declare i32 @llvm.aarch64.neon.umaxv.i32.v16i8(<16 x i8>) nounwind readnone
+declare i32 @llvm.aarch64.neon.umaxv.i32.v8i16(<8 x i16>) nounwind readnone
+declare i32 @llvm.aarch64.neon.umaxv.i32.v4i16(<4 x i16>) nounwind readnone
+declare i32 @llvm.aarch64.neon.umaxv.i32.v8i8(<8 x i8>) nounwind readnone
diff --git a/test/CodeGen/ARM64/uminv.ll b/test/CodeGen/AArch64/arm64-uminv.ll
similarity index 75%
rename from test/CodeGen/ARM64/uminv.ll
rename to test/CodeGen/AArch64/arm64-uminv.ll
index 440522f1693..3bade4b28b8 100644
--- a/test/CodeGen/ARM64/uminv.ll
+++ b/test/CodeGen/AArch64/arm64-uminv.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s
+; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
 
 define i32 @vmin_u8x8(<8 x i8> %a) nounwind ssp {
 ; CHECK-LABEL: vmin_u8x8:
@@ -7,7 +7,7 @@ define i32 @vmin_u8x8(<8 x i8> %a) nounwind ssp {
 ; CHECK-NOT: and
 ; CHECK: cbz     [[REG2]],
 entry:
-  %vminv.i = tail call i32 @llvm.arm64.neon.uminv.i32.v8i8(<8 x i8> %a) nounwind
+  %vminv.i = tail call i32 @llvm.aarch64.neon.uminv.i32.v8i8(<8 x i8> %a) nounwind
   %tmp = trunc i32 %vminv.i to i8
   %tobool = icmp eq i8 %tmp, 0
   br i1 %tobool, label %return, label %if.then
@@ -30,7 +30,7 @@ define i32 @vmin_u4x16(<4 x i16> %a) nounwind ssp {
 ; CHECK-NOT: and
 ; CHECK: cbz     [[REG2]],
 entry:
-  %vminv.i = tail call i32 @llvm.arm64.neon.uminv.i32.v4i16(<4 x i16> %a) nounwind
+  %vminv.i = tail call i32 @llvm.aarch64.neon.uminv.i32.v4i16(<4 x i16> %a) nounwind
   %tmp = trunc i32 %vminv.i to i16
   %tobool = icmp eq i16 %tmp, 0
   br i1 %tobool, label %return, label %if.then
@@ -51,7 +51,7 @@ define i32 @vmin_u8x16(<8 x i16> %a) nounwind ssp {
 ; CHECK-NOT: and
 ; CHECK: cbz     [[REG2]],
 entry:
-  %vminv.i = tail call i32 @llvm.arm64.neon.uminv.i32.v8i16(<8 x i16> %a) nounwind
+  %vminv.i = tail call i32 @llvm.aarch64.neon.uminv.i32.v8i16(<8 x i16> %a) nounwind
   %tmp = trunc i32 %vminv.i to i16
   %tobool = icmp eq i16 %tmp, 0
   br i1 %tobool, label %return, label %if.then
@@ -72,7 +72,7 @@ define i32 @vmin_u16x8(<16 x i8> %a) nounwind ssp {
 ; CHECK-NOT: and
 ; CHECK: cbz     [[REG2]],
 entry:
-  %vminv.i = tail call i32 @llvm.arm64.neon.uminv.i32.v16i8(<16 x i8> %a) nounwind
+  %vminv.i = tail call i32 @llvm.aarch64.neon.uminv.i32.v16i8(<16 x i8> %a) nounwind
   %tmp = trunc i32 %vminv.i to i8
   %tobool = icmp eq i8 %tmp, 0
   br i1 %tobool, label %return, label %if.then
@@ -86,7 +86,7 @@ return:
   ret i32 %retval.0
 }
 
-declare i32 @llvm.arm64.neon.uminv.i32.v16i8(<16 x i8>) nounwind readnone
-declare i32 @llvm.arm64.neon.uminv.i32.v8i16(<8 x i16>) nounwind readnone
-declare i32 @llvm.arm64.neon.uminv.i32.v4i16(<4 x i16>) nounwind readnone
-declare i32 @llvm.arm64.neon.uminv.i32.v8i8(<8 x i8>) nounwind readnone
+declare i32 @llvm.aarch64.neon.uminv.i32.v16i8(<16 x i8>) nounwind readnone
+declare i32 @llvm.aarch64.neon.uminv.i32.v8i16(<8 x i16>) nounwind readnone
+declare i32 @llvm.aarch64.neon.uminv.i32.v4i16(<4 x i16>) nounwind readnone
+declare i32 @llvm.aarch64.neon.uminv.i32.v8i8(<8 x i8>) nounwind readnone
diff --git a/test/CodeGen/ARM64/umov.ll b/test/CodeGen/AArch64/arm64-umov.ll
similarity index 90%
rename from test/CodeGen/ARM64/umov.ll
rename to test/CodeGen/AArch64/arm64-umov.ll
index 19fd91b6c3d..a1ef9908646 100644
--- a/test/CodeGen/ARM64/umov.ll
+++ b/test/CodeGen/AArch64/arm64-umov.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s
+; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
 
 define zeroext i8 @f1(<16 x i8> %a) {
 ; CHECK-LABEL: f1:
diff --git a/test/CodeGen/ARM64/unaligned_ldst.ll b/test/CodeGen/AArch64/arm64-unaligned_ldst.ll
similarity index 100%
rename from test/CodeGen/ARM64/unaligned_ldst.ll
rename to test/CodeGen/AArch64/arm64-unaligned_ldst.ll
diff --git a/test/CodeGen/ARM64/uzp.ll b/test/CodeGen/AArch64/arm64-uzp.ll
similarity index 98%
rename from test/CodeGen/ARM64/uzp.ll
rename to test/CodeGen/AArch64/arm64-uzp.ll
index 60e16d0d686..cdd8d31c998 100644
--- a/test/CodeGen/ARM64/uzp.ll
+++ b/test/CodeGen/AArch64/arm64-uzp.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s
+; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
 
 define <8 x i8> @vuzpi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vuzpi8:
diff --git a/test/CodeGen/ARM64/vaargs.ll b/test/CodeGen/AArch64/arm64-vaargs.ll
similarity index 100%
rename from test/CodeGen/ARM64/vaargs.ll
rename to test/CodeGen/AArch64/arm64-vaargs.ll
diff --git a/test/CodeGen/ARM64/vabs.ll b/test/CodeGen/AArch64/arm64-vabs.ll
similarity index 68%
rename from test/CodeGen/ARM64/vabs.ll
rename to test/CodeGen/AArch64/arm64-vabs.ll
index 0d8aa24e1b4..5afc8d9f3f4 100644
--- a/test/CodeGen/ARM64/vabs.ll
+++ b/test/CodeGen/AArch64/arm64-vabs.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s
+; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
 
 
 define <8 x i16> @sabdl8h(<8 x i8>* %A, <8 x i8>* %B) nounwind {
@@ -6,7 +6,7 @@ define <8 x i16> @sabdl8h(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK: sabdl.8h
         %tmp1 = load <8 x i8>* %A
         %tmp2 = load <8 x i8>* %B
-        %tmp3 = call <8 x i8> @llvm.arm64.neon.sabd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+        %tmp3 = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
         %tmp4 = zext <8 x i8> %tmp3 to <8 x i16>
         ret <8 x i16> %tmp4
 }
@@ -16,7 +16,7 @@ define <4 x i32> @sabdl4s(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK: sabdl.4s
         %tmp1 = load <4 x i16>* %A
         %tmp2 = load <4 x i16>* %B
-        %tmp3 = call <4 x i16> @llvm.arm64.neon.sabd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+        %tmp3 = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
         %tmp4 = zext <4 x i16> %tmp3 to <4 x i32>
         ret <4 x i32> %tmp4
 }
@@ -26,7 +26,7 @@ define <2 x i64> @sabdl2d(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK: sabdl.2d
         %tmp1 = load <2 x i32>* %A
         %tmp2 = load <2 x i32>* %B
-        %tmp3 = call <2 x i32> @llvm.arm64.neon.sabd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+        %tmp3 = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
         %tmp4 = zext <2 x i32> %tmp3 to <2 x i64>
         ret <2 x i64> %tmp4
 }
@@ -38,7 +38,7 @@ define <8 x i16> @sabdl2_8h(<16 x i8>* %A, <16 x i8>* %B) nounwind {
         %load2 = load <16 x i8>* %B
         %tmp1 = shufflevector <16 x i8> %load1, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
         %tmp2 = shufflevector <16 x i8> %load2, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-        %tmp3 = call <8 x i8> @llvm.arm64.neon.sabd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+        %tmp3 = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
         %tmp4 = zext <8 x i8> %tmp3 to <8 x i16>
         ret <8 x i16> %tmp4
 }
@@ -50,7 +50,7 @@ define <4 x i32> @sabdl2_4s(<8 x i16>* %A, <8 x i16>* %B) nounwind {
         %load2 = load <8 x i16>* %B
         %tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
         %tmp2 = shufflevector <8 x i16> %load2, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-        %tmp3 = call <4 x i16> @llvm.arm64.neon.sabd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+        %tmp3 = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
         %tmp4 = zext <4 x i16> %tmp3 to <4 x i32>
         ret <4 x i32> %tmp4
 }
@@ -62,7 +62,7 @@ define <2 x i64> @sabdl2_2d(<4 x i32>* %A, <4 x i32>* %B) nounwind {
         %load2 = load <4 x i32>* %B
         %tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
         %tmp2 = shufflevector <4 x i32> %load2, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-        %tmp3 = call <2 x i32> @llvm.arm64.neon.sabd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+        %tmp3 = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
         %tmp4 = zext <2 x i32> %tmp3 to <2 x i64>
         ret <2 x i64> %tmp4
 }
@@ -72,7 +72,7 @@ define <8 x i16> @uabdl8h(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK: uabdl.8h
   %tmp1 = load <8 x i8>* %A
   %tmp2 = load <8 x i8>* %B
-  %tmp3 = call <8 x i8> @llvm.arm64.neon.uabd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+  %tmp3 = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
   %tmp4 = zext <8 x i8> %tmp3 to <8 x i16>
   ret <8 x i16> %tmp4
 }
@@ -82,7 +82,7 @@ define <4 x i32> @uabdl4s(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK: uabdl.4s
   %tmp1 = load <4 x i16>* %A
   %tmp2 = load <4 x i16>* %B
-  %tmp3 = call <4 x i16> @llvm.arm64.neon.uabd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+  %tmp3 = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
   %tmp4 = zext <4 x i16> %tmp3 to <4 x i32>
   ret <4 x i32> %tmp4
 }
@@ -92,7 +92,7 @@ define <2 x i64> @uabdl2d(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK: uabdl.2d
   %tmp1 = load <2 x i32>* %A
   %tmp2 = load <2 x i32>* %B
-  %tmp3 = call <2 x i32> @llvm.arm64.neon.uabd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+  %tmp3 = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
   %tmp4 = zext <2 x i32> %tmp3 to <2 x i64>
   ret <2 x i64> %tmp4
 }
@@ -105,7 +105,7 @@ define <8 x i16> @uabdl2_8h(<16 x i8>* %A, <16 x i8>* %B) nounwind {
   %tmp1 = shufflevector <16 x i8> %load1, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
   %tmp2 = shufflevector <16 x i8> %load2, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
 
-  %tmp3 = call <8 x i8> @llvm.arm64.neon.uabd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+  %tmp3 = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
   %tmp4 = zext <8 x i8> %tmp3 to <8 x i16>
   ret <8 x i16> %tmp4
 }
@@ -117,7 +117,7 @@ define <4 x i32> @uabdl2_4s(<8 x i16>* %A, <8 x i16>* %B) nounwind {
   %load2 = load <8 x i16>* %B
   %tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
   %tmp2 = shufflevector <8 x i16> %load2, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %tmp3 = call <4 x i16> @llvm.arm64.neon.uabd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+  %tmp3 = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
   %tmp4 = zext <4 x i16> %tmp3 to <4 x i32>
   ret <4 x i32> %tmp4
 }
@@ -129,7 +129,7 @@ define <2 x i64> @uabdl2_2d(<4 x i32>* %A, <4 x i32>* %B) nounwind {
   %load2 = load <4 x i32>* %B
   %tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
   %tmp2 = shufflevector <4 x i32> %load2, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %tmp3 = call <2 x i32> @llvm.arm64.neon.uabd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+  %tmp3 = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
   %tmp4 = zext <2 x i32> %tmp3 to <2 x i64>
   ret <2 x i64> %tmp4
 }
@@ -139,7 +139,7 @@ define <2 x float> @fabd_2s(<2 x float>* %A, <2 x float>* %B) nounwind {
 ;CHECK: fabd.2s
         %tmp1 = load <2 x float>* %A
         %tmp2 = load <2 x float>* %B
-        %tmp3 = call <2 x float> @llvm.arm64.neon.fabd.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
+        %tmp3 = call <2 x float> @llvm.aarch64.neon.fabd.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
         ret <2 x float> %tmp3
 }
 
@@ -148,7 +148,7 @@ define <4 x float> @fabd_4s(<4 x float>* %A, <4 x float>* %B) nounwind {
 ;CHECK: fabd.4s
         %tmp1 = load <4 x float>* %A
         %tmp2 = load <4 x float>* %B
-        %tmp3 = call <4 x float> @llvm.arm64.neon.fabd.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
+        %tmp3 = call <4 x float> @llvm.aarch64.neon.fabd.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
         ret <4 x float> %tmp3
 }
 
@@ -157,20 +157,20 @@ define <2 x double> @fabd_2d(<2 x double>* %A, <2 x double>* %B) nounwind {
 ;CHECK: fabd.2d
         %tmp1 = load <2 x double>* %A
         %tmp2 = load <2 x double>* %B
-        %tmp3 = call <2 x double> @llvm.arm64.neon.fabd.v2f64(<2 x double> %tmp1, <2 x double> %tmp2)
+        %tmp3 = call <2 x double> @llvm.aarch64.neon.fabd.v2f64(<2 x double> %tmp1, <2 x double> %tmp2)
         ret <2 x double> %tmp3
 }
 
-declare <2 x float> @llvm.arm64.neon.fabd.v2f32(<2 x float>, <2 x float>) nounwind readnone
-declare <4 x float> @llvm.arm64.neon.fabd.v4f32(<4 x float>, <4 x float>) nounwind readnone
-declare <2 x double> @llvm.arm64.neon.fabd.v2f64(<2 x double>, <2 x double>) nounwind readnone
+declare <2 x float> @llvm.aarch64.neon.fabd.v2f32(<2 x float>, <2 x float>) nounwind readnone
+declare <4 x float> @llvm.aarch64.neon.fabd.v4f32(<4 x float>, <4 x float>) nounwind readnone
+declare <2 x double> @llvm.aarch64.neon.fabd.v2f64(<2 x double>, <2 x double>) nounwind readnone
 
 define <8 x i8> @sabd_8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: sabd_8b:
 ;CHECK: sabd.8b
         %tmp1 = load <8 x i8>* %A
         %tmp2 = load <8 x i8>* %B
-        %tmp3 = call <8 x i8> @llvm.arm64.neon.sabd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+        %tmp3 = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
         ret <8 x i8> %tmp3
 }
 
@@ -179,7 +179,7 @@ define <16 x i8> @sabd_16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK: sabd.16b
         %tmp1 = load <16 x i8>* %A
         %tmp2 = load <16 x i8>* %B
-        %tmp3 = call <16 x i8> @llvm.arm64.neon.sabd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+        %tmp3 = call <16 x i8> @llvm.aarch64.neon.sabd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
         ret <16 x i8> %tmp3
 }
 
@@ -188,7 +188,7 @@ define <4 x i16> @sabd_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK: sabd.4h
         %tmp1 = load <4 x i16>* %A
         %tmp2 = load <4 x i16>* %B
-        %tmp3 = call <4 x i16> @llvm.arm64.neon.sabd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+        %tmp3 = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
         ret <4 x i16> %tmp3
 }
 
@@ -197,7 +197,7 @@ define <8 x i16> @sabd_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK: sabd.8h
         %tmp1 = load <8 x i16>* %A
         %tmp2 = load <8 x i16>* %B
-        %tmp3 = call <8 x i16> @llvm.arm64.neon.sabd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+        %tmp3 = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
         ret <8 x i16> %tmp3
 }
 
@@ -206,7 +206,7 @@ define <2 x i32> @sabd_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK: sabd.2s
         %tmp1 = load <2 x i32>* %A
         %tmp2 = load <2 x i32>* %B
-        %tmp3 = call <2 x i32> @llvm.arm64.neon.sabd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+        %tmp3 = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
         ret <2 x i32> %tmp3
 }
 
@@ -215,23 +215,23 @@ define <4 x i32> @sabd_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK: sabd.4s
         %tmp1 = load <4 x i32>* %A
         %tmp2 = load <4 x i32>* %B
-        %tmp3 = call <4 x i32> @llvm.arm64.neon.sabd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+        %tmp3 = call <4 x i32> @llvm.aarch64.neon.sabd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
         ret <4 x i32> %tmp3
 }
 
-declare <8 x i8> @llvm.arm64.neon.sabd.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
-declare <16 x i8> @llvm.arm64.neon.sabd.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
-declare <4 x i16> @llvm.arm64.neon.sabd.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
-declare <8 x i16> @llvm.arm64.neon.sabd.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
-declare <2 x i32> @llvm.arm64.neon.sabd.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
-declare <4 x i32> @llvm.arm64.neon.sabd.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+declare <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <16 x i8> @llvm.aarch64.neon.sabd.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+declare <4 x i32> @llvm.aarch64.neon.sabd.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
 
 define <8 x i8> @uabd_8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: uabd_8b:
 ;CHECK: uabd.8b
         %tmp1 = load <8 x i8>* %A
         %tmp2 = load <8 x i8>* %B
-        %tmp3 = call <8 x i8> @llvm.arm64.neon.uabd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+        %tmp3 = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
         ret <8 x i8> %tmp3
 }
 
@@ -240,7 +240,7 @@ define <16 x i8> @uabd_16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK: uabd.16b
         %tmp1 = load <16 x i8>* %A
         %tmp2 = load <16 x i8>* %B
-        %tmp3 = call <16 x i8> @llvm.arm64.neon.uabd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+        %tmp3 = call <16 x i8> @llvm.aarch64.neon.uabd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
         ret <16 x i8> %tmp3
 }
 
@@ -249,7 +249,7 @@ define <4 x i16> @uabd_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK: uabd.4h
         %tmp1 = load <4 x i16>* %A
         %tmp2 = load <4 x i16>* %B
-        %tmp3 = call <4 x i16> @llvm.arm64.neon.uabd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+        %tmp3 = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
         ret <4 x i16> %tmp3
 }
 
@@ -258,7 +258,7 @@ define <8 x i16> @uabd_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK: uabd.8h
         %tmp1 = load <8 x i16>* %A
         %tmp2 = load <8 x i16>* %B
-        %tmp3 = call <8 x i16> @llvm.arm64.neon.uabd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+        %tmp3 = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
         ret <8 x i16> %tmp3
 }
 
@@ -267,7 +267,7 @@ define <2 x i32> @uabd_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK: uabd.2s
         %tmp1 = load <2 x i32>* %A
         %tmp2 = load <2 x i32>* %B
-        %tmp3 = call <2 x i32> @llvm.arm64.neon.uabd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+        %tmp3 = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
         ret <2 x i32> %tmp3
 }
 
@@ -276,22 +276,22 @@ define <4 x i32> @uabd_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK: uabd.4s
         %tmp1 = load <4 x i32>* %A
         %tmp2 = load <4 x i32>* %B
-        %tmp3 = call <4 x i32> @llvm.arm64.neon.uabd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+        %tmp3 = call <4 x i32> @llvm.aarch64.neon.uabd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
         ret <4 x i32> %tmp3
 }
 
-declare <8 x i8> @llvm.arm64.neon.uabd.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
-declare <16 x i8> @llvm.arm64.neon.uabd.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
-declare <4 x i16> @llvm.arm64.neon.uabd.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
-declare <8 x i16> @llvm.arm64.neon.uabd.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
-declare <2 x i32> @llvm.arm64.neon.uabd.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
-declare <4 x i32> @llvm.arm64.neon.uabd.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+declare <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <16 x i8> @llvm.aarch64.neon.uabd.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+declare <4 x i32> @llvm.aarch64.neon.uabd.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
 
 define <8 x i8> @sqabs_8b(<8 x i8>* %A) nounwind {
 ;CHECK-LABEL: sqabs_8b:
 ;CHECK: sqabs.8b
         %tmp1 = load <8 x i8>* %A
-        %tmp3 = call <8 x i8> @llvm.arm64.neon.sqabs.v8i8(<8 x i8> %tmp1)
+        %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqabs.v8i8(<8 x i8> %tmp1)
         ret <8 x i8> %tmp3
 }
 
@@ -299,7 +299,7 @@ define <16 x i8> @sqabs_16b(<16 x i8>* %A) nounwind {
 ;CHECK-LABEL: sqabs_16b:
 ;CHECK: sqabs.16b
         %tmp1 = load <16 x i8>* %A
-        %tmp3 = call <16 x i8> @llvm.arm64.neon.sqabs.v16i8(<16 x i8> %tmp1)
+        %tmp3 = call <16 x i8> @llvm.aarch64.neon.sqabs.v16i8(<16 x i8> %tmp1)
         ret <16 x i8> %tmp3
 }
 
@@ -307,7 +307,7 @@ define <4 x i16> @sqabs_4h(<4 x i16>* %A) nounwind {
 ;CHECK-LABEL: sqabs_4h:
 ;CHECK: sqabs.4h
         %tmp1 = load <4 x i16>* %A
-        %tmp3 = call <4 x i16> @llvm.arm64.neon.sqabs.v4i16(<4 x i16> %tmp1)
+        %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqabs.v4i16(<4 x i16> %tmp1)
         ret <4 x i16> %tmp3
 }
 
@@ -315,7 +315,7 @@ define <8 x i16> @sqabs_8h(<8 x i16>* %A) nounwind {
 ;CHECK-LABEL: sqabs_8h:
 ;CHECK: sqabs.8h
         %tmp1 = load <8 x i16>* %A
-        %tmp3 = call <8 x i16> @llvm.arm64.neon.sqabs.v8i16(<8 x i16> %tmp1)
+        %tmp3 = call <8 x i16> @llvm.aarch64.neon.sqabs.v8i16(<8 x i16> %tmp1)
         ret <8 x i16> %tmp3
 }
 
@@ -323,7 +323,7 @@ define <2 x i32> @sqabs_2s(<2 x i32>* %A) nounwind {
 ;CHECK-LABEL: sqabs_2s:
 ;CHECK: sqabs.2s
         %tmp1 = load <2 x i32>* %A
-        %tmp3 = call <2 x i32> @llvm.arm64.neon.sqabs.v2i32(<2 x i32> %tmp1)
+        %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqabs.v2i32(<2 x i32> %tmp1)
         ret <2 x i32> %tmp3
 }
 
@@ -331,22 +331,22 @@ define <4 x i32> @sqabs_4s(<4 x i32>* %A) nounwind {
 ;CHECK-LABEL: sqabs_4s:
 ;CHECK: sqabs.4s
         %tmp1 = load <4 x i32>* %A
-        %tmp3 = call <4 x i32> @llvm.arm64.neon.sqabs.v4i32(<4 x i32> %tmp1)
+        %tmp3 = call <4 x i32> @llvm.aarch64.neon.sqabs.v4i32(<4 x i32> %tmp1)
         ret <4 x i32> %tmp3
 }
 
-declare <8 x i8> @llvm.arm64.neon.sqabs.v8i8(<8 x i8>) nounwind readnone
-declare <16 x i8> @llvm.arm64.neon.sqabs.v16i8(<16 x i8>) nounwind readnone
-declare <4 x i16> @llvm.arm64.neon.sqabs.v4i16(<4 x i16>) nounwind readnone
-declare <8 x i16> @llvm.arm64.neon.sqabs.v8i16(<8 x i16>) nounwind readnone
-declare <2 x i32> @llvm.arm64.neon.sqabs.v2i32(<2 x i32>) nounwind readnone
-declare <4 x i32> @llvm.arm64.neon.sqabs.v4i32(<4 x i32>) nounwind readnone
+declare <8 x i8> @llvm.aarch64.neon.sqabs.v8i8(<8 x i8>) nounwind readnone
+declare <16 x i8> @llvm.aarch64.neon.sqabs.v16i8(<16 x i8>) nounwind readnone
+declare <4 x i16> @llvm.aarch64.neon.sqabs.v4i16(<4 x i16>) nounwind readnone
+declare <8 x i16> @llvm.aarch64.neon.sqabs.v8i16(<8 x i16>) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.sqabs.v2i32(<2 x i32>) nounwind readnone
+declare <4 x i32> @llvm.aarch64.neon.sqabs.v4i32(<4 x i32>) nounwind readnone
 
 define <8 x i8> @sqneg_8b(<8 x i8>* %A) nounwind {
 ;CHECK-LABEL: sqneg_8b:
 ;CHECK: sqneg.8b
         %tmp1 = load <8 x i8>* %A
-        %tmp3 = call <8 x i8> @llvm.arm64.neon.sqneg.v8i8(<8 x i8> %tmp1)
+        %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqneg.v8i8(<8 x i8> %tmp1)
         ret <8 x i8> %tmp3
 }
 
@@ -354,7 +354,7 @@ define <16 x i8> @sqneg_16b(<16 x i8>* %A) nounwind {
 ;CHECK-LABEL: sqneg_16b:
 ;CHECK: sqneg.16b
         %tmp1 = load <16 x i8>* %A
-        %tmp3 = call <16 x i8> @llvm.arm64.neon.sqneg.v16i8(<16 x i8> %tmp1)
+        %tmp3 = call <16 x i8> @llvm.aarch64.neon.sqneg.v16i8(<16 x i8> %tmp1)
         ret <16 x i8> %tmp3
 }
 
@@ -362,7 +362,7 @@ define <4 x i16> @sqneg_4h(<4 x i16>* %A) nounwind {
 ;CHECK-LABEL: sqneg_4h:
 ;CHECK: sqneg.4h
         %tmp1 = load <4 x i16>* %A
-        %tmp3 = call <4 x i16> @llvm.arm64.neon.sqneg.v4i16(<4 x i16> %tmp1)
+        %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqneg.v4i16(<4 x i16> %tmp1)
         ret <4 x i16> %tmp3
 }
 
@@ -370,7 +370,7 @@ define <8 x i16> @sqneg_8h(<8 x i16>* %A) nounwind {
 ;CHECK-LABEL: sqneg_8h:
 ;CHECK: sqneg.8h
         %tmp1 = load <8 x i16>* %A
-        %tmp3 = call <8 x i16> @llvm.arm64.neon.sqneg.v8i16(<8 x i16> %tmp1)
+        %tmp3 = call <8 x i16> @llvm.aarch64.neon.sqneg.v8i16(<8 x i16> %tmp1)
         ret <8 x i16> %tmp3
 }
 
@@ -378,7 +378,7 @@ define <2 x i32> @sqneg_2s(<2 x i32>* %A) nounwind {
 ;CHECK-LABEL: sqneg_2s:
 ;CHECK: sqneg.2s
         %tmp1 = load <2 x i32>* %A
-        %tmp3 = call <2 x i32> @llvm.arm64.neon.sqneg.v2i32(<2 x i32> %tmp1)
+        %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqneg.v2i32(<2 x i32> %tmp1)
         ret <2 x i32> %tmp3
 }
 
@@ -386,22 +386,22 @@ define <4 x i32> @sqneg_4s(<4 x i32>* %A) nounwind {
 ;CHECK-LABEL: sqneg_4s:
 ;CHECK: sqneg.4s
         %tmp1 = load <4 x i32>* %A
-        %tmp3 = call <4 x i32> @llvm.arm64.neon.sqneg.v4i32(<4 x i32> %tmp1)
+        %tmp3 = call <4 x i32> @llvm.aarch64.neon.sqneg.v4i32(<4 x i32> %tmp1)
         ret <4 x i32> %tmp3
 }
 
-declare <8 x i8> @llvm.arm64.neon.sqneg.v8i8(<8 x i8>) nounwind readnone
-declare <16 x i8> @llvm.arm64.neon.sqneg.v16i8(<16 x i8>) nounwind readnone
-declare <4 x i16> @llvm.arm64.neon.sqneg.v4i16(<4 x i16>) nounwind readnone
-declare <8 x i16> @llvm.arm64.neon.sqneg.v8i16(<8 x i16>) nounwind readnone
-declare <2 x i32> @llvm.arm64.neon.sqneg.v2i32(<2 x i32>) nounwind readnone
-declare <4 x i32> @llvm.arm64.neon.sqneg.v4i32(<4 x i32>) nounwind readnone
+declare <8 x i8> @llvm.aarch64.neon.sqneg.v8i8(<8 x i8>) nounwind readnone
+declare <16 x i8> @llvm.aarch64.neon.sqneg.v16i8(<16 x i8>) nounwind readnone
+declare <4 x i16> @llvm.aarch64.neon.sqneg.v4i16(<4 x i16>) nounwind readnone
+declare <8 x i16> @llvm.aarch64.neon.sqneg.v8i16(<8 x i16>) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.sqneg.v2i32(<2 x i32>) nounwind readnone
+declare <4 x i32> @llvm.aarch64.neon.sqneg.v4i32(<4 x i32>) nounwind readnone
 
 define <8 x i8> @abs_8b(<8 x i8>* %A) nounwind {
 ;CHECK-LABEL: abs_8b:
 ;CHECK: abs.8b
         %tmp1 = load <8 x i8>* %A
-        %tmp3 = call <8 x i8> @llvm.arm64.neon.abs.v8i8(<8 x i8> %tmp1)
+        %tmp3 = call <8 x i8> @llvm.aarch64.neon.abs.v8i8(<8 x i8> %tmp1)
         ret <8 x i8> %tmp3
 }
 
@@ -409,7 +409,7 @@ define <16 x i8> @abs_16b(<16 x i8>* %A) nounwind {
 ;CHECK-LABEL: abs_16b:
 ;CHECK: abs.16b
         %tmp1 = load <16 x i8>* %A
-        %tmp3 = call <16 x i8> @llvm.arm64.neon.abs.v16i8(<16 x i8> %tmp1)
+        %tmp3 = call <16 x i8> @llvm.aarch64.neon.abs.v16i8(<16 x i8> %tmp1)
         ret <16 x i8> %tmp3
 }
 
@@ -417,7 +417,7 @@ define <4 x i16> @abs_4h(<4 x i16>* %A) nounwind {
 ;CHECK-LABEL: abs_4h:
 ;CHECK: abs.4h
         %tmp1 = load <4 x i16>* %A
-        %tmp3 = call <4 x i16> @llvm.arm64.neon.abs.v4i16(<4 x i16> %tmp1)
+        %tmp3 = call <4 x i16> @llvm.aarch64.neon.abs.v4i16(<4 x i16> %tmp1)
         ret <4 x i16> %tmp3
 }
 
@@ -425,7 +425,7 @@ define <8 x i16> @abs_8h(<8 x i16>* %A) nounwind {
 ;CHECK-LABEL: abs_8h:
 ;CHECK: abs.8h
         %tmp1 = load <8 x i16>* %A
-        %tmp3 = call <8 x i16> @llvm.arm64.neon.abs.v8i16(<8 x i16> %tmp1)
+        %tmp3 = call <8 x i16> @llvm.aarch64.neon.abs.v8i16(<8 x i16> %tmp1)
         ret <8 x i16> %tmp3
 }
 
@@ -433,7 +433,7 @@ define <2 x i32> @abs_2s(<2 x i32>* %A) nounwind {
 ;CHECK-LABEL: abs_2s:
 ;CHECK: abs.2s
         %tmp1 = load <2 x i32>* %A
-        %tmp3 = call <2 x i32> @llvm.arm64.neon.abs.v2i32(<2 x i32> %tmp1)
+        %tmp3 = call <2 x i32> @llvm.aarch64.neon.abs.v2i32(<2 x i32> %tmp1)
         ret <2 x i32> %tmp3
 }
 
@@ -441,32 +441,32 @@ define <4 x i32> @abs_4s(<4 x i32>* %A) nounwind {
 ;CHECK-LABEL: abs_4s:
 ;CHECK: abs.4s
         %tmp1 = load <4 x i32>* %A
-        %tmp3 = call <4 x i32> @llvm.arm64.neon.abs.v4i32(<4 x i32> %tmp1)
+        %tmp3 = call <4 x i32> @llvm.aarch64.neon.abs.v4i32(<4 x i32> %tmp1)
         ret <4 x i32> %tmp3
 }
 
 define <1 x i64> @abs_1d(<1 x i64> %A) nounwind {
 ; CHECK-LABEL: abs_1d:
 ; CHECK: abs d0, d0
-  %abs = call <1 x i64> @llvm.arm64.neon.abs.v1i64(<1 x i64> %A)
+  %abs = call <1 x i64> @llvm.aarch64.neon.abs.v1i64(<1 x i64> %A)
   ret <1 x i64> %abs
 }
 
 define i64 @abs_1d_honestly(i64 %A) nounwind {
 ; CHECK-LABEL: abs_1d_honestly:
 ; CHECK: abs d0, d0
-  %abs = call i64 @llvm.arm64.neon.abs.i64(i64 %A)
+  %abs = call i64 @llvm.aarch64.neon.abs.i64(i64 %A)
   ret i64 %abs
 }
 
-declare <8 x i8> @llvm.arm64.neon.abs.v8i8(<8 x i8>) nounwind readnone
-declare <16 x i8> @llvm.arm64.neon.abs.v16i8(<16 x i8>) nounwind readnone
-declare <4 x i16> @llvm.arm64.neon.abs.v4i16(<4 x i16>) nounwind readnone
-declare <8 x i16> @llvm.arm64.neon.abs.v8i16(<8 x i16>) nounwind readnone
-declare <2 x i32> @llvm.arm64.neon.abs.v2i32(<2 x i32>) nounwind readnone
-declare <4 x i32> @llvm.arm64.neon.abs.v4i32(<4 x i32>) nounwind readnone
-declare <1 x i64> @llvm.arm64.neon.abs.v1i64(<1 x i64>) nounwind readnone
-declare i64 @llvm.arm64.neon.abs.i64(i64) nounwind readnone
+declare <8 x i8> @llvm.aarch64.neon.abs.v8i8(<8 x i8>) nounwind readnone
+declare <16 x i8> @llvm.aarch64.neon.abs.v16i8(<16 x i8>) nounwind readnone
+declare <4 x i16> @llvm.aarch64.neon.abs.v4i16(<4 x i16>) nounwind readnone
+declare <8 x i16> @llvm.aarch64.neon.abs.v8i16(<8 x i16>) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.abs.v2i32(<2 x i32>) nounwind readnone
+declare <4 x i32> @llvm.aarch64.neon.abs.v4i32(<4 x i32>) nounwind readnone
+declare <1 x i64> @llvm.aarch64.neon.abs.v1i64(<1 x i64>) nounwind readnone
+declare i64 @llvm.aarch64.neon.abs.i64(i64) nounwind readnone
 
 define <8 x i16> @sabal8h(<8 x i8>* %A, <8 x i8>* %B,  <8 x i16>* %C) nounwind {
 ;CHECK-LABEL: sabal8h:
@@ -474,7 +474,7 @@ define <8 x i16> @sabal8h(<8 x i8>* %A, <8 x i8>* %B,  <8 x i16>* %C) nounwind {
         %tmp1 = load <8 x i8>* %A
         %tmp2 = load <8 x i8>* %B
         %tmp3 = load <8 x i16>* %C
-        %tmp4 = call <8 x i8> @llvm.arm64.neon.sabd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+        %tmp4 = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
         %tmp4.1 = zext <8 x i8> %tmp4 to <8 x i16>
         %tmp5 = add <8 x i16> %tmp3, %tmp4.1
         ret <8 x i16> %tmp5
@@ -486,7 +486,7 @@ define <4 x i32> @sabal4s(<4 x i16>* %A, <4 x i16>* %B, <4 x i32>* %C) nounwind
         %tmp1 = load <4 x i16>* %A
         %tmp2 = load <4 x i16>* %B
         %tmp3 = load <4 x i32>* %C
-        %tmp4 = call <4 x i16> @llvm.arm64.neon.sabd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+        %tmp4 = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
         %tmp4.1 = zext <4 x i16> %tmp4 to <4 x i32>
         %tmp5 = add <4 x i32> %tmp3, %tmp4.1
         ret <4 x i32> %tmp5
@@ -498,7 +498,7 @@ define <2 x i64> @sabal2d(<2 x i32>* %A, <2 x i32>* %B, <2 x i64>* %C) nounwind
         %tmp1 = load <2 x i32>* %A
         %tmp2 = load <2 x i32>* %B
         %tmp3 = load <2 x i64>* %C
-        %tmp4 = call <2 x i32> @llvm.arm64.neon.sabd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+        %tmp4 = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
         %tmp4.1 = zext <2 x i32> %tmp4 to <2 x i64>
         %tmp4.1.1 = zext <2 x i32> %tmp4 to <2 x i64>
         %tmp5 = add <2 x i64> %tmp3, %tmp4.1
@@ -513,7 +513,7 @@ define <8 x i16> @sabal2_8h(<16 x i8>* %A, <16 x i8>* %B, <8 x i16>* %C) nounwin
         %tmp3 = load <8 x i16>* %C
         %tmp1 = shufflevector <16 x i8> %load1, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
         %tmp2 = shufflevector <16 x i8> %load2, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-        %tmp4 = call <8 x i8> @llvm.arm64.neon.sabd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+        %tmp4 = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
         %tmp4.1 = zext <8 x i8> %tmp4 to <8 x i16>
         %tmp5 = add <8 x i16> %tmp3, %tmp4.1
         ret <8 x i16> %tmp5
@@ -527,7 +527,7 @@ define <4 x i32> @sabal2_4s(<8 x i16>* %A, <8 x i16>* %B, <4 x i32>* %C) nounwin
         %tmp3 = load <4 x i32>* %C
         %tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
         %tmp2 = shufflevector <8 x i16> %load2, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-        %tmp4 = call <4 x i16> @llvm.arm64.neon.sabd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+        %tmp4 = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
         %tmp4.1 = zext <4 x i16> %tmp4 to <4 x i32>
         %tmp5 = add <4 x i32> %tmp3, %tmp4.1
         ret <4 x i32> %tmp5
@@ -541,7 +541,7 @@ define <2 x i64> @sabal2_2d(<4 x i32>* %A, <4 x i32>* %B, <2 x i64>* %C) nounwin
         %tmp3 = load <2 x i64>* %C
         %tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
         %tmp2 = shufflevector <4 x i32> %load2, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-        %tmp4 = call <2 x i32> @llvm.arm64.neon.sabd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+        %tmp4 = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
         %tmp4.1 = zext <2 x i32> %tmp4 to <2 x i64>
         %tmp5 = add <2 x i64> %tmp3, %tmp4.1
         ret <2 x i64> %tmp5
@@ -553,7 +553,7 @@ define <8 x i16> @uabal8h(<8 x i8>* %A, <8 x i8>* %B,  <8 x i16>* %C) nounwind {
         %tmp1 = load <8 x i8>* %A
         %tmp2 = load <8 x i8>* %B
         %tmp3 = load <8 x i16>* %C
-        %tmp4 = call <8 x i8> @llvm.arm64.neon.uabd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+        %tmp4 = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
         %tmp4.1 = zext <8 x i8> %tmp4 to <8 x i16>
         %tmp5 = add <8 x i16> %tmp3, %tmp4.1
         ret <8 x i16> %tmp5
@@ -565,7 +565,7 @@ define <4 x i32> @uabal4s(<4 x i16>* %A, <4 x i16>* %B, <4 x i32>* %C) nounwind
         %tmp1 = load <4 x i16>* %A
         %tmp2 = load <4 x i16>* %B
         %tmp3 = load <4 x i32>* %C
-        %tmp4 = call <4 x i16> @llvm.arm64.neon.uabd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+        %tmp4 = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
         %tmp4.1 = zext <4 x i16> %tmp4 to <4 x i32>
         %tmp5 = add <4 x i32> %tmp3, %tmp4.1
         ret <4 x i32> %tmp5
@@ -577,7 +577,7 @@ define <2 x i64> @uabal2d(<2 x i32>* %A, <2 x i32>* %B, <2 x i64>* %C) nounwind
         %tmp1 = load <2 x i32>* %A
         %tmp2 = load <2 x i32>* %B
         %tmp3 = load <2 x i64>* %C
-        %tmp4 = call <2 x i32> @llvm.arm64.neon.uabd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+        %tmp4 = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
         %tmp4.1 = zext <2 x i32> %tmp4 to <2 x i64>
         %tmp5 = add <2 x i64> %tmp3, %tmp4.1
         ret <2 x i64> %tmp5
@@ -591,7 +591,7 @@ define <8 x i16> @uabal2_8h(<16 x i8>* %A, <16 x i8>* %B, <8 x i16>* %C) nounwin
         %tmp3 = load <8 x i16>* %C
         %tmp1 = shufflevector <16 x i8> %load1, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
         %tmp2 = shufflevector <16 x i8> %load2, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-        %tmp4 = call <8 x i8> @llvm.arm64.neon.uabd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+        %tmp4 = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
         %tmp4.1 = zext <8 x i8> %tmp4 to <8 x i16>
         %tmp5 = add <8 x i16> %tmp3, %tmp4.1
         ret <8 x i16> %tmp5
@@ -605,7 +605,7 @@ define <4 x i32> @uabal2_4s(<8 x i16>* %A, <8 x i16>* %B, <4 x i32>* %C) nounwin
         %tmp3 = load <4 x i32>* %C
         %tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
         %tmp2 = shufflevector <8 x i16> %load2, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-        %tmp4 = call <4 x i16> @llvm.arm64.neon.uabd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+        %tmp4 = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
         %tmp4.1 = zext <4 x i16> %tmp4 to <4 x i32>
         %tmp5 = add <4 x i32> %tmp3, %tmp4.1
         ret <4 x i32> %tmp5
@@ -619,7 +619,7 @@ define <2 x i64> @uabal2_2d(<4 x i32>* %A, <4 x i32>* %B, <2 x i64>* %C) nounwin
         %tmp3 = load <2 x i64>* %C
         %tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
         %tmp2 = shufflevector <4 x i32> %load2, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-        %tmp4 = call <2 x i32> @llvm.arm64.neon.uabd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+        %tmp4 = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
         %tmp4.1 = zext <2 x i32> %tmp4 to <2 x i64>
         %tmp5 = add <2 x i64> %tmp3, %tmp4.1
         ret <2 x i64> %tmp5
@@ -630,7 +630,7 @@ define <8 x i8> @saba_8b(<8 x i8>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
 ;CHECK: saba.8b
         %tmp1 = load <8 x i8>* %A
         %tmp2 = load <8 x i8>* %B
-        %tmp3 = call <8 x i8> @llvm.arm64.neon.sabd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+        %tmp3 = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
         %tmp4 = load <8 x i8>* %C
         %tmp5 = add <8 x i8> %tmp3, %tmp4
         ret <8 x i8> %tmp5
@@ -641,7 +641,7 @@ define <16 x i8> @saba_16b(<16 x i8>* %A, <16 x i8>* %B, <16 x i8>* %C) nounwind
 ;CHECK: saba.16b
         %tmp1 = load <16 x i8>* %A
         %tmp2 = load <16 x i8>* %B
-        %tmp3 = call <16 x i8> @llvm.arm64.neon.sabd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+        %tmp3 = call <16 x i8> @llvm.aarch64.neon.sabd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
         %tmp4 = load <16 x i8>* %C
         %tmp5 = add <16 x i8> %tmp3, %tmp4
         ret <16 x i8> %tmp5
@@ -652,7 +652,7 @@ define <4 x i16> @saba_4h(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind
 ;CHECK: saba.4h
         %tmp1 = load <4 x i16>* %A
         %tmp2 = load <4 x i16>* %B
-        %tmp3 = call <4 x i16> @llvm.arm64.neon.sabd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+        %tmp3 = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
         %tmp4 = load <4 x i16>* %C
         %tmp5 = add <4 x i16> %tmp3, %tmp4
         ret <4 x i16> %tmp5
@@ -663,7 +663,7 @@ define <8 x i16> @saba_8h(<8 x i16>* %A, <8 x i16>* %B, <8 x i16>* %C) nounwind
 ;CHECK: saba.8h
         %tmp1 = load <8 x i16>* %A
         %tmp2 = load <8 x i16>* %B
-        %tmp3 = call <8 x i16> @llvm.arm64.neon.sabd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+        %tmp3 = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
         %tmp4 = load <8 x i16>* %C
         %tmp5 = add <8 x i16> %tmp3, %tmp4
         ret <8 x i16> %tmp5
@@ -674,7 +674,7 @@ define <2 x i32> @saba_2s(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind
 ;CHECK: saba.2s
         %tmp1 = load <2 x i32>* %A
         %tmp2 = load <2 x i32>* %B
-        %tmp3 = call <2 x i32> @llvm.arm64.neon.sabd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+        %tmp3 = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
         %tmp4 = load <2 x i32>* %C
         %tmp5 = add <2 x i32> %tmp3, %tmp4
         ret <2 x i32> %tmp5
@@ -685,7 +685,7 @@ define <4 x i32> @saba_4s(<4 x i32>* %A, <4 x i32>* %B, <4 x i32>* %C) nounwind
 ;CHECK: saba.4s
         %tmp1 = load <4 x i32>* %A
         %tmp2 = load <4 x i32>* %B
-        %tmp3 = call <4 x i32> @llvm.arm64.neon.sabd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+        %tmp3 = call <4 x i32> @llvm.aarch64.neon.sabd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
         %tmp4 = load <4 x i32>* %C
         %tmp5 = add <4 x i32> %tmp3, %tmp4
         ret <4 x i32> %tmp5
@@ -696,7 +696,7 @@ define <8 x i8> @uaba_8b(<8 x i8>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
 ;CHECK: uaba.8b
         %tmp1 = load <8 x i8>* %A
         %tmp2 = load <8 x i8>* %B
-        %tmp3 = call <8 x i8> @llvm.arm64.neon.uabd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+        %tmp3 = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
         %tmp4 = load <8 x i8>* %C
         %tmp5 = add <8 x i8> %tmp3, %tmp4
         ret <8 x i8> %tmp5
@@ -707,7 +707,7 @@ define <16 x i8> @uaba_16b(<16 x i8>* %A, <16 x i8>* %B, <16 x i8>* %C) nounwind
 ;CHECK: uaba.16b
         %tmp1 = load <16 x i8>* %A
         %tmp2 = load <16 x i8>* %B
-        %tmp3 = call <16 x i8> @llvm.arm64.neon.uabd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+        %tmp3 = call <16 x i8> @llvm.aarch64.neon.uabd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
         %tmp4 = load <16 x i8>* %C
         %tmp5 = add <16 x i8> %tmp3, %tmp4
         ret <16 x i8> %tmp5
@@ -718,7 +718,7 @@ define <4 x i16> @uaba_4h(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind
 ;CHECK: uaba.4h
         %tmp1 = load <4 x i16>* %A
         %tmp2 = load <4 x i16>* %B
-        %tmp3 = call <4 x i16> @llvm.arm64.neon.uabd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+        %tmp3 = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
         %tmp4 = load <4 x i16>* %C
         %tmp5 = add <4 x i16> %tmp3, %tmp4
         ret <4 x i16> %tmp5
@@ -729,7 +729,7 @@ define <8 x i16> @uaba_8h(<8 x i16>* %A, <8 x i16>* %B, <8 x i16>* %C) nounwind
 ;CHECK: uaba.8h
         %tmp1 = load <8 x i16>* %A
         %tmp2 = load <8 x i16>* %B
-        %tmp3 = call <8 x i16> @llvm.arm64.neon.uabd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+        %tmp3 = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
         %tmp4 = load <8 x i16>* %C
         %tmp5 = add <8 x i16> %tmp3, %tmp4
         ret <8 x i16> %tmp5
@@ -740,7 +740,7 @@ define <2 x i32> @uaba_2s(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind
 ;CHECK: uaba.2s
         %tmp1 = load <2 x i32>* %A
         %tmp2 = load <2 x i32>* %B
-        %tmp3 = call <2 x i32> @llvm.arm64.neon.uabd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+        %tmp3 = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
         %tmp4 = load <2 x i32>* %C
         %tmp5 = add <2 x i32> %tmp3, %tmp4
         ret <2 x i32> %tmp5
@@ -751,7 +751,7 @@ define <4 x i32> @uaba_4s(<4 x i32>* %A, <4 x i32>* %B, <4 x i32>* %C) nounwind
 ;CHECK: uaba.4s
         %tmp1 = load <4 x i32>* %A
         %tmp2 = load <4 x i32>* %B
-        %tmp3 = call <4 x i32> @llvm.arm64.neon.uabd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+        %tmp3 = call <4 x i32> @llvm.aarch64.neon.uabd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
         %tmp4 = load <4 x i32>* %C
         %tmp5 = add <4 x i32> %tmp3, %tmp4
         ret <4 x i32> %tmp5
@@ -761,19 +761,19 @@ define <4 x i32> @uaba_4s(<4 x i32>* %A, <4 x i32>* %B, <4 x i32>* %C) nounwind
 define float @fabds(float %a, float %b) nounwind {
 ; CHECK-LABEL: fabds:
 ; CHECK: fabd s0, s0, s1
-  %vabd.i = tail call float @llvm.arm64.sisd.fabd.f32(float %a, float %b) nounwind
+  %vabd.i = tail call float @llvm.aarch64.sisd.fabd.f32(float %a, float %b) nounwind
   ret float %vabd.i
 }
 
 define double @fabdd(double %a, double %b) nounwind {
 ; CHECK-LABEL: fabdd:
 ; CHECK: fabd d0, d0, d1
-  %vabd.i = tail call double @llvm.arm64.sisd.fabd.f64(double %a, double %b) nounwind
+  %vabd.i = tail call double @llvm.aarch64.sisd.fabd.f64(double %a, double %b) nounwind
   ret double %vabd.i
 }
 
-declare double @llvm.arm64.sisd.fabd.f64(double, double) nounwind readnone
-declare float @llvm.arm64.sisd.fabd.f32(float, float) nounwind readnone
+declare double @llvm.aarch64.sisd.fabd.f64(double, double) nounwind readnone
+declare float @llvm.aarch64.sisd.fabd.f32(float, float) nounwind readnone
 
 define <2 x i64> @uabdl_from_extract_dup(<4 x i32> %lhs, i32 %rhs) {
 ; CHECK-LABEL: uabdl_from_extract_dup:
@@ -784,7 +784,7 @@ define <2 x i64> @uabdl_from_extract_dup(<4 x i32> %lhs, i32 %rhs) {
 
   %lhs.high = shufflevector <4 x i32> %lhs, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
 
-  %res = tail call <2 x i32> @llvm.arm64.neon.uabd.v2i32(<2 x i32> %lhs.high, <2 x i32> %rhsvec) nounwind
+  %res = tail call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %lhs.high, <2 x i32> %rhsvec) nounwind
   %res1 = zext <2 x i32> %res to <2 x i64>
   ret <2 x i64> %res1
 }
@@ -798,7 +798,7 @@ define <2 x i64> @sabdl_from_extract_dup(<4 x i32> %lhs, i32 %rhs) {
 
   %lhs.high = shufflevector <4 x i32> %lhs, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
 
-  %res = tail call <2 x i32> @llvm.arm64.neon.sabd.v2i32(<2 x i32> %lhs.high, <2 x i32> %rhsvec) nounwind
+  %res = tail call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %lhs.high, <2 x i32> %rhsvec) nounwind
   %res1 = zext <2 x i32> %res to <2 x i64>
   ret <2 x i64> %res1
 }
diff --git a/test/CodeGen/ARM64/vadd.ll b/test/CodeGen/AArch64/arm64-vadd.ll
similarity index 80%
rename from test/CodeGen/ARM64/vadd.ll
rename to test/CodeGen/AArch64/arm64-vadd.ll
index f674c6de339..9ed8aa6d7c5 100644
--- a/test/CodeGen/ARM64/vadd.ll
+++ b/test/CodeGen/AArch64/arm64-vadd.ll
@@ -1,11 +1,11 @@
-; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple -asm-verbose=false | FileCheck %s
+; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple -asm-verbose=false | FileCheck %s
 
 define <8 x i8> @addhn8b(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: addhn8b:
 ;CHECK: addhn.8b
         %tmp1 = load <8 x i16>* %A
         %tmp2 = load <8 x i16>* %B
-        %tmp3 = call <8 x i8> @llvm.arm64.neon.addhn.v8i8(<8 x i16> %tmp1, <8 x i16> %tmp2)
+        %tmp3 = call <8 x i8> @llvm.aarch64.neon.addhn.v8i8(<8 x i16> %tmp1, <8 x i16> %tmp2)
         ret <8 x i8> %tmp3
 }
 
@@ -14,7 +14,7 @@ define <4 x i16> @addhn4h(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK: addhn.4h
         %tmp1 = load <4 x i32>* %A
         %tmp2 = load <4 x i32>* %B
-        %tmp3 = call <4 x i16> @llvm.arm64.neon.addhn.v4i16(<4 x i32> %tmp1, <4 x i32> %tmp2)
+        %tmp3 = call <4 x i16> @llvm.aarch64.neon.addhn.v4i16(<4 x i32> %tmp1, <4 x i32> %tmp2)
         ret <4 x i16> %tmp3
 }
 
@@ -23,7 +23,7 @@ define <2 x i32> @addhn2s(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 ;CHECK: addhn.2s
         %tmp1 = load <2 x i64>* %A
         %tmp2 = load <2 x i64>* %B
-        %tmp3 = call <2 x i32> @llvm.arm64.neon.addhn.v2i32(<2 x i64> %tmp1, <2 x i64> %tmp2)
+        %tmp3 = call <2 x i32> @llvm.aarch64.neon.addhn.v2i32(<2 x i64> %tmp1, <2 x i64> %tmp2)
         ret <2 x i32> %tmp3
 }
 
@@ -31,8 +31,8 @@ define <16 x i8> @addhn2_16b(<8 x i16> %a, <8 x i16> %b) nounwind {
 ;CHECK-LABEL: addhn2_16b:
 ;CHECK: addhn.8b
 ;CHECK-NEXT: addhn2.16b
-  %vaddhn2.i = tail call <8 x i8> @llvm.arm64.neon.addhn.v8i8(<8 x i16> %a, <8 x i16> %b) nounwind
-  %vaddhn_high2.i = tail call <8 x i8> @llvm.arm64.neon.addhn.v8i8(<8 x i16> %a, <8 x i16> %b) nounwind
+  %vaddhn2.i = tail call <8 x i8> @llvm.aarch64.neon.addhn.v8i8(<8 x i16> %a, <8 x i16> %b) nounwind
+  %vaddhn_high2.i = tail call <8 x i8> @llvm.aarch64.neon.addhn.v8i8(<8 x i16> %a, <8 x i16> %b) nounwind
   %res = shufflevector <8 x i8> %vaddhn2.i, <8 x i8> %vaddhn_high2.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
   ret <16 x i8> %res
 }
@@ -41,8 +41,8 @@ define <8 x i16> @addhn2_8h(<4 x i32> %a, <4 x i32> %b) nounwind {
 ;CHECK-LABEL: addhn2_8h:
 ;CHECK: addhn.4h
 ;CHECK-NEXT: addhn2.8h
-  %vaddhn2.i = tail call <4 x i16> @llvm.arm64.neon.addhn.v4i16(<4 x i32> %a, <4 x i32> %b) nounwind
-  %vaddhn_high3.i = tail call <4 x i16> @llvm.arm64.neon.addhn.v4i16(<4 x i32> %a, <4 x i32> %b) nounwind
+  %vaddhn2.i = tail call <4 x i16> @llvm.aarch64.neon.addhn.v4i16(<4 x i32> %a, <4 x i32> %b) nounwind
+  %vaddhn_high3.i = tail call <4 x i16> @llvm.aarch64.neon.addhn.v4i16(<4 x i32> %a, <4 x i32> %b) nounwind
   %res = shufflevector <4 x i16> %vaddhn2.i, <4 x i16> %vaddhn_high3.i, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
   ret <8 x i16> %res
 }
@@ -51,15 +51,15 @@ define <4 x i32> @addhn2_4s(<2 x i64> %a, <2 x i64> %b) nounwind {
 ;CHECK-LABEL: addhn2_4s:
 ;CHECK: addhn.2s
 ;CHECK-NEXT: addhn2.4s
-  %vaddhn2.i = tail call <2 x i32> @llvm.arm64.neon.addhn.v2i32(<2 x i64> %a, <2 x i64> %b) nounwind
-  %vaddhn_high3.i = tail call <2 x i32> @llvm.arm64.neon.addhn.v2i32(<2 x i64> %a, <2 x i64> %b) nounwind
+  %vaddhn2.i = tail call <2 x i32> @llvm.aarch64.neon.addhn.v2i32(<2 x i64> %a, <2 x i64> %b) nounwind
+  %vaddhn_high3.i = tail call <2 x i32> @llvm.aarch64.neon.addhn.v2i32(<2 x i64> %a, <2 x i64> %b) nounwind
   %res = shufflevector <2 x i32> %vaddhn2.i, <2 x i32> %vaddhn_high3.i, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   ret <4 x i32> %res
 }
 
-declare <2 x i32> @llvm.arm64.neon.addhn.v2i32(<2 x i64>, <2 x i64>) nounwind readnone
-declare <4 x i16> @llvm.arm64.neon.addhn.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
-declare <8 x i8> @llvm.arm64.neon.addhn.v8i8(<8 x i16>, <8 x i16>) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.addhn.v2i32(<2 x i64>, <2 x i64>) nounwind readnone
+declare <4 x i16> @llvm.aarch64.neon.addhn.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
+declare <8 x i8> @llvm.aarch64.neon.addhn.v8i8(<8 x i16>, <8 x i16>) nounwind readnone
 
 
 define <8 x i8> @raddhn8b(<8 x i16>* %A, <8 x i16>* %B) nounwind {
@@ -67,7 +67,7 @@ define <8 x i8> @raddhn8b(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK: raddhn.8b
         %tmp1 = load <8 x i16>* %A
         %tmp2 = load <8 x i16>* %B
-        %tmp3 = call <8 x i8> @llvm.arm64.neon.raddhn.v8i8(<8 x i16> %tmp1, <8 x i16> %tmp2)
+        %tmp3 = call <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16> %tmp1, <8 x i16> %tmp2)
         ret <8 x i8> %tmp3
 }
 
@@ -76,7 +76,7 @@ define <4 x i16> @raddhn4h(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK: raddhn.4h
         %tmp1 = load <4 x i32>* %A
         %tmp2 = load <4 x i32>* %B
-        %tmp3 = call <4 x i16> @llvm.arm64.neon.raddhn.v4i16(<4 x i32> %tmp1, <4 x i32> %tmp2)
+        %tmp3 = call <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32> %tmp1, <4 x i32> %tmp2)
         ret <4 x i16> %tmp3
 }
 
@@ -85,7 +85,7 @@ define <2 x i32> @raddhn2s(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 ;CHECK: raddhn.2s
         %tmp1 = load <2 x i64>* %A
         %tmp2 = load <2 x i64>* %B
-        %tmp3 = call <2 x i32> @llvm.arm64.neon.raddhn.v2i32(<2 x i64> %tmp1, <2 x i64> %tmp2)
+        %tmp3 = call <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64> %tmp1, <2 x i64> %tmp2)
         ret <2 x i32> %tmp3
 }
 
@@ -93,8 +93,8 @@ define <16 x i8> @raddhn2_16b(<8 x i16> %a, <8 x i16> %b) nounwind {
 ;CHECK-LABEL: raddhn2_16b:
 ;CHECK: raddhn.8b
 ;CHECK-NEXT: raddhn2.16b
-  %vraddhn2.i = tail call <8 x i8> @llvm.arm64.neon.raddhn.v8i8(<8 x i16> %a, <8 x i16> %b) nounwind
-  %vraddhn_high2.i = tail call <8 x i8> @llvm.arm64.neon.raddhn.v8i8(<8 x i16> %a, <8 x i16> %b) nounwind
+  %vraddhn2.i = tail call <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16> %a, <8 x i16> %b) nounwind
+  %vraddhn_high2.i = tail call <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16> %a, <8 x i16> %b) nounwind
   %res = shufflevector <8 x i8> %vraddhn2.i, <8 x i8> %vraddhn_high2.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
   ret <16 x i8> %res
 }
@@ -103,8 +103,8 @@ define <8 x i16> @raddhn2_8h(<4 x i32> %a, <4 x i32> %b) nounwind {
 ;CHECK-LABEL: raddhn2_8h:
 ;CHECK: raddhn.4h
 ;CHECK-NEXT: raddhn2.8h
-  %vraddhn2.i = tail call <4 x i16> @llvm.arm64.neon.raddhn.v4i16(<4 x i32> %a, <4 x i32> %b) nounwind
-  %vraddhn_high3.i = tail call <4 x i16> @llvm.arm64.neon.raddhn.v4i16(<4 x i32> %a, <4 x i32> %b) nounwind
+  %vraddhn2.i = tail call <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32> %a, <4 x i32> %b) nounwind
+  %vraddhn_high3.i = tail call <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32> %a, <4 x i32> %b) nounwind
   %res = shufflevector <4 x i16> %vraddhn2.i, <4 x i16> %vraddhn_high3.i, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
   ret <8 x i16> %res
 }
@@ -113,15 +113,15 @@ define <4 x i32> @raddhn2_4s(<2 x i64> %a, <2 x i64> %b) nounwind {
 ;CHECK-LABEL: raddhn2_4s:
 ;CHECK: raddhn.2s
 ;CHECK-NEXT: raddhn2.4s
-  %vraddhn2.i = tail call <2 x i32> @llvm.arm64.neon.raddhn.v2i32(<2 x i64> %a, <2 x i64> %b) nounwind
-  %vraddhn_high3.i = tail call <2 x i32> @llvm.arm64.neon.raddhn.v2i32(<2 x i64> %a, <2 x i64> %b) nounwind
+  %vraddhn2.i = tail call <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64> %a, <2 x i64> %b) nounwind
+  %vraddhn_high3.i = tail call <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64> %a, <2 x i64> %b) nounwind
   %res = shufflevector <2 x i32> %vraddhn2.i, <2 x i32> %vraddhn_high3.i, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   ret <4 x i32> %res
 }
 
-declare <2 x i32> @llvm.arm64.neon.raddhn.v2i32(<2 x i64>, <2 x i64>) nounwind readnone
-declare <4 x i16> @llvm.arm64.neon.raddhn.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
-declare <8 x i8> @llvm.arm64.neon.raddhn.v8i8(<8 x i16>, <8 x i16>) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64>, <2 x i64>) nounwind readnone
+declare <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
+declare <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16>, <8 x i16>) nounwind readnone
 
 define <8 x i16> @saddl8h(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: saddl8h:
@@ -428,7 +428,7 @@ define <4 x i16> @saddlp4h(<8 x i8>* %A) nounwind {
 ;CHECK-LABEL: saddlp4h:
 ;CHECK: saddlp.4h
         %tmp1 = load <8 x i8>* %A
-        %tmp3 = call <4 x i16> @llvm.arm64.neon.saddlp.v4i16.v8i8(<8 x i8> %tmp1)
+        %tmp3 = call <4 x i16> @llvm.aarch64.neon.saddlp.v4i16.v8i8(<8 x i8> %tmp1)
         ret <4 x i16> %tmp3
 }
 
@@ -436,7 +436,7 @@ define <2 x i32> @saddlp2s(<4 x i16>* %A) nounwind {
 ;CHECK-LABEL: saddlp2s:
 ;CHECK: saddlp.2s
         %tmp1 = load <4 x i16>* %A
-        %tmp3 = call <2 x i32> @llvm.arm64.neon.saddlp.v2i32.v4i16(<4 x i16> %tmp1)
+        %tmp3 = call <2 x i32> @llvm.aarch64.neon.saddlp.v2i32.v4i16(<4 x i16> %tmp1)
         ret <2 x i32> %tmp3
 }
 
@@ -444,7 +444,7 @@ define <1 x i64> @saddlp1d(<2 x i32>* %A) nounwind {
 ;CHECK-LABEL: saddlp1d:
 ;CHECK: saddlp.1d
         %tmp1 = load <2 x i32>* %A
-        %tmp3 = call <1 x i64> @llvm.arm64.neon.saddlp.v1i64.v2i32(<2 x i32> %tmp1)
+        %tmp3 = call <1 x i64> @llvm.aarch64.neon.saddlp.v1i64.v2i32(<2 x i32> %tmp1)
         ret <1 x i64> %tmp3
 }
 
@@ -452,7 +452,7 @@ define <8 x i16> @saddlp8h(<16 x i8>* %A) nounwind {
 ;CHECK-LABEL: saddlp8h:
 ;CHECK: saddlp.8h
         %tmp1 = load <16 x i8>* %A
-        %tmp3 = call <8 x i16> @llvm.arm64.neon.saddlp.v8i16.v16i8(<16 x i8> %tmp1)
+        %tmp3 = call <8 x i16> @llvm.aarch64.neon.saddlp.v8i16.v16i8(<16 x i8> %tmp1)
         ret <8 x i16> %tmp3
 }
 
@@ -460,7 +460,7 @@ define <4 x i32> @saddlp4s(<8 x i16>* %A) nounwind {
 ;CHECK-LABEL: saddlp4s:
 ;CHECK: saddlp.4s
         %tmp1 = load <8 x i16>* %A
-        %tmp3 = call <4 x i32> @llvm.arm64.neon.saddlp.v4i32.v8i16(<8 x i16> %tmp1)
+        %tmp3 = call <4 x i32> @llvm.aarch64.neon.saddlp.v4i32.v8i16(<8 x i16> %tmp1)
         ret <4 x i32> %tmp3
 }
 
@@ -468,23 +468,23 @@ define <2 x i64> @saddlp2d(<4 x i32>* %A) nounwind {
 ;CHECK-LABEL: saddlp2d:
 ;CHECK: saddlp.2d
         %tmp1 = load <4 x i32>* %A
-        %tmp3 = call <2 x i64> @llvm.arm64.neon.saddlp.v2i64.v4i32(<4 x i32> %tmp1)
+        %tmp3 = call <2 x i64> @llvm.aarch64.neon.saddlp.v2i64.v4i32(<4 x i32> %tmp1)
         ret <2 x i64> %tmp3
 }
 
-declare <4 x i16>  @llvm.arm64.neon.saddlp.v4i16.v8i8(<8 x i8>) nounwind readnone
-declare <2 x i32> @llvm.arm64.neon.saddlp.v2i32.v4i16(<4 x i16>) nounwind readnone
-declare <1 x i64> @llvm.arm64.neon.saddlp.v1i64.v2i32(<2 x i32>) nounwind readnone
+declare <4 x i16>  @llvm.aarch64.neon.saddlp.v4i16.v8i8(<8 x i8>) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.saddlp.v2i32.v4i16(<4 x i16>) nounwind readnone
+declare <1 x i64> @llvm.aarch64.neon.saddlp.v1i64.v2i32(<2 x i32>) nounwind readnone
 
-declare <8 x i16>  @llvm.arm64.neon.saddlp.v8i16.v16i8(<16 x i8>) nounwind readnone
-declare <4 x i32> @llvm.arm64.neon.saddlp.v4i32.v8i16(<8 x i16>) nounwind readnone
-declare <2 x i64> @llvm.arm64.neon.saddlp.v2i64.v4i32(<4 x i32>) nounwind readnone
+declare <8 x i16>  @llvm.aarch64.neon.saddlp.v8i16.v16i8(<16 x i8>) nounwind readnone
+declare <4 x i32> @llvm.aarch64.neon.saddlp.v4i32.v8i16(<8 x i16>) nounwind readnone
+declare <2 x i64> @llvm.aarch64.neon.saddlp.v2i64.v4i32(<4 x i32>) nounwind readnone
 
 define <4 x i16> @uaddlp4h(<8 x i8>* %A) nounwind {
 ;CHECK-LABEL: uaddlp4h:
 ;CHECK: uaddlp.4h
         %tmp1 = load <8 x i8>* %A
-        %tmp3 = call <4 x i16> @llvm.arm64.neon.uaddlp.v4i16.v8i8(<8 x i8> %tmp1)
+        %tmp3 = call <4 x i16> @llvm.aarch64.neon.uaddlp.v4i16.v8i8(<8 x i8> %tmp1)
         ret <4 x i16> %tmp3
 }
 
@@ -492,7 +492,7 @@ define <2 x i32> @uaddlp2s(<4 x i16>* %A) nounwind {
 ;CHECK-LABEL: uaddlp2s:
 ;CHECK: uaddlp.2s
         %tmp1 = load <4 x i16>* %A
-        %tmp3 = call <2 x i32> @llvm.arm64.neon.uaddlp.v2i32.v4i16(<4 x i16> %tmp1)
+        %tmp3 = call <2 x i32> @llvm.aarch64.neon.uaddlp.v2i32.v4i16(<4 x i16> %tmp1)
         ret <2 x i32> %tmp3
 }
 
@@ -500,7 +500,7 @@ define <1 x i64> @uaddlp1d(<2 x i32>* %A) nounwind {
 ;CHECK-LABEL: uaddlp1d:
 ;CHECK: uaddlp.1d
         %tmp1 = load <2 x i32>* %A
-        %tmp3 = call <1 x i64> @llvm.arm64.neon.uaddlp.v1i64.v2i32(<2 x i32> %tmp1)
+        %tmp3 = call <1 x i64> @llvm.aarch64.neon.uaddlp.v1i64.v2i32(<2 x i32> %tmp1)
         ret <1 x i64> %tmp3
 }
 
@@ -508,7 +508,7 @@ define <8 x i16> @uaddlp8h(<16 x i8>* %A) nounwind {
 ;CHECK-LABEL: uaddlp8h:
 ;CHECK: uaddlp.8h
         %tmp1 = load <16 x i8>* %A
-        %tmp3 = call <8 x i16> @llvm.arm64.neon.uaddlp.v8i16.v16i8(<16 x i8> %tmp1)
+        %tmp3 = call <8 x i16> @llvm.aarch64.neon.uaddlp.v8i16.v16i8(<16 x i8> %tmp1)
         ret <8 x i16> %tmp3
 }
 
@@ -516,7 +516,7 @@ define <4 x i32> @uaddlp4s(<8 x i16>* %A) nounwind {
 ;CHECK-LABEL: uaddlp4s:
 ;CHECK: uaddlp.4s
         %tmp1 = load <8 x i16>* %A
-        %tmp3 = call <4 x i32> @llvm.arm64.neon.uaddlp.v4i32.v8i16(<8 x i16> %tmp1)
+        %tmp3 = call <4 x i32> @llvm.aarch64.neon.uaddlp.v4i32.v8i16(<8 x i16> %tmp1)
         ret <4 x i32> %tmp3
 }
 
@@ -524,23 +524,23 @@ define <2 x i64> @uaddlp2d(<4 x i32>* %A) nounwind {
 ;CHECK-LABEL: uaddlp2d:
 ;CHECK: uaddlp.2d
         %tmp1 = load <4 x i32>* %A
-        %tmp3 = call <2 x i64> @llvm.arm64.neon.uaddlp.v2i64.v4i32(<4 x i32> %tmp1)
+        %tmp3 = call <2 x i64> @llvm.aarch64.neon.uaddlp.v2i64.v4i32(<4 x i32> %tmp1)
         ret <2 x i64> %tmp3
 }
 
-declare <4 x i16>  @llvm.arm64.neon.uaddlp.v4i16.v8i8(<8 x i8>) nounwind readnone
-declare <2 x i32> @llvm.arm64.neon.uaddlp.v2i32.v4i16(<4 x i16>) nounwind readnone
-declare <1 x i64> @llvm.arm64.neon.uaddlp.v1i64.v2i32(<2 x i32>) nounwind readnone
+declare <4 x i16>  @llvm.aarch64.neon.uaddlp.v4i16.v8i8(<8 x i8>) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.uaddlp.v2i32.v4i16(<4 x i16>) nounwind readnone
+declare <1 x i64> @llvm.aarch64.neon.uaddlp.v1i64.v2i32(<2 x i32>) nounwind readnone
 
-declare <8 x i16>  @llvm.arm64.neon.uaddlp.v8i16.v16i8(<16 x i8>) nounwind readnone
-declare <4 x i32> @llvm.arm64.neon.uaddlp.v4i32.v8i16(<8 x i16>) nounwind readnone
-declare <2 x i64> @llvm.arm64.neon.uaddlp.v2i64.v4i32(<4 x i32>) nounwind readnone
+declare <8 x i16>  @llvm.aarch64.neon.uaddlp.v8i16.v16i8(<16 x i8>) nounwind readnone
+declare <4 x i32> @llvm.aarch64.neon.uaddlp.v4i32.v8i16(<8 x i16>) nounwind readnone
+declare <2 x i64> @llvm.aarch64.neon.uaddlp.v2i64.v4i32(<4 x i32>) nounwind readnone
 
 define <4 x i16> @sadalp4h(<8 x i8>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: sadalp4h:
 ;CHECK: sadalp.4h
         %tmp1 = load <8 x i8>* %A
-        %tmp3 = call <4 x i16> @llvm.arm64.neon.saddlp.v4i16.v8i8(<8 x i8> %tmp1)
+        %tmp3 = call <4 x i16> @llvm.aarch64.neon.saddlp.v4i16.v8i8(<8 x i8> %tmp1)
         %tmp4 = load <4 x i16>* %B
         %tmp5 = add <4 x i16> %tmp3, %tmp4
         ret <4 x i16> %tmp5
@@ -550,7 +550,7 @@ define <2 x i32> @sadalp2s(<4 x i16>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: sadalp2s:
 ;CHECK: sadalp.2s
         %tmp1 = load <4 x i16>* %A
-        %tmp3 = call <2 x i32> @llvm.arm64.neon.saddlp.v2i32.v4i16(<4 x i16> %tmp1)
+        %tmp3 = call <2 x i32> @llvm.aarch64.neon.saddlp.v2i32.v4i16(<4 x i16> %tmp1)
         %tmp4 = load <2 x i32>* %B
         %tmp5 = add <2 x i32> %tmp3, %tmp4
         ret <2 x i32> %tmp5
@@ -560,7 +560,7 @@ define <8 x i16> @sadalp8h(<16 x i8>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: sadalp8h:
 ;CHECK: sadalp.8h
         %tmp1 = load <16 x i8>* %A
-        %tmp3 = call <8 x i16> @llvm.arm64.neon.saddlp.v8i16.v16i8(<16 x i8> %tmp1)
+        %tmp3 = call <8 x i16> @llvm.aarch64.neon.saddlp.v8i16.v16i8(<16 x i8> %tmp1)
         %tmp4 = load <8 x i16>* %B
         %tmp5 = add <8 x i16> %tmp3, %tmp4
         ret <8 x i16> %tmp5
@@ -570,7 +570,7 @@ define <4 x i32> @sadalp4s(<8 x i16>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: sadalp4s:
 ;CHECK: sadalp.4s
         %tmp1 = load <8 x i16>* %A
-        %tmp3 = call <4 x i32> @llvm.arm64.neon.saddlp.v4i32.v8i16(<8 x i16> %tmp1)
+        %tmp3 = call <4 x i32> @llvm.aarch64.neon.saddlp.v4i32.v8i16(<8 x i16> %tmp1)
         %tmp4 = load <4 x i32>* %B
         %tmp5 = add <4 x i32> %tmp3, %tmp4
         ret <4 x i32> %tmp5
@@ -580,7 +580,7 @@ define <2 x i64> @sadalp2d(<4 x i32>* %A, <2 x i64>* %B) nounwind {
 ;CHECK-LABEL: sadalp2d:
 ;CHECK: sadalp.2d
         %tmp1 = load <4 x i32>* %A
-        %tmp3 = call <2 x i64> @llvm.arm64.neon.saddlp.v2i64.v4i32(<4 x i32> %tmp1)
+        %tmp3 = call <2 x i64> @llvm.aarch64.neon.saddlp.v2i64.v4i32(<4 x i32> %tmp1)
         %tmp4 = load <2 x i64>* %B
         %tmp5 = add <2 x i64> %tmp3, %tmp4
         ret <2 x i64> %tmp5
@@ -590,7 +590,7 @@ define <4 x i16> @uadalp4h(<8 x i8>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: uadalp4h:
 ;CHECK: uadalp.4h
         %tmp1 = load <8 x i8>* %A
-        %tmp3 = call <4 x i16> @llvm.arm64.neon.uaddlp.v4i16.v8i8(<8 x i8> %tmp1)
+        %tmp3 = call <4 x i16> @llvm.aarch64.neon.uaddlp.v4i16.v8i8(<8 x i8> %tmp1)
         %tmp4 = load <4 x i16>* %B
         %tmp5 = add <4 x i16> %tmp3, %tmp4
         ret <4 x i16> %tmp5
@@ -600,7 +600,7 @@ define <2 x i32> @uadalp2s(<4 x i16>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: uadalp2s:
 ;CHECK: uadalp.2s
         %tmp1 = load <4 x i16>* %A
-        %tmp3 = call <2 x i32> @llvm.arm64.neon.uaddlp.v2i32.v4i16(<4 x i16> %tmp1)
+        %tmp3 = call <2 x i32> @llvm.aarch64.neon.uaddlp.v2i32.v4i16(<4 x i16> %tmp1)
         %tmp4 = load <2 x i32>* %B
         %tmp5 = add <2 x i32> %tmp3, %tmp4
         ret <2 x i32> %tmp5
@@ -610,7 +610,7 @@ define <8 x i16> @uadalp8h(<16 x i8>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: uadalp8h:
 ;CHECK: uadalp.8h
         %tmp1 = load <16 x i8>* %A
-        %tmp3 = call <8 x i16> @llvm.arm64.neon.uaddlp.v8i16.v16i8(<16 x i8> %tmp1)
+        %tmp3 = call <8 x i16> @llvm.aarch64.neon.uaddlp.v8i16.v16i8(<16 x i8> %tmp1)
         %tmp4 = load <8 x i16>* %B
         %tmp5 = add <8 x i16> %tmp3, %tmp4
         ret <8 x i16> %tmp5
@@ -620,7 +620,7 @@ define <4 x i32> @uadalp4s(<8 x i16>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: uadalp4s:
 ;CHECK: uadalp.4s
         %tmp1 = load <8 x i16>* %A
-        %tmp3 = call <4 x i32> @llvm.arm64.neon.uaddlp.v4i32.v8i16(<8 x i16> %tmp1)
+        %tmp3 = call <4 x i32> @llvm.aarch64.neon.uaddlp.v4i32.v8i16(<8 x i16> %tmp1)
         %tmp4 = load <4 x i32>* %B
         %tmp5 = add <4 x i32> %tmp3, %tmp4
         ret <4 x i32> %tmp5
@@ -630,7 +630,7 @@ define <2 x i64> @uadalp2d(<4 x i32>* %A, <2 x i64>* %B) nounwind {
 ;CHECK-LABEL: uadalp2d:
 ;CHECK: uadalp.2d
         %tmp1 = load <4 x i32>* %A
-        %tmp3 = call <2 x i64> @llvm.arm64.neon.uaddlp.v2i64.v4i32(<4 x i32> %tmp1)
+        %tmp3 = call <2 x i64> @llvm.aarch64.neon.uaddlp.v2i64.v4i32(<4 x i32> %tmp1)
         %tmp4 = load <2 x i64>* %B
         %tmp5 = add <2 x i64> %tmp3, %tmp4
         ret <2 x i64> %tmp5
@@ -641,7 +641,7 @@ define <8 x i8> @addp_8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK: addp.8b
         %tmp1 = load <8 x i8>* %A
         %tmp2 = load <8 x i8>* %B
-        %tmp3 = call <8 x i8> @llvm.arm64.neon.addp.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+        %tmp3 = call <8 x i8> @llvm.aarch64.neon.addp.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
         ret <8 x i8> %tmp3
 }
 
@@ -650,7 +650,7 @@ define <16 x i8> @addp_16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK: addp.16b
         %tmp1 = load <16 x i8>* %A
         %tmp2 = load <16 x i8>* %B
-        %tmp3 = call <16 x i8> @llvm.arm64.neon.addp.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+        %tmp3 = call <16 x i8> @llvm.aarch64.neon.addp.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
         ret <16 x i8> %tmp3
 }
 
@@ -659,7 +659,7 @@ define <4 x i16> @addp_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK: addp.4h
         %tmp1 = load <4 x i16>* %A
         %tmp2 = load <4 x i16>* %B
-        %tmp3 = call <4 x i16> @llvm.arm64.neon.addp.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+        %tmp3 = call <4 x i16> @llvm.aarch64.neon.addp.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
         ret <4 x i16> %tmp3
 }
 
@@ -668,7 +668,7 @@ define <8 x i16> @addp_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK: addp.8h
         %tmp1 = load <8 x i16>* %A
         %tmp2 = load <8 x i16>* %B
-        %tmp3 = call <8 x i16> @llvm.arm64.neon.addp.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+        %tmp3 = call <8 x i16> @llvm.aarch64.neon.addp.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
         ret <8 x i16> %tmp3
 }
 
@@ -677,7 +677,7 @@ define <2 x i32> @addp_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK: addp.2s
         %tmp1 = load <2 x i32>* %A
         %tmp2 = load <2 x i32>* %B
-        %tmp3 = call <2 x i32> @llvm.arm64.neon.addp.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+        %tmp3 = call <2 x i32> @llvm.aarch64.neon.addp.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
         ret <2 x i32> %tmp3
 }
 
@@ -686,7 +686,7 @@ define <4 x i32> @addp_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK: addp.4s
         %tmp1 = load <4 x i32>* %A
         %tmp2 = load <4 x i32>* %B
-        %tmp3 = call <4 x i32> @llvm.arm64.neon.addp.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+        %tmp3 = call <4 x i32> @llvm.aarch64.neon.addp.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
         ret <4 x i32> %tmp3
 }
 
@@ -695,24 +695,24 @@ define <2 x i64> @addp_2d(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 ;CHECK: addp.2d
         %tmp1 = load <2 x i64>* %A
         %tmp2 = load <2 x i64>* %B
-        %tmp3 = call <2 x i64> @llvm.arm64.neon.addp.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
+        %tmp3 = call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
         ret <2 x i64> %tmp3
 }
 
-declare <8 x i8> @llvm.arm64.neon.addp.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
-declare <16 x i8> @llvm.arm64.neon.addp.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
-declare <4 x i16> @llvm.arm64.neon.addp.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
-declare <8 x i16> @llvm.arm64.neon.addp.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
-declare <2 x i32> @llvm.arm64.neon.addp.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
-declare <4 x i32> @llvm.arm64.neon.addp.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
-declare <2 x i64> @llvm.arm64.neon.addp.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
+declare <8 x i8> @llvm.aarch64.neon.addp.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <16 x i8> @llvm.aarch64.neon.addp.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <4 x i16> @llvm.aarch64.neon.addp.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <8 x i16> @llvm.aarch64.neon.addp.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.addp.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+declare <4 x i32> @llvm.aarch64.neon.addp.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
 
 define <2 x float> @faddp_2s(<2 x float>* %A, <2 x float>* %B) nounwind {
 ;CHECK-LABEL: faddp_2s:
 ;CHECK: faddp.2s
         %tmp1 = load <2 x float>* %A
         %tmp2 = load <2 x float>* %B
-        %tmp3 = call <2 x float> @llvm.arm64.neon.addp.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
+        %tmp3 = call <2 x float> @llvm.aarch64.neon.addp.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
         ret <2 x float> %tmp3
 }
 
@@ -721,7 +721,7 @@ define <4 x float> @faddp_4s(<4 x float>* %A, <4 x float>* %B) nounwind {
 ;CHECK: faddp.4s
         %tmp1 = load <4 x float>* %A
         %tmp2 = load <4 x float>* %B
-        %tmp3 = call <4 x float> @llvm.arm64.neon.addp.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
+        %tmp3 = call <4 x float> @llvm.aarch64.neon.addp.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
         ret <4 x float> %tmp3
 }
 
@@ -730,13 +730,13 @@ define <2 x double> @faddp_2d(<2 x double>* %A, <2 x double>* %B) nounwind {
 ;CHECK: faddp.2d
         %tmp1 = load <2 x double>* %A
         %tmp2 = load <2 x double>* %B
-        %tmp3 = call <2 x double> @llvm.arm64.neon.addp.v2f64(<2 x double> %tmp1, <2 x double> %tmp2)
+        %tmp3 = call <2 x double> @llvm.aarch64.neon.addp.v2f64(<2 x double> %tmp1, <2 x double> %tmp2)
         ret <2 x double> %tmp3
 }
 
-declare <2 x float> @llvm.arm64.neon.addp.v2f32(<2 x float>, <2 x float>) nounwind readnone
-declare <4 x float> @llvm.arm64.neon.addp.v4f32(<4 x float>, <4 x float>) nounwind readnone
-declare <2 x double> @llvm.arm64.neon.addp.v2f64(<2 x double>, <2 x double>) nounwind readnone
+declare <2 x float> @llvm.aarch64.neon.addp.v2f32(<2 x float>, <2 x float>) nounwind readnone
+declare <4 x float> @llvm.aarch64.neon.addp.v4f32(<4 x float>, <4 x float>) nounwind readnone
+declare <2 x double> @llvm.aarch64.neon.addp.v2f64(<2 x double>, <2 x double>) nounwind readnone
 
 define <2 x i64> @uaddl2_duprhs(<4 x i32> %lhs, i32 %rhs) {
 ; CHECK-LABEL: uaddl2_duprhs
diff --git a/test/CodeGen/ARM64/vaddlv.ll b/test/CodeGen/AArch64/arm64-vaddlv.ll
similarity index 55%
rename from test/CodeGen/ARM64/vaddlv.ll
rename to test/CodeGen/AArch64/arm64-vaddlv.ll
index d4d4608ba07..2d6413812ec 100644
--- a/test/CodeGen/ARM64/vaddlv.ll
+++ b/test/CodeGen/AArch64/arm64-vaddlv.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=arm64 -arm64-neon-syntax=apple < %s | FileCheck %s
+; RUN: llc -march=arm64 -aarch64-neon-syntax=apple < %s | FileCheck %s
 
 define i64 @test_vaddlv_s32(<2 x i32> %a1) nounwind readnone {
 ; CHECK: test_vaddlv_s32
@@ -6,7 +6,7 @@ define i64 @test_vaddlv_s32(<2 x i32> %a1) nounwind readnone {
 ; CHECK-NEXT: fmov x[[OUTREG:[0-9]+]], d[[REGNUM]]
 ; CHECK-NEXT: ret
 entry:
-  %vaddlv.i = tail call i64 @llvm.arm64.neon.saddlv.i64.v2i32(<2 x i32> %a1) nounwind
+  %vaddlv.i = tail call i64 @llvm.aarch64.neon.saddlv.i64.v2i32(<2 x i32> %a1) nounwind
   ret i64 %vaddlv.i
 }
 
@@ -16,11 +16,11 @@ define i64 @test_vaddlv_u32(<2 x i32> %a1) nounwind readnone {
 ; CHECK-NEXT: fmov x[[OUTREG:[0-9]+]], d[[REGNUM]]
 ; CHECK-NEXT: ret
 entry:
-  %vaddlv.i = tail call i64 @llvm.arm64.neon.uaddlv.i64.v2i32(<2 x i32> %a1) nounwind
+  %vaddlv.i = tail call i64 @llvm.aarch64.neon.uaddlv.i64.v2i32(<2 x i32> %a1) nounwind
   ret i64 %vaddlv.i
 }
 
-declare i64 @llvm.arm64.neon.uaddlv.i64.v2i32(<2 x i32>) nounwind readnone
+declare i64 @llvm.aarch64.neon.uaddlv.i64.v2i32(<2 x i32>) nounwind readnone
 
-declare i64 @llvm.arm64.neon.saddlv.i64.v2i32(<2 x i32>) nounwind readnone
+declare i64 @llvm.aarch64.neon.saddlv.i64.v2i32(<2 x i32>) nounwind readnone
 
diff --git a/test/CodeGen/ARM64/vaddv.ll b/test/CodeGen/AArch64/arm64-vaddv.ll
similarity index 63%
rename from test/CodeGen/ARM64/vaddv.ll
rename to test/CodeGen/AArch64/arm64-vaddv.ll
index 154f9177937..2d92ce6ea57 100644
--- a/test/CodeGen/ARM64/vaddv.ll
+++ b/test/CodeGen/AArch64/arm64-vaddv.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=arm64 -arm64-neon-syntax=apple < %s -mcpu=cyclone | FileCheck %s
+; RUN: llc -march=arm64 -aarch64-neon-syntax=apple < %s -mcpu=cyclone | FileCheck %s
 
 define signext i8 @test_vaddv_s8(<8 x i8> %a1) {
 ; CHECK-LABEL: test_vaddv_s8:
@@ -6,7 +6,7 @@ define signext i8 @test_vaddv_s8(<8 x i8> %a1) {
 ; CHECK-NEXT: smov.b w0, v[[REGNUM]][0]
 ; CHECK-NEXT: ret
 entry:
-  %vaddv.i = tail call i32 @llvm.arm64.neon.saddv.i32.v8i8(<8 x i8> %a1)
+  %vaddv.i = tail call i32 @llvm.aarch64.neon.saddv.i32.v8i8(<8 x i8> %a1)
   %0 = trunc i32 %vaddv.i to i8
   ret i8 %0
 }
@@ -17,7 +17,7 @@ define signext i16 @test_vaddv_s16(<4 x i16> %a1) {
 ; CHECK-NEXT: smov.h w0, v[[REGNUM]][0]
 ; CHECK-NEXT: ret
 entry:
-  %vaddv.i = tail call i32 @llvm.arm64.neon.saddv.i32.v4i16(<4 x i16> %a1)
+  %vaddv.i = tail call i32 @llvm.aarch64.neon.saddv.i32.v4i16(<4 x i16> %a1)
   %0 = trunc i32 %vaddv.i to i16
   ret i16 %0
 }
@@ -29,7 +29,7 @@ define i32 @test_vaddv_s32(<2 x i32> %a1) {
 ; CHECK-NEXT: fmov w0, s[[REGNUM]]
 ; CHECK-NEXT: ret
 entry:
-  %vaddv.i = tail call i32 @llvm.arm64.neon.saddv.i32.v2i32(<2 x i32> %a1)
+  %vaddv.i = tail call i32 @llvm.aarch64.neon.saddv.i32.v2i32(<2 x i32> %a1)
   ret i32 %vaddv.i
 }
 
@@ -39,7 +39,7 @@ define i64 @test_vaddv_s64(<2 x i64> %a1) {
 ; CHECK-NEXT: fmov x0, [[REGNUM]]
 ; CHECK-NEXT: ret
 entry:
-  %vaddv.i = tail call i64 @llvm.arm64.neon.saddv.i64.v2i64(<2 x i64> %a1)
+  %vaddv.i = tail call i64 @llvm.aarch64.neon.saddv.i64.v2i64(<2 x i64> %a1)
   ret i64 %vaddv.i
 }
 
@@ -49,7 +49,7 @@ define zeroext i8 @test_vaddv_u8(<8 x i8> %a1) {
 ; CHECK-NEXT: fmov w0, s[[REGNUM]]
 ; CHECK-NEXT: ret
 entry:
-  %vaddv.i = tail call i32 @llvm.arm64.neon.uaddv.i32.v8i8(<8 x i8> %a1)
+  %vaddv.i = tail call i32 @llvm.aarch64.neon.uaddv.i32.v8i8(<8 x i8> %a1)
   %0 = trunc i32 %vaddv.i to i8
   ret i8 %0
 }
@@ -60,7 +60,7 @@ define i32 @test_vaddv_u8_masked(<8 x i8> %a1) {
 ; CHECK-NEXT: fmov w0, s[[REGNUM]]
 ; CHECK-NEXT: ret
 entry:
-  %vaddv.i = tail call i32 @llvm.arm64.neon.uaddv.i32.v8i8(<8 x i8> %a1)
+  %vaddv.i = tail call i32 @llvm.aarch64.neon.uaddv.i32.v8i8(<8 x i8> %a1)
   %0 = and i32 %vaddv.i, 511 ; 0x1ff
   ret i32 %0
 }
@@ -71,7 +71,7 @@ define zeroext i16 @test_vaddv_u16(<4 x i16> %a1) {
 ; CHECK-NEXT: fmov w0, s[[REGNUM]]
 ; CHECK-NEXT: ret
 entry:
-  %vaddv.i = tail call i32 @llvm.arm64.neon.uaddv.i32.v4i16(<4 x i16> %a1)
+  %vaddv.i = tail call i32 @llvm.aarch64.neon.uaddv.i32.v4i16(<4 x i16> %a1)
   %0 = trunc i32 %vaddv.i to i16
   ret i16 %0
 }
@@ -82,7 +82,7 @@ define i32 @test_vaddv_u16_masked(<4 x i16> %a1) {
 ; CHECK-NEXT: fmov w0, s[[REGNUM]]
 ; CHECK-NEXT: ret
 entry:
-  %vaddv.i = tail call i32 @llvm.arm64.neon.uaddv.i32.v4i16(<4 x i16> %a1)
+  %vaddv.i = tail call i32 @llvm.aarch64.neon.uaddv.i32.v4i16(<4 x i16> %a1)
   %0 = and i32 %vaddv.i, 3276799 ; 0x31ffff
   ret i32 %0
 }
@@ -94,7 +94,7 @@ define i32 @test_vaddv_u32(<2 x i32> %a1) {
 ; CHECK-NEXT: fmov w0, s[[REGNUM]]
 ; CHECK-NEXT: ret
 entry:
-  %vaddv.i = tail call i32 @llvm.arm64.neon.uaddv.i32.v2i32(<2 x i32> %a1)
+  %vaddv.i = tail call i32 @llvm.aarch64.neon.uaddv.i32.v2i32(<2 x i32> %a1)
   ret i32 %vaddv.i
 }
 
@@ -103,7 +103,7 @@ define float @test_vaddv_f32(<2 x float> %a1) {
 ; CHECK: faddp.2s s0, v0
 ; CHECK-NEXT: ret
 entry:
-  %vaddv.i = tail call float @llvm.arm64.neon.faddv.f32.v2f32(<2 x float> %a1)
+  %vaddv.i = tail call float @llvm.aarch64.neon.faddv.f32.v2f32(<2 x float> %a1)
   ret float %vaddv.i
 }
 
@@ -113,7 +113,7 @@ define float @test_vaddv_v4f32(<4 x float> %a1) {
 ; CHECK: faddp.2s s0, [[REGNUM]]
 ; CHECK-NEXT: ret
 entry:
-  %vaddv.i = tail call float @llvm.arm64.neon.faddv.f32.v4f32(<4 x float> %a1)
+  %vaddv.i = tail call float @llvm.aarch64.neon.faddv.f32.v4f32(<4 x float> %a1)
   ret float %vaddv.i
 }
 
@@ -122,7 +122,7 @@ define double @test_vaddv_f64(<2 x double> %a1) {
 ; CHECK: faddp.2d d0, v0
 ; CHECK-NEXT: ret
 entry:
-  %vaddv.i = tail call double @llvm.arm64.neon.faddv.f64.v2f64(<2 x double> %a1)
+  %vaddv.i = tail call double @llvm.aarch64.neon.faddv.f64.v2f64(<2 x double> %a1)
   ret double %vaddv.i
 }
 
@@ -132,7 +132,7 @@ define i64 @test_vaddv_u64(<2 x i64> %a1) {
 ; CHECK-NEXT: fmov x0, [[REGNUM]]
 ; CHECK-NEXT: ret
 entry:
-  %vaddv.i = tail call i64 @llvm.arm64.neon.uaddv.i64.v2i64(<2 x i64> %a1)
+  %vaddv.i = tail call i64 @llvm.aarch64.neon.uaddv.i64.v2i64(<2 x i64> %a1)
   ret i64 %vaddv.i
 }
 
@@ -143,7 +143,7 @@ define <1 x i64> @test_vaddv_u64_to_vec(<2 x i64> %a1) {
 ; CHECK-NOT: ins
 ; CHECK: ret
 entry:
-  %vaddv.i = tail call i64 @llvm.arm64.neon.uaddv.i64.v2i64(<2 x i64> %a1)
+  %vaddv.i = tail call i64 @llvm.aarch64.neon.uaddv.i64.v2i64(<2 x i64> %a1)
   %vec = insertelement <1 x i64> undef, i64 %vaddv.i, i32 0
   ret <1 x i64> %vec
 }
@@ -154,7 +154,7 @@ define signext i8 @test_vaddvq_s8(<16 x i8> %a1) {
 ; CHECK-NEXT: smov.b w0, v[[REGNUM]][0]
 ; CHECK-NEXT: ret
 entry:
-  %vaddv.i = tail call i32 @llvm.arm64.neon.saddv.i32.v16i8(<16 x i8> %a1)
+  %vaddv.i = tail call i32 @llvm.aarch64.neon.saddv.i32.v16i8(<16 x i8> %a1)
   %0 = trunc i32 %vaddv.i to i8
   ret i8 %0
 }
@@ -165,7 +165,7 @@ define signext i16 @test_vaddvq_s16(<8 x i16> %a1) {
 ; CHECK-NEXT: smov.h w0, v[[REGNUM]][0]
 ; CHECK-NEXT: ret
 entry:
-  %vaddv.i = tail call i32 @llvm.arm64.neon.saddv.i32.v8i16(<8 x i16> %a1)
+  %vaddv.i = tail call i32 @llvm.aarch64.neon.saddv.i32.v8i16(<8 x i16> %a1)
   %0 = trunc i32 %vaddv.i to i16
   ret i16 %0
 }
@@ -176,7 +176,7 @@ define i32 @test_vaddvq_s32(<4 x i32> %a1) {
 ; CHECK-NEXT: fmov w0, [[REGNUM]]
 ; CHECK-NEXT: ret
 entry:
-  %vaddv.i = tail call i32 @llvm.arm64.neon.saddv.i32.v4i32(<4 x i32> %a1)
+  %vaddv.i = tail call i32 @llvm.aarch64.neon.saddv.i32.v4i32(<4 x i32> %a1)
   ret i32 %vaddv.i
 }
 
@@ -186,7 +186,7 @@ define zeroext i8 @test_vaddvq_u8(<16 x i8> %a1) {
 ; CHECK-NEXT: fmov w0, s[[REGNUM]]
 ; CHECK-NEXT: ret
 entry:
-  %vaddv.i = tail call i32 @llvm.arm64.neon.uaddv.i32.v16i8(<16 x i8> %a1)
+  %vaddv.i = tail call i32 @llvm.aarch64.neon.uaddv.i32.v16i8(<16 x i8> %a1)
   %0 = trunc i32 %vaddv.i to i8
   ret i8 %0
 }
@@ -197,7 +197,7 @@ define zeroext i16 @test_vaddvq_u16(<8 x i16> %a1) {
 ; CHECK-NEXT: fmov w0, s[[REGNUM]]
 ; CHECK-NEXT: ret
 entry:
-  %vaddv.i = tail call i32 @llvm.arm64.neon.uaddv.i32.v8i16(<8 x i16> %a1)
+  %vaddv.i = tail call i32 @llvm.aarch64.neon.uaddv.i32.v8i16(<8 x i16> %a1)
   %0 = trunc i32 %vaddv.i to i16
   ret i16 %0
 }
@@ -208,38 +208,38 @@ define i32 @test_vaddvq_u32(<4 x i32> %a1) {
 ; CHECK-NEXT: fmov [[FMOVRES:w[0-9]+]], [[REGNUM]]
 ; CHECK-NEXT: ret
 entry:
-  %vaddv.i = tail call i32 @llvm.arm64.neon.uaddv.i32.v4i32(<4 x i32> %a1)
+  %vaddv.i = tail call i32 @llvm.aarch64.neon.uaddv.i32.v4i32(<4 x i32> %a1)
   ret i32 %vaddv.i
 }
 
-declare i32 @llvm.arm64.neon.uaddv.i32.v4i32(<4 x i32>)
+declare i32 @llvm.aarch64.neon.uaddv.i32.v4i32(<4 x i32>)
 
-declare i32 @llvm.arm64.neon.uaddv.i32.v8i16(<8 x i16>)
+declare i32 @llvm.aarch64.neon.uaddv.i32.v8i16(<8 x i16>)
 
-declare i32 @llvm.arm64.neon.uaddv.i32.v16i8(<16 x i8>)
+declare i32 @llvm.aarch64.neon.uaddv.i32.v16i8(<16 x i8>)
 
-declare i32 @llvm.arm64.neon.saddv.i32.v4i32(<4 x i32>)
+declare i32 @llvm.aarch64.neon.saddv.i32.v4i32(<4 x i32>)
 
-declare i32 @llvm.arm64.neon.saddv.i32.v8i16(<8 x i16>)
+declare i32 @llvm.aarch64.neon.saddv.i32.v8i16(<8 x i16>)
 
-declare i32 @llvm.arm64.neon.saddv.i32.v16i8(<16 x i8>)
+declare i32 @llvm.aarch64.neon.saddv.i32.v16i8(<16 x i8>)
 
-declare i64 @llvm.arm64.neon.uaddv.i64.v2i64(<2 x i64>)
+declare i64 @llvm.aarch64.neon.uaddv.i64.v2i64(<2 x i64>)
 
-declare i32 @llvm.arm64.neon.uaddv.i32.v2i32(<2 x i32>)
+declare i32 @llvm.aarch64.neon.uaddv.i32.v2i32(<2 x i32>)
 
-declare i32 @llvm.arm64.neon.uaddv.i32.v4i16(<4 x i16>)
+declare i32 @llvm.aarch64.neon.uaddv.i32.v4i16(<4 x i16>)
 
-declare i32 @llvm.arm64.neon.uaddv.i32.v8i8(<8 x i8>)
+declare i32 @llvm.aarch64.neon.uaddv.i32.v8i8(<8 x i8>)
 
-declare i32 @llvm.arm64.neon.saddv.i32.v2i32(<2 x i32>)
+declare i32 @llvm.aarch64.neon.saddv.i32.v2i32(<2 x i32>)
 
-declare i64 @llvm.arm64.neon.saddv.i64.v2i64(<2 x i64>)
+declare i64 @llvm.aarch64.neon.saddv.i64.v2i64(<2 x i64>)
 
-declare i32 @llvm.arm64.neon.saddv.i32.v4i16(<4 x i16>)
+declare i32 @llvm.aarch64.neon.saddv.i32.v4i16(<4 x i16>)
 
-declare i32 @llvm.arm64.neon.saddv.i32.v8i8(<8 x i8>)
+declare i32 @llvm.aarch64.neon.saddv.i32.v8i8(<8 x i8>)
 
-declare float @llvm.arm64.neon.faddv.f32.v2f32(<2 x float> %a1)
-declare float @llvm.arm64.neon.faddv.f32.v4f32(<4 x float> %a1)
-declare double @llvm.arm64.neon.faddv.f64.v2f64(<2 x double> %a1)
+declare float @llvm.aarch64.neon.faddv.f32.v2f32(<2 x float> %a1)
+declare float @llvm.aarch64.neon.faddv.f32.v4f32(<4 x float> %a1)
+declare double @llvm.aarch64.neon.faddv.f64.v2f64(<2 x double> %a1)
diff --git a/test/CodeGen/ARM64/variadic-aapcs.ll b/test/CodeGen/AArch64/arm64-variadic-aapcs.ll
similarity index 100%
rename from test/CodeGen/ARM64/variadic-aapcs.ll
rename to test/CodeGen/AArch64/arm64-variadic-aapcs.ll
diff --git a/test/CodeGen/ARM64/vbitwise.ll b/test/CodeGen/AArch64/arm64-vbitwise.ll
similarity index 86%
rename from test/CodeGen/ARM64/vbitwise.ll
rename to test/CodeGen/AArch64/arm64-vbitwise.ll
index 7d8378de292..93de95e52e5 100644
--- a/test/CodeGen/ARM64/vbitwise.ll
+++ b/test/CodeGen/AArch64/arm64-vbitwise.ll
@@ -1,10 +1,10 @@
-; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s
+; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
 
 define <8 x i8> @rbit_8b(<8 x i8>* %A) nounwind {
 ;CHECK-LABEL: rbit_8b:
 ;CHECK: rbit.8b
 	%tmp1 = load <8 x i8>* %A
-	%tmp3 = call <8 x i8> @llvm.arm64.neon.rbit.v8i8(<8 x i8> %tmp1)
+	%tmp3 = call <8 x i8> @llvm.aarch64.neon.rbit.v8i8(<8 x i8> %tmp1)
 	ret <8 x i8> %tmp3
 }
 
@@ -12,12 +12,12 @@ define <16 x i8> @rbit_16b(<16 x i8>* %A) nounwind {
 ;CHECK-LABEL: rbit_16b:
 ;CHECK: rbit.16b
 	%tmp1 = load <16 x i8>* %A
-	%tmp3 = call <16 x i8> @llvm.arm64.neon.rbit.v16i8(<16 x i8> %tmp1)
+	%tmp3 = call <16 x i8> @llvm.aarch64.neon.rbit.v16i8(<16 x i8> %tmp1)
 	ret <16 x i8> %tmp3
 }
 
-declare <8 x i8> @llvm.arm64.neon.rbit.v8i8(<8 x i8>) nounwind readnone
-declare <16 x i8> @llvm.arm64.neon.rbit.v16i8(<16 x i8>) nounwind readnone
+declare <8 x i8> @llvm.aarch64.neon.rbit.v8i8(<8 x i8>) nounwind readnone
+declare <16 x i8> @llvm.aarch64.neon.rbit.v16i8(<16 x i8>) nounwind readnone
 
 define <8 x i16> @sxtl8h(<8 x i8>* %A) nounwind {
 ;CHECK-LABEL: sxtl8h:
diff --git a/test/CodeGen/ARM64/vclz.ll b/test/CodeGen/AArch64/arm64-vclz.ll
similarity index 98%
rename from test/CodeGen/ARM64/vclz.ll
rename to test/CodeGen/AArch64/arm64-vclz.ll
index ddc09ed85fa..cf5670a0354 100644
--- a/test/CodeGen/ARM64/vclz.ll
+++ b/test/CodeGen/AArch64/arm64-vclz.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=arm64 -arm64-neon-syntax=apple < %s | FileCheck %s
+; RUN: llc -march=arm64 -aarch64-neon-syntax=apple < %s | FileCheck %s
 
 define <8 x i8> @test_vclz_u8(<8 x i8> %a) nounwind readnone ssp {
   ; CHECK-LABEL: test_vclz_u8:
diff --git a/test/CodeGen/ARM64/vcmp.ll b/test/CodeGen/AArch64/arm64-vcmp.ll
similarity index 76%
rename from test/CodeGen/ARM64/vcmp.ll
rename to test/CodeGen/AArch64/arm64-vcmp.ll
index 56153f08f35..982ab09ee69 100644
--- a/test/CodeGen/ARM64/vcmp.ll
+++ b/test/CodeGen/AArch64/arm64-vcmp.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s
+; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
 
 
 define void @fcmltz_4s(<4 x float> %a, <4 x i16>* %p) nounwind {
@@ -18,7 +18,7 @@ define <2 x i32> @facge_2s(<2 x float>* %A, <2 x float>* %B) nounwind {
 ;CHECK: facge.2s
 	%tmp1 = load <2 x float>* %A
 	%tmp2 = load <2 x float>* %B
-	%tmp3 = call <2 x i32> @llvm.arm64.neon.facge.v2i32.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
+	%tmp3 = call <2 x i32> @llvm.aarch64.neon.facge.v2i32.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
 	ret <2 x i32> %tmp3
 }
 
@@ -27,7 +27,7 @@ define <4 x i32> @facge_4s(<4 x float>* %A, <4 x float>* %B) nounwind {
 ;CHECK: facge.4s
 	%tmp1 = load <4 x float>* %A
 	%tmp2 = load <4 x float>* %B
-	%tmp3 = call <4 x i32> @llvm.arm64.neon.facge.v4i32.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
+	%tmp3 = call <4 x i32> @llvm.aarch64.neon.facge.v4i32.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
 	ret <4 x i32> %tmp3
 }
 
@@ -36,20 +36,20 @@ define <2 x i64> @facge_2d(<2 x double>* %A, <2 x double>* %B) nounwind {
 ;CHECK: facge.2d
 	%tmp1 = load <2 x double>* %A
 	%tmp2 = load <2 x double>* %B
-	%tmp3 = call <2 x i64> @llvm.arm64.neon.facge.v2i64.v2f64(<2 x double> %tmp1, <2 x double> %tmp2)
+	%tmp3 = call <2 x i64> @llvm.aarch64.neon.facge.v2i64.v2f64(<2 x double> %tmp1, <2 x double> %tmp2)
 	ret <2 x i64> %tmp3
 }
 
-declare <2 x i32> @llvm.arm64.neon.facge.v2i32.v2f32(<2 x float>, <2 x float>) nounwind readnone
-declare <4 x i32> @llvm.arm64.neon.facge.v4i32.v4f32(<4 x float>, <4 x float>) nounwind readnone
-declare <2 x i64> @llvm.arm64.neon.facge.v2i64.v2f64(<2 x double>, <2 x double>) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.facge.v2i32.v2f32(<2 x float>, <2 x float>) nounwind readnone
+declare <4 x i32> @llvm.aarch64.neon.facge.v4i32.v4f32(<4 x float>, <4 x float>) nounwind readnone
+declare <2 x i64> @llvm.aarch64.neon.facge.v2i64.v2f64(<2 x double>, <2 x double>) nounwind readnone
 
 define <2 x i32> @facgt_2s(<2 x float>* %A, <2 x float>* %B) nounwind {
 ;CHECK-LABEL: facgt_2s:
 ;CHECK: facgt.2s
 	%tmp1 = load <2 x float>* %A
 	%tmp2 = load <2 x float>* %B
-	%tmp3 = call <2 x i32> @llvm.arm64.neon.facgt.v2i32.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
+	%tmp3 = call <2 x i32> @llvm.aarch64.neon.facgt.v2i32.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
 	ret <2 x i32> %tmp3
 }
 
@@ -58,7 +58,7 @@ define <4 x i32> @facgt_4s(<4 x float>* %A, <4 x float>* %B) nounwind {
 ;CHECK: facgt.4s
 	%tmp1 = load <4 x float>* %A
 	%tmp2 = load <4 x float>* %B
-	%tmp3 = call <4 x i32> @llvm.arm64.neon.facgt.v4i32.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
+	%tmp3 = call <4 x i32> @llvm.aarch64.neon.facgt.v4i32.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
 	ret <4 x i32> %tmp3
 }
 
@@ -67,47 +67,47 @@ define <2 x i64> @facgt_2d(<2 x double>* %A, <2 x double>* %B) nounwind {
 ;CHECK: facgt.2d
 	%tmp1 = load <2 x double>* %A
 	%tmp2 = load <2 x double>* %B
-	%tmp3 = call <2 x i64> @llvm.arm64.neon.facgt.v2i64.v2f64(<2 x double> %tmp1, <2 x double> %tmp2)
+	%tmp3 = call <2 x i64> @llvm.aarch64.neon.facgt.v2i64.v2f64(<2 x double> %tmp1, <2 x double> %tmp2)
 	ret <2 x i64> %tmp3
 }
 
-declare <2 x i32> @llvm.arm64.neon.facgt.v2i32.v2f32(<2 x float>, <2 x float>) nounwind readnone
-declare <4 x i32> @llvm.arm64.neon.facgt.v4i32.v4f32(<4 x float>, <4 x float>) nounwind readnone
-declare <2 x i64> @llvm.arm64.neon.facgt.v2i64.v2f64(<2 x double>, <2 x double>) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.facgt.v2i32.v2f32(<2 x float>, <2 x float>) nounwind readnone
+declare <4 x i32> @llvm.aarch64.neon.facgt.v4i32.v4f32(<4 x float>, <4 x float>) nounwind readnone
+declare <2 x i64> @llvm.aarch64.neon.facgt.v2i64.v2f64(<2 x double>, <2 x double>) nounwind readnone
 
 define i32 @facge_s(float %A, float %B) nounwind {
 ; CHECK-LABEL: facge_s:
 ; CHECK: facge {{s[0-9]+}}, s0, s1
-  %mask = call i32 @llvm.arm64.neon.facge.i32.f32(float %A, float %B)
+  %mask = call i32 @llvm.aarch64.neon.facge.i32.f32(float %A, float %B)
   ret i32 %mask
 }
 
 define i64 @facge_d(double %A, double %B) nounwind {
 ; CHECK-LABEL: facge_d:
 ; CHECK: facge {{d[0-9]+}}, d0, d1
-  %mask = call i64 @llvm.arm64.neon.facge.i64.f64(double %A, double %B)
+  %mask = call i64 @llvm.aarch64.neon.facge.i64.f64(double %A, double %B)
   ret i64 %mask
 }
 
-declare i64 @llvm.arm64.neon.facge.i64.f64(double, double)
-declare i32 @llvm.arm64.neon.facge.i32.f32(float, float)
+declare i64 @llvm.aarch64.neon.facge.i64.f64(double, double)
+declare i32 @llvm.aarch64.neon.facge.i32.f32(float, float)
 
 define i32 @facgt_s(float %A, float %B) nounwind {
 ; CHECK-LABEL: facgt_s:
 ; CHECK: facgt {{s[0-9]+}}, s0, s1
-  %mask = call i32 @llvm.arm64.neon.facgt.i32.f32(float %A, float %B)
+  %mask = call i32 @llvm.aarch64.neon.facgt.i32.f32(float %A, float %B)
   ret i32 %mask
 }
 
 define i64 @facgt_d(double %A, double %B) nounwind {
 ; CHECK-LABEL: facgt_d:
 ; CHECK: facgt {{d[0-9]+}}, d0, d1
-  %mask = call i64 @llvm.arm64.neon.facgt.i64.f64(double %A, double %B)
+  %mask = call i64 @llvm.aarch64.neon.facgt.i64.f64(double %A, double %B)
   ret i64 %mask
 }
 
-declare i64 @llvm.arm64.neon.facgt.i64.f64(double, double)
-declare i32 @llvm.arm64.neon.facgt.i32.f32(float, float)
+declare i64 @llvm.aarch64.neon.facgt.i64.f64(double, double)
+declare i32 @llvm.aarch64.neon.facgt.i32.f32(float, float)
 
 define <8 x i8> @cmtst_8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: cmtst_8b:
diff --git a/test/CodeGen/AArch64/arm64-vcnt.ll b/test/CodeGen/AArch64/arm64-vcnt.ll
new file mode 100644
index 00000000000..903501ec16a
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-vcnt.ll
@@ -0,0 +1,56 @@
+; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
+
+define <8 x i8> @cls_8b(<8 x i8>* %A) nounwind {
+;CHECK-LABEL: cls_8b:
+;CHECK: cls.8b
+	%tmp1 = load <8 x i8>* %A
+	%tmp3 = call <8 x i8> @llvm.aarch64.neon.cls.v8i8(<8 x i8> %tmp1)
+	ret <8 x i8> %tmp3
+}
+
+define <16 x i8> @cls_16b(<16 x i8>* %A) nounwind {
+;CHECK-LABEL: cls_16b:
+;CHECK: cls.16b
+	%tmp1 = load <16 x i8>* %A
+	%tmp3 = call <16 x i8> @llvm.aarch64.neon.cls.v16i8(<16 x i8> %tmp1)
+	ret <16 x i8> %tmp3
+}
+
+define <4 x i16> @cls_4h(<4 x i16>* %A) nounwind {
+;CHECK-LABEL: cls_4h:
+;CHECK: cls.4h
+	%tmp1 = load <4 x i16>* %A
+	%tmp3 = call <4 x i16> @llvm.aarch64.neon.cls.v4i16(<4 x i16> %tmp1)
+	ret <4 x i16> %tmp3
+}
+
+define <8 x i16> @cls_8h(<8 x i16>* %A) nounwind {
+;CHECK-LABEL: cls_8h:
+;CHECK: cls.8h
+	%tmp1 = load <8 x i16>* %A
+	%tmp3 = call <8 x i16> @llvm.aarch64.neon.cls.v8i16(<8 x i16> %tmp1)
+	ret <8 x i16> %tmp3
+}
+
+define <2 x i32> @cls_2s(<2 x i32>* %A) nounwind {
+;CHECK-LABEL: cls_2s:
+;CHECK: cls.2s
+	%tmp1 = load <2 x i32>* %A
+	%tmp3 = call <2 x i32> @llvm.aarch64.neon.cls.v2i32(<2 x i32> %tmp1)
+	ret <2 x i32> %tmp3
+}
+
+define <4 x i32> @cls_4s(<4 x i32>* %A) nounwind {
+;CHECK-LABEL: cls_4s:
+;CHECK: cls.4s
+	%tmp1 = load <4 x i32>* %A
+	%tmp3 = call <4 x i32> @llvm.aarch64.neon.cls.v4i32(<4 x i32> %tmp1)
+	ret <4 x i32> %tmp3
+}
+
+declare <8 x i8> @llvm.aarch64.neon.cls.v8i8(<8 x i8>) nounwind readnone
+declare <16 x i8> @llvm.aarch64.neon.cls.v16i8(<16 x i8>) nounwind readnone
+declare <4 x i16> @llvm.aarch64.neon.cls.v4i16(<4 x i16>) nounwind readnone
+declare <8 x i16> @llvm.aarch64.neon.cls.v8i16(<8 x i16>) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.cls.v2i32(<2 x i32>) nounwind readnone
+declare <4 x i32> @llvm.aarch64.neon.cls.v4i32(<4 x i32>) nounwind readnone
diff --git a/test/CodeGen/ARM64/vcombine.ll b/test/CodeGen/AArch64/arm64-vcombine.ll
similarity index 90%
rename from test/CodeGen/ARM64/vcombine.ll
rename to test/CodeGen/AArch64/arm64-vcombine.ll
index 16f591e378e..fa1299603af 100644
--- a/test/CodeGen/ARM64/vcombine.ll
+++ b/test/CodeGen/AArch64/arm64-vcombine.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s
+; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
 
 ; LowerCONCAT_VECTORS() was reversing the order of two parts.
 ; rdar://11558157
diff --git a/test/CodeGen/ARM64/vcvt.ll b/test/CodeGen/AArch64/arm64-vcvt.ll
similarity index 67%
rename from test/CodeGen/ARM64/vcvt.ll
rename to test/CodeGen/AArch64/arm64-vcvt.ll
index 19bb8cb8dc5..8c9e4e92710 100644
--- a/test/CodeGen/ARM64/vcvt.ll
+++ b/test/CodeGen/AArch64/arm64-vcvt.ll
@@ -1,11 +1,11 @@
-; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s
+; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
 
 define <2 x i32> @fcvtas_2s(<2 x float> %A) nounwind {
 ;CHECK-LABEL: fcvtas_2s:
 ;CHECK-NOT: ld1
 ;CHECK: fcvtas.2s v0, v0
 ;CHECK-NEXT: ret
-	%tmp3 = call <2 x i32> @llvm.arm64.neon.fcvtas.v2i32.v2f32(<2 x float> %A)
+	%tmp3 = call <2 x i32> @llvm.aarch64.neon.fcvtas.v2i32.v2f32(<2 x float> %A)
 	ret <2 x i32> %tmp3
 }
 
@@ -14,7 +14,7 @@ define <4 x i32> @fcvtas_4s(<4 x float> %A) nounwind {
 ;CHECK-NOT: ld1
 ;CHECK: fcvtas.4s v0, v0
 ;CHECK-NEXT: ret
-	%tmp3 = call <4 x i32> @llvm.arm64.neon.fcvtas.v4i32.v4f32(<4 x float> %A)
+	%tmp3 = call <4 x i32> @llvm.aarch64.neon.fcvtas.v4i32.v4f32(<4 x float> %A)
 	ret <4 x i32> %tmp3
 }
 
@@ -23,20 +23,20 @@ define <2 x i64> @fcvtas_2d(<2 x double> %A) nounwind {
 ;CHECK-NOT: ld1
 ;CHECK: fcvtas.2d v0, v0
 ;CHECK-NEXT: ret
-	%tmp3 = call <2 x i64> @llvm.arm64.neon.fcvtas.v2i64.v2f64(<2 x double> %A)
+	%tmp3 = call <2 x i64> @llvm.aarch64.neon.fcvtas.v2i64.v2f64(<2 x double> %A)
 	ret <2 x i64> %tmp3
 }
 
-declare <2 x i32> @llvm.arm64.neon.fcvtas.v2i32.v2f32(<2 x float>) nounwind readnone
-declare <4 x i32> @llvm.arm64.neon.fcvtas.v4i32.v4f32(<4 x float>) nounwind readnone
-declare <2 x i64> @llvm.arm64.neon.fcvtas.v2i64.v2f64(<2 x double>) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.fcvtas.v2i32.v2f32(<2 x float>) nounwind readnone
+declare <4 x i32> @llvm.aarch64.neon.fcvtas.v4i32.v4f32(<4 x float>) nounwind readnone
+declare <2 x i64> @llvm.aarch64.neon.fcvtas.v2i64.v2f64(<2 x double>) nounwind readnone
 
 define <2 x i32> @fcvtau_2s(<2 x float> %A) nounwind {
 ;CHECK-LABEL: fcvtau_2s:
 ;CHECK-NOT: ld1
 ;CHECK: fcvtau.2s v0, v0
 ;CHECK-NEXT: ret
-	%tmp3 = call <2 x i32> @llvm.arm64.neon.fcvtau.v2i32.v2f32(<2 x float> %A)
+	%tmp3 = call <2 x i32> @llvm.aarch64.neon.fcvtau.v2i32.v2f32(<2 x float> %A)
 	ret <2 x i32> %tmp3
 }
 
@@ -45,7 +45,7 @@ define <4 x i32> @fcvtau_4s(<4 x float> %A) nounwind {
 ;CHECK-NOT: ld1
 ;CHECK: fcvtau.4s v0, v0
 ;CHECK-NEXT: ret
-	%tmp3 = call <4 x i32> @llvm.arm64.neon.fcvtau.v4i32.v4f32(<4 x float> %A)
+	%tmp3 = call <4 x i32> @llvm.aarch64.neon.fcvtau.v4i32.v4f32(<4 x float> %A)
 	ret <4 x i32> %tmp3
 }
 
@@ -54,20 +54,20 @@ define <2 x i64> @fcvtau_2d(<2 x double> %A) nounwind {
 ;CHECK-NOT: ld1
 ;CHECK: fcvtau.2d v0, v0
 ;CHECK-NEXT: ret
-	%tmp3 = call <2 x i64> @llvm.arm64.neon.fcvtau.v2i64.v2f64(<2 x double> %A)
+	%tmp3 = call <2 x i64> @llvm.aarch64.neon.fcvtau.v2i64.v2f64(<2 x double> %A)
 	ret <2 x i64> %tmp3
 }
 
-declare <2 x i32> @llvm.arm64.neon.fcvtau.v2i32.v2f32(<2 x float>) nounwind readnone
-declare <4 x i32> @llvm.arm64.neon.fcvtau.v4i32.v4f32(<4 x float>) nounwind readnone
-declare <2 x i64> @llvm.arm64.neon.fcvtau.v2i64.v2f64(<2 x double>) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.fcvtau.v2i32.v2f32(<2 x float>) nounwind readnone
+declare <4 x i32> @llvm.aarch64.neon.fcvtau.v4i32.v4f32(<4 x float>) nounwind readnone
+declare <2 x i64> @llvm.aarch64.neon.fcvtau.v2i64.v2f64(<2 x double>) nounwind readnone
 
 define <2 x i32> @fcvtms_2s(<2 x float> %A) nounwind {
 ;CHECK-LABEL: fcvtms_2s:
 ;CHECK-NOT: ld1
 ;CHECK: fcvtms.2s v0, v0
 ;CHECK-NEXT: ret
-	%tmp3 = call <2 x i32> @llvm.arm64.neon.fcvtms.v2i32.v2f32(<2 x float> %A)
+	%tmp3 = call <2 x i32> @llvm.aarch64.neon.fcvtms.v2i32.v2f32(<2 x float> %A)
 	ret <2 x i32> %tmp3
 }
 
@@ -76,7 +76,7 @@ define <4 x i32> @fcvtms_4s(<4 x float> %A) nounwind {
 ;CHECK-NOT: ld1
 ;CHECK: fcvtms.4s v0, v0
 ;CHECK-NEXT: ret
-	%tmp3 = call <4 x i32> @llvm.arm64.neon.fcvtms.v4i32.v4f32(<4 x float> %A)
+	%tmp3 = call <4 x i32> @llvm.aarch64.neon.fcvtms.v4i32.v4f32(<4 x float> %A)
 	ret <4 x i32> %tmp3
 }
 
@@ -85,20 +85,20 @@ define <2 x i64> @fcvtms_2d(<2 x double> %A) nounwind {
 ;CHECK-NOT: ld1
 ;CHECK: fcvtms.2d v0, v0
 ;CHECK-NEXT: ret
-	%tmp3 = call <2 x i64> @llvm.arm64.neon.fcvtms.v2i64.v2f64(<2 x double> %A)
+	%tmp3 = call <2 x i64> @llvm.aarch64.neon.fcvtms.v2i64.v2f64(<2 x double> %A)
 	ret <2 x i64> %tmp3
 }
 
-declare <2 x i32> @llvm.arm64.neon.fcvtms.v2i32.v2f32(<2 x float>) nounwind readnone
-declare <4 x i32> @llvm.arm64.neon.fcvtms.v4i32.v4f32(<4 x float>) nounwind readnone
-declare <2 x i64> @llvm.arm64.neon.fcvtms.v2i64.v2f64(<2 x double>) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.fcvtms.v2i32.v2f32(<2 x float>) nounwind readnone
+declare <4 x i32> @llvm.aarch64.neon.fcvtms.v4i32.v4f32(<4 x float>) nounwind readnone
+declare <2 x i64> @llvm.aarch64.neon.fcvtms.v2i64.v2f64(<2 x double>) nounwind readnone
 
 define <2 x i32> @fcvtmu_2s(<2 x float> %A) nounwind {
 ;CHECK-LABEL: fcvtmu_2s:
 ;CHECK-NOT: ld1
 ;CHECK: fcvtmu.2s v0, v0
 ;CHECK-NEXT: ret
-	%tmp3 = call <2 x i32> @llvm.arm64.neon.fcvtmu.v2i32.v2f32(<2 x float> %A)
+	%tmp3 = call <2 x i32> @llvm.aarch64.neon.fcvtmu.v2i32.v2f32(<2 x float> %A)
 	ret <2 x i32> %tmp3
 }
 
@@ -107,7 +107,7 @@ define <4 x i32> @fcvtmu_4s(<4 x float> %A) nounwind {
 ;CHECK-NOT: ld1
 ;CHECK: fcvtmu.4s v0, v0
 ;CHECK-NEXT: ret
-	%tmp3 = call <4 x i32> @llvm.arm64.neon.fcvtmu.v4i32.v4f32(<4 x float> %A)
+	%tmp3 = call <4 x i32> @llvm.aarch64.neon.fcvtmu.v4i32.v4f32(<4 x float> %A)
 	ret <4 x i32> %tmp3
 }
 
@@ -116,20 +116,20 @@ define <2 x i64> @fcvtmu_2d(<2 x double> %A) nounwind {
 ;CHECK-NOT: ld1
 ;CHECK: fcvtmu.2d v0, v0
 ;CHECK-NEXT: ret
-	%tmp3 = call <2 x i64> @llvm.arm64.neon.fcvtmu.v2i64.v2f64(<2 x double> %A)
+	%tmp3 = call <2 x i64> @llvm.aarch64.neon.fcvtmu.v2i64.v2f64(<2 x double> %A)
 	ret <2 x i64> %tmp3
 }
 
-declare <2 x i32> @llvm.arm64.neon.fcvtmu.v2i32.v2f32(<2 x float>) nounwind readnone
-declare <4 x i32> @llvm.arm64.neon.fcvtmu.v4i32.v4f32(<4 x float>) nounwind readnone
-declare <2 x i64> @llvm.arm64.neon.fcvtmu.v2i64.v2f64(<2 x double>) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.fcvtmu.v2i32.v2f32(<2 x float>) nounwind readnone
+declare <4 x i32> @llvm.aarch64.neon.fcvtmu.v4i32.v4f32(<4 x float>) nounwind readnone
+declare <2 x i64> @llvm.aarch64.neon.fcvtmu.v2i64.v2f64(<2 x double>) nounwind readnone
 
 define <2 x i32> @fcvtps_2s(<2 x float> %A) nounwind {
 ;CHECK-LABEL: fcvtps_2s:
 ;CHECK-NOT: ld1
 ;CHECK: fcvtps.2s v0, v0
 ;CHECK-NEXT: ret
-	%tmp3 = call <2 x i32> @llvm.arm64.neon.fcvtps.v2i32.v2f32(<2 x float> %A)
+	%tmp3 = call <2 x i32> @llvm.aarch64.neon.fcvtps.v2i32.v2f32(<2 x float> %A)
 	ret <2 x i32> %tmp3
 }
 
@@ -138,7 +138,7 @@ define <4 x i32> @fcvtps_4s(<4 x float> %A) nounwind {
 ;CHECK-NOT: ld1
 ;CHECK: fcvtps.4s v0, v0
 ;CHECK-NEXT: ret
-	%tmp3 = call <4 x i32> @llvm.arm64.neon.fcvtps.v4i32.v4f32(<4 x float> %A)
+	%tmp3 = call <4 x i32> @llvm.aarch64.neon.fcvtps.v4i32.v4f32(<4 x float> %A)
 	ret <4 x i32> %tmp3
 }
 
@@ -147,20 +147,20 @@ define <2 x i64> @fcvtps_2d(<2 x double> %A) nounwind {
 ;CHECK-NOT: ld1
 ;CHECK: fcvtps.2d v0, v0
 ;CHECK-NEXT: ret
-	%tmp3 = call <2 x i64> @llvm.arm64.neon.fcvtps.v2i64.v2f64(<2 x double> %A)
+	%tmp3 = call <2 x i64> @llvm.aarch64.neon.fcvtps.v2i64.v2f64(<2 x double> %A)
 	ret <2 x i64> %tmp3
 }
 
-declare <2 x i32> @llvm.arm64.neon.fcvtps.v2i32.v2f32(<2 x float>) nounwind readnone
-declare <4 x i32> @llvm.arm64.neon.fcvtps.v4i32.v4f32(<4 x float>) nounwind readnone
-declare <2 x i64> @llvm.arm64.neon.fcvtps.v2i64.v2f64(<2 x double>) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.fcvtps.v2i32.v2f32(<2 x float>) nounwind readnone
+declare <4 x i32> @llvm.aarch64.neon.fcvtps.v4i32.v4f32(<4 x float>) nounwind readnone
+declare <2 x i64> @llvm.aarch64.neon.fcvtps.v2i64.v2f64(<2 x double>) nounwind readnone
 
 define <2 x i32> @fcvtpu_2s(<2 x float> %A) nounwind {
 ;CHECK-LABEL: fcvtpu_2s:
 ;CHECK-NOT: ld1
 ;CHECK: fcvtpu.2s v0, v0
 ;CHECK-NEXT: ret
-	%tmp3 = call <2 x i32> @llvm.arm64.neon.fcvtpu.v2i32.v2f32(<2 x float> %A)
+	%tmp3 = call <2 x i32> @llvm.aarch64.neon.fcvtpu.v2i32.v2f32(<2 x float> %A)
 	ret <2 x i32> %tmp3
 }
 
@@ -169,7 +169,7 @@ define <4 x i32> @fcvtpu_4s(<4 x float> %A) nounwind {
 ;CHECK-NOT: ld1
 ;CHECK: fcvtpu.4s v0, v0
 ;CHECK-NEXT: ret
-	%tmp3 = call <4 x i32> @llvm.arm64.neon.fcvtpu.v4i32.v4f32(<4 x float> %A)
+	%tmp3 = call <4 x i32> @llvm.aarch64.neon.fcvtpu.v4i32.v4f32(<4 x float> %A)
 	ret <4 x i32> %tmp3
 }
 
@@ -178,20 +178,20 @@ define <2 x i64> @fcvtpu_2d(<2 x double> %A) nounwind {
 ;CHECK-NOT: ld1
 ;CHECK: fcvtpu.2d v0, v0
 ;CHECK-NEXT: ret
-	%tmp3 = call <2 x i64> @llvm.arm64.neon.fcvtpu.v2i64.v2f64(<2 x double> %A)
+	%tmp3 = call <2 x i64> @llvm.aarch64.neon.fcvtpu.v2i64.v2f64(<2 x double> %A)
 	ret <2 x i64> %tmp3
 }
 
-declare <2 x i32> @llvm.arm64.neon.fcvtpu.v2i32.v2f32(<2 x float>) nounwind readnone
-declare <4 x i32> @llvm.arm64.neon.fcvtpu.v4i32.v4f32(<4 x float>) nounwind readnone
-declare <2 x i64> @llvm.arm64.neon.fcvtpu.v2i64.v2f64(<2 x double>) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.fcvtpu.v2i32.v2f32(<2 x float>) nounwind readnone
+declare <4 x i32> @llvm.aarch64.neon.fcvtpu.v4i32.v4f32(<4 x float>) nounwind readnone
+declare <2 x i64> @llvm.aarch64.neon.fcvtpu.v2i64.v2f64(<2 x double>) nounwind readnone
 
 define <2 x i32> @fcvtns_2s(<2 x float> %A) nounwind {
 ;CHECK-LABEL: fcvtns_2s:
 ;CHECK-NOT: ld1
 ;CHECK: fcvtns.2s v0, v0
 ;CHECK-NEXT: ret
-	%tmp3 = call <2 x i32> @llvm.arm64.neon.fcvtns.v2i32.v2f32(<2 x float> %A)
+	%tmp3 = call <2 x i32> @llvm.aarch64.neon.fcvtns.v2i32.v2f32(<2 x float> %A)
 	ret <2 x i32> %tmp3
 }
 
@@ -200,7 +200,7 @@ define <4 x i32> @fcvtns_4s(<4 x float> %A) nounwind {
 ;CHECK-NOT: ld1
 ;CHECK: fcvtns.4s v0, v0
 ;CHECK-NEXT: ret
-	%tmp3 = call <4 x i32> @llvm.arm64.neon.fcvtns.v4i32.v4f32(<4 x float> %A)
+	%tmp3 = call <4 x i32> @llvm.aarch64.neon.fcvtns.v4i32.v4f32(<4 x float> %A)
 	ret <4 x i32> %tmp3
 }
 
@@ -209,20 +209,20 @@ define <2 x i64> @fcvtns_2d(<2 x double> %A) nounwind {
 ;CHECK-NOT: ld1
 ;CHECK: fcvtns.2d v0, v0
 ;CHECK-NEXT: ret
-	%tmp3 = call <2 x i64> @llvm.arm64.neon.fcvtns.v2i64.v2f64(<2 x double> %A)
+	%tmp3 = call <2 x i64> @llvm.aarch64.neon.fcvtns.v2i64.v2f64(<2 x double> %A)
 	ret <2 x i64> %tmp3
 }
 
-declare <2 x i32> @llvm.arm64.neon.fcvtns.v2i32.v2f32(<2 x float>) nounwind readnone
-declare <4 x i32> @llvm.arm64.neon.fcvtns.v4i32.v4f32(<4 x float>) nounwind readnone
-declare <2 x i64> @llvm.arm64.neon.fcvtns.v2i64.v2f64(<2 x double>) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.fcvtns.v2i32.v2f32(<2 x float>) nounwind readnone
+declare <4 x i32> @llvm.aarch64.neon.fcvtns.v4i32.v4f32(<4 x float>) nounwind readnone
+declare <2 x i64> @llvm.aarch64.neon.fcvtns.v2i64.v2f64(<2 x double>) nounwind readnone
 
 define <2 x i32> @fcvtnu_2s(<2 x float> %A) nounwind {
 ;CHECK-LABEL: fcvtnu_2s:
 ;CHECK-NOT: ld1
 ;CHECK: fcvtnu.2s v0, v0
 ;CHECK-NEXT: ret
-	%tmp3 = call <2 x i32> @llvm.arm64.neon.fcvtnu.v2i32.v2f32(<2 x float> %A)
+	%tmp3 = call <2 x i32> @llvm.aarch64.neon.fcvtnu.v2i32.v2f32(<2 x float> %A)
 	ret <2 x i32> %tmp3
 }
 
@@ -231,7 +231,7 @@ define <4 x i32> @fcvtnu_4s(<4 x float> %A) nounwind {
 ;CHECK-NOT: ld1
 ;CHECK: fcvtnu.4s v0, v0
 ;CHECK-NEXT: ret
-	%tmp3 = call <4 x i32> @llvm.arm64.neon.fcvtnu.v4i32.v4f32(<4 x float> %A)
+	%tmp3 = call <4 x i32> @llvm.aarch64.neon.fcvtnu.v4i32.v4f32(<4 x float> %A)
 	ret <4 x i32> %tmp3
 }
 
@@ -240,13 +240,13 @@ define <2 x i64> @fcvtnu_2d(<2 x double> %A) nounwind {
 ;CHECK-NOT: ld1
 ;CHECK: fcvtnu.2d v0, v0
 ;CHECK-NEXT: ret
-	%tmp3 = call <2 x i64> @llvm.arm64.neon.fcvtnu.v2i64.v2f64(<2 x double> %A)
+	%tmp3 = call <2 x i64> @llvm.aarch64.neon.fcvtnu.v2i64.v2f64(<2 x double> %A)
 	ret <2 x i64> %tmp3
 }
 
-declare <2 x i32> @llvm.arm64.neon.fcvtnu.v2i32.v2f32(<2 x float>) nounwind readnone
-declare <4 x i32> @llvm.arm64.neon.fcvtnu.v4i32.v4f32(<4 x float>) nounwind readnone
-declare <2 x i64> @llvm.arm64.neon.fcvtnu.v2i64.v2f64(<2 x double>) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.fcvtnu.v2i32.v2f32(<2 x float>) nounwind readnone
+declare <4 x i32> @llvm.aarch64.neon.fcvtnu.v4i32.v4f32(<4 x float>) nounwind readnone
+declare <2 x i64> @llvm.aarch64.neon.fcvtnu.v2i64.v2f64(<2 x double>) nounwind readnone
 
 define <2 x i32> @fcvtzs_2s(<2 x float> %A) nounwind {
 ;CHECK-LABEL: fcvtzs_2s:
@@ -401,7 +401,7 @@ define <2 x float> @frintn_2s(<2 x float> %A) nounwind {
 ;CHECK-NOT: ld1
 ;CHECK: frintn.2s v0, v0
 ;CHECK-NEXT: ret
-	%tmp3 = call <2 x float> @llvm.arm64.neon.frintn.v2f32(<2 x float> %A)
+	%tmp3 = call <2 x float> @llvm.aarch64.neon.frintn.v2f32(<2 x float> %A)
 	ret <2 x float> %tmp3
 }
 
@@ -410,7 +410,7 @@ define <4 x float> @frintn_4s(<4 x float> %A) nounwind {
 ;CHECK-NOT: ld1
 ;CHECK: frintn.4s v0, v0
 ;CHECK-NEXT: ret
-	%tmp3 = call <4 x float> @llvm.arm64.neon.frintn.v4f32(<4 x float> %A)
+	%tmp3 = call <4 x float> @llvm.aarch64.neon.frintn.v4f32(<4 x float> %A)
 	ret <4 x float> %tmp3
 }
 
@@ -419,13 +419,13 @@ define <2 x double> @frintn_2d(<2 x double> %A) nounwind {
 ;CHECK-NOT: ld1
 ;CHECK: frintn.2d v0, v0
 ;CHECK-NEXT: ret
-	%tmp3 = call <2 x double> @llvm.arm64.neon.frintn.v2f64(<2 x double> %A)
+	%tmp3 = call <2 x double> @llvm.aarch64.neon.frintn.v2f64(<2 x double> %A)
 	ret <2 x double> %tmp3
 }
 
-declare <2 x float> @llvm.arm64.neon.frintn.v2f32(<2 x float>) nounwind readnone
-declare <4 x float> @llvm.arm64.neon.frintn.v4f32(<4 x float>) nounwind readnone
-declare <2 x double> @llvm.arm64.neon.frintn.v2f64(<2 x double>) nounwind readnone
+declare <2 x float> @llvm.aarch64.neon.frintn.v2f32(<2 x float>) nounwind readnone
+declare <4 x float> @llvm.aarch64.neon.frintn.v4f32(<4 x float>) nounwind readnone
+declare <2 x double> @llvm.aarch64.neon.frintn.v2f64(<2 x double>) nounwind readnone
 
 define <2 x float> @frintp_2s(<2 x float> %A) nounwind {
 ;CHECK-LABEL: frintp_2s:
@@ -525,7 +525,7 @@ define <2 x float> @fcvtxn_2s(<2 x double> %A) nounwind {
 ;CHECK-NOT: ld1
 ;CHECK: fcvtxn v0.2s, v0.2d
 ;CHECK-NEXT: ret
-	%tmp3 = call <2 x float> @llvm.arm64.neon.fcvtxn.v2f32.v2f64(<2 x double> %A)
+	%tmp3 = call <2 x float> @llvm.aarch64.neon.fcvtxn.v2f32.v2f64(<2 x double> %A)
 	ret <2 x float> %tmp3
 }
 
@@ -534,19 +534,19 @@ define <4 x float> @fcvtxn_4s(<2 x float> %ret, <2 x double> %A) nounwind {
 ;CHECK-NOT: ld1
 ;CHECK: fcvtxn2 v0.4s, v1.2d
 ;CHECK-NEXT: ret
-	%tmp3 = call <2 x float> @llvm.arm64.neon.fcvtxn.v2f32.v2f64(<2 x double> %A)
+	%tmp3 = call <2 x float> @llvm.aarch64.neon.fcvtxn.v2f32.v2f64(<2 x double> %A)
         %res = shufflevector <2 x float> %ret, <2 x float> %tmp3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 	ret <4 x float> %res
 }
 
-declare <2 x float> @llvm.arm64.neon.fcvtxn.v2f32.v2f64(<2 x double>) nounwind readnone
+declare <2 x float> @llvm.aarch64.neon.fcvtxn.v2f32.v2f64(<2 x double>) nounwind readnone
 
 define <2 x i32> @fcvtzsc_2s(<2 x float> %A) nounwind {
 ;CHECK-LABEL: fcvtzsc_2s:
 ;CHECK-NOT: ld1
 ;CHECK: fcvtzs.2s v0, v0, #1
 ;CHECK-NEXT: ret
-	%tmp3 = call <2 x i32> @llvm.arm64.neon.vcvtfp2fxs.v2i32.v2f32(<2 x float> %A, i32 1)
+	%tmp3 = call <2 x i32> @llvm.aarch64.neon.vcvtfp2fxs.v2i32.v2f32(<2 x float> %A, i32 1)
 	ret <2 x i32> %tmp3
 }
 
@@ -555,7 +555,7 @@ define <4 x i32> @fcvtzsc_4s(<4 x float> %A) nounwind {
 ;CHECK-NOT: ld1
 ;CHECK: fcvtzs.4s v0, v0, #1
 ;CHECK-NEXT: ret
-	%tmp3 = call <4 x i32> @llvm.arm64.neon.vcvtfp2fxs.v4i32.v4f32(<4 x float> %A, i32 1)
+	%tmp3 = call <4 x i32> @llvm.aarch64.neon.vcvtfp2fxs.v4i32.v4f32(<4 x float> %A, i32 1)
 	ret <4 x i32> %tmp3
 }
 
@@ -564,20 +564,20 @@ define <2 x i64> @fcvtzsc_2d(<2 x double> %A) nounwind {
 ;CHECK-NOT: ld1
 ;CHECK: fcvtzs.2d v0, v0, #1
 ;CHECK-NEXT: ret
-	%tmp3 = call <2 x i64> @llvm.arm64.neon.vcvtfp2fxs.v2i64.v2f64(<2 x double> %A, i32 1)
+	%tmp3 = call <2 x i64> @llvm.aarch64.neon.vcvtfp2fxs.v2i64.v2f64(<2 x double> %A, i32 1)
 	ret <2 x i64> %tmp3
 }
 
-declare <2 x i32> @llvm.arm64.neon.vcvtfp2fxs.v2i32.v2f32(<2 x float>, i32) nounwind readnone
-declare <4 x i32> @llvm.arm64.neon.vcvtfp2fxs.v4i32.v4f32(<4 x float>, i32) nounwind readnone
-declare <2 x i64> @llvm.arm64.neon.vcvtfp2fxs.v2i64.v2f64(<2 x double>, i32) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.vcvtfp2fxs.v2i32.v2f32(<2 x float>, i32) nounwind readnone
+declare <4 x i32> @llvm.aarch64.neon.vcvtfp2fxs.v4i32.v4f32(<4 x float>, i32) nounwind readnone
+declare <2 x i64> @llvm.aarch64.neon.vcvtfp2fxs.v2i64.v2f64(<2 x double>, i32) nounwind readnone
 
 define <2 x i32> @fcvtzuc_2s(<2 x float> %A) nounwind {
 ;CHECK-LABEL: fcvtzuc_2s:
 ;CHECK-NOT: ld1
 ;CHECK: fcvtzu.2s v0, v0, #1
 ;CHECK-NEXT: ret
-	%tmp3 = call <2 x i32> @llvm.arm64.neon.vcvtfp2fxu.v2i32.v2f32(<2 x float> %A, i32 1)
+	%tmp3 = call <2 x i32> @llvm.aarch64.neon.vcvtfp2fxu.v2i32.v2f32(<2 x float> %A, i32 1)
 	ret <2 x i32> %tmp3
 }
 
@@ -586,7 +586,7 @@ define <4 x i32> @fcvtzuc_4s(<4 x float> %A) nounwind {
 ;CHECK-NOT: ld1
 ;CHECK: fcvtzu.4s v0, v0, #1
 ;CHECK-NEXT: ret
-	%tmp3 = call <4 x i32> @llvm.arm64.neon.vcvtfp2fxu.v4i32.v4f32(<4 x float> %A, i32 1)
+	%tmp3 = call <4 x i32> @llvm.aarch64.neon.vcvtfp2fxu.v4i32.v4f32(<4 x float> %A, i32 1)
 	ret <4 x i32> %tmp3
 }
 
@@ -595,20 +595,20 @@ define <2 x i64> @fcvtzuc_2d(<2 x double> %A) nounwind {
 ;CHECK-NOT: ld1
 ;CHECK: fcvtzu.2d v0, v0, #1
 ;CHECK-NEXT: ret
-	%tmp3 = call <2 x i64> @llvm.arm64.neon.vcvtfp2fxu.v2i64.v2f64(<2 x double> %A, i32 1)
+	%tmp3 = call <2 x i64> @llvm.aarch64.neon.vcvtfp2fxu.v2i64.v2f64(<2 x double> %A, i32 1)
 	ret <2 x i64> %tmp3
 }
 
-declare <2 x i32> @llvm.arm64.neon.vcvtfp2fxu.v2i32.v2f32(<2 x float>, i32) nounwind readnone
-declare <4 x i32> @llvm.arm64.neon.vcvtfp2fxu.v4i32.v4f32(<4 x float>, i32) nounwind readnone
-declare <2 x i64> @llvm.arm64.neon.vcvtfp2fxu.v2i64.v2f64(<2 x double>, i32) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.vcvtfp2fxu.v2i32.v2f32(<2 x float>, i32) nounwind readnone
+declare <4 x i32> @llvm.aarch64.neon.vcvtfp2fxu.v4i32.v4f32(<4 x float>, i32) nounwind readnone
+declare <2 x i64> @llvm.aarch64.neon.vcvtfp2fxu.v2i64.v2f64(<2 x double>, i32) nounwind readnone
 
 define <2 x float> @scvtf_2sc(<2 x i32> %A) nounwind {
 ;CHECK-LABEL: scvtf_2sc:
 ;CHECK-NOT: ld1
 ;CHECK: scvtf.2s v0, v0, #1
 ;CHECK-NEXT: ret
-	%tmp3 = call <2 x float> @llvm.arm64.neon.vcvtfxs2fp.v2f32.v2i32(<2 x i32> %A, i32 1)
+	%tmp3 = call <2 x float> @llvm.aarch64.neon.vcvtfxs2fp.v2f32.v2i32(<2 x i32> %A, i32 1)
 	ret <2 x float> %tmp3
 }
 
@@ -617,7 +617,7 @@ define <4 x float> @scvtf_4sc(<4 x i32> %A) nounwind {
 ;CHECK-NOT: ld1
 ;CHECK: scvtf.4s v0, v0, #1
 ;CHECK-NEXT: ret
-	%tmp3 = call <4 x float> @llvm.arm64.neon.vcvtfxs2fp.v4f32.v4i32(<4 x i32> %A, i32 1)
+	%tmp3 = call <4 x float> @llvm.aarch64.neon.vcvtfxs2fp.v4f32.v4i32(<4 x i32> %A, i32 1)
 	ret <4 x float> %tmp3
 }
 
@@ -626,20 +626,20 @@ define <2 x double> @scvtf_2dc(<2 x i64> %A) nounwind {
 ;CHECK-NOT: ld1
 ;CHECK: scvtf.2d v0, v0, #1
 ;CHECK-NEXT: ret
-	%tmp3 = call <2 x double> @llvm.arm64.neon.vcvtfxs2fp.v2f64.v2i64(<2 x i64> %A, i32 1)
+	%tmp3 = call <2 x double> @llvm.aarch64.neon.vcvtfxs2fp.v2f64.v2i64(<2 x i64> %A, i32 1)
 	ret <2 x double> %tmp3
 }
 
-declare <2 x float> @llvm.arm64.neon.vcvtfxs2fp.v2f32.v2i32(<2 x i32>, i32) nounwind readnone
-declare <4 x float> @llvm.arm64.neon.vcvtfxs2fp.v4f32.v4i32(<4 x i32>, i32) nounwind readnone
-declare <2 x double> @llvm.arm64.neon.vcvtfxs2fp.v2f64.v2i64(<2 x i64>, i32) nounwind readnone
+declare <2 x float> @llvm.aarch64.neon.vcvtfxs2fp.v2f32.v2i32(<2 x i32>, i32) nounwind readnone
+declare <4 x float> @llvm.aarch64.neon.vcvtfxs2fp.v4f32.v4i32(<4 x i32>, i32) nounwind readnone
+declare <2 x double> @llvm.aarch64.neon.vcvtfxs2fp.v2f64.v2i64(<2 x i64>, i32) nounwind readnone
 
 define <2 x float> @ucvtf_2sc(<2 x i32> %A) nounwind {
 ;CHECK-LABEL: ucvtf_2sc:
 ;CHECK-NOT: ld1
 ;CHECK: ucvtf.2s v0, v0, #1
 ;CHECK-NEXT: ret
-	%tmp3 = call <2 x float> @llvm.arm64.neon.vcvtfxu2fp.v2f32.v2i32(<2 x i32> %A, i32 1)
+	%tmp3 = call <2 x float> @llvm.aarch64.neon.vcvtfxu2fp.v2f32.v2i32(<2 x i32> %A, i32 1)
 	ret <2 x float> %tmp3
 }
 
@@ -648,7 +648,7 @@ define <4 x float> @ucvtf_4sc(<4 x i32> %A) nounwind {
 ;CHECK-NOT: ld1
 ;CHECK: ucvtf.4s v0, v0, #1
 ;CHECK-NEXT: ret
-	%tmp3 = call <4 x float> @llvm.arm64.neon.vcvtfxu2fp.v4f32.v4i32(<4 x i32> %A, i32 1)
+	%tmp3 = call <4 x float> @llvm.aarch64.neon.vcvtfxu2fp.v4f32.v4i32(<4 x i32> %A, i32 1)
 	ret <4 x float> %tmp3
 }
 
@@ -657,7 +657,7 @@ define <2 x double> @ucvtf_2dc(<2 x i64> %A) nounwind {
 ;CHECK-NOT: ld1
 ;CHECK: ucvtf.2d v0, v0, #1
 ;CHECK-NEXT: ret
-	%tmp3 = call <2 x double> @llvm.arm64.neon.vcvtfxu2fp.v2f64.v2i64(<2 x i64> %A, i32 1)
+	%tmp3 = call <2 x double> @llvm.aarch64.neon.vcvtfxu2fp.v2f64.v2i64(<2 x i64> %A, i32 1)
 	ret <2 x double> %tmp3
 }
 
@@ -681,6 +681,6 @@ define void @autogen_SD19225() {
   ret void
 }
 
-declare <2 x float> @llvm.arm64.neon.vcvtfxu2fp.v2f32.v2i32(<2 x i32>, i32) nounwind readnone
-declare <4 x float> @llvm.arm64.neon.vcvtfxu2fp.v4f32.v4i32(<4 x i32>, i32) nounwind readnone
-declare <2 x double> @llvm.arm64.neon.vcvtfxu2fp.v2f64.v2i64(<2 x i64>, i32) nounwind readnone
+declare <2 x float> @llvm.aarch64.neon.vcvtfxu2fp.v2f32.v2i32(<2 x i32>, i32) nounwind readnone
+declare <4 x float> @llvm.aarch64.neon.vcvtfxu2fp.v4f32.v4i32(<4 x i32>, i32) nounwind readnone
+declare <2 x double> @llvm.aarch64.neon.vcvtfxu2fp.v2f64.v2i64(<2 x i64>, i32) nounwind readnone
diff --git a/test/CodeGen/ARM64/vcvt_f.ll b/test/CodeGen/AArch64/arm64-vcvt_f.ll
similarity index 73%
rename from test/CodeGen/ARM64/vcvt_f.ll
rename to test/CodeGen/AArch64/arm64-vcvt_f.ll
index d67aa3b9d47..d24495844b4 100644
--- a/test/CodeGen/ARM64/vcvt_f.ll
+++ b/test/CodeGen/AArch64/arm64-vcvt_f.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s
-; RUN: llc < %s -O0 -march=arm64 -arm64-neon-syntax=apple | FileCheck %s
+; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
+; RUN: llc < %s -O0 -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
 
 define <2 x double> @test_vcvt_f64_f32(<2 x float> %x) nounwind readnone ssp {
 ; CHECK-LABEL: test_vcvt_f64_f32:
@@ -38,7 +38,7 @@ define <4 x float> @test_vcvt_high_f32_f64(<2 x float> %x, <2 x double> %v) noun
 
 define <2 x float> @test_vcvtx_f32_f64(<2 x double> %v) nounwind readnone ssp {
 ; CHECK-LABEL: test_vcvtx_f32_f64:
-  %vcvtx1.i = tail call <2 x float> @llvm.arm64.neon.fcvtxn.v2f32.v2f64(<2 x double> %v) nounwind
+  %vcvtx1.i = tail call <2 x float> @llvm.aarch64.neon.fcvtxn.v2f32.v2f64(<2 x double> %v) nounwind
 ; CHECK: fcvtxn
   ret <2 x float> %vcvtx1.i
 ; CHECK: ret
@@ -46,7 +46,7 @@ define <2 x float> @test_vcvtx_f32_f64(<2 x double> %v) nounwind readnone ssp {
 
 define <4 x float> @test_vcvtx_high_f32_f64(<2 x float> %x, <2 x double> %v) nounwind readnone ssp {
 ; CHECK-LABEL: test_vcvtx_high_f32_f64:
-  %vcvtx2.i = tail call <2 x float> @llvm.arm64.neon.fcvtxn.v2f32.v2f64(<2 x double> %v) nounwind
+  %vcvtx2.i = tail call <2 x float> @llvm.aarch64.neon.fcvtxn.v2f32.v2f64(<2 x double> %v) nounwind
   %res = shufflevector <2 x float> %x, <2 x float> %vcvtx2.i, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 ; CHECK: fcvtxn2
   ret <4 x float> %res
@@ -54,13 +54,13 @@ define <4 x float> @test_vcvtx_high_f32_f64(<2 x float> %x, <2 x double> %v) nou
 }
 
 
-declare <2 x double> @llvm.arm64.neon.vcvthighfp2df(<4 x float>) nounwind readnone
-declare <2 x double> @llvm.arm64.neon.vcvtfp2df(<2 x float>) nounwind readnone
+declare <2 x double> @llvm.aarch64.neon.vcvthighfp2df(<4 x float>) nounwind readnone
+declare <2 x double> @llvm.aarch64.neon.vcvtfp2df(<2 x float>) nounwind readnone
 
-declare <2 x float> @llvm.arm64.neon.vcvtdf2fp(<2 x double>) nounwind readnone
-declare <4 x float> @llvm.arm64.neon.vcvthighdf2fp(<2 x float>, <2 x double>) nounwind readnone
+declare <2 x float> @llvm.aarch64.neon.vcvtdf2fp(<2 x double>) nounwind readnone
+declare <4 x float> @llvm.aarch64.neon.vcvthighdf2fp(<2 x float>, <2 x double>) nounwind readnone
 
-declare <2 x float> @llvm.arm64.neon.fcvtxn.v2f32.v2f64(<2 x double>) nounwind readnone
+declare <2 x float> @llvm.aarch64.neon.fcvtxn.v2f32.v2f64(<2 x double>) nounwind readnone
 
 define i16 @to_half(float %in) {
 ; CHECK-LABEL: to_half:
diff --git a/test/CodeGen/ARM64/vcvt_f32_su32.ll b/test/CodeGen/AArch64/arm64-vcvt_f32_su32.ll
similarity index 75%
rename from test/CodeGen/ARM64/vcvt_f32_su32.ll
rename to test/CodeGen/AArch64/arm64-vcvt_f32_su32.ll
index 51e053d9745..1eb7b43d575 100644
--- a/test/CodeGen/ARM64/vcvt_f32_su32.ll
+++ b/test/CodeGen/AArch64/arm64-vcvt_f32_su32.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s
+; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
 
 define <2 x float> @ucvt(<2 x i32> %a) nounwind readnone ssp {
 ; CHECK-LABEL: ucvt:
@@ -37,7 +37,7 @@ define <4 x float> @cvtf16(<4 x i16> %a) nounwind readnone ssp {
 ; CHECK-LABEL: cvtf16:
 ; CHECK: fcvtl  v0.4s, v0.4h
 ; CHECK-NEXT: ret
-  %vcvt1.i = tail call <4 x float> @llvm.arm64.neon.vcvthf2fp(<4 x i16> %a) nounwind
+  %vcvt1.i = tail call <4 x float> @llvm.aarch64.neon.vcvthf2fp(<4 x i16> %a) nounwind
   ret <4 x float> %vcvt1.i
 }
 
@@ -46,7 +46,7 @@ define <4 x float> @cvtf16_high(<8 x i16> %a) nounwind readnone ssp {
 ; CHECK: fcvtl2  v0.4s, v0.8h
 ; CHECK-NEXT: ret
   %in = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %vcvt1.i = tail call <4 x float> @llvm.arm64.neon.vcvthf2fp(<4 x i16> %in) nounwind
+  %vcvt1.i = tail call <4 x float> @llvm.aarch64.neon.vcvthf2fp(<4 x i16> %in) nounwind
   ret <4 x float> %vcvt1.i
 }
 
@@ -56,7 +56,7 @@ define <4 x i16> @cvtf16f32(<4 x float> %a) nounwind readnone ssp {
 ; CHECK-LABEL: cvtf16f32:
 ; CHECK: fcvtn  v0.4h, v0.4s
 ; CHECK-NEXT: ret
-  %vcvt1.i = tail call <4 x i16> @llvm.arm64.neon.vcvtfp2hf(<4 x float> %a) nounwind
+  %vcvt1.i = tail call <4 x i16> @llvm.aarch64.neon.vcvtfp2hf(<4 x float> %a) nounwind
   ret <4 x i16> %vcvt1.i
 }
 
@@ -64,10 +64,10 @@ define <8 x i16> @cvtf16f32_high(<4 x i16> %low, <4 x float> %high_big) {
 ; CHECK-LABEL: cvtf16f32_high:
 ; CHECK: fcvtn2 v0.8h, v1.4s
 ; CHECK-NEXT: ret
-  %high = call <4 x i16> @llvm.arm64.neon.vcvtfp2hf(<4 x float> %high_big)
+  %high = call <4 x i16> @llvm.aarch64.neon.vcvtfp2hf(<4 x float> %high_big)
   %res = shufflevector <4 x i16> %low, <4 x i16> %high, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
   ret <8 x i16> %res
 }
 
-declare <4 x float> @llvm.arm64.neon.vcvthf2fp(<4 x i16>) nounwind readnone
-declare <4 x i16> @llvm.arm64.neon.vcvtfp2hf(<4 x float>) nounwind readnone
+declare <4 x float> @llvm.aarch64.neon.vcvthf2fp(<4 x i16>) nounwind readnone
+declare <4 x i16> @llvm.aarch64.neon.vcvtfp2hf(<4 x float>) nounwind readnone
diff --git a/test/CodeGen/AArch64/arm64-vcvt_n.ll b/test/CodeGen/AArch64/arm64-vcvt_n.ll
new file mode 100644
index 00000000000..7ed5be6e8af
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-vcvt_n.ll
@@ -0,0 +1,49 @@
+; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
+
+define <2 x float> @cvtf32fxpu(<2 x i32> %a) nounwind readnone ssp {
+; CHECK-LABEL: cvtf32fxpu:
+; CHECK: ucvtf.2s	v0, v0, #9
+; CHECK: ret
+  %vcvt_n1 = tail call <2 x float> @llvm.aarch64.neon.vcvtfxu2fp.v2f32.v2i32(<2 x i32> %a, i32 9)
+  ret <2 x float> %vcvt_n1
+}
+
+define <2 x float> @cvtf32fxps(<2 x i32> %a) nounwind readnone ssp {
+; CHECK-LABEL: cvtf32fxps:
+; CHECK: scvtf.2s	v0, v0, #12
+; CHECK: ret
+  %vcvt_n1 = tail call <2 x float> @llvm.aarch64.neon.vcvtfxs2fp.v2f32.v2i32(<2 x i32> %a, i32 12)
+  ret <2 x float> %vcvt_n1
+}
+
+define <4 x float> @cvtqf32fxpu(<4 x i32> %a) nounwind readnone ssp {
+; CHECK-LABEL: cvtqf32fxpu:
+; CHECK: ucvtf.4s	v0, v0, #18
+; CHECK: ret
+  %vcvt_n1 = tail call <4 x float> @llvm.aarch64.neon.vcvtfxu2fp.v4f32.v4i32(<4 x i32> %a, i32 18)
+  ret <4 x float> %vcvt_n1
+}
+
+define <4 x float> @cvtqf32fxps(<4 x i32> %a) nounwind readnone ssp {
+; CHECK-LABEL: cvtqf32fxps:
+; CHECK: scvtf.4s	v0, v0, #30
+; CHECK: ret
+  %vcvt_n1 = tail call <4 x float> @llvm.aarch64.neon.vcvtfxs2fp.v4f32.v4i32(<4 x i32> %a, i32 30)
+  ret <4 x float> %vcvt_n1
+}
+define <2 x double> @f1(<2 x i64> %a) nounwind readnone ssp {
+  %vcvt_n1 = tail call <2 x double> @llvm.aarch64.neon.vcvtfxu2fp.v2f64.v2i64(<2 x i64> %a, i32 12)
+  ret <2 x double> %vcvt_n1
+}
+
+define <2 x double> @f2(<2 x i64> %a) nounwind readnone ssp {
+  %vcvt_n1 = tail call <2 x double> @llvm.aarch64.neon.vcvtfxs2fp.v2f64.v2i64(<2 x i64> %a, i32 9)
+  ret <2 x double> %vcvt_n1
+}
+
+declare <4 x float> @llvm.aarch64.neon.vcvtfxu2fp.v4f32.v4i32(<4 x i32>, i32) nounwind readnone
+declare <4 x float> @llvm.aarch64.neon.vcvtfxs2fp.v4f32.v4i32(<4 x i32>, i32) nounwind readnone
+declare <2 x float> @llvm.aarch64.neon.vcvtfxu2fp.v2f32.v2i32(<2 x i32>, i32) nounwind readnone
+declare <2 x float> @llvm.aarch64.neon.vcvtfxs2fp.v2f32.v2i32(<2 x i32>, i32) nounwind readnone
+declare <2 x double> @llvm.aarch64.neon.vcvtfxu2fp.v2f64.v2i64(<2 x i64>, i32) nounwind readnone
+declare <2 x double> @llvm.aarch64.neon.vcvtfxs2fp.v2f64.v2i64(<2 x i64>, i32) nounwind readnone
diff --git a/test/CodeGen/ARM64/vcvt_su32_f32.ll b/test/CodeGen/AArch64/arm64-vcvt_su32_f32.ll
similarity index 91%
rename from test/CodeGen/ARM64/vcvt_su32_f32.ll
rename to test/CodeGen/AArch64/arm64-vcvt_su32_f32.ll
index 8c82fa095c8..985a5f76243 100644
--- a/test/CodeGen/ARM64/vcvt_su32_f32.ll
+++ b/test/CodeGen/AArch64/arm64-vcvt_su32_f32.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s
+; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
 
 define <2 x i32> @c1(<2 x float> %a) nounwind readnone ssp {
 ; CHECK: c1
diff --git a/test/CodeGen/ARM64/vcvtxd_f32_f64.ll b/test/CodeGen/AArch64/arm64-vcvtxd_f32_f64.ll
similarity index 54%
rename from test/CodeGen/ARM64/vcvtxd_f32_f64.ll
rename to test/CodeGen/AArch64/arm64-vcvtxd_f32_f64.ll
index bbe8f0b3864..b29c22cbfda 100644
--- a/test/CodeGen/ARM64/vcvtxd_f32_f64.ll
+++ b/test/CodeGen/AArch64/arm64-vcvtxd_f32_f64.ll
@@ -4,8 +4,8 @@ define float @fcvtxn(double %a) {
 ; CHECK-LABEL: fcvtxn:
 ; CHECK: fcvtxn s0, d0
 ; CHECK-NEXT: ret
-  %vcvtxd.i = tail call float @llvm.arm64.sisd.fcvtxn(double %a) nounwind
+  %vcvtxd.i = tail call float @llvm.aarch64.sisd.fcvtxn(double %a) nounwind
   ret float %vcvtxd.i
 }
 
-declare float @llvm.arm64.sisd.fcvtxn(double) nounwind readnone
+declare float @llvm.aarch64.sisd.fcvtxn(double) nounwind readnone
diff --git a/test/CodeGen/ARM64/vecCmpBr.ll b/test/CodeGen/AArch64/arm64-vecCmpBr.ll
similarity index 87%
rename from test/CodeGen/ARM64/vecCmpBr.ll
rename to test/CodeGen/AArch64/arm64-vecCmpBr.ll
index 2af8775cea6..c7321e4b7d0 100644
--- a/test/CodeGen/ARM64/vecCmpBr.ll
+++ b/test/CodeGen/AArch64/arm64-vecCmpBr.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=arm64 -arm64-neon-syntax=apple < %s -mcpu=cyclone | FileCheck %s
+; RUN: llc -march=arm64 -aarch64-neon-syntax=apple < %s -mcpu=cyclone | FileCheck %s
 ; ModuleID = 'arm64_vecCmpBr.c'
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n32:64-S128"
 target triple = "arm64-apple-ios3.0.0"
@@ -13,7 +13,7 @@ define i32 @anyZero64(<4 x i16> %a) #0 {
 ; CHECK-NEXT: b _bar
 entry:
   %0 = bitcast <4 x i16> %a to <8 x i8>
-  %vminv.i = tail call i32 @llvm.arm64.neon.uminv.i32.v8i8(<8 x i8> %0) #3
+  %vminv.i = tail call i32 @llvm.aarch64.neon.uminv.i32.v8i8(<8 x i8> %0) #3
   %1 = trunc i32 %vminv.i to i8
   %tobool = icmp eq i8 %1, 0
   br i1 %tobool, label %if.then, label %return
@@ -39,7 +39,7 @@ define i32 @anyZero128(<8 x i16> %a) #0 {
 
 entry:
   %0 = bitcast <8 x i16> %a to <16 x i8>
-  %vminv.i = tail call i32 @llvm.arm64.neon.uminv.i32.v16i8(<16 x i8> %0) #3
+  %vminv.i = tail call i32 @llvm.aarch64.neon.uminv.i32.v16i8(<16 x i8> %0) #3
   %1 = trunc i32 %vminv.i to i8
   %tobool = icmp eq i8 %1, 0
   br i1 %tobool, label %if.then, label %return
@@ -63,7 +63,7 @@ define i32 @anyNonZero64(<4 x i16> %a) #0 {
 
 entry:
   %0 = bitcast <4 x i16> %a to <8 x i8>
-  %vmaxv.i = tail call i32 @llvm.arm64.neon.umaxv.i32.v8i8(<8 x i8> %0) #3
+  %vmaxv.i = tail call i32 @llvm.aarch64.neon.umaxv.i32.v8i8(<8 x i8> %0) #3
   %1 = trunc i32 %vmaxv.i to i8
   %tobool = icmp eq i8 %1, 0
   br i1 %tobool, label %return, label %if.then
@@ -86,7 +86,7 @@ define i32 @anyNonZero128(<8 x i16> %a) #0 {
 ; CHECK-NEXT: movz w0, #0
 entry:
   %0 = bitcast <8 x i16> %a to <16 x i8>
-  %vmaxv.i = tail call i32 @llvm.arm64.neon.umaxv.i32.v16i8(<16 x i8> %0) #3
+  %vmaxv.i = tail call i32 @llvm.aarch64.neon.umaxv.i32.v16i8(<16 x i8> %0) #3
   %1 = trunc i32 %vmaxv.i to i8
   %tobool = icmp eq i8 %1, 0
   br i1 %tobool, label %return, label %if.then
@@ -109,7 +109,7 @@ define i32 @allZero64(<4 x i16> %a) #0 {
 ; CHECK-NEXT: b _bar
 entry:
   %0 = bitcast <4 x i16> %a to <8 x i8>
-  %vmaxv.i = tail call i32 @llvm.arm64.neon.umaxv.i32.v8i8(<8 x i8> %0) #3
+  %vmaxv.i = tail call i32 @llvm.aarch64.neon.umaxv.i32.v8i8(<8 x i8> %0) #3
   %1 = trunc i32 %vmaxv.i to i8
   %tobool = icmp eq i8 %1, 0
   br i1 %tobool, label %if.then, label %return
@@ -132,7 +132,7 @@ define i32 @allZero128(<8 x i16> %a) #0 {
 ; CHECK-NEXT: b _bar
 entry:
   %0 = bitcast <8 x i16> %a to <16 x i8>
-  %vmaxv.i = tail call i32 @llvm.arm64.neon.umaxv.i32.v16i8(<16 x i8> %0) #3
+  %vmaxv.i = tail call i32 @llvm.aarch64.neon.umaxv.i32.v16i8(<16 x i8> %0) #3
   %1 = trunc i32 %vmaxv.i to i8
   %tobool = icmp eq i8 %1, 0
   br i1 %tobool, label %if.then, label %return
@@ -155,7 +155,7 @@ define i32 @allNonZero64(<4 x i16> %a) #0 {
 ; CHECK-NEXT: movz w0, #0
 entry:
   %0 = bitcast <4 x i16> %a to <8 x i8>
-  %vminv.i = tail call i32 @llvm.arm64.neon.uminv.i32.v8i8(<8 x i8> %0) #3
+  %vminv.i = tail call i32 @llvm.aarch64.neon.uminv.i32.v8i8(<8 x i8> %0) #3
   %1 = trunc i32 %vminv.i to i8
   %tobool = icmp eq i8 %1, 0
   br i1 %tobool, label %return, label %if.then
@@ -178,7 +178,7 @@ define i32 @allNonZero128(<8 x i16> %a) #0 {
 ; CHECK-NEXT: movz w0, #0
 entry:
   %0 = bitcast <8 x i16> %a to <16 x i8>
-  %vminv.i = tail call i32 @llvm.arm64.neon.uminv.i32.v16i8(<16 x i8> %0) #3
+  %vminv.i = tail call i32 @llvm.aarch64.neon.uminv.i32.v16i8(<16 x i8> %0) #3
   %1 = trunc i32 %vminv.i to i8
   %tobool = icmp eq i8 %1, 0
   br i1 %tobool, label %return, label %if.then
@@ -192,13 +192,13 @@ return:                                           ; preds = %entry, %if.then
   ret i32 %retval.0
 }
 
-declare i32 @llvm.arm64.neon.umaxv.i32.v16i8(<16 x i8>) #2
+declare i32 @llvm.aarch64.neon.umaxv.i32.v16i8(<16 x i8>) #2
 
-declare i32 @llvm.arm64.neon.umaxv.i32.v8i8(<8 x i8>) #2
+declare i32 @llvm.aarch64.neon.umaxv.i32.v8i8(<8 x i8>) #2
 
-declare i32 @llvm.arm64.neon.uminv.i32.v16i8(<16 x i8>) #2
+declare i32 @llvm.aarch64.neon.uminv.i32.v16i8(<16 x i8>) #2
 
-declare i32 @llvm.arm64.neon.uminv.i32.v8i8(<8 x i8>) #2
+declare i32 @llvm.aarch64.neon.uminv.i32.v8i8(<8 x i8>) #2
 
 attributes #0 = { nounwind ssp "target-cpu"="cyclone" }
 attributes #1 = { "target-cpu"="cyclone" }
diff --git a/test/CodeGen/ARM64/vecFold.ll b/test/CodeGen/AArch64/arm64-vecFold.ll
similarity index 74%
rename from test/CodeGen/ARM64/vecFold.ll
rename to test/CodeGen/AArch64/arm64-vecFold.ll
index 6888932f2ce..aeacfccab3c 100644
--- a/test/CodeGen/ARM64/vecFold.ll
+++ b/test/CodeGen/AArch64/arm64-vecFold.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=arm64 -arm64-neon-syntax=apple -o - %s| FileCheck %s
+; RUN: llc -march=arm64 -aarch64-neon-syntax=apple -o - %s| FileCheck %s
 
 define <16 x i8> @foov16i8(<8 x i16> %a0, <8 x i16> %b0) nounwind readnone ssp {
 ; CHECK-LABEL: foov16i8:
@@ -50,8 +50,8 @@ define <4 x i32> @foov4i32(<2 x i64> %a0, <2 x i64> %b0) nounwind readnone ssp {
 
 define <8 x i16> @bar(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %b0, <4 x i32> %b1) nounwind readnone ssp {
 ; CHECK-LABEL: bar:
-  %vaddhn2.i = tail call <4 x i16> @llvm.arm64.neon.addhn.v4i16(<4 x i32> %a0, <4 x i32> %a1) nounwind
-  %vaddhn2.i10 = tail call <4 x i16> @llvm.arm64.neon.addhn.v4i16(<4 x i32> %b0, <4 x i32> %b1) nounwind
+  %vaddhn2.i = tail call <4 x i16> @llvm.aarch64.neon.addhn.v4i16(<4 x i32> %a0, <4 x i32> %a1) nounwind
+  %vaddhn2.i10 = tail call <4 x i16> @llvm.aarch64.neon.addhn.v4i16(<4 x i32> %b0, <4 x i32> %b1) nounwind
 ; CHECK: addhn.4h	v0, v0, v1
 ; CHECK-NEXT: addhn2.8h	v0, v2, v3
 ; CHECK-NEXT: ret
@@ -64,7 +64,7 @@ define <8 x i16> @bar(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %b0, <4 x i32> %b1
 
 define <8 x i16> @baz(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %b0, <4 x i32> %b1) nounwind readnone ssp {
 ; CHECK-LABEL: baz:
-  %vaddhn2.i = tail call <4 x i16> @llvm.arm64.neon.addhn.v4i16(<4 x i32> %a0, <4 x i32> %a1) nounwind
+  %vaddhn2.i = tail call <4 x i16> @llvm.aarch64.neon.addhn.v4i16(<4 x i32> %a0, <4 x i32> %a1) nounwind
   %vshrn_high_shift = ashr <4 x i32> %b0, <i32 5, i32 5, i32 5, i32 5>
   %vshrn_high = trunc <4 x i32> %vshrn_high_shift to <4 x i16>
 ; CHECK: addhn.4h	v0, v0, v1
@@ -83,8 +83,8 @@ entry:
 ; CHECK: 	raddhn.4h	v0, v0, v1
 ; CHECK-NEXT: 	raddhn2.8h	v0, v2, v3
 ; CHECK-NEXT: 	ret
-  %vraddhn2.i = tail call <4 x i16> @llvm.arm64.neon.raddhn.v4i16(<4 x i32> %a0, <4 x i32> %a1) nounwind
-  %vraddhn2.i10 = tail call <4 x i16> @llvm.arm64.neon.raddhn.v4i16(<4 x i32> %b0, <4 x i32> %b1) nounwind
+  %vraddhn2.i = tail call <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32> %a0, <4 x i32> %a1) nounwind
+  %vraddhn2.i10 = tail call <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32> %b0, <4 x i32> %b1) nounwind
   %0 = bitcast <4 x i16> %vraddhn2.i to <1 x i64>
   %1 = bitcast <4 x i16> %vraddhn2.i10 to <1 x i64>
   %shuffle.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
@@ -97,8 +97,8 @@ define <8 x i16> @vrshrn(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> %b0, <8 x i16>
 ; CHECK: rshrn.8b	v0, v0, #5
 ; CHECK-NEXT: rshrn2.16b	v0, v2, #6
 ; CHECK-NEXT: ret
-  %vrshrn_n1 = tail call <8 x i8> @llvm.arm64.neon.rshrn.v8i8(<8 x i16> %a0, i32 5)
-  %vrshrn_n4 = tail call <8 x i8> @llvm.arm64.neon.rshrn.v8i8(<8 x i16> %b0, i32 6)
+  %vrshrn_n1 = tail call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> %a0, i32 5)
+  %vrshrn_n4 = tail call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> %b0, i32 6)
   %1 = bitcast <8 x i8> %vrshrn_n1 to <1 x i64>
   %2 = bitcast <8 x i8> %vrshrn_n4 to <1 x i64>
   %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
@@ -111,8 +111,8 @@ define <8 x i16> @vrsubhn(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> %b0, <8 x i16>
 ; CHECK: rsubhn.8b	v0, v0, v1
 ; CHECK: rsubhn2.16b	v0, v2, v3
 ; CHECK-NEXT: 	ret
-  %vrsubhn2.i = tail call <8 x i8> @llvm.arm64.neon.rsubhn.v8i8(<8 x i16> %a0, <8 x i16> %a1) nounwind
-  %vrsubhn2.i10 = tail call <8 x i8> @llvm.arm64.neon.rsubhn.v8i8(<8 x i16> %b0, <8 x i16> %b1) nounwind
+  %vrsubhn2.i = tail call <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16> %a0, <8 x i16> %a1) nounwind
+  %vrsubhn2.i10 = tail call <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16> %b0, <8 x i16> %b1) nounwind
   %1 = bitcast <8 x i8> %vrsubhn2.i to <1 x i64>
   %2 = bitcast <8 x i8> %vrsubhn2.i10 to <1 x i64>
   %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
@@ -122,8 +122,8 @@ define <8 x i16> @vrsubhn(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> %b0, <8 x i16>
 
 define <8 x i16> @noOpt1(<2 x i32> %a0, <2 x i32> %a1, <4 x i32> %b0, <4 x i32> %b1) nounwind readnone ssp {
 ; CHECK-LABEL: noOpt1:
-  %vqsub2.i = tail call <2 x i32> @llvm.arm64.neon.sqsub.v2i32(<2 x i32> %a0, <2 x i32> %a1) nounwind
-  %vaddhn2.i = tail call <4 x i16> @llvm.arm64.neon.addhn.v4i16(<4 x i32> %b0, <4 x i32> %b1) nounwind
+  %vqsub2.i = tail call <2 x i32> @llvm.aarch64.neon.sqsub.v2i32(<2 x i32> %a0, <2 x i32> %a1) nounwind
+  %vaddhn2.i = tail call <4 x i16> @llvm.aarch64.neon.addhn.v4i16(<4 x i32> %b0, <4 x i32> %b1) nounwind
 ; CHECK:	sqsub.2s	v0, v0, v1
 ; CHECK-NEXT:	addhn2.8h	v0, v2, v3
   %1 = bitcast <2 x i32> %vqsub2.i to <1 x i64>
@@ -133,13 +133,13 @@ define <8 x i16> @noOpt1(<2 x i32> %a0, <2 x i32> %a1, <4 x i32> %b0, <4 x i32>
   ret <8 x i16> %3
 }
 
-declare <2 x i32> @llvm.arm64.neon.sqsub.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.sqsub.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
 
-declare <8 x i8> @llvm.arm64.neon.shrn.v8i8(<8 x i16>, i32) nounwind readnone
-declare <4 x i16> @llvm.arm64.neon.shrn.v4i16(<4 x i32>, i32) nounwind readnone
-declare <2 x i32> @llvm.arm64.neon.shrn.v2i32(<2 x i64>, i32) nounwind readnone
-declare <4 x i16> @llvm.arm64.neon.addhn.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
-declare <4 x i16> @llvm.arm64.neon.raddhn.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
-declare <8 x i8> @llvm.arm64.neon.rshrn.v8i8(<8 x i16>, i32) nounwind readnone
-declare <8 x i8> @llvm.arm64.neon.rsubhn.v8i8(<8 x i16>, <8 x i16>) nounwind readnone
+declare <8 x i8> @llvm.aarch64.neon.shrn.v8i8(<8 x i16>, i32) nounwind readnone
+declare <4 x i16> @llvm.aarch64.neon.shrn.v4i16(<4 x i32>, i32) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.shrn.v2i32(<2 x i64>, i32) nounwind readnone
+declare <4 x i16> @llvm.aarch64.neon.addhn.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
+declare <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
+declare <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16>, i32) nounwind readnone
+declare <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16>, <8 x i16>) nounwind readnone
 
diff --git a/test/CodeGen/ARM64/vector-ext.ll b/test/CodeGen/AArch64/arm64-vector-ext.ll
similarity index 81%
rename from test/CodeGen/ARM64/vector-ext.ll
rename to test/CodeGen/AArch64/arm64-vector-ext.ll
index 9cc0555d8c4..650ff1e14f0 100644
--- a/test/CodeGen/ARM64/vector-ext.ll
+++ b/test/CodeGen/AArch64/arm64-vector-ext.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s
+; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
 
 ;CHECK: @func30
 ;CHECK: ushll.4s  v0, v0, #0
diff --git a/test/CodeGen/ARM64/vector-imm.ll b/test/CodeGen/AArch64/arm64-vector-imm.ll
similarity index 98%
rename from test/CodeGen/ARM64/vector-imm.ll
rename to test/CodeGen/AArch64/arm64-vector-imm.ll
index a84f804c8cd..9fb088b9a49 100644
--- a/test/CodeGen/ARM64/vector-imm.ll
+++ b/test/CodeGen/AArch64/arm64-vector-imm.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s
+; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
 
 define <8 x i8> @v_orrimm(<8 x i8>* %A) nounwind {
 ; CHECK-LABEL: v_orrimm:
diff --git a/test/CodeGen/ARM64/vector-insertion.ll b/test/CodeGen/AArch64/arm64-vector-insertion.ll
similarity index 91%
rename from test/CodeGen/ARM64/vector-insertion.ll
rename to test/CodeGen/AArch64/arm64-vector-insertion.ll
index 0926bcfde9a..8fbff71f9fc 100644
--- a/test/CodeGen/ARM64/vector-insertion.ll
+++ b/test/CodeGen/AArch64/arm64-vector-insertion.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=arm64 -mcpu=generic -arm64-neon-syntax=apple < %s | FileCheck %s
+; RUN: llc -march=arm64 -mcpu=generic -aarch64-neon-syntax=apple < %s | FileCheck %s
 
 define void @test0f(float* nocapture %x, float %a) #0 {
 entry:
diff --git a/test/CodeGen/ARM64/vector-ldst.ll b/test/CodeGen/AArch64/arm64-vector-ldst.ll
similarity index 99%
rename from test/CodeGen/ARM64/vector-ldst.ll
rename to test/CodeGen/AArch64/arm64-vector-ldst.ll
index 154160ee502..c00191577d1 100644
--- a/test/CodeGen/ARM64/vector-ldst.ll
+++ b/test/CodeGen/AArch64/arm64-vector-ldst.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple -verify-machineinstrs | FileCheck %s
+; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple -verify-machineinstrs | FileCheck %s
 
 ; rdar://9428579
 
diff --git a/test/CodeGen/ARM64/vext.ll b/test/CodeGen/AArch64/arm64-vext.ll
similarity index 99%
rename from test/CodeGen/ARM64/vext.ll
rename to test/CodeGen/AArch64/arm64-vext.ll
index c82043940c8..2240dfd5a1a 100644
--- a/test/CodeGen/ARM64/vext.ll
+++ b/test/CodeGen/AArch64/arm64-vext.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=arm64 -arm64-neon-syntax=apple < %s | FileCheck %s
+; RUN: llc -march=arm64 -aarch64-neon-syntax=apple < %s | FileCheck %s
 
 define void @test_vext_s8() nounwind ssp {
   ; CHECK-LABEL: test_vext_s8:
diff --git a/test/CodeGen/ARM64/vext_reverse.ll b/test/CodeGen/AArch64/arm64-vext_reverse.ll
similarity index 100%
rename from test/CodeGen/ARM64/vext_reverse.ll
rename to test/CodeGen/AArch64/arm64-vext_reverse.ll
diff --git a/test/CodeGen/ARM64/vfloatintrinsics.ll b/test/CodeGen/AArch64/arm64-vfloatintrinsics.ll
similarity index 99%
rename from test/CodeGen/ARM64/vfloatintrinsics.ll
rename to test/CodeGen/AArch64/arm64-vfloatintrinsics.ll
index a8c882bf696..255a18216de 100644
--- a/test/CodeGen/ARM64/vfloatintrinsics.ll
+++ b/test/CodeGen/AArch64/arm64-vfloatintrinsics.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=arm64 -arm64-neon-syntax=apple < %s | FileCheck %s
+; RUN: llc -march=arm64 -aarch64-neon-syntax=apple < %s | FileCheck %s
 
 ;;; Float vectors
 
diff --git a/test/CodeGen/ARM64/vhadd.ll b/test/CodeGen/AArch64/arm64-vhadd.ll
similarity index 51%
rename from test/CodeGen/ARM64/vhadd.ll
rename to test/CodeGen/AArch64/arm64-vhadd.ll
index aed76810e13..6178bf9809d 100644
--- a/test/CodeGen/ARM64/vhadd.ll
+++ b/test/CodeGen/AArch64/arm64-vhadd.ll
@@ -1,11 +1,11 @@
-; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s
+; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
 
 define <8 x i8> @shadd8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: shadd8b:
 ;CHECK: shadd.8b
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
-	%tmp3 = call <8 x i8> @llvm.arm64.neon.shadd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	%tmp3 = call <8 x i8> @llvm.aarch64.neon.shadd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
 	ret <8 x i8> %tmp3
 }
 
@@ -14,7 +14,7 @@ define <16 x i8> @shadd16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK: shadd.16b
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
-	%tmp3 = call <16 x i8> @llvm.arm64.neon.shadd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+	%tmp3 = call <16 x i8> @llvm.aarch64.neon.shadd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
 	ret <16 x i8> %tmp3
 }
 
@@ -23,7 +23,7 @@ define <4 x i16> @shadd4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK: shadd.4h
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
-	%tmp3 = call <4 x i16> @llvm.arm64.neon.shadd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	%tmp3 = call <4 x i16> @llvm.aarch64.neon.shadd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
 	ret <4 x i16> %tmp3
 }
 
@@ -32,7 +32,7 @@ define <8 x i16> @shadd8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK: shadd.8h
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
-	%tmp3 = call <8 x i16> @llvm.arm64.neon.shadd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+	%tmp3 = call <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
 	ret <8 x i16> %tmp3
 }
 
@@ -41,7 +41,7 @@ define <2 x i32> @shadd2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK: shadd.2s
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
-	%tmp3 = call <2 x i32> @llvm.arm64.neon.shadd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	%tmp3 = call <2 x i32> @llvm.aarch64.neon.shadd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
 	ret <2 x i32> %tmp3
 }
 
@@ -50,7 +50,7 @@ define <4 x i32> @shadd4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK: shadd.4s
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
-	%tmp3 = call <4 x i32> @llvm.arm64.neon.shadd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+	%tmp3 = call <4 x i32> @llvm.aarch64.neon.shadd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
 	ret <4 x i32> %tmp3
 }
 
@@ -59,7 +59,7 @@ define <8 x i8> @uhadd8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK: uhadd.8b
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
-	%tmp3 = call <8 x i8> @llvm.arm64.neon.uhadd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	%tmp3 = call <8 x i8> @llvm.aarch64.neon.uhadd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
 	ret <8 x i8> %tmp3
 }
 
@@ -68,7 +68,7 @@ define <16 x i8> @uhadd16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK: uhadd.16b
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
-	%tmp3 = call <16 x i8> @llvm.arm64.neon.uhadd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+	%tmp3 = call <16 x i8> @llvm.aarch64.neon.uhadd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
 	ret <16 x i8> %tmp3
 }
 
@@ -77,7 +77,7 @@ define <4 x i16> @uhadd4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK: uhadd.4h
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
-	%tmp3 = call <4 x i16> @llvm.arm64.neon.uhadd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	%tmp3 = call <4 x i16> @llvm.aarch64.neon.uhadd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
 	ret <4 x i16> %tmp3
 }
 
@@ -86,7 +86,7 @@ define <8 x i16> @uhadd8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK: uhadd.8h
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
-	%tmp3 = call <8 x i16> @llvm.arm64.neon.uhadd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+	%tmp3 = call <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
 	ret <8 x i16> %tmp3
 }
 
@@ -95,7 +95,7 @@ define <2 x i32> @uhadd2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK: uhadd.2s
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
-	%tmp3 = call <2 x i32> @llvm.arm64.neon.uhadd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	%tmp3 = call <2 x i32> @llvm.aarch64.neon.uhadd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
 	ret <2 x i32> %tmp3
 }
 
@@ -104,32 +104,32 @@ define <4 x i32> @uhadd4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK: uhadd.4s
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
-	%tmp3 = call <4 x i32> @llvm.arm64.neon.uhadd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+	%tmp3 = call <4 x i32> @llvm.aarch64.neon.uhadd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
 	ret <4 x i32> %tmp3
 }
 
-declare <8 x i8>  @llvm.arm64.neon.shadd.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
-declare <4 x i16> @llvm.arm64.neon.shadd.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
-declare <2 x i32> @llvm.arm64.neon.shadd.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+declare <8 x i8>  @llvm.aarch64.neon.shadd.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.aarch64.neon.shadd.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.shadd.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
 
-declare <8 x i8>  @llvm.arm64.neon.uhadd.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
-declare <4 x i16> @llvm.arm64.neon.uhadd.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
-declare <2 x i32> @llvm.arm64.neon.uhadd.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+declare <8 x i8>  @llvm.aarch64.neon.uhadd.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.aarch64.neon.uhadd.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.uhadd.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
 
-declare <16 x i8> @llvm.arm64.neon.shadd.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
-declare <8 x i16> @llvm.arm64.neon.shadd.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
-declare <4 x i32> @llvm.arm64.neon.shadd.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+declare <16 x i8> @llvm.aarch64.neon.shadd.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.aarch64.neon.shadd.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
 
-declare <16 x i8> @llvm.arm64.neon.uhadd.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
-declare <8 x i16> @llvm.arm64.neon.uhadd.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
-declare <4 x i32> @llvm.arm64.neon.uhadd.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+declare <16 x i8> @llvm.aarch64.neon.uhadd.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.aarch64.neon.uhadd.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
 
 define <8 x i8> @srhadd8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: srhadd8b:
 ;CHECK: srhadd.8b
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
-	%tmp3 = call <8 x i8> @llvm.arm64.neon.srhadd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	%tmp3 = call <8 x i8> @llvm.aarch64.neon.srhadd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
 	ret <8 x i8> %tmp3
 }
 
@@ -138,7 +138,7 @@ define <16 x i8> @srhadd16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK: srhadd.16b
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
-	%tmp3 = call <16 x i8> @llvm.arm64.neon.srhadd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+	%tmp3 = call <16 x i8> @llvm.aarch64.neon.srhadd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
 	ret <16 x i8> %tmp3
 }
 
@@ -147,7 +147,7 @@ define <4 x i16> @srhadd4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK: srhadd.4h
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
-	%tmp3 = call <4 x i16> @llvm.arm64.neon.srhadd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	%tmp3 = call <4 x i16> @llvm.aarch64.neon.srhadd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
 	ret <4 x i16> %tmp3
 }
 
@@ -156,7 +156,7 @@ define <8 x i16> @srhadd8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK: srhadd.8h
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
-	%tmp3 = call <8 x i16> @llvm.arm64.neon.srhadd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+	%tmp3 = call <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
 	ret <8 x i16> %tmp3
 }
 
@@ -165,7 +165,7 @@ define <2 x i32> @srhadd2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK: srhadd.2s
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
-	%tmp3 = call <2 x i32> @llvm.arm64.neon.srhadd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	%tmp3 = call <2 x i32> @llvm.aarch64.neon.srhadd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
 	ret <2 x i32> %tmp3
 }
 
@@ -174,7 +174,7 @@ define <4 x i32> @srhadd4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK: srhadd.4s
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
-	%tmp3 = call <4 x i32> @llvm.arm64.neon.srhadd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+	%tmp3 = call <4 x i32> @llvm.aarch64.neon.srhadd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
 	ret <4 x i32> %tmp3
 }
 
@@ -183,7 +183,7 @@ define <8 x i8> @urhadd8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK: urhadd.8b
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
-	%tmp3 = call <8 x i8> @llvm.arm64.neon.urhadd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	%tmp3 = call <8 x i8> @llvm.aarch64.neon.urhadd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
 	ret <8 x i8> %tmp3
 }
 
@@ -192,7 +192,7 @@ define <16 x i8> @urhadd16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK: urhadd.16b
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
-	%tmp3 = call <16 x i8> @llvm.arm64.neon.urhadd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+	%tmp3 = call <16 x i8> @llvm.aarch64.neon.urhadd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
 	ret <16 x i8> %tmp3
 }
 
@@ -201,7 +201,7 @@ define <4 x i16> @urhadd4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK: urhadd.4h
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
-	%tmp3 = call <4 x i16> @llvm.arm64.neon.urhadd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	%tmp3 = call <4 x i16> @llvm.aarch64.neon.urhadd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
 	ret <4 x i16> %tmp3
 }
 
@@ -210,7 +210,7 @@ define <8 x i16> @urhadd8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK: urhadd.8h
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
-	%tmp3 = call <8 x i16> @llvm.arm64.neon.urhadd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+	%tmp3 = call <8 x i16> @llvm.aarch64.neon.urhadd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
 	ret <8 x i16> %tmp3
 }
 
@@ -219,7 +219,7 @@ define <2 x i32> @urhadd2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK: urhadd.2s
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
-	%tmp3 = call <2 x i32> @llvm.arm64.neon.urhadd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	%tmp3 = call <2 x i32> @llvm.aarch64.neon.urhadd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
 	ret <2 x i32> %tmp3
 }
 
@@ -228,22 +228,22 @@ define <4 x i32> @urhadd4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK: urhadd.4s
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
-	%tmp3 = call <4 x i32> @llvm.arm64.neon.urhadd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+	%tmp3 = call <4 x i32> @llvm.aarch64.neon.urhadd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
 	ret <4 x i32> %tmp3
 }
 
-declare <8 x i8>  @llvm.arm64.neon.srhadd.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
-declare <4 x i16> @llvm.arm64.neon.srhadd.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
-declare <2 x i32> @llvm.arm64.neon.srhadd.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+declare <8 x i8>  @llvm.aarch64.neon.srhadd.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.aarch64.neon.srhadd.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.srhadd.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
 
-declare <8 x i8>  @llvm.arm64.neon.urhadd.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
-declare <4 x i16> @llvm.arm64.neon.urhadd.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
-declare <2 x i32> @llvm.arm64.neon.urhadd.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+declare <8 x i8>  @llvm.aarch64.neon.urhadd.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.aarch64.neon.urhadd.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.urhadd.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
 
-declare <16 x i8> @llvm.arm64.neon.srhadd.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
-declare <8 x i16> @llvm.arm64.neon.srhadd.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
-declare <4 x i32> @llvm.arm64.neon.srhadd.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+declare <16 x i8> @llvm.aarch64.neon.srhadd.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.aarch64.neon.srhadd.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
 
-declare <16 x i8> @llvm.arm64.neon.urhadd.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
-declare <8 x i16> @llvm.arm64.neon.urhadd.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
-declare <4 x i32> @llvm.arm64.neon.urhadd.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+declare <16 x i8> @llvm.aarch64.neon.urhadd.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.aarch64.neon.urhadd.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.aarch64.neon.urhadd.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
diff --git a/test/CodeGen/ARM64/vhsub.ll b/test/CodeGen/AArch64/arm64-vhsub.ll
similarity index 50%
rename from test/CodeGen/ARM64/vhsub.ll
rename to test/CodeGen/AArch64/arm64-vhsub.ll
index 85df4d4eb73..13bfda3899e 100644
--- a/test/CodeGen/ARM64/vhsub.ll
+++ b/test/CodeGen/AArch64/arm64-vhsub.ll
@@ -1,11 +1,11 @@
-; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s
+; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
 
 define <8 x i8> @shsub8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: shsub8b:
 ;CHECK: shsub.8b
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
-	%tmp3 = call <8 x i8> @llvm.arm64.neon.shsub.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	%tmp3 = call <8 x i8> @llvm.aarch64.neon.shsub.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
 	ret <8 x i8> %tmp3
 }
 
@@ -14,7 +14,7 @@ define <16 x i8> @shsub16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK: shsub.16b
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
-	%tmp3 = call <16 x i8> @llvm.arm64.neon.shsub.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+	%tmp3 = call <16 x i8> @llvm.aarch64.neon.shsub.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
 	ret <16 x i8> %tmp3
 }
 
@@ -23,7 +23,7 @@ define <4 x i16> @shsub4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK: shsub.4h
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
-	%tmp3 = call <4 x i16> @llvm.arm64.neon.shsub.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	%tmp3 = call <4 x i16> @llvm.aarch64.neon.shsub.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
 	ret <4 x i16> %tmp3
 }
 
@@ -32,7 +32,7 @@ define <8 x i16> @shsub8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK: shsub.8h
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
-	%tmp3 = call <8 x i16> @llvm.arm64.neon.shsub.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+	%tmp3 = call <8 x i16> @llvm.aarch64.neon.shsub.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
 	ret <8 x i16> %tmp3
 }
 
@@ -41,7 +41,7 @@ define <2 x i32> @shsub2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK: shsub.2s
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
-	%tmp3 = call <2 x i32> @llvm.arm64.neon.shsub.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	%tmp3 = call <2 x i32> @llvm.aarch64.neon.shsub.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
 	ret <2 x i32> %tmp3
 }
 
@@ -50,7 +50,7 @@ define <4 x i32> @shsub4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK: shsub.4s
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
-	%tmp3 = call <4 x i32> @llvm.arm64.neon.shsub.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+	%tmp3 = call <4 x i32> @llvm.aarch64.neon.shsub.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
 	ret <4 x i32> %tmp3
 }
 
@@ -59,7 +59,7 @@ define <8 x i8> @uhsub8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK: uhsub.8b
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
-	%tmp3 = call <8 x i8> @llvm.arm64.neon.uhsub.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	%tmp3 = call <8 x i8> @llvm.aarch64.neon.uhsub.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
 	ret <8 x i8> %tmp3
 }
 
@@ -68,7 +68,7 @@ define <16 x i8> @uhsub16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK: uhsub.16b
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
-	%tmp3 = call <16 x i8> @llvm.arm64.neon.uhsub.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+	%tmp3 = call <16 x i8> @llvm.aarch64.neon.uhsub.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
 	ret <16 x i8> %tmp3
 }
 
@@ -77,7 +77,7 @@ define <4 x i16> @uhsub4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK: uhsub.4h
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
-	%tmp3 = call <4 x i16> @llvm.arm64.neon.uhsub.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	%tmp3 = call <4 x i16> @llvm.aarch64.neon.uhsub.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
 	ret <4 x i16> %tmp3
 }
 
@@ -86,7 +86,7 @@ define <8 x i16> @uhsub8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK: uhsub.8h
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
-	%tmp3 = call <8 x i16> @llvm.arm64.neon.uhsub.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+	%tmp3 = call <8 x i16> @llvm.aarch64.neon.uhsub.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
 	ret <8 x i16> %tmp3
 }
 
@@ -95,7 +95,7 @@ define <2 x i32> @uhsub2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK: uhsub.2s
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
-	%tmp3 = call <2 x i32> @llvm.arm64.neon.uhsub.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	%tmp3 = call <2 x i32> @llvm.aarch64.neon.uhsub.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
 	ret <2 x i32> %tmp3
 }
 
@@ -104,22 +104,22 @@ define <4 x i32> @uhsub4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK: uhsub.4s
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
-	%tmp3 = call <4 x i32> @llvm.arm64.neon.uhsub.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+	%tmp3 = call <4 x i32> @llvm.aarch64.neon.uhsub.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
 	ret <4 x i32> %tmp3
 }
 
-declare <8 x i8>  @llvm.arm64.neon.shsub.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
-declare <4 x i16> @llvm.arm64.neon.shsub.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
-declare <2 x i32> @llvm.arm64.neon.shsub.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+declare <8 x i8>  @llvm.aarch64.neon.shsub.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.aarch64.neon.shsub.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.shsub.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
 
-declare <8 x i8>  @llvm.arm64.neon.uhsub.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
-declare <4 x i16> @llvm.arm64.neon.uhsub.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
-declare <2 x i32> @llvm.arm64.neon.uhsub.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+declare <8 x i8>  @llvm.aarch64.neon.uhsub.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.aarch64.neon.uhsub.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.uhsub.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
 
-declare <16 x i8> @llvm.arm64.neon.shsub.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
-declare <8 x i16> @llvm.arm64.neon.shsub.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
-declare <4 x i32> @llvm.arm64.neon.shsub.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+declare <16 x i8> @llvm.aarch64.neon.shsub.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.aarch64.neon.shsub.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.aarch64.neon.shsub.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
 
-declare <16 x i8> @llvm.arm64.neon.uhsub.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
-declare <8 x i16> @llvm.arm64.neon.uhsub.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
-declare <4 x i32> @llvm.arm64.neon.uhsub.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+declare <16 x i8> @llvm.aarch64.neon.uhsub.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.aarch64.neon.uhsub.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.aarch64.neon.uhsub.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
diff --git a/test/CodeGen/ARM64/virtual_base.ll b/test/CodeGen/AArch64/arm64-virtual_base.ll
similarity index 100%
rename from test/CodeGen/ARM64/virtual_base.ll
rename to test/CodeGen/AArch64/arm64-virtual_base.ll
diff --git a/test/CodeGen/ARM64/vmax.ll b/test/CodeGen/AArch64/arm64-vmax.ll
similarity index 52%
rename from test/CodeGen/ARM64/vmax.ll
rename to test/CodeGen/AArch64/arm64-vmax.ll
index b2426f35057..3f2c134dec6 100644
--- a/test/CodeGen/ARM64/vmax.ll
+++ b/test/CodeGen/AArch64/arm64-vmax.ll
@@ -1,11 +1,11 @@
-; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s
+; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
 
 define <8 x i8> @smax_8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: smax_8b:
 ;CHECK: smax.8b
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
-	%tmp3 = call <8 x i8> @llvm.arm64.neon.smax.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	%tmp3 = call <8 x i8> @llvm.aarch64.neon.smax.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
 	ret <8 x i8> %tmp3
 }
 
@@ -14,7 +14,7 @@ define <16 x i8> @smax_16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK: smax.16b
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
-	%tmp3 = call <16 x i8> @llvm.arm64.neon.smax.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+	%tmp3 = call <16 x i8> @llvm.aarch64.neon.smax.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
 	ret <16 x i8> %tmp3
 }
 
@@ -23,7 +23,7 @@ define <4 x i16> @smax_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK: smax.4h
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
-	%tmp3 = call <4 x i16> @llvm.arm64.neon.smax.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	%tmp3 = call <4 x i16> @llvm.aarch64.neon.smax.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
 	ret <4 x i16> %tmp3
 }
 
@@ -32,7 +32,7 @@ define <8 x i16> @smax_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK: smax.8h
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
-	%tmp3 = call <8 x i16> @llvm.arm64.neon.smax.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+	%tmp3 = call <8 x i16> @llvm.aarch64.neon.smax.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
 	ret <8 x i16> %tmp3
 }
 
@@ -41,7 +41,7 @@ define <2 x i32> @smax_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK: smax.2s
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
-	%tmp3 = call <2 x i32> @llvm.arm64.neon.smax.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	%tmp3 = call <2 x i32> @llvm.aarch64.neon.smax.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
 	ret <2 x i32> %tmp3
 }
 
@@ -50,23 +50,23 @@ define <4 x i32> @smax_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK: smax.4s
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
-	%tmp3 = call <4 x i32> @llvm.arm64.neon.smax.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+	%tmp3 = call <4 x i32> @llvm.aarch64.neon.smax.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
 	ret <4 x i32> %tmp3
 }
 
-declare <8 x i8> @llvm.arm64.neon.smax.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
-declare <16 x i8> @llvm.arm64.neon.smax.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
-declare <4 x i16> @llvm.arm64.neon.smax.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
-declare <8 x i16> @llvm.arm64.neon.smax.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
-declare <2 x i32> @llvm.arm64.neon.smax.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
-declare <4 x i32> @llvm.arm64.neon.smax.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+declare <8 x i8> @llvm.aarch64.neon.smax.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <16 x i8> @llvm.aarch64.neon.smax.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <4 x i16> @llvm.aarch64.neon.smax.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <8 x i16> @llvm.aarch64.neon.smax.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.smax.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+declare <4 x i32> @llvm.aarch64.neon.smax.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
 
 define <8 x i8> @umax_8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: umax_8b:
 ;CHECK: umax.8b
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
-	%tmp3 = call <8 x i8> @llvm.arm64.neon.umax.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	%tmp3 = call <8 x i8> @llvm.aarch64.neon.umax.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
 	ret <8 x i8> %tmp3
 }
 
@@ -75,7 +75,7 @@ define <16 x i8> @umax_16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK: umax.16b
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
-	%tmp3 = call <16 x i8> @llvm.arm64.neon.umax.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+	%tmp3 = call <16 x i8> @llvm.aarch64.neon.umax.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
 	ret <16 x i8> %tmp3
 }
 
@@ -84,7 +84,7 @@ define <4 x i16> @umax_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK: umax.4h
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
-	%tmp3 = call <4 x i16> @llvm.arm64.neon.umax.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	%tmp3 = call <4 x i16> @llvm.aarch64.neon.umax.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
 	ret <4 x i16> %tmp3
 }
 
@@ -93,7 +93,7 @@ define <8 x i16> @umax_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK: umax.8h
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
-	%tmp3 = call <8 x i16> @llvm.arm64.neon.umax.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+	%tmp3 = call <8 x i16> @llvm.aarch64.neon.umax.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
 	ret <8 x i16> %tmp3
 }
 
@@ -102,7 +102,7 @@ define <2 x i32> @umax_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK: umax.2s
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
-	%tmp3 = call <2 x i32> @llvm.arm64.neon.umax.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	%tmp3 = call <2 x i32> @llvm.aarch64.neon.umax.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
 	ret <2 x i32> %tmp3
 }
 
@@ -111,23 +111,23 @@ define <4 x i32> @umax_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK: umax.4s
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
-	%tmp3 = call <4 x i32> @llvm.arm64.neon.umax.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+	%tmp3 = call <4 x i32> @llvm.aarch64.neon.umax.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
 	ret <4 x i32> %tmp3
 }
 
-declare <8 x i8> @llvm.arm64.neon.umax.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
-declare <16 x i8> @llvm.arm64.neon.umax.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
-declare <4 x i16> @llvm.arm64.neon.umax.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
-declare <8 x i16> @llvm.arm64.neon.umax.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
-declare <2 x i32> @llvm.arm64.neon.umax.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
-declare <4 x i32> @llvm.arm64.neon.umax.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+declare <8 x i8> @llvm.aarch64.neon.umax.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <16 x i8> @llvm.aarch64.neon.umax.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <4 x i16> @llvm.aarch64.neon.umax.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <8 x i16> @llvm.aarch64.neon.umax.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.umax.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+declare <4 x i32> @llvm.aarch64.neon.umax.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
 
 define <8 x i8> @smin_8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: smin_8b:
 ;CHECK: smin.8b
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
-	%tmp3 = call <8 x i8> @llvm.arm64.neon.smin.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	%tmp3 = call <8 x i8> @llvm.aarch64.neon.smin.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
 	ret <8 x i8> %tmp3
 }
 
@@ -136,7 +136,7 @@ define <16 x i8> @smin_16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK: smin.16b
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
-	%tmp3 = call <16 x i8> @llvm.arm64.neon.smin.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+	%tmp3 = call <16 x i8> @llvm.aarch64.neon.smin.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
 	ret <16 x i8> %tmp3
 }
 
@@ -145,7 +145,7 @@ define <4 x i16> @smin_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK: smin.4h
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
-	%tmp3 = call <4 x i16> @llvm.arm64.neon.smin.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	%tmp3 = call <4 x i16> @llvm.aarch64.neon.smin.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
 	ret <4 x i16> %tmp3
 }
 
@@ -154,7 +154,7 @@ define <8 x i16> @smin_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK: smin.8h
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
-	%tmp3 = call <8 x i16> @llvm.arm64.neon.smin.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+	%tmp3 = call <8 x i16> @llvm.aarch64.neon.smin.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
 	ret <8 x i16> %tmp3
 }
 
@@ -163,7 +163,7 @@ define <2 x i32> @smin_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK: smin.2s
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
-	%tmp3 = call <2 x i32> @llvm.arm64.neon.smin.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	%tmp3 = call <2 x i32> @llvm.aarch64.neon.smin.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
 	ret <2 x i32> %tmp3
 }
 
@@ -172,23 +172,23 @@ define <4 x i32> @smin_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK: smin.4s
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
-	%tmp3 = call <4 x i32> @llvm.arm64.neon.smin.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+	%tmp3 = call <4 x i32> @llvm.aarch64.neon.smin.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
 	ret <4 x i32> %tmp3
 }
 
-declare <8 x i8> @llvm.arm64.neon.smin.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
-declare <16 x i8> @llvm.arm64.neon.smin.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
-declare <4 x i16> @llvm.arm64.neon.smin.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
-declare <8 x i16> @llvm.arm64.neon.smin.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
-declare <2 x i32> @llvm.arm64.neon.smin.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
-declare <4 x i32> @llvm.arm64.neon.smin.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+declare <8 x i8> @llvm.aarch64.neon.smin.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <16 x i8> @llvm.aarch64.neon.smin.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <4 x i16> @llvm.aarch64.neon.smin.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <8 x i16> @llvm.aarch64.neon.smin.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.smin.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+declare <4 x i32> @llvm.aarch64.neon.smin.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
 
 define <8 x i8> @umin_8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: umin_8b:
 ;CHECK: umin.8b
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
-	%tmp3 = call <8 x i8> @llvm.arm64.neon.umin.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	%tmp3 = call <8 x i8> @llvm.aarch64.neon.umin.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
 	ret <8 x i8> %tmp3
 }
 
@@ -197,7 +197,7 @@ define <16 x i8> @umin_16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK: umin.16b
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
-	%tmp3 = call <16 x i8> @llvm.arm64.neon.umin.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+	%tmp3 = call <16 x i8> @llvm.aarch64.neon.umin.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
 	ret <16 x i8> %tmp3
 }
 
@@ -206,7 +206,7 @@ define <4 x i16> @umin_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK: umin.4h
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
-	%tmp3 = call <4 x i16> @llvm.arm64.neon.umin.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	%tmp3 = call <4 x i16> @llvm.aarch64.neon.umin.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
 	ret <4 x i16> %tmp3
 }
 
@@ -215,7 +215,7 @@ define <8 x i16> @umin_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK: umin.8h
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
-	%tmp3 = call <8 x i16> @llvm.arm64.neon.umin.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+	%tmp3 = call <8 x i16> @llvm.aarch64.neon.umin.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
 	ret <8 x i16> %tmp3
 }
 
@@ -224,7 +224,7 @@ define <2 x i32> @umin_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK: umin.2s
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
-	%tmp3 = call <2 x i32> @llvm.arm64.neon.umin.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	%tmp3 = call <2 x i32> @llvm.aarch64.neon.umin.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
 	ret <2 x i32> %tmp3
 }
 
@@ -233,25 +233,25 @@ define <4 x i32> @umin_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK: umin.4s
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
-	%tmp3 = call <4 x i32> @llvm.arm64.neon.umin.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+	%tmp3 = call <4 x i32> @llvm.aarch64.neon.umin.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
 	ret <4 x i32> %tmp3
 }
 
-declare <8 x i8> @llvm.arm64.neon.umin.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
-declare <16 x i8> @llvm.arm64.neon.umin.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
-declare <4 x i16> @llvm.arm64.neon.umin.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
-declare <8 x i16> @llvm.arm64.neon.umin.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
-declare <2 x i32> @llvm.arm64.neon.umin.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
-declare <4 x i32> @llvm.arm64.neon.umin.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+declare <8 x i8> @llvm.aarch64.neon.umin.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <16 x i8> @llvm.aarch64.neon.umin.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <4 x i16> @llvm.aarch64.neon.umin.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <8 x i16> @llvm.aarch64.neon.umin.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.umin.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+declare <4 x i32> @llvm.aarch64.neon.umin.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
 
-; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s
+; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
 
 define <8 x i8> @smaxp_8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: smaxp_8b:
 ;CHECK: smaxp.8b
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
-	%tmp3 = call <8 x i8> @llvm.arm64.neon.smaxp.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	%tmp3 = call <8 x i8> @llvm.aarch64.neon.smaxp.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
 	ret <8 x i8> %tmp3
 }
 
@@ -260,7 +260,7 @@ define <16 x i8> @smaxp_16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK: smaxp.16b
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
-	%tmp3 = call <16 x i8> @llvm.arm64.neon.smaxp.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+	%tmp3 = call <16 x i8> @llvm.aarch64.neon.smaxp.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
 	ret <16 x i8> %tmp3
 }
 
@@ -269,7 +269,7 @@ define <4 x i16> @smaxp_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK: smaxp.4h
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
-	%tmp3 = call <4 x i16> @llvm.arm64.neon.smaxp.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	%tmp3 = call <4 x i16> @llvm.aarch64.neon.smaxp.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
 	ret <4 x i16> %tmp3
 }
 
@@ -278,7 +278,7 @@ define <8 x i16> @smaxp_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK: smaxp.8h
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
-	%tmp3 = call <8 x i16> @llvm.arm64.neon.smaxp.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+	%tmp3 = call <8 x i16> @llvm.aarch64.neon.smaxp.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
 	ret <8 x i16> %tmp3
 }
 
@@ -287,7 +287,7 @@ define <2 x i32> @smaxp_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK: smaxp.2s
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
-	%tmp3 = call <2 x i32> @llvm.arm64.neon.smaxp.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	%tmp3 = call <2 x i32> @llvm.aarch64.neon.smaxp.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
 	ret <2 x i32> %tmp3
 }
 
@@ -296,23 +296,23 @@ define <4 x i32> @smaxp_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK: smaxp.4s
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
-	%tmp3 = call <4 x i32> @llvm.arm64.neon.smaxp.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+	%tmp3 = call <4 x i32> @llvm.aarch64.neon.smaxp.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
 	ret <4 x i32> %tmp3
 }
 
-declare <8 x i8> @llvm.arm64.neon.smaxp.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
-declare <16 x i8> @llvm.arm64.neon.smaxp.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
-declare <4 x i16> @llvm.arm64.neon.smaxp.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
-declare <8 x i16> @llvm.arm64.neon.smaxp.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
-declare <2 x i32> @llvm.arm64.neon.smaxp.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
-declare <4 x i32> @llvm.arm64.neon.smaxp.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+declare <8 x i8> @llvm.aarch64.neon.smaxp.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <16 x i8> @llvm.aarch64.neon.smaxp.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <4 x i16> @llvm.aarch64.neon.smaxp.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <8 x i16> @llvm.aarch64.neon.smaxp.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.smaxp.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+declare <4 x i32> @llvm.aarch64.neon.smaxp.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
 
 define <8 x i8> @umaxp_8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: umaxp_8b:
 ;CHECK: umaxp.8b
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
-	%tmp3 = call <8 x i8> @llvm.arm64.neon.umaxp.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	%tmp3 = call <8 x i8> @llvm.aarch64.neon.umaxp.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
 	ret <8 x i8> %tmp3
 }
 
@@ -321,7 +321,7 @@ define <16 x i8> @umaxp_16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK: umaxp.16b
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
-	%tmp3 = call <16 x i8> @llvm.arm64.neon.umaxp.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+	%tmp3 = call <16 x i8> @llvm.aarch64.neon.umaxp.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
 	ret <16 x i8> %tmp3
 }
 
@@ -330,7 +330,7 @@ define <4 x i16> @umaxp_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK: umaxp.4h
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
-	%tmp3 = call <4 x i16> @llvm.arm64.neon.umaxp.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	%tmp3 = call <4 x i16> @llvm.aarch64.neon.umaxp.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
 	ret <4 x i16> %tmp3
 }
 
@@ -339,7 +339,7 @@ define <8 x i16> @umaxp_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK: umaxp.8h
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
-	%tmp3 = call <8 x i16> @llvm.arm64.neon.umaxp.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+	%tmp3 = call <8 x i16> @llvm.aarch64.neon.umaxp.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
 	ret <8 x i16> %tmp3
 }
 
@@ -348,7 +348,7 @@ define <2 x i32> @umaxp_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK: umaxp.2s
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
-	%tmp3 = call <2 x i32> @llvm.arm64.neon.umaxp.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	%tmp3 = call <2 x i32> @llvm.aarch64.neon.umaxp.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
 	ret <2 x i32> %tmp3
 }
 
@@ -357,25 +357,25 @@ define <4 x i32> @umaxp_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK: umaxp.4s
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
-	%tmp3 = call <4 x i32> @llvm.arm64.neon.umaxp.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+	%tmp3 = call <4 x i32> @llvm.aarch64.neon.umaxp.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
 	ret <4 x i32> %tmp3
 }
 
-declare <8 x i8> @llvm.arm64.neon.umaxp.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
-declare <16 x i8> @llvm.arm64.neon.umaxp.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
-declare <4 x i16> @llvm.arm64.neon.umaxp.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
-declare <8 x i16> @llvm.arm64.neon.umaxp.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
-declare <2 x i32> @llvm.arm64.neon.umaxp.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
-declare <4 x i32> @llvm.arm64.neon.umaxp.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+declare <8 x i8> @llvm.aarch64.neon.umaxp.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <16 x i8> @llvm.aarch64.neon.umaxp.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <4 x i16> @llvm.aarch64.neon.umaxp.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <8 x i16> @llvm.aarch64.neon.umaxp.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.umaxp.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+declare <4 x i32> @llvm.aarch64.neon.umaxp.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
 
-; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s
+; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
 
 define <8 x i8> @sminp_8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: sminp_8b:
 ;CHECK: sminp.8b
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
-	%tmp3 = call <8 x i8> @llvm.arm64.neon.sminp.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	%tmp3 = call <8 x i8> @llvm.aarch64.neon.sminp.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
 	ret <8 x i8> %tmp3
 }
 
@@ -384,7 +384,7 @@ define <16 x i8> @sminp_16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK: sminp.16b
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
-	%tmp3 = call <16 x i8> @llvm.arm64.neon.sminp.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+	%tmp3 = call <16 x i8> @llvm.aarch64.neon.sminp.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
 	ret <16 x i8> %tmp3
 }
 
@@ -393,7 +393,7 @@ define <4 x i16> @sminp_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK: sminp.4h
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
-	%tmp3 = call <4 x i16> @llvm.arm64.neon.sminp.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	%tmp3 = call <4 x i16> @llvm.aarch64.neon.sminp.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
 	ret <4 x i16> %tmp3
 }
 
@@ -402,7 +402,7 @@ define <8 x i16> @sminp_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK: sminp.8h
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
-	%tmp3 = call <8 x i16> @llvm.arm64.neon.sminp.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+	%tmp3 = call <8 x i16> @llvm.aarch64.neon.sminp.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
 	ret <8 x i16> %tmp3
 }
 
@@ -411,7 +411,7 @@ define <2 x i32> @sminp_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK: sminp.2s
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
-	%tmp3 = call <2 x i32> @llvm.arm64.neon.sminp.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	%tmp3 = call <2 x i32> @llvm.aarch64.neon.sminp.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
 	ret <2 x i32> %tmp3
 }
 
@@ -420,23 +420,23 @@ define <4 x i32> @sminp_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK: sminp.4s
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
-	%tmp3 = call <4 x i32> @llvm.arm64.neon.sminp.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+	%tmp3 = call <4 x i32> @llvm.aarch64.neon.sminp.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
 	ret <4 x i32> %tmp3
 }
 
-declare <8 x i8> @llvm.arm64.neon.sminp.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
-declare <16 x i8> @llvm.arm64.neon.sminp.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
-declare <4 x i16> @llvm.arm64.neon.sminp.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
-declare <8 x i16> @llvm.arm64.neon.sminp.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
-declare <2 x i32> @llvm.arm64.neon.sminp.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
-declare <4 x i32> @llvm.arm64.neon.sminp.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+declare <8 x i8> @llvm.aarch64.neon.sminp.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <16 x i8> @llvm.aarch64.neon.sminp.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <4 x i16> @llvm.aarch64.neon.sminp.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <8 x i16> @llvm.aarch64.neon.sminp.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.sminp.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+declare <4 x i32> @llvm.aarch64.neon.sminp.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
 
 define <8 x i8> @uminp_8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: uminp_8b:
 ;CHECK: uminp.8b
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
-	%tmp3 = call <8 x i8> @llvm.arm64.neon.uminp.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	%tmp3 = call <8 x i8> @llvm.aarch64.neon.uminp.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
 	ret <8 x i8> %tmp3
 }
 
@@ -445,7 +445,7 @@ define <16 x i8> @uminp_16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK: uminp.16b
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
-	%tmp3 = call <16 x i8> @llvm.arm64.neon.uminp.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+	%tmp3 = call <16 x i8> @llvm.aarch64.neon.uminp.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
 	ret <16 x i8> %tmp3
 }
 
@@ -454,7 +454,7 @@ define <4 x i16> @uminp_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK: uminp.4h
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
-	%tmp3 = call <4 x i16> @llvm.arm64.neon.uminp.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	%tmp3 = call <4 x i16> @llvm.aarch64.neon.uminp.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
 	ret <4 x i16> %tmp3
 }
 
@@ -463,7 +463,7 @@ define <8 x i16> @uminp_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK: uminp.8h
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
-	%tmp3 = call <8 x i16> @llvm.arm64.neon.uminp.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+	%tmp3 = call <8 x i16> @llvm.aarch64.neon.uminp.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
 	ret <8 x i16> %tmp3
 }
 
@@ -472,7 +472,7 @@ define <2 x i32> @uminp_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK: uminp.2s
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
-	%tmp3 = call <2 x i32> @llvm.arm64.neon.uminp.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	%tmp3 = call <2 x i32> @llvm.aarch64.neon.uminp.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
 	ret <2 x i32> %tmp3
 }
 
@@ -481,23 +481,23 @@ define <4 x i32> @uminp_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK: uminp.4s
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
-	%tmp3 = call <4 x i32> @llvm.arm64.neon.uminp.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+	%tmp3 = call <4 x i32> @llvm.aarch64.neon.uminp.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
 	ret <4 x i32> %tmp3
 }
 
-declare <8 x i8> @llvm.arm64.neon.uminp.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
-declare <16 x i8> @llvm.arm64.neon.uminp.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
-declare <4 x i16> @llvm.arm64.neon.uminp.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
-declare <8 x i16> @llvm.arm64.neon.uminp.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
-declare <2 x i32> @llvm.arm64.neon.uminp.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
-declare <4 x i32> @llvm.arm64.neon.uminp.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+declare <8 x i8> @llvm.aarch64.neon.uminp.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <16 x i8> @llvm.aarch64.neon.uminp.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <4 x i16> @llvm.aarch64.neon.uminp.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <8 x i16> @llvm.aarch64.neon.uminp.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.uminp.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+declare <4 x i32> @llvm.aarch64.neon.uminp.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
 
 define <2 x float> @fmax_2s(<2 x float>* %A, <2 x float>* %B) nounwind {
 ;CHECK-LABEL: fmax_2s:
 ;CHECK: fmax.2s
 	%tmp1 = load <2 x float>* %A
 	%tmp2 = load <2 x float>* %B
-	%tmp3 = call <2 x float> @llvm.arm64.neon.fmax.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
+	%tmp3 = call <2 x float> @llvm.aarch64.neon.fmax.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
 	ret <2 x float> %tmp3
 }
 
@@ -506,7 +506,7 @@ define <4 x float> @fmax_4s(<4 x float>* %A, <4 x float>* %B) nounwind {
 ;CHECK: fmax.4s
 	%tmp1 = load <4 x float>* %A
 	%tmp2 = load <4 x float>* %B
-	%tmp3 = call <4 x float> @llvm.arm64.neon.fmax.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
+	%tmp3 = call <4 x float> @llvm.aarch64.neon.fmax.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
 	ret <4 x float> %tmp3
 }
 
@@ -515,20 +515,20 @@ define <2 x double> @fmax_2d(<2 x double>* %A, <2 x double>* %B) nounwind {
 ;CHECK: fmax.2d
 	%tmp1 = load <2 x double>* %A
 	%tmp2 = load <2 x double>* %B
-	%tmp3 = call <2 x double> @llvm.arm64.neon.fmax.v2f64(<2 x double> %tmp1, <2 x double> %tmp2)
+	%tmp3 = call <2 x double> @llvm.aarch64.neon.fmax.v2f64(<2 x double> %tmp1, <2 x double> %tmp2)
 	ret <2 x double> %tmp3
 }
 
-declare <2 x float> @llvm.arm64.neon.fmax.v2f32(<2 x float>, <2 x float>) nounwind readnone
-declare <4 x float> @llvm.arm64.neon.fmax.v4f32(<4 x float>, <4 x float>) nounwind readnone
-declare <2 x double> @llvm.arm64.neon.fmax.v2f64(<2 x double>, <2 x double>) nounwind readnone
+declare <2 x float> @llvm.aarch64.neon.fmax.v2f32(<2 x float>, <2 x float>) nounwind readnone
+declare <4 x float> @llvm.aarch64.neon.fmax.v4f32(<4 x float>, <4 x float>) nounwind readnone
+declare <2 x double> @llvm.aarch64.neon.fmax.v2f64(<2 x double>, <2 x double>) nounwind readnone
 
 define <2 x float> @fmaxp_2s(<2 x float>* %A, <2 x float>* %B) nounwind {
 ;CHECK-LABEL: fmaxp_2s:
 ;CHECK: fmaxp.2s
 	%tmp1 = load <2 x float>* %A
 	%tmp2 = load <2 x float>* %B
-	%tmp3 = call <2 x float> @llvm.arm64.neon.fmaxp.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
+	%tmp3 = call <2 x float> @llvm.aarch64.neon.fmaxp.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
 	ret <2 x float> %tmp3
 }
 
@@ -537,7 +537,7 @@ define <4 x float> @fmaxp_4s(<4 x float>* %A, <4 x float>* %B) nounwind {
 ;CHECK: fmaxp.4s
 	%tmp1 = load <4 x float>* %A
 	%tmp2 = load <4 x float>* %B
-	%tmp3 = call <4 x float> @llvm.arm64.neon.fmaxp.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
+	%tmp3 = call <4 x float> @llvm.aarch64.neon.fmaxp.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
 	ret <4 x float> %tmp3
 }
 
@@ -546,20 +546,20 @@ define <2 x double> @fmaxp_2d(<2 x double>* %A, <2 x double>* %B) nounwind {
 ;CHECK: fmaxp.2d
 	%tmp1 = load <2 x double>* %A
 	%tmp2 = load <2 x double>* %B
-	%tmp3 = call <2 x double> @llvm.arm64.neon.fmaxp.v2f64(<2 x double> %tmp1, <2 x double> %tmp2)
+	%tmp3 = call <2 x double> @llvm.aarch64.neon.fmaxp.v2f64(<2 x double> %tmp1, <2 x double> %tmp2)
 	ret <2 x double> %tmp3
 }
 
-declare <2 x float> @llvm.arm64.neon.fmaxp.v2f32(<2 x float>, <2 x float>) nounwind readnone
-declare <4 x float> @llvm.arm64.neon.fmaxp.v4f32(<4 x float>, <4 x float>) nounwind readnone
-declare <2 x double> @llvm.arm64.neon.fmaxp.v2f64(<2 x double>, <2 x double>) nounwind readnone
+declare <2 x float> @llvm.aarch64.neon.fmaxp.v2f32(<2 x float>, <2 x float>) nounwind readnone
+declare <4 x float> @llvm.aarch64.neon.fmaxp.v4f32(<4 x float>, <4 x float>) nounwind readnone
+declare <2 x double> @llvm.aarch64.neon.fmaxp.v2f64(<2 x double>, <2 x double>) nounwind readnone
 
 define <2 x float> @fmin_2s(<2 x float>* %A, <2 x float>* %B) nounwind {
 ;CHECK-LABEL: fmin_2s:
 ;CHECK: fmin.2s
 	%tmp1 = load <2 x float>* %A
 	%tmp2 = load <2 x float>* %B
-	%tmp3 = call <2 x float> @llvm.arm64.neon.fmin.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
+	%tmp3 = call <2 x float> @llvm.aarch64.neon.fmin.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
 	ret <2 x float> %tmp3
 }
 
@@ -568,7 +568,7 @@ define <4 x float> @fmin_4s(<4 x float>* %A, <4 x float>* %B) nounwind {
 ;CHECK: fmin.4s
 	%tmp1 = load <4 x float>* %A
 	%tmp2 = load <4 x float>* %B
-	%tmp3 = call <4 x float> @llvm.arm64.neon.fmin.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
+	%tmp3 = call <4 x float> @llvm.aarch64.neon.fmin.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
 	ret <4 x float> %tmp3
 }
 
@@ -577,20 +577,20 @@ define <2 x double> @fmin_2d(<2 x double>* %A, <2 x double>* %B) nounwind {
 ;CHECK: fmin.2d
 	%tmp1 = load <2 x double>* %A
 	%tmp2 = load <2 x double>* %B
-	%tmp3 = call <2 x double> @llvm.arm64.neon.fmin.v2f64(<2 x double> %tmp1, <2 x double> %tmp2)
+	%tmp3 = call <2 x double> @llvm.aarch64.neon.fmin.v2f64(<2 x double> %tmp1, <2 x double> %tmp2)
 	ret <2 x double> %tmp3
 }
 
-declare <2 x float> @llvm.arm64.neon.fmin.v2f32(<2 x float>, <2 x float>) nounwind readnone
-declare <4 x float> @llvm.arm64.neon.fmin.v4f32(<4 x float>, <4 x float>) nounwind readnone
-declare <2 x double> @llvm.arm64.neon.fmin.v2f64(<2 x double>, <2 x double>) nounwind readnone
+declare <2 x float> @llvm.aarch64.neon.fmin.v2f32(<2 x float>, <2 x float>) nounwind readnone
+declare <4 x float> @llvm.aarch64.neon.fmin.v4f32(<4 x float>, <4 x float>) nounwind readnone
+declare <2 x double> @llvm.aarch64.neon.fmin.v2f64(<2 x double>, <2 x double>) nounwind readnone
 
 define <2 x float> @fminp_2s(<2 x float>* %A, <2 x float>* %B) nounwind {
 ;CHECK-LABEL: fminp_2s:
 ;CHECK: fminp.2s
 	%tmp1 = load <2 x float>* %A
 	%tmp2 = load <2 x float>* %B
-	%tmp3 = call <2 x float> @llvm.arm64.neon.fminp.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
+	%tmp3 = call <2 x float> @llvm.aarch64.neon.fminp.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
 	ret <2 x float> %tmp3
 }
 
@@ -599,7 +599,7 @@ define <4 x float> @fminp_4s(<4 x float>* %A, <4 x float>* %B) nounwind {
 ;CHECK: fminp.4s
 	%tmp1 = load <4 x float>* %A
 	%tmp2 = load <4 x float>* %B
-	%tmp3 = call <4 x float> @llvm.arm64.neon.fminp.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
+	%tmp3 = call <4 x float> @llvm.aarch64.neon.fminp.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
 	ret <4 x float> %tmp3
 }
 
@@ -608,20 +608,20 @@ define <2 x double> @fminp_2d(<2 x double>* %A, <2 x double>* %B) nounwind {
 ;CHECK: fminp.2d
 	%tmp1 = load <2 x double>* %A
 	%tmp2 = load <2 x double>* %B
-	%tmp3 = call <2 x double> @llvm.arm64.neon.fminp.v2f64(<2 x double> %tmp1, <2 x double> %tmp2)
+	%tmp3 = call <2 x double> @llvm.aarch64.neon.fminp.v2f64(<2 x double> %tmp1, <2 x double> %tmp2)
 	ret <2 x double> %tmp3
 }
 
-declare <2 x float> @llvm.arm64.neon.fminp.v2f32(<2 x float>, <2 x float>) nounwind readnone
-declare <4 x float> @llvm.arm64.neon.fminp.v4f32(<4 x float>, <4 x float>) nounwind readnone
-declare <2 x double> @llvm.arm64.neon.fminp.v2f64(<2 x double>, <2 x double>) nounwind readnone
+declare <2 x float> @llvm.aarch64.neon.fminp.v2f32(<2 x float>, <2 x float>) nounwind readnone
+declare <4 x float> @llvm.aarch64.neon.fminp.v4f32(<4 x float>, <4 x float>) nounwind readnone
+declare <2 x double> @llvm.aarch64.neon.fminp.v2f64(<2 x double>, <2 x double>) nounwind readnone
 
 define <2 x float> @fminnmp_2s(<2 x float>* %A, <2 x float>* %B) nounwind {
 ;CHECK-LABEL: fminnmp_2s:
 ;CHECK: fminnmp.2s
 	%tmp1 = load <2 x float>* %A
 	%tmp2 = load <2 x float>* %B
-	%tmp3 = call <2 x float> @llvm.arm64.neon.fminnmp.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
+	%tmp3 = call <2 x float> @llvm.aarch64.neon.fminnmp.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
 	ret <2 x float> %tmp3
 }
 
@@ -630,7 +630,7 @@ define <4 x float> @fminnmp_4s(<4 x float>* %A, <4 x float>* %B) nounwind {
 ;CHECK: fminnmp.4s
 	%tmp1 = load <4 x float>* %A
 	%tmp2 = load <4 x float>* %B
-	%tmp3 = call <4 x float> @llvm.arm64.neon.fminnmp.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
+	%tmp3 = call <4 x float> @llvm.aarch64.neon.fminnmp.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
 	ret <4 x float> %tmp3
 }
 
@@ -639,20 +639,20 @@ define <2 x double> @fminnmp_2d(<2 x double>* %A, <2 x double>* %B) nounwind {
 ;CHECK: fminnmp.2d
 	%tmp1 = load <2 x double>* %A
 	%tmp2 = load <2 x double>* %B
-	%tmp3 = call <2 x double> @llvm.arm64.neon.fminnmp.v2f64(<2 x double> %tmp1, <2 x double> %tmp2)
+	%tmp3 = call <2 x double> @llvm.aarch64.neon.fminnmp.v2f64(<2 x double> %tmp1, <2 x double> %tmp2)
 	ret <2 x double> %tmp3
 }
 
-declare <2 x float> @llvm.arm64.neon.fminnmp.v2f32(<2 x float>, <2 x float>) nounwind readnone
-declare <4 x float> @llvm.arm64.neon.fminnmp.v4f32(<4 x float>, <4 x float>) nounwind readnone
-declare <2 x double> @llvm.arm64.neon.fminnmp.v2f64(<2 x double>, <2 x double>) nounwind readnone
+declare <2 x float> @llvm.aarch64.neon.fminnmp.v2f32(<2 x float>, <2 x float>) nounwind readnone
+declare <4 x float> @llvm.aarch64.neon.fminnmp.v4f32(<4 x float>, <4 x float>) nounwind readnone
+declare <2 x double> @llvm.aarch64.neon.fminnmp.v2f64(<2 x double>, <2 x double>) nounwind readnone
 
 define <2 x float> @fmaxnmp_2s(<2 x float>* %A, <2 x float>* %B) nounwind {
 ;CHECK-LABEL: fmaxnmp_2s:
 ;CHECK: fmaxnmp.2s
 	%tmp1 = load <2 x float>* %A
 	%tmp2 = load <2 x float>* %B
-	%tmp3 = call <2 x float> @llvm.arm64.neon.fmaxnmp.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
+	%tmp3 = call <2 x float> @llvm.aarch64.neon.fmaxnmp.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
 	ret <2 x float> %tmp3
 }
 
@@ -661,7 +661,7 @@ define <4 x float> @fmaxnmp_4s(<4 x float>* %A, <4 x float>* %B) nounwind {
 ;CHECK: fmaxnmp.4s
 	%tmp1 = load <4 x float>* %A
 	%tmp2 = load <4 x float>* %B
-	%tmp3 = call <4 x float> @llvm.arm64.neon.fmaxnmp.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
+	%tmp3 = call <4 x float> @llvm.aarch64.neon.fmaxnmp.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
 	ret <4 x float> %tmp3
 }
 
@@ -670,10 +670,10 @@ define <2 x double> @fmaxnmp_2d(<2 x double>* %A, <2 x double>* %B) nounwind {
 ;CHECK: fmaxnmp.2d
 	%tmp1 = load <2 x double>* %A
 	%tmp2 = load <2 x double>* %B
-	%tmp3 = call <2 x double> @llvm.arm64.neon.fmaxnmp.v2f64(<2 x double> %tmp1, <2 x double> %tmp2)
+	%tmp3 = call <2 x double> @llvm.aarch64.neon.fmaxnmp.v2f64(<2 x double> %tmp1, <2 x double> %tmp2)
 	ret <2 x double> %tmp3
 }
 
-declare <2 x float> @llvm.arm64.neon.fmaxnmp.v2f32(<2 x float>, <2 x float>) nounwind readnone
-declare <4 x float> @llvm.arm64.neon.fmaxnmp.v4f32(<4 x float>, <4 x float>) nounwind readnone
-declare <2 x double> @llvm.arm64.neon.fmaxnmp.v2f64(<2 x double>, <2 x double>) nounwind readnone
+declare <2 x float> @llvm.aarch64.neon.fmaxnmp.v2f32(<2 x float>, <2 x float>) nounwind readnone
+declare <4 x float> @llvm.aarch64.neon.fmaxnmp.v4f32(<4 x float>, <4 x float>) nounwind readnone
+declare <2 x double> @llvm.aarch64.neon.fmaxnmp.v2f64(<2 x double>, <2 x double>) nounwind readnone
diff --git a/test/CodeGen/AArch64/arm64-vminmaxnm.ll b/test/CodeGen/AArch64/arm64-vminmaxnm.ll
new file mode 100644
index 00000000000..b5aca45cd47
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-vminmaxnm.ll
@@ -0,0 +1,68 @@
+; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
+
+define <2 x float> @f1(<2 x float> %a, <2 x float> %b) nounwind readnone ssp {
+; CHECK: fmaxnm.2s	v0, v0, v1
+; CHECK: ret
+  %vmaxnm2.i = tail call <2 x float> @llvm.aarch64.neon.fmaxnm.v2f32(<2 x float> %a, <2 x float> %b) nounwind
+  ret <2 x float> %vmaxnm2.i
+}
+
+define <4 x float> @f2(<4 x float> %a, <4 x float> %b) nounwind readnone ssp {
+; CHECK: fmaxnm.4s	v0, v0, v1
+; CHECK: ret
+  %vmaxnm2.i = tail call <4 x float> @llvm.aarch64.neon.fmaxnm.v4f32(<4 x float> %a, <4 x float> %b) nounwind
+  ret <4 x float> %vmaxnm2.i
+}
+
+define <2 x double> @f3(<2 x double> %a, <2 x double> %b) nounwind readnone ssp {
+; CHECK: fmaxnm.2d	v0, v0, v1
+; CHECK: ret
+  %vmaxnm2.i = tail call <2 x double> @llvm.aarch64.neon.fmaxnm.v2f64(<2 x double> %a, <2 x double> %b) nounwind
+  ret <2 x double> %vmaxnm2.i
+}
+
+define <2 x float> @f4(<2 x float> %a, <2 x float> %b) nounwind readnone ssp {
+; CHECK: fminnm.2s	v0, v0, v1
+; CHECK: ret
+  %vminnm2.i = tail call <2 x float> @llvm.aarch64.neon.fminnm.v2f32(<2 x float> %a, <2 x float> %b) nounwind
+  ret <2 x float> %vminnm2.i
+}
+
+define <4 x float> @f5(<4 x float> %a, <4 x float> %b) nounwind readnone ssp {
+; CHECK: fminnm.4s	v0, v0, v1
+; CHECK: ret
+  %vminnm2.i = tail call <4 x float> @llvm.aarch64.neon.fminnm.v4f32(<4 x float> %a, <4 x float> %b) nounwind
+  ret <4 x float> %vminnm2.i
+}
+
+define <2 x double> @f6(<2 x double> %a, <2 x double> %b) nounwind readnone ssp {
+; CHECK: fminnm.2d	v0, v0, v1
+; CHECK: ret
+  %vminnm2.i = tail call <2 x double> @llvm.aarch64.neon.fminnm.v2f64(<2 x double> %a, <2 x double> %b) nounwind
+  ret <2 x double> %vminnm2.i
+}
+
+declare <2 x double> @llvm.aarch64.neon.fminnm.v2f64(<2 x double>, <2 x double>) nounwind readnone
+declare <4 x float> @llvm.aarch64.neon.fminnm.v4f32(<4 x float>, <4 x float>) nounwind readnone
+declare <2 x float> @llvm.aarch64.neon.fminnm.v2f32(<2 x float>, <2 x float>) nounwind readnone
+declare <2 x double> @llvm.aarch64.neon.fmaxnm.v2f64(<2 x double>, <2 x double>) nounwind readnone
+declare <4 x float> @llvm.aarch64.neon.fmaxnm.v4f32(<4 x float>, <4 x float>) nounwind readnone
+declare <2 x float> @llvm.aarch64.neon.fmaxnm.v2f32(<2 x float>, <2 x float>) nounwind readnone
+
+
+define double @test_fmaxnmv(<2 x double> %in) {
+; CHECK-LABEL: test_fmaxnmv:
+; CHECK: fmaxnmp.2d d0, v0
+  %max = call double @llvm.aarch64.neon.fmaxnmv.f64.v2f64(<2 x double> %in)
+  ret double %max
+}
+
+define double @test_fminnmv(<2 x double> %in) {
+; CHECK-LABEL: test_fminnmv:
+; CHECK: fminnmp.2d d0, v0
+  %min = call double @llvm.aarch64.neon.fminnmv.f64.v2f64(<2 x double> %in)
+  ret double %min
+}
+
+declare double @llvm.aarch64.neon.fmaxnmv.f64.v2f64(<2 x double>)
+declare double @llvm.aarch64.neon.fminnmv.f64.v2f64(<2 x double>)
diff --git a/test/CodeGen/ARM64/vmovn.ll b/test/CodeGen/AArch64/arm64-vmovn.ll
similarity index 74%
rename from test/CodeGen/ARM64/vmovn.ll
rename to test/CodeGen/AArch64/arm64-vmovn.ll
index 675633b6cfa..67e2816a7f5 100644
--- a/test/CodeGen/ARM64/vmovn.ll
+++ b/test/CodeGen/AArch64/arm64-vmovn.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s
+; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
 
 define <8 x i8> @xtn8b(<8 x i16> %A) nounwind {
 ;CHECK-LABEL: xtn8b:
@@ -62,7 +62,7 @@ define <8 x i8> @sqxtn8b(<8 x i16> %A) nounwind {
 ;CHECK-NOT: ld1
 ;CHECK: sqxtn.8b v0, v0
 ;CHECK-NEXT: ret
-        %tmp3 = call <8 x i8> @llvm.arm64.neon.sqxtn.v8i8(<8 x i16> %A)
+        %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqxtn.v8i8(<8 x i16> %A)
         ret <8 x i8> %tmp3
 }
 
@@ -71,7 +71,7 @@ define <4 x i16> @sqxtn4h(<4 x i32> %A) nounwind {
 ;CHECK-NOT: ld1
 ;CHECK: sqxtn.4h v0, v0
 ;CHECK-NEXT: ret
-        %tmp3 = call <4 x i16> @llvm.arm64.neon.sqxtn.v4i16(<4 x i32> %A)
+        %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqxtn.v4i16(<4 x i32> %A)
         ret <4 x i16> %tmp3
 }
 
@@ -80,7 +80,7 @@ define <2 x i32> @sqxtn2s(<2 x i64> %A) nounwind {
 ;CHECK-NOT: ld1
 ;CHECK: sqxtn.2s v0, v0
 ;CHECK-NEXT: ret
-        %tmp3 = call <2 x i32> @llvm.arm64.neon.sqxtn.v2i32(<2 x i64> %A)
+        %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqxtn.v2i32(<2 x i64> %A)
         ret <2 x i32> %tmp3
 }
 
@@ -89,7 +89,7 @@ define <16 x i8> @sqxtn2_16b(<8 x i8> %ret, <8 x i16> %A) nounwind {
 ;CHECK-NOT: ld1
 ;CHECK: sqxtn2.16b v0, v1
 ;CHECK-NEXT: ret
-        %tmp3 = call <8 x i8> @llvm.arm64.neon.sqxtn.v8i8(<8 x i16> %A)
+        %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqxtn.v8i8(<8 x i16> %A)
         %res = shufflevector <8 x i8> %ret, <8 x i8> %tmp3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
         ret <16 x i8> %res
 }
@@ -99,7 +99,7 @@ define <8 x i16> @sqxtn2_8h(<4 x i16> %ret, <4 x i32> %A) nounwind {
 ;CHECK-NOT: ld1
 ;CHECK: sqxtn2.8h v0, v1
 ;CHECK-NEXT: ret
-        %tmp3 = call <4 x i16> @llvm.arm64.neon.sqxtn.v4i16(<4 x i32> %A)
+        %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqxtn.v4i16(<4 x i32> %A)
         %res = shufflevector <4 x i16> %ret, <4 x i16> %tmp3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
         ret <8 x i16> %res
 }
@@ -109,21 +109,21 @@ define <4 x i32> @sqxtn2_4s(<2 x i32> %ret, <2 x i64> %A) nounwind {
 ;CHECK-NOT: ld1
 ;CHECK: sqxtn2.4s v0, v1
 ;CHECK-NEXT: ret
-        %tmp3 = call <2 x i32> @llvm.arm64.neon.sqxtn.v2i32(<2 x i64> %A)
+        %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqxtn.v2i32(<2 x i64> %A)
         %res = shufflevector <2 x i32> %ret, <2 x i32> %tmp3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
         ret <4 x i32> %res
 }
 
-declare <8 x i8>  @llvm.arm64.neon.sqxtn.v8i8(<8 x i16>) nounwind readnone
-declare <4 x i16> @llvm.arm64.neon.sqxtn.v4i16(<4 x i32>) nounwind readnone
-declare <2 x i32> @llvm.arm64.neon.sqxtn.v2i32(<2 x i64>) nounwind readnone
+declare <8 x i8>  @llvm.aarch64.neon.sqxtn.v8i8(<8 x i16>) nounwind readnone
+declare <4 x i16> @llvm.aarch64.neon.sqxtn.v4i16(<4 x i32>) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.sqxtn.v2i32(<2 x i64>) nounwind readnone
 
 define <8 x i8> @uqxtn8b(<8 x i16> %A) nounwind {
 ;CHECK-LABEL: uqxtn8b:
 ;CHECK-NOT: ld1
 ;CHECK: uqxtn.8b v0, v0
 ;CHECK-NEXT: ret
-        %tmp3 = call <8 x i8> @llvm.arm64.neon.uqxtn.v8i8(<8 x i16> %A)
+        %tmp3 = call <8 x i8> @llvm.aarch64.neon.uqxtn.v8i8(<8 x i16> %A)
         ret <8 x i8> %tmp3
 }
 
@@ -132,7 +132,7 @@ define <4 x i16> @uqxtn4h(<4 x i32> %A) nounwind {
 ;CHECK-NOT: ld1
 ;CHECK: uqxtn.4h v0, v0
 ;CHECK-NEXT: ret
-        %tmp3 = call <4 x i16> @llvm.arm64.neon.uqxtn.v4i16(<4 x i32> %A)
+        %tmp3 = call <4 x i16> @llvm.aarch64.neon.uqxtn.v4i16(<4 x i32> %A)
         ret <4 x i16> %tmp3
 }
 
@@ -141,7 +141,7 @@ define <2 x i32> @uqxtn2s(<2 x i64> %A) nounwind {
 ;CHECK-NOT: ld1
 ;CHECK: uqxtn.2s v0, v0
 ;CHECK-NEXT: ret
-        %tmp3 = call <2 x i32> @llvm.arm64.neon.uqxtn.v2i32(<2 x i64> %A)
+        %tmp3 = call <2 x i32> @llvm.aarch64.neon.uqxtn.v2i32(<2 x i64> %A)
         ret <2 x i32> %tmp3
 }
 
@@ -150,7 +150,7 @@ define <16 x i8> @uqxtn2_16b(<8 x i8> %ret, <8 x i16> %A) nounwind {
 ;CHECK-NOT: ld1
 ;CHECK: uqxtn2.16b v0, v1
 ;CHECK-NEXT: ret
-        %tmp3 = call <8 x i8> @llvm.arm64.neon.uqxtn.v8i8(<8 x i16> %A)
+        %tmp3 = call <8 x i8> @llvm.aarch64.neon.uqxtn.v8i8(<8 x i16> %A)
         %res = shufflevector <8 x i8> %ret, <8 x i8> %tmp3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
         ret <16 x i8> %res
 }
@@ -160,7 +160,7 @@ define <8 x i16> @uqxtn2_8h(<4 x i16> %ret, <4 x i32> %A) nounwind {
 ;CHECK-NOT: ld1
 ;CHECK: uqxtn2.8h v0, v1
 ;CHECK-NEXT: ret
-        %tmp3 = call <4 x i16> @llvm.arm64.neon.uqxtn.v4i16(<4 x i32> %A)
+        %tmp3 = call <4 x i16> @llvm.aarch64.neon.uqxtn.v4i16(<4 x i32> %A)
         %res = shufflevector <4 x i16> %ret, <4 x i16> %tmp3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
         ret <8 x i16> %res
 }
@@ -170,21 +170,21 @@ define <4 x i32> @uqxtn2_4s(<2 x i32> %ret, <2 x i64> %A) nounwind {
 ;CHECK-NOT: ld1
 ;CHECK: uqxtn2.4s v0, v1
 ;CHECK-NEXT: ret
-        %tmp3 = call <2 x i32> @llvm.arm64.neon.uqxtn.v2i32(<2 x i64> %A)
+        %tmp3 = call <2 x i32> @llvm.aarch64.neon.uqxtn.v2i32(<2 x i64> %A)
         %res = shufflevector <2 x i32> %ret, <2 x i32> %tmp3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
         ret <4 x i32> %res
 }
 
-declare <8 x i8>  @llvm.arm64.neon.uqxtn.v8i8(<8 x i16>) nounwind readnone
-declare <4 x i16> @llvm.arm64.neon.uqxtn.v4i16(<4 x i32>) nounwind readnone
-declare <2 x i32> @llvm.arm64.neon.uqxtn.v2i32(<2 x i64>) nounwind readnone
+declare <8 x i8>  @llvm.aarch64.neon.uqxtn.v8i8(<8 x i16>) nounwind readnone
+declare <4 x i16> @llvm.aarch64.neon.uqxtn.v4i16(<4 x i32>) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.uqxtn.v2i32(<2 x i64>) nounwind readnone
 
 define <8 x i8> @sqxtun8b(<8 x i16> %A) nounwind {
 ;CHECK-LABEL: sqxtun8b:
 ;CHECK-NOT: ld1
 ;CHECK: sqxtun.8b v0, v0
 ;CHECK-NEXT: ret
-        %tmp3 = call <8 x i8> @llvm.arm64.neon.sqxtun.v8i8(<8 x i16> %A)
+        %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqxtun.v8i8(<8 x i16> %A)
         ret <8 x i8> %tmp3
 }
 
@@ -193,7 +193,7 @@ define <4 x i16> @sqxtun4h(<4 x i32> %A) nounwind {
 ;CHECK-NOT: ld1
 ;CHECK: sqxtun.4h v0, v0
 ;CHECK-NEXT: ret
-        %tmp3 = call <4 x i16> @llvm.arm64.neon.sqxtun.v4i16(<4 x i32> %A)
+        %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqxtun.v4i16(<4 x i32> %A)
         ret <4 x i16> %tmp3
 }
 
@@ -202,7 +202,7 @@ define <2 x i32> @sqxtun2s(<2 x i64> %A) nounwind {
 ;CHECK-NOT: ld1
 ;CHECK: sqxtun.2s v0, v0
 ;CHECK-NEXT: ret
-        %tmp3 = call <2 x i32> @llvm.arm64.neon.sqxtun.v2i32(<2 x i64> %A)
+        %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqxtun.v2i32(<2 x i64> %A)
         ret <2 x i32> %tmp3
 }
 
@@ -211,7 +211,7 @@ define <16 x i8> @sqxtun2_16b(<8 x i8> %ret, <8 x i16> %A) nounwind {
 ;CHECK-NOT: ld1
 ;CHECK: sqxtun2.16b v0, v1
 ;CHECK-NEXT: ret
-        %tmp3 = call <8 x i8> @llvm.arm64.neon.sqxtun.v8i8(<8 x i16> %A)
+        %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqxtun.v8i8(<8 x i16> %A)
         %res = shufflevector <8 x i8> %ret, <8 x i8> %tmp3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
         ret <16 x i8> %res
 }
@@ -221,7 +221,7 @@ define <8 x i16> @sqxtun2_8h(<4 x i16> %ret, <4 x i32> %A) nounwind {
 ;CHECK-NOT: ld1
 ;CHECK: sqxtun2.8h v0, v1
 ;CHECK-NEXT: ret
-        %tmp3 = call <4 x i16> @llvm.arm64.neon.sqxtun.v4i16(<4 x i32> %A)
+        %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqxtun.v4i16(<4 x i32> %A)
         %res = shufflevector <4 x i16> %ret, <4 x i16> %tmp3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
         ret <8 x i16> %res
 }
@@ -231,12 +231,12 @@ define <4 x i32> @sqxtun2_4s(<2 x i32> %ret, <2 x i64> %A) nounwind {
 ;CHECK-NOT: ld1
 ;CHECK: sqxtun2.4s v0, v1
 ;CHECK-NEXT: ret
-        %tmp3 = call <2 x i32> @llvm.arm64.neon.sqxtun.v2i32(<2 x i64> %A)
+        %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqxtun.v2i32(<2 x i64> %A)
         %res = shufflevector <2 x i32> %ret, <2 x i32> %tmp3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
         ret <4 x i32> %res
 }
 
-declare <8 x i8>  @llvm.arm64.neon.sqxtun.v8i8(<8 x i16>) nounwind readnone
-declare <4 x i16> @llvm.arm64.neon.sqxtun.v4i16(<4 x i32>) nounwind readnone
-declare <2 x i32> @llvm.arm64.neon.sqxtun.v2i32(<2 x i64>) nounwind readnone
+declare <8 x i8>  @llvm.aarch64.neon.sqxtun.v8i8(<8 x i16>) nounwind readnone
+declare <4 x i16> @llvm.aarch64.neon.sqxtun.v4i16(<4 x i32>) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.sqxtun.v2i32(<2 x i64>) nounwind readnone
 
diff --git a/test/CodeGen/ARM64/vmul.ll b/test/CodeGen/AArch64/arm64-vmul.ll
similarity index 80%
rename from test/CodeGen/ARM64/vmul.ll
rename to test/CodeGen/AArch64/arm64-vmul.ll
index b6bd16ac0b4..6fa60fe346a 100644
--- a/test/CodeGen/ARM64/vmul.ll
+++ b/test/CodeGen/AArch64/arm64-vmul.ll
@@ -1,4 +1,4 @@
-; RUN: llc -asm-verbose=false < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s
+; RUN: llc -asm-verbose=false < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
 
 
 define <8 x i16> @smull8h(<8 x i8>* %A, <8 x i8>* %B) nounwind {
@@ -6,7 +6,7 @@ define <8 x i16> @smull8h(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK: smull.8h
   %tmp1 = load <8 x i8>* %A
   %tmp2 = load <8 x i8>* %B
-  %tmp3 = call <8 x i16> @llvm.arm64.neon.smull.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2)
+  %tmp3 = call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2)
   ret <8 x i16> %tmp3
 }
 
@@ -15,7 +15,7 @@ define <4 x i32> @smull4s(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK: smull.4s
   %tmp1 = load <4 x i16>* %A
   %tmp2 = load <4 x i16>* %B
-  %tmp3 = call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
+  %tmp3 = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
   ret <4 x i32> %tmp3
 }
 
@@ -24,20 +24,20 @@ define <2 x i64> @smull2d(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK: smull.2d
   %tmp1 = load <2 x i32>* %A
   %tmp2 = load <2 x i32>* %B
-  %tmp3 = call <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
+  %tmp3 = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
   ret <2 x i64> %tmp3
 }
 
-declare <8 x i16>  @llvm.arm64.neon.smull.v8i16(<8 x i8>, <8 x i8>) nounwind readnone
-declare <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
-declare <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32>, <2 x i32>) nounwind readnone
+declare <8 x i16>  @llvm.aarch64.neon.smull.v8i16(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32>, <2 x i32>) nounwind readnone
 
 define <8 x i16> @umull8h(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: umull8h:
 ;CHECK: umull.8h
   %tmp1 = load <8 x i8>* %A
   %tmp2 = load <8 x i8>* %B
-  %tmp3 = call <8 x i16> @llvm.arm64.neon.umull.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2)
+  %tmp3 = call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2)
   ret <8 x i16> %tmp3
 }
 
@@ -46,7 +46,7 @@ define <4 x i32> @umull4s(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK: umull.4s
   %tmp1 = load <4 x i16>* %A
   %tmp2 = load <4 x i16>* %B
-  %tmp3 = call <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
+  %tmp3 = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
   ret <4 x i32> %tmp3
 }
 
@@ -55,20 +55,20 @@ define <2 x i64> @umull2d(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK: umull.2d
   %tmp1 = load <2 x i32>* %A
   %tmp2 = load <2 x i32>* %B
-  %tmp3 = call <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
+  %tmp3 = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
   ret <2 x i64> %tmp3
 }
 
-declare <8 x i16>  @llvm.arm64.neon.umull.v8i16(<8 x i8>, <8 x i8>) nounwind readnone
-declare <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
-declare <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32>, <2 x i32>) nounwind readnone
+declare <8 x i16>  @llvm.aarch64.neon.umull.v8i16(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32>, <2 x i32>) nounwind readnone
 
 define <4 x i32> @sqdmull4s(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: sqdmull4s:
 ;CHECK: sqdmull.4s
   %tmp1 = load <4 x i16>* %A
   %tmp2 = load <4 x i16>* %B
-  %tmp3 = call <4 x i32> @llvm.arm64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
+  %tmp3 = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
   ret <4 x i32> %tmp3
 }
 
@@ -77,7 +77,7 @@ define <2 x i64> @sqdmull2d(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK: sqdmull.2d
   %tmp1 = load <2 x i32>* %A
   %tmp2 = load <2 x i32>* %B
-  %tmp3 = call <2 x i64> @llvm.arm64.neon.sqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
+  %tmp3 = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
   ret <2 x i64> %tmp3
 }
 
@@ -88,7 +88,7 @@ define <4 x i32> @sqdmull2_4s(<8 x i16>* %A, <8 x i16>* %B) nounwind {
   %load2 = load <8 x i16>* %B
   %tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
   %tmp2 = shufflevector <8 x i16> %load2, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %tmp3 = call <4 x i32> @llvm.arm64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
+  %tmp3 = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
   ret <4 x i32> %tmp3
 }
 
@@ -99,31 +99,31 @@ define <2 x i64> @sqdmull2_2d(<4 x i32>* %A, <4 x i32>* %B) nounwind {
   %load2 = load <4 x i32>* %B
   %tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
   %tmp2 = shufflevector <4 x i32> %load2, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %tmp3 = call <2 x i64> @llvm.arm64.neon.sqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
+  %tmp3 = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
   ret <2 x i64> %tmp3
 }
 
 
-declare <4 x i32> @llvm.arm64.neon.sqdmull.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
-declare <2 x i64> @llvm.arm64.neon.sqdmull.v2i64(<2 x i32>, <2 x i32>) nounwind readnone
+declare <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32>, <2 x i32>) nounwind readnone
 
 define <8 x i16> @pmull8h(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: pmull8h:
 ;CHECK: pmull.8h
   %tmp1 = load <8 x i8>* %A
   %tmp2 = load <8 x i8>* %B
-  %tmp3 = call <8 x i16> @llvm.arm64.neon.pmull.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2)
+  %tmp3 = call <8 x i16> @llvm.aarch64.neon.pmull.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2)
   ret <8 x i16> %tmp3
 }
 
-declare <8 x i16> @llvm.arm64.neon.pmull.v8i16(<8 x i8>, <8 x i8>) nounwind readnone
+declare <8 x i16> @llvm.aarch64.neon.pmull.v8i16(<8 x i8>, <8 x i8>) nounwind readnone
 
 define <4 x i16> @sqdmulh_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: sqdmulh_4h:
 ;CHECK: sqdmulh.4h
   %tmp1 = load <4 x i16>* %A
   %tmp2 = load <4 x i16>* %B
-  %tmp3 = call <4 x i16> @llvm.arm64.neon.sqdmulh.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+  %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
   ret <4 x i16> %tmp3
 }
 
@@ -132,7 +132,7 @@ define <8 x i16> @sqdmulh_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK: sqdmulh.8h
   %tmp1 = load <8 x i16>* %A
   %tmp2 = load <8 x i16>* %B
-  %tmp3 = call <8 x i16> @llvm.arm64.neon.sqdmulh.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+  %tmp3 = call <8 x i16> @llvm.aarch64.neon.sqdmulh.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
   ret <8 x i16> %tmp3
 }
 
@@ -141,7 +141,7 @@ define <2 x i32> @sqdmulh_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK: sqdmulh.2s
   %tmp1 = load <2 x i32>* %A
   %tmp2 = load <2 x i32>* %B
-  %tmp3 = call <2 x i32> @llvm.arm64.neon.sqdmulh.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+  %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
   ret <2 x i32> %tmp3
 }
 
@@ -150,7 +150,7 @@ define <4 x i32> @sqdmulh_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK: sqdmulh.4s
   %tmp1 = load <4 x i32>* %A
   %tmp2 = load <4 x i32>* %B
-  %tmp3 = call <4 x i32> @llvm.arm64.neon.sqdmulh.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+  %tmp3 = call <4 x i32> @llvm.aarch64.neon.sqdmulh.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
   ret <4 x i32> %tmp3
 }
 
@@ -159,22 +159,22 @@ define i32 @sqdmulh_1s(i32* %A, i32* %B) nounwind {
 ;CHECK: sqdmulh s0, {{s[0-9]+}}, {{s[0-9]+}}
   %tmp1 = load i32* %A
   %tmp2 = load i32* %B
-  %tmp3 = call i32 @llvm.arm64.neon.sqdmulh.i32(i32 %tmp1, i32 %tmp2)
+  %tmp3 = call i32 @llvm.aarch64.neon.sqdmulh.i32(i32 %tmp1, i32 %tmp2)
   ret i32 %tmp3
 }
 
-declare <4 x i16> @llvm.arm64.neon.sqdmulh.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
-declare <8 x i16> @llvm.arm64.neon.sqdmulh.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
-declare <2 x i32> @llvm.arm64.neon.sqdmulh.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
-declare <4 x i32> @llvm.arm64.neon.sqdmulh.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
-declare i32 @llvm.arm64.neon.sqdmulh.i32(i32, i32) nounwind readnone
+declare <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <8 x i16> @llvm.aarch64.neon.sqdmulh.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+declare <4 x i32> @llvm.aarch64.neon.sqdmulh.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+declare i32 @llvm.aarch64.neon.sqdmulh.i32(i32, i32) nounwind readnone
 
 define <4 x i16> @sqrdmulh_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: sqrdmulh_4h:
 ;CHECK: sqrdmulh.4h
   %tmp1 = load <4 x i16>* %A
   %tmp2 = load <4 x i16>* %B
-  %tmp3 = call <4 x i16> @llvm.arm64.neon.sqrdmulh.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+  %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
   ret <4 x i16> %tmp3
 }
 
@@ -183,7 +183,7 @@ define <8 x i16> @sqrdmulh_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK: sqrdmulh.8h
   %tmp1 = load <8 x i16>* %A
   %tmp2 = load <8 x i16>* %B
-  %tmp3 = call <8 x i16> @llvm.arm64.neon.sqrdmulh.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+  %tmp3 = call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
   ret <8 x i16> %tmp3
 }
 
@@ -192,7 +192,7 @@ define <2 x i32> @sqrdmulh_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK: sqrdmulh.2s
   %tmp1 = load <2 x i32>* %A
   %tmp2 = load <2 x i32>* %B
-  %tmp3 = call <2 x i32> @llvm.arm64.neon.sqrdmulh.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+  %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
   ret <2 x i32> %tmp3
 }
 
@@ -201,7 +201,7 @@ define <4 x i32> @sqrdmulh_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK: sqrdmulh.4s
   %tmp1 = load <4 x i32>* %A
   %tmp2 = load <4 x i32>* %B
-  %tmp3 = call <4 x i32> @llvm.arm64.neon.sqrdmulh.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+  %tmp3 = call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
   ret <4 x i32> %tmp3
 }
 
@@ -210,22 +210,22 @@ define i32 @sqrdmulh_1s(i32* %A, i32* %B) nounwind {
 ;CHECK: sqrdmulh s0, {{s[0-9]+}}, {{s[0-9]+}}
   %tmp1 = load i32* %A
   %tmp2 = load i32* %B
-  %tmp3 = call i32 @llvm.arm64.neon.sqrdmulh.i32(i32 %tmp1, i32 %tmp2)
+  %tmp3 = call i32 @llvm.aarch64.neon.sqrdmulh.i32(i32 %tmp1, i32 %tmp2)
   ret i32 %tmp3
 }
 
-declare <4 x i16> @llvm.arm64.neon.sqrdmulh.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
-declare <8 x i16> @llvm.arm64.neon.sqrdmulh.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
-declare <2 x i32> @llvm.arm64.neon.sqrdmulh.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
-declare <4 x i32> @llvm.arm64.neon.sqrdmulh.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
-declare i32 @llvm.arm64.neon.sqrdmulh.i32(i32, i32) nounwind readnone
+declare <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+declare <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+declare i32 @llvm.aarch64.neon.sqrdmulh.i32(i32, i32) nounwind readnone
 
 define <2 x float> @fmulx_2s(<2 x float>* %A, <2 x float>* %B) nounwind {
 ;CHECK-LABEL: fmulx_2s:
 ;CHECK: fmulx.2s
   %tmp1 = load <2 x float>* %A
   %tmp2 = load <2 x float>* %B
-  %tmp3 = call <2 x float> @llvm.arm64.neon.fmulx.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
+  %tmp3 = call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
   ret <2 x float> %tmp3
 }
 
@@ -234,7 +234,7 @@ define <4 x float> @fmulx_4s(<4 x float>* %A, <4 x float>* %B) nounwind {
 ;CHECK: fmulx.4s
   %tmp1 = load <4 x float>* %A
   %tmp2 = load <4 x float>* %B
-  %tmp3 = call <4 x float> @llvm.arm64.neon.fmulx.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
+  %tmp3 = call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
   ret <4 x float> %tmp3
 }
 
@@ -243,13 +243,13 @@ define <2 x double> @fmulx_2d(<2 x double>* %A, <2 x double>* %B) nounwind {
 ;CHECK: fmulx.2d
   %tmp1 = load <2 x double>* %A
   %tmp2 = load <2 x double>* %B
-  %tmp3 = call <2 x double> @llvm.arm64.neon.fmulx.v2f64(<2 x double> %tmp1, <2 x double> %tmp2)
+  %tmp3 = call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> %tmp1, <2 x double> %tmp2)
   ret <2 x double> %tmp3
 }
 
-declare <2 x float> @llvm.arm64.neon.fmulx.v2f32(<2 x float>, <2 x float>) nounwind readnone
-declare <4 x float> @llvm.arm64.neon.fmulx.v4f32(<4 x float>, <4 x float>) nounwind readnone
-declare <2 x double> @llvm.arm64.neon.fmulx.v2f64(<2 x double>, <2 x double>) nounwind readnone
+declare <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float>, <2 x float>) nounwind readnone
+declare <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float>, <4 x float>) nounwind readnone
+declare <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double>, <2 x double>) nounwind readnone
 
 define <4 x i32> @smlal4s(<4 x i16>* %A, <4 x i16>* %B, <4 x i32>* %C) nounwind {
 ;CHECK-LABEL: smlal4s:
@@ -257,7 +257,7 @@ define <4 x i32> @smlal4s(<4 x i16>* %A, <4 x i16>* %B, <4 x i32>* %C) nounwind
   %tmp1 = load <4 x i16>* %A
   %tmp2 = load <4 x i16>* %B
   %tmp3 = load <4 x i32>* %C
-  %tmp4 = call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
+  %tmp4 = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
   %tmp5 = add <4 x i32> %tmp3, %tmp4
   ret <4 x i32> %tmp5
 }
@@ -268,7 +268,7 @@ define <2 x i64> @smlal2d(<2 x i32>* %A, <2 x i32>* %B, <2 x i64>* %C) nounwind
   %tmp1 = load <2 x i32>* %A
   %tmp2 = load <2 x i32>* %B
   %tmp3 = load <2 x i64>* %C
-  %tmp4 = call <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
+  %tmp4 = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
   %tmp5 = add <2 x i64> %tmp3, %tmp4
   ret <2 x i64> %tmp5
 }
@@ -279,7 +279,7 @@ define <4 x i32> @smlsl4s(<4 x i16>* %A, <4 x i16>* %B, <4 x i32>* %C) nounwind
   %tmp1 = load <4 x i16>* %A
   %tmp2 = load <4 x i16>* %B
   %tmp3 = load <4 x i32>* %C
-  %tmp4 = call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
+  %tmp4 = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
   %tmp5 = sub <4 x i32> %tmp3, %tmp4
   ret <4 x i32> %tmp5
 }
@@ -290,15 +290,15 @@ define <2 x i64> @smlsl2d(<2 x i32>* %A, <2 x i32>* %B, <2 x i64>* %C) nounwind
   %tmp1 = load <2 x i32>* %A
   %tmp2 = load <2 x i32>* %B
   %tmp3 = load <2 x i64>* %C
-  %tmp4 = call <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
+  %tmp4 = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
   %tmp5 = sub <2 x i64> %tmp3, %tmp4
   ret <2 x i64> %tmp5
 }
 
-declare <4 x i32> @llvm.arm64.neon.sqadd.v4i32(<4 x i32>, <4 x i32>)
-declare <2 x i64> @llvm.arm64.neon.sqadd.v2i64(<2 x i64>, <2 x i64>)
-declare <4 x i32> @llvm.arm64.neon.sqsub.v4i32(<4 x i32>, <4 x i32>)
-declare <2 x i64> @llvm.arm64.neon.sqsub.v2i64(<2 x i64>, <2 x i64>)
+declare <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32>, <4 x i32>)
+declare <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64>, <2 x i64>)
+declare <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32>, <4 x i32>)
+declare <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64>, <2 x i64>)
 
 define <4 x i32> @sqdmlal4s(<4 x i16>* %A, <4 x i16>* %B, <4 x i32>* %C) nounwind {
 ;CHECK-LABEL: sqdmlal4s:
@@ -306,8 +306,8 @@ define <4 x i32> @sqdmlal4s(<4 x i16>* %A, <4 x i16>* %B, <4 x i32>* %C) nounwin
   %tmp1 = load <4 x i16>* %A
   %tmp2 = load <4 x i16>* %B
   %tmp3 = load <4 x i32>* %C
-  %tmp4 = call <4 x i32> @llvm.arm64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
-  %tmp5 = call <4 x i32> @llvm.arm64.neon.sqadd.v4i32(<4 x i32> %tmp3, <4 x i32> %tmp4)
+  %tmp4 = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
+  %tmp5 = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %tmp3, <4 x i32> %tmp4)
   ret <4 x i32> %tmp5
 }
 
@@ -317,8 +317,8 @@ define <2 x i64> @sqdmlal2d(<2 x i32>* %A, <2 x i32>* %B, <2 x i64>* %C) nounwin
   %tmp1 = load <2 x i32>* %A
   %tmp2 = load <2 x i32>* %B
   %tmp3 = load <2 x i64>* %C
-  %tmp4 = call <2 x i64> @llvm.arm64.neon.sqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
-  %tmp5 = call <2 x i64> @llvm.arm64.neon.sqadd.v2i64(<2 x i64> %tmp3, <2 x i64> %tmp4)
+  %tmp4 = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
+  %tmp5 = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %tmp3, <2 x i64> %tmp4)
   ret <2 x i64> %tmp5
 }
 
@@ -330,8 +330,8 @@ define <4 x i32> @sqdmlal2_4s(<8 x i16>* %A, <8 x i16>* %B, <4 x i32>* %C) nounw
   %tmp3 = load <4 x i32>* %C
   %tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
   %tmp2 = shufflevector <8 x i16> %load2, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %tmp4 = call <4 x i32> @llvm.arm64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
-  %tmp5 = call <4 x i32> @llvm.arm64.neon.sqadd.v4i32(<4 x i32> %tmp3, <4 x i32> %tmp4)
+  %tmp4 = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
+  %tmp5 = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %tmp3, <4 x i32> %tmp4)
   ret <4 x i32> %tmp5
 }
 
@@ -343,8 +343,8 @@ define <2 x i64> @sqdmlal2_2d(<4 x i32>* %A, <4 x i32>* %B, <2 x i64>* %C) nounw
   %tmp3 = load <2 x i64>* %C
   %tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
   %tmp2 = shufflevector <4 x i32> %load2, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %tmp4 = call <2 x i64> @llvm.arm64.neon.sqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
-  %tmp5 = call <2 x i64> @llvm.arm64.neon.sqadd.v2i64(<2 x i64> %tmp3, <2 x i64> %tmp4)
+  %tmp4 = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
+  %tmp5 = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %tmp3, <2 x i64> %tmp4)
   ret <2 x i64> %tmp5
 }
 
@@ -354,8 +354,8 @@ define <4 x i32> @sqdmlsl4s(<4 x i16>* %A, <4 x i16>* %B, <4 x i32>* %C) nounwin
   %tmp1 = load <4 x i16>* %A
   %tmp2 = load <4 x i16>* %B
   %tmp3 = load <4 x i32>* %C
-  %tmp4 = call <4 x i32> @llvm.arm64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
-  %tmp5 = call <4 x i32> @llvm.arm64.neon.sqsub.v4i32(<4 x i32> %tmp3, <4 x i32> %tmp4)
+  %tmp4 = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
+  %tmp5 = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %tmp3, <4 x i32> %tmp4)
   ret <4 x i32> %tmp5
 }
 
@@ -365,8 +365,8 @@ define <2 x i64> @sqdmlsl2d(<2 x i32>* %A, <2 x i32>* %B, <2 x i64>* %C) nounwin
   %tmp1 = load <2 x i32>* %A
   %tmp2 = load <2 x i32>* %B
   %tmp3 = load <2 x i64>* %C
-  %tmp4 = call <2 x i64> @llvm.arm64.neon.sqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
-  %tmp5 = call <2 x i64> @llvm.arm64.neon.sqsub.v2i64(<2 x i64> %tmp3, <2 x i64> %tmp4)
+  %tmp4 = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
+  %tmp5 = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %tmp3, <2 x i64> %tmp4)
   ret <2 x i64> %tmp5
 }
 
@@ -378,8 +378,8 @@ define <4 x i32> @sqdmlsl2_4s(<8 x i16>* %A, <8 x i16>* %B, <4 x i32>* %C) nounw
   %tmp3 = load <4 x i32>* %C
   %tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
   %tmp2 = shufflevector <8 x i16> %load2, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %tmp4 = call <4 x i32> @llvm.arm64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
-  %tmp5 = call <4 x i32> @llvm.arm64.neon.sqsub.v4i32(<4 x i32> %tmp3, <4 x i32> %tmp4)
+  %tmp4 = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
+  %tmp5 = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %tmp3, <4 x i32> %tmp4)
   ret <4 x i32> %tmp5
 }
 
@@ -391,8 +391,8 @@ define <2 x i64> @sqdmlsl2_2d(<4 x i32>* %A, <4 x i32>* %B, <2 x i64>* %C) nounw
   %tmp3 = load <2 x i64>* %C
   %tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
   %tmp2 = shufflevector <4 x i32> %load2, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %tmp4 = call <2 x i64> @llvm.arm64.neon.sqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
-  %tmp5 = call <2 x i64> @llvm.arm64.neon.sqsub.v2i64(<2 x i64> %tmp3, <2 x i64> %tmp4)
+  %tmp4 = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
+  %tmp5 = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %tmp3, <2 x i64> %tmp4)
   ret <2 x i64> %tmp5
 }
 
@@ -402,7 +402,7 @@ define <4 x i32> @umlal4s(<4 x i16>* %A, <4 x i16>* %B, <4 x i32>* %C) nounwind
   %tmp1 = load <4 x i16>* %A
   %tmp2 = load <4 x i16>* %B
   %tmp3 = load <4 x i32>* %C
-  %tmp4 = call <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
+  %tmp4 = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
   %tmp5 = add <4 x i32> %tmp3, %tmp4
   ret <4 x i32> %tmp5
 }
@@ -413,7 +413,7 @@ define <2 x i64> @umlal2d(<2 x i32>* %A, <2 x i32>* %B, <2 x i64>* %C) nounwind
   %tmp1 = load <2 x i32>* %A
   %tmp2 = load <2 x i32>* %B
   %tmp3 = load <2 x i64>* %C
-  %tmp4 = call <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
+  %tmp4 = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
   %tmp5 = add <2 x i64> %tmp3, %tmp4
   ret <2 x i64> %tmp5
 }
@@ -424,7 +424,7 @@ define <4 x i32> @umlsl4s(<4 x i16>* %A, <4 x i16>* %B, <4 x i32>* %C) nounwind
   %tmp1 = load <4 x i16>* %A
   %tmp2 = load <4 x i16>* %B
   %tmp3 = load <4 x i32>* %C
-  %tmp4 = call <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
+  %tmp4 = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
   %tmp5 = sub <4 x i32> %tmp3, %tmp4
   ret <4 x i32> %tmp5
 }
@@ -435,7 +435,7 @@ define <2 x i64> @umlsl2d(<2 x i32>* %A, <2 x i32>* %B, <2 x i64>* %C) nounwind
   %tmp1 = load <2 x i32>* %A
   %tmp2 = load <2 x i32>* %B
   %tmp3 = load <2 x i64>* %C
-  %tmp4 = call <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
+  %tmp4 = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
   %tmp5 = sub <2 x i64> %tmp3, %tmp4
   ret <2 x i64> %tmp5
 }
@@ -717,7 +717,7 @@ define <2 x float> @fmulx_lane_2s(<2 x float>* %A, <2 x float>* %B) nounwind {
   %tmp1 = load <2 x float>* %A
   %tmp2 = load <2 x float>* %B
   %tmp3 = shufflevector <2 x float> %tmp2, <2 x float> %tmp2, <2 x i32> <i32 1, i32 1>
-  %tmp4 = call <2 x float> @llvm.arm64.neon.fmulx.v2f32(<2 x float> %tmp1, <2 x float> %tmp3)
+  %tmp4 = call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> %tmp1, <2 x float> %tmp3)
   ret <2 x float> %tmp4
 }
 
@@ -728,7 +728,7 @@ define <4 x float> @fmulx_lane_4s(<4 x float>* %A, <4 x float>* %B) nounwind {
   %tmp1 = load <4 x float>* %A
   %tmp2 = load <4 x float>* %B
   %tmp3 = shufflevector <4 x float> %tmp2, <4 x float> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
-  %tmp4 = call <4 x float> @llvm.arm64.neon.fmulx.v4f32(<4 x float> %tmp1, <4 x float> %tmp3)
+  %tmp4 = call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> %tmp1, <4 x float> %tmp3)
   ret <4 x float> %tmp4
 }
 
@@ -739,7 +739,7 @@ define <2 x double> @fmulx_lane_2d(<2 x double>* %A, <2 x double>* %B) nounwind
   %tmp1 = load <2 x double>* %A
   %tmp2 = load <2 x double>* %B
   %tmp3 = shufflevector <2 x double> %tmp2, <2 x double> %tmp2, <2 x i32> <i32 1, i32 1>
-  %tmp4 = call <2 x double> @llvm.arm64.neon.fmulx.v2f64(<2 x double> %tmp1, <2 x double> %tmp3)
+  %tmp4 = call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> %tmp1, <2 x double> %tmp3)
   ret <2 x double> %tmp4
 }
 
@@ -750,7 +750,7 @@ define <4 x i16> @sqdmulh_lane_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
   %tmp1 = load <4 x i16>* %A
   %tmp2 = load <4 x i16>* %B
   %tmp3 = shufflevector <4 x i16> %tmp2, <4 x i16> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
-  %tmp4 = call <4 x i16> @llvm.arm64.neon.sqdmulh.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp3)
+  %tmp4 = call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp3)
   ret <4 x i16> %tmp4
 }
 
@@ -761,7 +761,7 @@ define <8 x i16> @sqdmulh_lane_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
   %tmp1 = load <8 x i16>* %A
   %tmp2 = load <8 x i16>* %B
   %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> %tmp2, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
-  %tmp4 = call <8 x i16> @llvm.arm64.neon.sqdmulh.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp3)
+  %tmp4 = call <8 x i16> @llvm.aarch64.neon.sqdmulh.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp3)
   ret <8 x i16> %tmp4
 }
 
@@ -772,7 +772,7 @@ define <2 x i32> @sqdmulh_lane_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
   %tmp1 = load <2 x i32>* %A
   %tmp2 = load <2 x i32>* %B
   %tmp3 = shufflevector <2 x i32> %tmp2, <2 x i32> %tmp2, <2 x i32> <i32 1, i32 1>
-  %tmp4 = call <2 x i32> @llvm.arm64.neon.sqdmulh.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp3)
+  %tmp4 = call <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp3)
   ret <2 x i32> %tmp4
 }
 
@@ -783,7 +783,7 @@ define <4 x i32> @sqdmulh_lane_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
   %tmp1 = load <4 x i32>* %A
   %tmp2 = load <4 x i32>* %B
   %tmp3 = shufflevector <4 x i32> %tmp2, <4 x i32> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
-  %tmp4 = call <4 x i32> @llvm.arm64.neon.sqdmulh.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp3)
+  %tmp4 = call <4 x i32> @llvm.aarch64.neon.sqdmulh.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp3)
   ret <4 x i32> %tmp4
 }
 
@@ -792,7 +792,7 @@ define i32 @sqdmulh_lane_1s(i32 %A, <4 x i32> %B) nounwind {
 ;CHECK-NOT: dup
 ;CHECK: sqdmulh.s s0, {{s[0-9]+}}, {{v[0-9]+}}[1]
   %tmp1 = extractelement <4 x i32> %B, i32 1
-  %tmp2 = call i32 @llvm.arm64.neon.sqdmulh.i32(i32 %A, i32 %tmp1)
+  %tmp2 = call i32 @llvm.aarch64.neon.sqdmulh.i32(i32 %A, i32 %tmp1)
   ret i32 %tmp2
 }
 
@@ -803,7 +803,7 @@ define <4 x i16> @sqrdmulh_lane_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
   %tmp1 = load <4 x i16>* %A
   %tmp2 = load <4 x i16>* %B
   %tmp3 = shufflevector <4 x i16> %tmp2, <4 x i16> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
-  %tmp4 = call <4 x i16> @llvm.arm64.neon.sqrdmulh.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp3)
+  %tmp4 = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp3)
   ret <4 x i16> %tmp4
 }
 
@@ -814,7 +814,7 @@ define <8 x i16> @sqrdmulh_lane_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
   %tmp1 = load <8 x i16>* %A
   %tmp2 = load <8 x i16>* %B
   %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> %tmp2, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
-  %tmp4 = call <8 x i16> @llvm.arm64.neon.sqrdmulh.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp3)
+  %tmp4 = call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp3)
   ret <8 x i16> %tmp4
 }
 
@@ -825,7 +825,7 @@ define <2 x i32> @sqrdmulh_lane_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
   %tmp1 = load <2 x i32>* %A
   %tmp2 = load <2 x i32>* %B
   %tmp3 = shufflevector <2 x i32> %tmp2, <2 x i32> %tmp2, <2 x i32> <i32 1, i32 1>
-  %tmp4 = call <2 x i32> @llvm.arm64.neon.sqrdmulh.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp3)
+  %tmp4 = call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp3)
   ret <2 x i32> %tmp4
 }
 
@@ -836,7 +836,7 @@ define <4 x i32> @sqrdmulh_lane_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
   %tmp1 = load <4 x i32>* %A
   %tmp2 = load <4 x i32>* %B
   %tmp3 = shufflevector <4 x i32> %tmp2, <4 x i32> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
-  %tmp4 = call <4 x i32> @llvm.arm64.neon.sqrdmulh.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp3)
+  %tmp4 = call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp3)
   ret <4 x i32> %tmp4
 }
 
@@ -845,7 +845,7 @@ define i32 @sqrdmulh_lane_1s(i32 %A, <4 x i32> %B) nounwind {
 ;CHECK-NOT: dup
 ;CHECK: sqrdmulh.s s0, {{s[0-9]+}}, {{v[0-9]+}}[1]
   %tmp1 = extractelement <4 x i32> %B, i32 1
-  %tmp2 = call i32 @llvm.arm64.neon.sqrdmulh.i32(i32 %A, i32 %tmp1)
+  %tmp2 = call i32 @llvm.aarch64.neon.sqrdmulh.i32(i32 %A, i32 %tmp1)
   ret i32 %tmp2
 }
 
@@ -856,7 +856,7 @@ define <4 x i32> @sqdmull_lane_4s(<4 x i16>* %A, <4 x i16>* %B) nounwind {
   %tmp1 = load <4 x i16>* %A
   %tmp2 = load <4 x i16>* %B
   %tmp3 = shufflevector <4 x i16> %tmp2, <4 x i16> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
-  %tmp4 = call <4 x i32> @llvm.arm64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp3)
+  %tmp4 = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp3)
   ret <4 x i32> %tmp4
 }
 
@@ -867,7 +867,7 @@ define <2 x i64> @sqdmull_lane_2d(<2 x i32>* %A, <2 x i32>* %B) nounwind {
   %tmp1 = load <2 x i32>* %A
   %tmp2 = load <2 x i32>* %B
   %tmp3 = shufflevector <2 x i32> %tmp2, <2 x i32> %tmp2, <2 x i32> <i32 1, i32 1>
-  %tmp4 = call <2 x i64> @llvm.arm64.neon.sqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp3)
+  %tmp4 = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp3)
   ret <2 x i64> %tmp4
 }
 
@@ -879,7 +879,7 @@ define <4 x i32> @sqdmull2_lane_4s(<8 x i16>* %A, <8 x i16>* %B) nounwind {
   %load2 = load <8 x i16>* %B
   %tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
   %tmp2 = shufflevector <8 x i16> %load2, <8 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
-  %tmp4 = call <4 x i32> @llvm.arm64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
+  %tmp4 = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
   ret <4 x i32> %tmp4
 }
 
@@ -891,7 +891,7 @@ define <2 x i64> @sqdmull2_lane_2d(<4 x i32>* %A, <4 x i32>* %B) nounwind {
   %load2 = load <4 x i32>* %B
   %tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
   %tmp2 = shufflevector <4 x i32> %load2, <4 x i32> undef, <2 x i32> <i32 1, i32 1>
-  %tmp4 = call <2 x i64> @llvm.arm64.neon.sqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
+  %tmp4 = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
   ret <2 x i64> %tmp4
 }
 
@@ -902,7 +902,7 @@ define <4 x i32> @umull_lane_4s(<4 x i16>* %A, <4 x i16>* %B) nounwind {
   %tmp1 = load <4 x i16>* %A
   %tmp2 = load <4 x i16>* %B
   %tmp3 = shufflevector <4 x i16> %tmp2, <4 x i16> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
-  %tmp4 = call <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp3)
+  %tmp4 = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp3)
   ret <4 x i32> %tmp4
 }
 
@@ -913,7 +913,7 @@ define <2 x i64> @umull_lane_2d(<2 x i32>* %A, <2 x i32>* %B) nounwind {
   %tmp1 = load <2 x i32>* %A
   %tmp2 = load <2 x i32>* %B
   %tmp3 = shufflevector <2 x i32> %tmp2, <2 x i32> %tmp2, <2 x i32> <i32 1, i32 1>
-  %tmp4 = call <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp3)
+  %tmp4 = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp3)
   ret <2 x i64> %tmp4
 }
 
@@ -924,7 +924,7 @@ define <4 x i32> @smull_lane_4s(<4 x i16>* %A, <4 x i16>* %B) nounwind {
   %tmp1 = load <4 x i16>* %A
   %tmp2 = load <4 x i16>* %B
   %tmp3 = shufflevector <4 x i16> %tmp2, <4 x i16> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
-  %tmp4 = call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp3)
+  %tmp4 = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp3)
   ret <4 x i32> %tmp4
 }
 
@@ -935,7 +935,7 @@ define <2 x i64> @smull_lane_2d(<2 x i32>* %A, <2 x i32>* %B) nounwind {
   %tmp1 = load <2 x i32>* %A
   %tmp2 = load <2 x i32>* %B
   %tmp3 = shufflevector <2 x i32> %tmp2, <2 x i32> %tmp2, <2 x i32> <i32 1, i32 1>
-  %tmp4 = call <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp3)
+  %tmp4 = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp3)
   ret <2 x i64> %tmp4
 }
 
@@ -947,7 +947,7 @@ define <4 x i32> @smlal_lane_4s(<4 x i16>* %A, <4 x i16>* %B, <4 x i32>* %C) nou
   %tmp2 = load <4 x i16>* %B
   %tmp3 = load <4 x i32>* %C
   %tmp4 = shufflevector <4 x i16> %tmp2, <4 x i16> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
-  %tmp5 = call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp4)
+  %tmp5 = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp4)
   %tmp6 = add <4 x i32> %tmp3, %tmp5
   ret <4 x i32> %tmp6
 }
@@ -960,7 +960,7 @@ define <2 x i64> @smlal_lane_2d(<2 x i32>* %A, <2 x i32>* %B, <2 x i64>* %C) nou
   %tmp2 = load <2 x i32>* %B
   %tmp3 = load <2 x i64>* %C
   %tmp4 = shufflevector <2 x i32> %tmp2, <2 x i32> %tmp2, <2 x i32> <i32 1, i32 1>
-  %tmp5 = call <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp4)
+  %tmp5 = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp4)
   %tmp6 = add <2 x i64> %tmp3, %tmp5
   ret <2 x i64> %tmp6
 }
@@ -973,8 +973,8 @@ define <4 x i32> @sqdmlal_lane_4s(<4 x i16>* %A, <4 x i16>* %B, <4 x i32>* %C) n
   %tmp2 = load <4 x i16>* %B
   %tmp3 = load <4 x i32>* %C
   %tmp4 = shufflevector <4 x i16> %tmp2, <4 x i16> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
-  %tmp5 = call <4 x i32> @llvm.arm64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp4)
-  %tmp6 = call <4 x i32> @llvm.arm64.neon.sqadd.v4i32(<4 x i32> %tmp3, <4 x i32> %tmp5)
+  %tmp5 = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp4)
+  %tmp6 = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %tmp3, <4 x i32> %tmp5)
   ret <4 x i32> %tmp6
 }
 
@@ -986,8 +986,8 @@ define <2 x i64> @sqdmlal_lane_2d(<2 x i32>* %A, <2 x i32>* %B, <2 x i64>* %C) n
   %tmp2 = load <2 x i32>* %B
   %tmp3 = load <2 x i64>* %C
   %tmp4 = shufflevector <2 x i32> %tmp2, <2 x i32> %tmp2, <2 x i32> <i32 1, i32 1>
-  %tmp5 = call <2 x i64> @llvm.arm64.neon.sqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp4)
-  %tmp6 = call <2 x i64> @llvm.arm64.neon.sqadd.v2i64(<2 x i64> %tmp3, <2 x i64> %tmp5)
+  %tmp5 = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp4)
+  %tmp6 = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %tmp3, <2 x i64> %tmp5)
   ret <2 x i64> %tmp6
 }
 
@@ -1000,8 +1000,8 @@ define <4 x i32> @sqdmlal2_lane_4s(<8 x i16>* %A, <8 x i16>* %B, <4 x i32>* %C)
   %tmp3 = load <4 x i32>* %C
   %tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
   %tmp2 = shufflevector <8 x i16> %load2, <8 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
-  %tmp5 = call <4 x i32> @llvm.arm64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
-  %tmp6 = call <4 x i32> @llvm.arm64.neon.sqadd.v4i32(<4 x i32> %tmp3, <4 x i32> %tmp5)
+  %tmp5 = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
+  %tmp6 = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %tmp3, <4 x i32> %tmp5)
   ret <4 x i32> %tmp6
 }
 
@@ -1014,8 +1014,8 @@ define <2 x i64> @sqdmlal2_lane_2d(<4 x i32>* %A, <4 x i32>* %B, <2 x i64>* %C)
   %tmp3 = load <2 x i64>* %C
   %tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
   %tmp2 = shufflevector <4 x i32> %load2, <4 x i32> undef, <2 x i32> <i32 1, i32 1>
-  %tmp5 = call <2 x i64> @llvm.arm64.neon.sqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
-  %tmp6 = call <2 x i64> @llvm.arm64.neon.sqadd.v2i64(<2 x i64> %tmp3, <2 x i64> %tmp5)
+  %tmp5 = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
+  %tmp6 = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %tmp3, <2 x i64> %tmp5)
   ret <2 x i64> %tmp6
 }
 
@@ -1024,45 +1024,45 @@ define i32 @sqdmlal_lane_1s(i32 %A, i16 %B, <4 x i16> %C) nounwind {
 ;CHECK: sqdmlal.4s
   %lhs = insertelement <4 x i16> undef, i16 %B, i32 0
   %rhs = shufflevector <4 x i16> %C, <4 x i16> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
-  %prod.vec = call <4 x i32> @llvm.arm64.neon.sqdmull.v4i32(<4 x i16> %lhs, <4 x i16> %rhs)
+  %prod.vec = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %lhs, <4 x i16> %rhs)
   %prod = extractelement <4 x i32> %prod.vec, i32 0
-  %res = call i32 @llvm.arm64.neon.sqadd.i32(i32 %A, i32 %prod)
+  %res = call i32 @llvm.aarch64.neon.sqadd.i32(i32 %A, i32 %prod)
   ret i32 %res
 }
-declare i32 @llvm.arm64.neon.sqadd.i32(i32, i32)
+declare i32 @llvm.aarch64.neon.sqadd.i32(i32, i32)
 
 define i32 @sqdmlsl_lane_1s(i32 %A, i16 %B, <4 x i16> %C) nounwind {
 ;CHECK-LABEL: sqdmlsl_lane_1s:
 ;CHECK: sqdmlsl.4s
   %lhs = insertelement <4 x i16> undef, i16 %B, i32 0
   %rhs = shufflevector <4 x i16> %C, <4 x i16> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
-  %prod.vec = call <4 x i32> @llvm.arm64.neon.sqdmull.v4i32(<4 x i16> %lhs, <4 x i16> %rhs)
+  %prod.vec = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %lhs, <4 x i16> %rhs)
   %prod = extractelement <4 x i32> %prod.vec, i32 0
-  %res = call i32 @llvm.arm64.neon.sqsub.i32(i32 %A, i32 %prod)
+  %res = call i32 @llvm.aarch64.neon.sqsub.i32(i32 %A, i32 %prod)
   ret i32 %res
 }
-declare i32 @llvm.arm64.neon.sqsub.i32(i32, i32)
+declare i32 @llvm.aarch64.neon.sqsub.i32(i32, i32)
 
 define i64 @sqdmlal_lane_1d(i64 %A, i32 %B, <2 x i32> %C) nounwind {
 ;CHECK-LABEL: sqdmlal_lane_1d:
 ;CHECK: sqdmlal.s
   %rhs = extractelement <2 x i32> %C, i32 1
-  %prod = call i64 @llvm.arm64.neon.sqdmulls.scalar(i32 %B, i32 %rhs)
-  %res = call i64 @llvm.arm64.neon.sqadd.i64(i64 %A, i64 %prod)
+  %prod = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 %B, i32 %rhs)
+  %res = call i64 @llvm.aarch64.neon.sqadd.i64(i64 %A, i64 %prod)
   ret i64 %res
 }
-declare i64 @llvm.arm64.neon.sqdmulls.scalar(i32, i32)
-declare i64 @llvm.arm64.neon.sqadd.i64(i64, i64)
+declare i64 @llvm.aarch64.neon.sqdmulls.scalar(i32, i32)
+declare i64 @llvm.aarch64.neon.sqadd.i64(i64, i64)
 
 define i64 @sqdmlsl_lane_1d(i64 %A, i32 %B, <2 x i32> %C) nounwind {
 ;CHECK-LABEL: sqdmlsl_lane_1d:
 ;CHECK: sqdmlsl.s
   %rhs = extractelement <2 x i32> %C, i32 1
-  %prod = call i64 @llvm.arm64.neon.sqdmulls.scalar(i32 %B, i32 %rhs)
-  %res = call i64 @llvm.arm64.neon.sqsub.i64(i64 %A, i64 %prod)
+  %prod = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 %B, i32 %rhs)
+  %res = call i64 @llvm.aarch64.neon.sqsub.i64(i64 %A, i64 %prod)
   ret i64 %res
 }
-declare i64 @llvm.arm64.neon.sqsub.i64(i64, i64)
+declare i64 @llvm.aarch64.neon.sqsub.i64(i64, i64)
 
 
 define <4 x i32> @umlal_lane_4s(<4 x i16>* %A, <4 x i16>* %B, <4 x i32>* %C) nounwind {
@@ -1073,7 +1073,7 @@ define <4 x i32> @umlal_lane_4s(<4 x i16>* %A, <4 x i16>* %B, <4 x i32>* %C) nou
   %tmp2 = load <4 x i16>* %B
   %tmp3 = load <4 x i32>* %C
   %tmp4 = shufflevector <4 x i16> %tmp2, <4 x i16> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
-  %tmp5 = call <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp4)
+  %tmp5 = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp4)
   %tmp6 = add <4 x i32> %tmp3, %tmp5
   ret <4 x i32> %tmp6
 }
@@ -1086,7 +1086,7 @@ define <2 x i64> @umlal_lane_2d(<2 x i32>* %A, <2 x i32>* %B, <2 x i64>* %C) nou
   %tmp2 = load <2 x i32>* %B
   %tmp3 = load <2 x i64>* %C
   %tmp4 = shufflevector <2 x i32> %tmp2, <2 x i32> %tmp2, <2 x i32> <i32 1, i32 1>
-  %tmp5 = call <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp4)
+  %tmp5 = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp4)
   %tmp6 = add <2 x i64> %tmp3, %tmp5
   ret <2 x i64> %tmp6
 }
@@ -1100,7 +1100,7 @@ define <4 x i32> @smlsl_lane_4s(<4 x i16>* %A, <4 x i16>* %B, <4 x i32>* %C) nou
   %tmp2 = load <4 x i16>* %B
   %tmp3 = load <4 x i32>* %C
   %tmp4 = shufflevector <4 x i16> %tmp2, <4 x i16> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
-  %tmp5 = call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp4)
+  %tmp5 = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp4)
   %tmp6 = sub <4 x i32> %tmp3, %tmp5
   ret <4 x i32> %tmp6
 }
@@ -1113,7 +1113,7 @@ define <2 x i64> @smlsl_lane_2d(<2 x i32>* %A, <2 x i32>* %B, <2 x i64>* %C) nou
   %tmp2 = load <2 x i32>* %B
   %tmp3 = load <2 x i64>* %C
   %tmp4 = shufflevector <2 x i32> %tmp2, <2 x i32> %tmp2, <2 x i32> <i32 1, i32 1>
-  %tmp5 = call <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp4)
+  %tmp5 = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp4)
   %tmp6 = sub <2 x i64> %tmp3, %tmp5
   ret <2 x i64> %tmp6
 }
@@ -1126,8 +1126,8 @@ define <4 x i32> @sqdmlsl_lane_4s(<4 x i16>* %A, <4 x i16>* %B, <4 x i32>* %C) n
   %tmp2 = load <4 x i16>* %B
   %tmp3 = load <4 x i32>* %C
   %tmp4 = shufflevector <4 x i16> %tmp2, <4 x i16> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
-  %tmp5 = call <4 x i32> @llvm.arm64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp4)
-  %tmp6 = call <4 x i32> @llvm.arm64.neon.sqsub.v4i32(<4 x i32> %tmp3, <4 x i32> %tmp5)
+  %tmp5 = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp4)
+  %tmp6 = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %tmp3, <4 x i32> %tmp5)
   ret <4 x i32> %tmp6
 }
 
@@ -1139,8 +1139,8 @@ define <2 x i64> @sqdmlsl_lane_2d(<2 x i32>* %A, <2 x i32>* %B, <2 x i64>* %C) n
   %tmp2 = load <2 x i32>* %B
   %tmp3 = load <2 x i64>* %C
   %tmp4 = shufflevector <2 x i32> %tmp2, <2 x i32> %tmp2, <2 x i32> <i32 1, i32 1>
-  %tmp5 = call <2 x i64> @llvm.arm64.neon.sqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp4)
-  %tmp6 = call <2 x i64> @llvm.arm64.neon.sqsub.v2i64(<2 x i64> %tmp3, <2 x i64> %tmp5)
+  %tmp5 = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp4)
+  %tmp6 = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %tmp3, <2 x i64> %tmp5)
   ret <2 x i64> %tmp6
 }
 
@@ -1153,8 +1153,8 @@ define <4 x i32> @sqdmlsl2_lane_4s(<8 x i16>* %A, <8 x i16>* %B, <4 x i32>* %C)
   %tmp3 = load <4 x i32>* %C
   %tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
   %tmp2 = shufflevector <8 x i16> %load2, <8 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
-  %tmp5 = call <4 x i32> @llvm.arm64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
-  %tmp6 = call <4 x i32> @llvm.arm64.neon.sqsub.v4i32(<4 x i32> %tmp3, <4 x i32> %tmp5)
+  %tmp5 = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
+  %tmp6 = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %tmp3, <4 x i32> %tmp5)
   ret <4 x i32> %tmp6
 }
 
@@ -1167,8 +1167,8 @@ define <2 x i64> @sqdmlsl2_lane_2d(<4 x i32>* %A, <4 x i32>* %B, <2 x i64>* %C)
   %tmp3 = load <2 x i64>* %C
   %tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
   %tmp2 = shufflevector <4 x i32> %load2, <4 x i32> undef, <2 x i32> <i32 1, i32 1>
-  %tmp5 = call <2 x i64> @llvm.arm64.neon.sqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
-  %tmp6 = call <2 x i64> @llvm.arm64.neon.sqsub.v2i64(<2 x i64> %tmp3, <2 x i64> %tmp5)
+  %tmp5 = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
+  %tmp6 = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %tmp3, <2 x i64> %tmp5)
   ret <2 x i64> %tmp6
 }
 
@@ -1180,7 +1180,7 @@ define <4 x i32> @umlsl_lane_4s(<4 x i16>* %A, <4 x i16>* %B, <4 x i32>* %C) nou
   %tmp2 = load <4 x i16>* %B
   %tmp3 = load <4 x i32>* %C
   %tmp4 = shufflevector <4 x i16> %tmp2, <4 x i16> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
-  %tmp5 = call <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp4)
+  %tmp5 = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp4)
   %tmp6 = sub <4 x i32> %tmp3, %tmp5
   ret <4 x i32> %tmp6
 }
@@ -1193,7 +1193,7 @@ define <2 x i64> @umlsl_lane_2d(<2 x i32>* %A, <2 x i32>* %B, <2 x i64>* %C) nou
   %tmp2 = load <2 x i32>* %B
   %tmp3 = load <2 x i64>* %C
   %tmp4 = shufflevector <2 x i32> %tmp2, <2 x i32> %tmp2, <2 x i32> <i32 1, i32 1>
-  %tmp5 = call <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp4)
+  %tmp5 = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp4)
   %tmp6 = sub <2 x i64> %tmp3, %tmp5
   ret <2 x i64> %tmp6
 }
@@ -1202,7 +1202,7 @@ define <2 x i64> @umlsl_lane_2d(<2 x i32>* %A, <2 x i32>* %B, <2 x i64>* %C) nou
 define float @fmulxs(float %a, float %b) nounwind {
 ; CHECK-LABEL: fmulxs:
 ; CHECKNEXT: fmulx s0, s0, s1
-  %fmulx.i = tail call float @llvm.arm64.neon.fmulx.f32(float %a, float %b) nounwind
+  %fmulx.i = tail call float @llvm.aarch64.neon.fmulx.f32(float %a, float %b) nounwind
 ; CHECKNEXT: ret
   ret float %fmulx.i
 }
@@ -1210,7 +1210,7 @@ define float @fmulxs(float %a, float %b) nounwind {
 define double @fmulxd(double %a, double %b) nounwind {
 ; CHECK-LABEL: fmulxd:
 ; CHECKNEXT: fmulx d0, d0, d1
-  %fmulx.i = tail call double @llvm.arm64.neon.fmulx.f64(double %a, double %b) nounwind
+  %fmulx.i = tail call double @llvm.aarch64.neon.fmulx.f64(double %a, double %b) nounwind
 ; CHECKNEXT: ret
   ret double %fmulx.i
 }
@@ -1219,7 +1219,7 @@ define float @fmulxs_lane(float %a, <4 x float> %vec) nounwind {
 ; CHECK-LABEL: fmulxs_lane:
 ; CHECKNEXT: fmulx.s s0, s0, v1[3]
   %b = extractelement <4 x float> %vec, i32 3
-  %fmulx.i = tail call float @llvm.arm64.neon.fmulx.f32(float %a, float %b) nounwind
+  %fmulx.i = tail call float @llvm.aarch64.neon.fmulx.f32(float %a, float %b) nounwind
 ; CHECKNEXT: ret
   ret float %fmulx.i
 }
@@ -1228,13 +1228,13 @@ define double @fmulxd_lane(double %a, <2 x double> %vec) nounwind {
 ; CHECK-LABEL: fmulxd_lane:
 ; CHECKNEXT: fmulx d0, d0, v1[1]
   %b = extractelement <2 x double> %vec, i32 1
-  %fmulx.i = tail call double @llvm.arm64.neon.fmulx.f64(double %a, double %b) nounwind
+  %fmulx.i = tail call double @llvm.aarch64.neon.fmulx.f64(double %a, double %b) nounwind
 ; CHECKNEXT: ret
   ret double %fmulx.i
 }
 
-declare double @llvm.arm64.neon.fmulx.f64(double, double) nounwind readnone
-declare float @llvm.arm64.neon.fmulx.f32(float, float) nounwind readnone
+declare double @llvm.aarch64.neon.fmulx.f64(double, double) nounwind readnone
+declare float @llvm.aarch64.neon.fmulx.f32(float, float) nounwind readnone
 
 
 define <8 x i16> @smull2_8h_simple(<16 x i8> %a, <16 x i8> %b) nounwind {
@@ -1243,7 +1243,7 @@ define <8 x i16> @smull2_8h_simple(<16 x i8> %a, <16 x i8> %b) nounwind {
 ; CHECK-NEXT: ret
   %1 = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
   %2 = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %3 = tail call <8 x i16> @llvm.arm64.neon.smull.v8i16(<8 x i8> %1, <8 x i8> %2) #2
+  %3 = tail call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %1, <8 x i8> %2) #2
   ret <8 x i16> %3
 }
 
@@ -1256,7 +1256,7 @@ define <8 x i16> @foo0(<16 x i8> %a, <16 x i8> %b) nounwind {
   %tmp2 = bitcast <16 x i8> %b to <2 x i64>
   %shuffle.i3.i = shufflevector <2 x i64> %tmp2, <2 x i64> undef, <1 x i32> <i32 1>
   %tmp3 = bitcast <1 x i64> %shuffle.i3.i to <8 x i8>
-  %vmull.i.i = tail call <8 x i16> @llvm.arm64.neon.smull.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp3) nounwind
+  %vmull.i.i = tail call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp3) nounwind
   ret <8 x i16> %vmull.i.i
 }
 
@@ -1269,7 +1269,7 @@ define <4 x i32> @foo1(<8 x i16> %a, <8 x i16> %b) nounwind {
   %tmp2 = bitcast <8 x i16> %b to <2 x i64>
   %shuffle.i3.i = shufflevector <2 x i64> %tmp2, <2 x i64> undef, <1 x i32> <i32 1>
   %tmp3 = bitcast <1 x i64> %shuffle.i3.i to <4 x i16>
-  %vmull2.i.i = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp3) nounwind
+  %vmull2.i.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp3) nounwind
   ret <4 x i32> %vmull2.i.i
 }
 
@@ -1282,7 +1282,7 @@ define <2 x i64> @foo2(<4 x i32> %a, <4 x i32> %b) nounwind {
   %tmp2 = bitcast <4 x i32> %b to <2 x i64>
   %shuffle.i3.i = shufflevector <2 x i64> %tmp2, <2 x i64> undef, <1 x i32> <i32 1>
   %tmp3 = bitcast <1 x i64> %shuffle.i3.i to <2 x i32>
-  %vmull2.i.i = tail call <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp3) nounwind
+  %vmull2.i.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp3) nounwind
   ret <2 x i64> %vmull2.i.i
 }
 
@@ -1295,7 +1295,7 @@ define <8 x i16> @foo3(<16 x i8> %a, <16 x i8> %b) nounwind {
   %tmp2 = bitcast <16 x i8> %b to <2 x i64>
   %shuffle.i3.i = shufflevector <2 x i64> %tmp2, <2 x i64> undef, <1 x i32> <i32 1>
   %tmp3 = bitcast <1 x i64> %shuffle.i3.i to <8 x i8>
-  %vmull.i.i = tail call <8 x i16> @llvm.arm64.neon.umull.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp3) nounwind
+  %vmull.i.i = tail call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp3) nounwind
   ret <8 x i16> %vmull.i.i
 }
 
@@ -1308,7 +1308,7 @@ define <4 x i32> @foo4(<8 x i16> %a, <8 x i16> %b) nounwind {
   %tmp2 = bitcast <8 x i16> %b to <2 x i64>
   %shuffle.i3.i = shufflevector <2 x i64> %tmp2, <2 x i64> undef, <1 x i32> <i32 1>
   %tmp3 = bitcast <1 x i64> %shuffle.i3.i to <4 x i16>
-  %vmull2.i.i = tail call <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp3) nounwind
+  %vmull2.i.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp3) nounwind
   ret <4 x i32> %vmull2.i.i
 }
 
@@ -1321,7 +1321,7 @@ define <2 x i64> @foo5(<4 x i32> %a, <4 x i32> %b) nounwind {
   %tmp2 = bitcast <4 x i32> %b to <2 x i64>
   %shuffle.i3.i = shufflevector <2 x i64> %tmp2, <2 x i64> undef, <1 x i32> <i32 1>
   %tmp3 = bitcast <1 x i64> %shuffle.i3.i to <2 x i32>
-  %vmull2.i.i = tail call <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp3) nounwind
+  %vmull2.i.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp3) nounwind
   ret <2 x i64> %vmull2.i.i
 }
 
@@ -1334,7 +1334,7 @@ entry:
   %shuffle.i = shufflevector <2 x i64> %0, <2 x i64> undef, <1 x i32> <i32 1>
   %1 = bitcast <1 x i64> %shuffle.i to <4 x i16>
   %shuffle = shufflevector <4 x i16> %c, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
-  %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> %1, <4 x i16> %shuffle) nounwind
+  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %1, <4 x i16> %shuffle) nounwind
   ret <4 x i32> %vmull2.i
 }
 
@@ -1347,7 +1347,7 @@ entry:
   %shuffle.i = shufflevector <2 x i64> %0, <2 x i64> undef, <1 x i32> <i32 1>
   %1 = bitcast <1 x i64> %shuffle.i to <2 x i32>
   %shuffle = shufflevector <2 x i32> %c, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
-  %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32> %1, <2 x i32> %shuffle) nounwind
+  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %1, <2 x i32> %shuffle) nounwind
   ret <2 x i64> %vmull2.i
 }
 
@@ -1360,7 +1360,7 @@ entry:
   %shuffle.i = shufflevector <2 x i64> %0, <2 x i64> undef, <1 x i32> <i32 1>
   %1 = bitcast <1 x i64> %shuffle.i to <4 x i16>
   %shuffle = shufflevector <4 x i16> %c, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
-  %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16> %1, <4 x i16> %shuffle) nounwind
+  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %1, <4 x i16> %shuffle) nounwind
   ret <4 x i32> %vmull2.i
 }
 
@@ -1373,7 +1373,7 @@ entry:
   %shuffle.i = shufflevector <2 x i64> %0, <2 x i64> undef, <1 x i32> <i32 1>
   %1 = bitcast <1 x i64> %shuffle.i to <2 x i32>
   %shuffle = shufflevector <2 x i32> %c, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
-  %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32> %1, <2 x i32> %shuffle) nounwind
+  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %1, <2 x i32> %shuffle) nounwind
   ret <2 x i64> %vmull2.i
 }
 
@@ -1388,7 +1388,7 @@ define <8 x i16> @bar0(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) nounwind {
   %tmp2 = bitcast <16 x i8> %c to <2 x i64>
   %shuffle.i3.i.i = shufflevector <2 x i64> %tmp2, <2 x i64> undef, <1 x i32> <i32 1>
   %tmp3 = bitcast <1 x i64> %shuffle.i3.i.i to <8 x i8>
-  %vmull.i.i.i = tail call <8 x i16> @llvm.arm64.neon.smull.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp3) nounwind
+  %vmull.i.i.i = tail call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp3) nounwind
   %add.i = add <8 x i16> %vmull.i.i.i, %a
   ret <8 x i16> %add.i
 }
@@ -1404,7 +1404,7 @@ define <4 x i32> @bar1(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) nounwind {
   %tmp2 = bitcast <8 x i16> %c to <2 x i64>
   %shuffle.i3.i.i = shufflevector <2 x i64> %tmp2, <2 x i64> undef, <1 x i32> <i32 1>
   %tmp3 = bitcast <1 x i64> %shuffle.i3.i.i to <4 x i16>
-  %vmull2.i.i.i = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp3) nounwind
+  %vmull2.i.i.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp3) nounwind
   %add.i = add <4 x i32> %vmull2.i.i.i, %a
   ret <4 x i32> %add.i
 }
@@ -1420,7 +1420,7 @@ define <2 x i64> @bar2(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) nounwind {
   %tmp2 = bitcast <4 x i32> %c to <2 x i64>
   %shuffle.i3.i.i = shufflevector <2 x i64> %tmp2, <2 x i64> undef, <1 x i32> <i32 1>
   %tmp3 = bitcast <1 x i64> %shuffle.i3.i.i to <2 x i32>
-  %vmull2.i.i.i = tail call <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp3) nounwind
+  %vmull2.i.i.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp3) nounwind
   %add.i = add <2 x i64> %vmull2.i.i.i, %a
   ret <2 x i64> %add.i
 }
@@ -1436,7 +1436,7 @@ define <8 x i16> @bar3(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) nounwind {
   %tmp2 = bitcast <16 x i8> %c to <2 x i64>
   %shuffle.i3.i.i = shufflevector <2 x i64> %tmp2, <2 x i64> undef, <1 x i32> <i32 1>
   %tmp3 = bitcast <1 x i64> %shuffle.i3.i.i to <8 x i8>
-  %vmull.i.i.i = tail call <8 x i16> @llvm.arm64.neon.umull.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp3) nounwind
+  %vmull.i.i.i = tail call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp3) nounwind
   %add.i = add <8 x i16> %vmull.i.i.i, %a
   ret <8 x i16> %add.i
 }
@@ -1452,7 +1452,7 @@ define <4 x i32> @bar4(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) nounwind {
   %tmp2 = bitcast <8 x i16> %c to <2 x i64>
   %shuffle.i3.i.i = shufflevector <2 x i64> %tmp2, <2 x i64> undef, <1 x i32> <i32 1>
   %tmp3 = bitcast <1 x i64> %shuffle.i3.i.i to <4 x i16>
-  %vmull2.i.i.i = tail call <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp3) nounwind
+  %vmull2.i.i.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp3) nounwind
   %add.i = add <4 x i32> %vmull2.i.i.i, %a
   ret <4 x i32> %add.i
 }
@@ -1468,7 +1468,7 @@ define <2 x i64> @bar5(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) nounwind {
   %tmp2 = bitcast <4 x i32> %c to <2 x i64>
   %shuffle.i3.i.i = shufflevector <2 x i64> %tmp2, <2 x i64> undef, <1 x i32> <i32 1>
   %tmp3 = bitcast <1 x i64> %shuffle.i3.i.i to <2 x i32>
-  %vmull2.i.i.i = tail call <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp3) nounwind
+  %vmull2.i.i.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp3) nounwind
   %add.i = add <2 x i64> %vmull2.i.i.i, %a
   ret <2 x i64> %add.i
 }
@@ -1484,7 +1484,7 @@ define <4 x i32> @mlal2_1(<4 x i32> %a, <8 x i16> %b, <4 x i16> %c) nounwind {
   %tmp2 = bitcast <8 x i16> %shuffle to <2 x i64>
   %shuffle.i3.i = shufflevector <2 x i64> %tmp2, <2 x i64> undef, <1 x i32> <i32 1>
   %tmp3 = bitcast <1 x i64> %shuffle.i3.i to <4 x i16>
-  %vmull2.i.i = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp3) nounwind
+  %vmull2.i.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp3) nounwind
   %add = add <4 x i32> %vmull2.i.i, %a
   ret <4 x i32> %add
 }
@@ -1500,7 +1500,7 @@ define <2 x i64> @mlal2_2(<2 x i64> %a, <4 x i32> %b, <2 x i32> %c) nounwind {
   %tmp2 = bitcast <4 x i32> %shuffle to <2 x i64>
   %shuffle.i3.i = shufflevector <2 x i64> %tmp2, <2 x i64> undef, <1 x i32> <i32 1>
   %tmp3 = bitcast <1 x i64> %shuffle.i3.i to <2 x i32>
-  %vmull2.i.i = tail call <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp3) nounwind
+  %vmull2.i.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp3) nounwind
   %add = add <2 x i64> %vmull2.i.i, %a
   ret <2 x i64> %add
 }
@@ -1517,7 +1517,7 @@ define <4 x i32> @mlal2_4(<4 x i32> %a, <8 x i16> %b, <4 x i16> %c) nounwind {
   %tmp2 = bitcast <8 x i16> %shuffle to <2 x i64>
   %shuffle.i3.i = shufflevector <2 x i64> %tmp2, <2 x i64> undef, <1 x i32> <i32 1>
   %tmp3 = bitcast <1 x i64> %shuffle.i3.i to <4 x i16>
-  %vmull2.i.i = tail call <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp3) nounwind
+  %vmull2.i.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp3) nounwind
   %add = add <4 x i32> %vmull2.i.i, %a
   ret <4 x i32> %add
 }
@@ -1533,7 +1533,7 @@ define <2 x i64> @mlal2_5(<2 x i64> %a, <4 x i32> %b, <2 x i32> %c) nounwind {
   %tmp2 = bitcast <4 x i32> %shuffle to <2 x i64>
   %shuffle.i3.i = shufflevector <2 x i64> %tmp2, <2 x i64> undef, <1 x i32> <i32 1>
   %tmp3 = bitcast <1 x i64> %shuffle.i3.i to <2 x i32>
-  %vmull2.i.i = tail call <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp3) nounwind
+  %vmull2.i.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp3) nounwind
   %add = add <2 x i64> %vmull2.i.i, %a
   ret <2 x i64> %add
 }
@@ -1631,7 +1631,7 @@ entry:
 ; CHECK: smull.4s v0, v0, v1[6]
 ; CHECK-NEXT: ret
   %shuffle = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 6, i32 6, i32 6, i32 6>
-  %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) #2
+  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) #2
   ret <4 x i32> %vmull2.i
 }
 
@@ -1642,7 +1642,7 @@ entry:
 ; CHECK: smull.2d v0, v0, v1[2]
 ; CHECK-NEXT: ret
   %shuffle = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 2>
-  %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) #2
+  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) #2
   ret <2 x i64> %vmull2.i
 }
 define <4 x i32> @vmull_laneq_u16_test(<4 x i16> %a, <8 x i16> %b) nounwind readnone ssp {
@@ -1652,7 +1652,7 @@ entry:
 ; CHECK: umull.4s v0, v0, v1[6]
 ; CHECK-NEXT: ret
   %shuffle = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 6, i32 6, i32 6, i32 6>
-  %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) #2
+  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) #2
   ret <4 x i32> %vmull2.i
 }
 
@@ -1663,7 +1663,7 @@ entry:
 ; CHECK: umull.2d v0, v0, v1[2]
 ; CHECK-NEXT: ret
   %shuffle = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 2>
-  %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) #2
+  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) #2
   ret <2 x i64> %vmull2.i
 }
 
@@ -1681,7 +1681,7 @@ entry:
   %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %conv, i32 1
   %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %conv, i32 2
   %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %conv, i32 3
-  %vmull2.i.i = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> %1, <4 x i16> %vecinit3.i) nounwind
+  %vmull2.i.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %1, <4 x i16> %vecinit3.i) nounwind
   ret <4 x i32> %vmull2.i.i
 }
 
@@ -1696,7 +1696,7 @@ entry:
   %1 = bitcast <1 x i64> %shuffle.i.i to <2 x i32>
   %vecinit.i = insertelement <2 x i32> undef, i32 %d, i32 0
   %vecinit1.i = insertelement <2 x i32> %vecinit.i, i32 %d, i32 1
-  %vmull2.i.i = tail call <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32> %1, <2 x i32> %vecinit1.i) nounwind
+  %vmull2.i.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %1, <2 x i32> %vecinit1.i) nounwind
   ret <2 x i64> %vmull2.i.i
 }
 
@@ -1714,7 +1714,7 @@ entry:
   %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %conv, i32 1
   %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %conv, i32 2
   %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %conv, i32 3
-  %vmull2.i.i = tail call <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16> %1, <4 x i16> %vecinit3.i) nounwind
+  %vmull2.i.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %1, <4 x i16> %vecinit3.i) nounwind
   ret <4 x i32> %vmull2.i.i
 }
 
@@ -1729,7 +1729,7 @@ entry:
   %1 = bitcast <1 x i64> %shuffle.i.i to <2 x i32>
   %vecinit.i = insertelement <2 x i32> undef, i32 %d, i32 0
   %vecinit1.i = insertelement <2 x i32> %vecinit.i, i32 %d, i32 1
-  %vmull2.i.i = tail call <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32> %1, <2 x i32> %vecinit1.i) nounwind
+  %vmull2.i.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %1, <2 x i32> %vecinit1.i) nounwind
   ret <2 x i64> %vmull2.i.i
 }
 
@@ -1787,7 +1787,7 @@ define <2 x i64> @mull_from_two_extracts(<4 x i32> %lhs, <4 x i32> %rhs) {
   %lhs.high = shufflevector <4 x i32> %lhs, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
   %rhs.high = shufflevector <4 x i32> %rhs, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
 
-  %res = tail call <2 x i64> @llvm.arm64.neon.sqdmull.v2i64(<2 x i32> %lhs.high, <2 x i32> %rhs.high) nounwind
+  %res = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %lhs.high, <2 x i32> %rhs.high) nounwind
   ret <2 x i64> %res
 }
 
@@ -1799,8 +1799,8 @@ define <2 x i64> @mlal_from_two_extracts(<2 x i64> %accum, <4 x i32> %lhs, <4 x
   %lhs.high = shufflevector <4 x i32> %lhs, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
   %rhs.high = shufflevector <4 x i32> %rhs, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
 
-  %res = tail call <2 x i64> @llvm.arm64.neon.sqdmull.v2i64(<2 x i32> %lhs.high, <2 x i32> %rhs.high) nounwind
-  %sum = call <2 x i64> @llvm.arm64.neon.sqadd.v2i64(<2 x i64> %accum, <2 x i64> %res)
+  %res = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %lhs.high, <2 x i32> %rhs.high) nounwind
+  %sum = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %accum, <2 x i64> %res)
   ret <2 x i64> %sum
 }
 
@@ -1813,7 +1813,7 @@ define <2 x i64> @mull_from_extract_dup(<4 x i32> %lhs, i32 %rhs) {
 
   %lhs.high = shufflevector <4 x i32> %lhs, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
 
-  %res = tail call <2 x i64> @llvm.arm64.neon.sqdmull.v2i64(<2 x i32> %lhs.high, <2 x i32> %rhsvec) nounwind
+  %res = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %lhs.high, <2 x i32> %rhsvec) nounwind
   ret <2 x i64> %res
 }
 
@@ -1826,7 +1826,7 @@ define <8 x i16> @pmull_from_extract_dup(<16 x i8> %lhs, i8 %rhs) {
 
   %lhs.high = shufflevector <16 x i8> %lhs, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
 
-  %res = tail call <8 x i16> @llvm.arm64.neon.pmull.v8i16(<8 x i8> %lhs.high, <8 x i8> %rhsvec) nounwind
+  %res = tail call <8 x i16> @llvm.aarch64.neon.pmull.v8i16(<8 x i8> %lhs.high, <8 x i8> %rhsvec) nounwind
   ret <8 x i16> %res
 }
 
@@ -1838,7 +1838,7 @@ define <8 x i16> @pmull_from_extract_duplane(<16 x i8> %lhs, <8 x i8> %rhs) {
   %lhs.high = shufflevector <16 x i8> %lhs, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
   %rhs.high = shufflevector <8 x i8> %rhs, <8 x i8> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
 
-  %res = tail call <8 x i16> @llvm.arm64.neon.pmull.v8i16(<8 x i8> %lhs.high, <8 x i8> %rhs.high) nounwind
+  %res = tail call <8 x i16> @llvm.aarch64.neon.pmull.v8i16(<8 x i8> %lhs.high, <8 x i8> %rhs.high) nounwind
   ret <8 x i16> %res
 }
 
@@ -1850,7 +1850,7 @@ define <2 x i64> @sqdmull_from_extract_duplane(<4 x i32> %lhs, <4 x i32> %rhs) {
   %lhs.high = shufflevector <4 x i32> %lhs, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
   %rhs.high = shufflevector <4 x i32> %rhs, <4 x i32> undef, <2 x i32> <i32 0, i32 0>
 
-  %res = tail call <2 x i64> @llvm.arm64.neon.sqdmull.v2i64(<2 x i32> %lhs.high, <2 x i32> %rhs.high) nounwind
+  %res = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %lhs.high, <2 x i32> %rhs.high) nounwind
   ret <2 x i64> %res
 }
 
@@ -1862,8 +1862,8 @@ define <2 x i64> @sqdmlal_from_extract_duplane(<2 x i64> %accum, <4 x i32> %lhs,
   %lhs.high = shufflevector <4 x i32> %lhs, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
   %rhs.high = shufflevector <4 x i32> %rhs, <4 x i32> undef, <2 x i32> <i32 0, i32 0>
 
-  %res = tail call <2 x i64> @llvm.arm64.neon.sqdmull.v2i64(<2 x i32> %lhs.high, <2 x i32> %rhs.high) nounwind
-  %sum = call <2 x i64> @llvm.arm64.neon.sqadd.v2i64(<2 x i64> %accum, <2 x i64> %res)
+  %res = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %lhs.high, <2 x i32> %rhs.high) nounwind
+  %sum = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %accum, <2 x i64> %res)
   ret <2 x i64> %sum
 }
 
@@ -1875,7 +1875,7 @@ define <2 x i64> @umlal_from_extract_duplane(<2 x i64> %accum, <4 x i32> %lhs, <
   %lhs.high = shufflevector <4 x i32> %lhs, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
   %rhs.high = shufflevector <4 x i32> %rhs, <4 x i32> undef, <2 x i32> <i32 0, i32 0>
 
-  %res = tail call <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32> %lhs.high, <2 x i32> %rhs.high) nounwind
+  %res = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %lhs.high, <2 x i32> %rhs.high) nounwind
   %sum = add <2 x i64> %accum, %res
   ret <2 x i64> %sum
 }
@@ -1997,23 +1997,23 @@ define <1 x double> @test_fdiv_v1f64(<1 x double> %L, <1 x double> %R) nounwind
 define i64 @sqdmlal_d(i32 %A, i32 %B, i64 %C) nounwind {
 ;CHECK-LABEL: sqdmlal_d:
 ;CHECK: sqdmlal
-  %tmp4 = call i64 @llvm.arm64.neon.sqdmulls.scalar(i32 %A, i32 %B)
-  %tmp5 = call i64 @llvm.arm64.neon.sqadd.i64(i64 %C, i64 %tmp4)
+  %tmp4 = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 %A, i32 %B)
+  %tmp5 = call i64 @llvm.aarch64.neon.sqadd.i64(i64 %C, i64 %tmp4)
   ret i64 %tmp5
 }
 
 define i64 @sqdmlsl_d(i32 %A, i32 %B, i64 %C) nounwind {
 ;CHECK-LABEL: sqdmlsl_d:
 ;CHECK: sqdmlsl
-  %tmp4 = call i64 @llvm.arm64.neon.sqdmulls.scalar(i32 %A, i32 %B)
-  %tmp5 = call i64 @llvm.arm64.neon.sqsub.i64(i64 %C, i64 %tmp4)
+  %tmp4 = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 %A, i32 %B)
+  %tmp5 = call i64 @llvm.aarch64.neon.sqsub.i64(i64 %C, i64 %tmp4)
   ret i64 %tmp5
 }
 
 define <16 x i8> @test_pmull_64(i64 %l, i64 %r) nounwind {
 ; CHECK-LABEL: test_pmull_64:
 ; CHECK: pmull.1q
-  %val = call <16 x i8> @llvm.arm64.neon.pmull64(i64 %l, i64 %r)
+  %val = call <16 x i8> @llvm.aarch64.neon.pmull64(i64 %l, i64 %r)
   ret <16 x i8> %val
 }
 
@@ -2022,11 +2022,11 @@ define <16 x i8> @test_pmull_high_64(<2 x i64> %l, <2 x i64> %r) nounwind {
 ; CHECK: pmull2.1q
   %l_hi = extractelement <2 x i64> %l, i32 1
   %r_hi = extractelement <2 x i64> %r, i32 1
-  %val = call <16 x i8> @llvm.arm64.neon.pmull64(i64 %l_hi, i64 %r_hi)
+  %val = call <16 x i8> @llvm.aarch64.neon.pmull64(i64 %l_hi, i64 %r_hi)
   ret <16 x i8> %val
 }
 
-declare <16 x i8> @llvm.arm64.neon.pmull64(i64, i64)
+declare <16 x i8> @llvm.aarch64.neon.pmull64(i64, i64)
 
 define <1 x i64> @test_mul_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) nounwind {
 ; CHECK-LABEL: test_mul_v1i64:
diff --git a/test/CodeGen/ARM64/volatile.ll b/test/CodeGen/AArch64/arm64-volatile.ll
similarity index 100%
rename from test/CodeGen/ARM64/volatile.ll
rename to test/CodeGen/AArch64/arm64-volatile.ll
diff --git a/test/CodeGen/ARM64/vpopcnt.ll b/test/CodeGen/AArch64/arm64-vpopcnt.ll
similarity index 100%
rename from test/CodeGen/ARM64/vpopcnt.ll
rename to test/CodeGen/AArch64/arm64-vpopcnt.ll
diff --git a/test/CodeGen/ARM64/vqadd.ll b/test/CodeGen/AArch64/arm64-vqadd.ll
similarity index 50%
rename from test/CodeGen/ARM64/vqadd.ll
rename to test/CodeGen/AArch64/arm64-vqadd.ll
index 0b7f7e53105..20f7e2c7a89 100644
--- a/test/CodeGen/ARM64/vqadd.ll
+++ b/test/CodeGen/AArch64/arm64-vqadd.ll
@@ -1,11 +1,11 @@
-; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s
+; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
 
 define <8 x i8> @sqadd8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: sqadd8b:
 ;CHECK: sqadd.8b
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
-	%tmp3 = call <8 x i8> @llvm.arm64.neon.sqadd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	%tmp3 = call <8 x i8> @llvm.aarch64.neon.sqadd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
 	ret <8 x i8> %tmp3
 }
 
@@ -14,7 +14,7 @@ define <4 x i16> @sqadd4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK: sqadd.4h
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
-	%tmp3 = call <4 x i16> @llvm.arm64.neon.sqadd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	%tmp3 = call <4 x i16> @llvm.aarch64.neon.sqadd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
 	ret <4 x i16> %tmp3
 }
 
@@ -23,7 +23,7 @@ define <2 x i32> @sqadd2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK: sqadd.2s
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
-	%tmp3 = call <2 x i32> @llvm.arm64.neon.sqadd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	%tmp3 = call <2 x i32> @llvm.aarch64.neon.sqadd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
 	ret <2 x i32> %tmp3
 }
 
@@ -32,7 +32,7 @@ define <8 x i8> @uqadd8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK: uqadd.8b
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
-	%tmp3 = call <8 x i8> @llvm.arm64.neon.uqadd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	%tmp3 = call <8 x i8> @llvm.aarch64.neon.uqadd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
 	ret <8 x i8> %tmp3
 }
 
@@ -41,7 +41,7 @@ define <4 x i16> @uqadd4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK: uqadd.4h
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
-	%tmp3 = call <4 x i16> @llvm.arm64.neon.uqadd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	%tmp3 = call <4 x i16> @llvm.aarch64.neon.uqadd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
 	ret <4 x i16> %tmp3
 }
 
@@ -50,7 +50,7 @@ define <2 x i32> @uqadd2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK: uqadd.2s
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
-	%tmp3 = call <2 x i32> @llvm.arm64.neon.uqadd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	%tmp3 = call <2 x i32> @llvm.aarch64.neon.uqadd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
 	ret <2 x i32> %tmp3
 }
 
@@ -59,7 +59,7 @@ define <16 x i8> @sqadd16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK: sqadd.16b
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
-	%tmp3 = call <16 x i8> @llvm.arm64.neon.sqadd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+	%tmp3 = call <16 x i8> @llvm.aarch64.neon.sqadd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
 	ret <16 x i8> %tmp3
 }
 
@@ -68,7 +68,7 @@ define <8 x i16> @sqadd8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK: sqadd.8h
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
-	%tmp3 = call <8 x i16> @llvm.arm64.neon.sqadd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+	%tmp3 = call <8 x i16> @llvm.aarch64.neon.sqadd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
 	ret <8 x i16> %tmp3
 }
 
@@ -77,7 +77,7 @@ define <4 x i32> @sqadd4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK: sqadd.4s
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
-	%tmp3 = call <4 x i32> @llvm.arm64.neon.sqadd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+	%tmp3 = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
 	ret <4 x i32> %tmp3
 }
 
@@ -86,7 +86,7 @@ define <2 x i64> @sqadd2d(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 ;CHECK: sqadd.2d
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = load <2 x i64>* %B
-	%tmp3 = call <2 x i64> @llvm.arm64.neon.sqadd.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
+	%tmp3 = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
 	ret <2 x i64> %tmp3
 }
 
@@ -95,7 +95,7 @@ define <16 x i8> @uqadd16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK: uqadd.16b
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
-	%tmp3 = call <16 x i8> @llvm.arm64.neon.uqadd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+	%tmp3 = call <16 x i8> @llvm.aarch64.neon.uqadd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
 	ret <16 x i8> %tmp3
 }
 
@@ -104,7 +104,7 @@ define <8 x i16> @uqadd8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK: uqadd.8h
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
-	%tmp3 = call <8 x i16> @llvm.arm64.neon.uqadd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+	%tmp3 = call <8 x i16> @llvm.aarch64.neon.uqadd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
 	ret <8 x i16> %tmp3
 }
 
@@ -113,7 +113,7 @@ define <4 x i32> @uqadd4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK: uqadd.4s
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
-	%tmp3 = call <4 x i32> @llvm.arm64.neon.uqadd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+	%tmp3 = call <4 x i32> @llvm.aarch64.neon.uqadd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
 	ret <4 x i32> %tmp3
 }
 
@@ -122,36 +122,36 @@ define <2 x i64> @uqadd2d(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 ;CHECK: uqadd.2d
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = load <2 x i64>* %B
-	%tmp3 = call <2 x i64> @llvm.arm64.neon.uqadd.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
+	%tmp3 = call <2 x i64> @llvm.aarch64.neon.uqadd.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
 	ret <2 x i64> %tmp3
 }
 
-declare <8 x i8>  @llvm.arm64.neon.sqadd.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
-declare <4 x i16> @llvm.arm64.neon.sqadd.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
-declare <2 x i32> @llvm.arm64.neon.sqadd.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
-declare <1 x i64> @llvm.arm64.neon.sqadd.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
+declare <8 x i8>  @llvm.aarch64.neon.sqadd.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.aarch64.neon.sqadd.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.sqadd.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+declare <1 x i64> @llvm.aarch64.neon.sqadd.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
 
-declare <8 x i8>  @llvm.arm64.neon.uqadd.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
-declare <4 x i16> @llvm.arm64.neon.uqadd.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
-declare <2 x i32> @llvm.arm64.neon.uqadd.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
-declare <1 x i64> @llvm.arm64.neon.uqadd.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
+declare <8 x i8>  @llvm.aarch64.neon.uqadd.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.aarch64.neon.uqadd.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.uqadd.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+declare <1 x i64> @llvm.aarch64.neon.uqadd.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
 
-declare <16 x i8> @llvm.arm64.neon.sqadd.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
-declare <8 x i16> @llvm.arm64.neon.sqadd.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
-declare <4 x i32> @llvm.arm64.neon.sqadd.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
-declare <2 x i64> @llvm.arm64.neon.sqadd.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
+declare <16 x i8> @llvm.aarch64.neon.sqadd.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.aarch64.neon.sqadd.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
 
-declare <16 x i8> @llvm.arm64.neon.uqadd.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
-declare <8 x i16> @llvm.arm64.neon.uqadd.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
-declare <4 x i32> @llvm.arm64.neon.uqadd.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
-declare <2 x i64> @llvm.arm64.neon.uqadd.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
+declare <16 x i8> @llvm.aarch64.neon.uqadd.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.aarch64.neon.uqadd.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.aarch64.neon.uqadd.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.aarch64.neon.uqadd.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
 
 define <8 x i8> @usqadd8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: usqadd8b:
 ;CHECK: usqadd.8b
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
-	%tmp3 = call <8 x i8> @llvm.arm64.neon.usqadd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	%tmp3 = call <8 x i8> @llvm.aarch64.neon.usqadd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
 	ret <8 x i8> %tmp3
 }
 
@@ -160,7 +160,7 @@ define <4 x i16> @usqadd4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK: usqadd.4h
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
-	%tmp3 = call <4 x i16> @llvm.arm64.neon.usqadd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	%tmp3 = call <4 x i16> @llvm.aarch64.neon.usqadd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
 	ret <4 x i16> %tmp3
 }
 
@@ -169,7 +169,7 @@ define <2 x i32> @usqadd2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK: usqadd.2s
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
-	%tmp3 = call <2 x i32> @llvm.arm64.neon.usqadd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	%tmp3 = call <2 x i32> @llvm.aarch64.neon.usqadd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
 	ret <2 x i32> %tmp3
 }
 
@@ -178,7 +178,7 @@ define <16 x i8> @usqadd16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK: usqadd.16b
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
-	%tmp3 = call <16 x i8> @llvm.arm64.neon.usqadd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+	%tmp3 = call <16 x i8> @llvm.aarch64.neon.usqadd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
 	ret <16 x i8> %tmp3
 }
 
@@ -187,7 +187,7 @@ define <8 x i16> @usqadd8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK: usqadd.8h
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
-	%tmp3 = call <8 x i16> @llvm.arm64.neon.usqadd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+	%tmp3 = call <8 x i16> @llvm.aarch64.neon.usqadd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
 	ret <8 x i16> %tmp3
 }
 
@@ -196,7 +196,7 @@ define <4 x i32> @usqadd4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK: usqadd.4s
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
-	%tmp3 = call <4 x i32> @llvm.arm64.neon.usqadd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+	%tmp3 = call <4 x i32> @llvm.aarch64.neon.usqadd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
 	ret <4 x i32> %tmp3
 }
 
@@ -205,42 +205,42 @@ define <2 x i64> @usqadd2d(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 ;CHECK: usqadd.2d
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = load <2 x i64>* %B
-	%tmp3 = call <2 x i64> @llvm.arm64.neon.usqadd.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
+	%tmp3 = call <2 x i64> @llvm.aarch64.neon.usqadd.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
 	ret <2 x i64> %tmp3
 }
 
 define i64 @usqadd_d(i64 %l, i64 %r) nounwind {
 ; CHECK-LABEL: usqadd_d:
 ; CHECK: usqadd {{d[0-9]+}}, {{d[0-9]+}}
-  %sum = call i64 @llvm.arm64.neon.usqadd.i64(i64 %l, i64 %r)
+  %sum = call i64 @llvm.aarch64.neon.usqadd.i64(i64 %l, i64 %r)
   ret i64 %sum
 }
 
 define i32 @usqadd_s(i32 %l, i32 %r) nounwind {
 ; CHECK-LABEL: usqadd_s:
 ; CHECK: usqadd {{s[0-9]+}}, {{s[0-9]+}}
-  %sum = call i32 @llvm.arm64.neon.usqadd.i32(i32 %l, i32 %r)
+  %sum = call i32 @llvm.aarch64.neon.usqadd.i32(i32 %l, i32 %r)
   ret i32 %sum
 }
 
-declare <8 x i8>  @llvm.arm64.neon.usqadd.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
-declare <4 x i16> @llvm.arm64.neon.usqadd.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
-declare <2 x i32> @llvm.arm64.neon.usqadd.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
-declare <1 x i64> @llvm.arm64.neon.usqadd.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
-declare i64 @llvm.arm64.neon.usqadd.i64(i64, i64) nounwind readnone
-declare i32 @llvm.arm64.neon.usqadd.i32(i32, i32) nounwind readnone
+declare <8 x i8>  @llvm.aarch64.neon.usqadd.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.aarch64.neon.usqadd.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.usqadd.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+declare <1 x i64> @llvm.aarch64.neon.usqadd.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
+declare i64 @llvm.aarch64.neon.usqadd.i64(i64, i64) nounwind readnone
+declare i32 @llvm.aarch64.neon.usqadd.i32(i32, i32) nounwind readnone
 
-declare <16 x i8> @llvm.arm64.neon.usqadd.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
-declare <8 x i16> @llvm.arm64.neon.usqadd.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
-declare <4 x i32> @llvm.arm64.neon.usqadd.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
-declare <2 x i64> @llvm.arm64.neon.usqadd.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
+declare <16 x i8> @llvm.aarch64.neon.usqadd.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.aarch64.neon.usqadd.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.aarch64.neon.usqadd.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.aarch64.neon.usqadd.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
 
 define <8 x i8> @suqadd8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: suqadd8b:
 ;CHECK: suqadd.8b
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
-	%tmp3 = call <8 x i8> @llvm.arm64.neon.suqadd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	%tmp3 = call <8 x i8> @llvm.aarch64.neon.suqadd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
 	ret <8 x i8> %tmp3
 }
 
@@ -249,7 +249,7 @@ define <4 x i16> @suqadd4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK: suqadd.4h
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
-	%tmp3 = call <4 x i16> @llvm.arm64.neon.suqadd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	%tmp3 = call <4 x i16> @llvm.aarch64.neon.suqadd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
 	ret <4 x i16> %tmp3
 }
 
@@ -258,7 +258,7 @@ define <2 x i32> @suqadd2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK: suqadd.2s
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
-	%tmp3 = call <2 x i32> @llvm.arm64.neon.suqadd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	%tmp3 = call <2 x i32> @llvm.aarch64.neon.suqadd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
 	ret <2 x i32> %tmp3
 }
 
@@ -267,7 +267,7 @@ define <16 x i8> @suqadd16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK: suqadd.16b
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
-	%tmp3 = call <16 x i8> @llvm.arm64.neon.suqadd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+	%tmp3 = call <16 x i8> @llvm.aarch64.neon.suqadd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
 	ret <16 x i8> %tmp3
 }
 
@@ -276,7 +276,7 @@ define <8 x i16> @suqadd8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK: suqadd.8h
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
-	%tmp3 = call <8 x i16> @llvm.arm64.neon.suqadd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+	%tmp3 = call <8 x i16> @llvm.aarch64.neon.suqadd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
 	ret <8 x i16> %tmp3
 }
 
@@ -285,7 +285,7 @@ define <4 x i32> @suqadd4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK: suqadd.4s
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
-	%tmp3 = call <4 x i32> @llvm.arm64.neon.suqadd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+	%tmp3 = call <4 x i32> @llvm.aarch64.neon.suqadd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
 	ret <4 x i32> %tmp3
 }
 
@@ -294,39 +294,39 @@ define <2 x i64> @suqadd2d(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 ;CHECK: suqadd.2d
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = load <2 x i64>* %B
-	%tmp3 = call <2 x i64> @llvm.arm64.neon.suqadd.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
+	%tmp3 = call <2 x i64> @llvm.aarch64.neon.suqadd.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
 	ret <2 x i64> %tmp3
 }
 
 define <1 x i64> @suqadd_1d(<1 x i64> %l, <1 x i64> %r) nounwind {
 ; CHECK-LABEL: suqadd_1d:
 ; CHECK: suqadd {{d[0-9]+}}, {{d[0-9]+}}
-  %sum = call <1 x i64> @llvm.arm64.neon.suqadd.v1i64(<1 x i64> %l, <1 x i64> %r)
+  %sum = call <1 x i64> @llvm.aarch64.neon.suqadd.v1i64(<1 x i64> %l, <1 x i64> %r)
   ret <1 x i64> %sum
 }
 
 define i64 @suqadd_d(i64 %l, i64 %r) nounwind {
 ; CHECK-LABEL: suqadd_d:
 ; CHECK: suqadd {{d[0-9]+}}, {{d[0-9]+}}
-  %sum = call i64 @llvm.arm64.neon.suqadd.i64(i64 %l, i64 %r)
+  %sum = call i64 @llvm.aarch64.neon.suqadd.i64(i64 %l, i64 %r)
   ret i64 %sum
 }
 
 define i32 @suqadd_s(i32 %l, i32 %r) nounwind {
 ; CHECK-LABEL: suqadd_s:
 ; CHECK: suqadd {{s[0-9]+}}, {{s[0-9]+}}
-  %sum = call i32 @llvm.arm64.neon.suqadd.i32(i32 %l, i32 %r)
+  %sum = call i32 @llvm.aarch64.neon.suqadd.i32(i32 %l, i32 %r)
   ret i32 %sum
 }
 
-declare <8 x i8>  @llvm.arm64.neon.suqadd.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
-declare <4 x i16> @llvm.arm64.neon.suqadd.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
-declare <2 x i32> @llvm.arm64.neon.suqadd.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
-declare <1 x i64> @llvm.arm64.neon.suqadd.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
-declare i64 @llvm.arm64.neon.suqadd.i64(i64, i64) nounwind readnone
-declare i32 @llvm.arm64.neon.suqadd.i32(i32, i32) nounwind readnone
+declare <8 x i8>  @llvm.aarch64.neon.suqadd.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.aarch64.neon.suqadd.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.suqadd.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+declare <1 x i64> @llvm.aarch64.neon.suqadd.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
+declare i64 @llvm.aarch64.neon.suqadd.i64(i64, i64) nounwind readnone
+declare i32 @llvm.aarch64.neon.suqadd.i32(i32, i32) nounwind readnone
 
-declare <16 x i8> @llvm.arm64.neon.suqadd.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
-declare <8 x i16> @llvm.arm64.neon.suqadd.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
-declare <4 x i32> @llvm.arm64.neon.suqadd.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
-declare <2 x i64> @llvm.arm64.neon.suqadd.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
+declare <16 x i8> @llvm.aarch64.neon.suqadd.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.aarch64.neon.suqadd.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.aarch64.neon.suqadd.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.aarch64.neon.suqadd.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
diff --git a/test/CodeGen/AArch64/arm64-vqsub.ll b/test/CodeGen/AArch64/arm64-vqsub.ll
new file mode 100644
index 00000000000..dde3ac3478e
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-vqsub.ll
@@ -0,0 +1,147 @@
+; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
+
+define <8 x i8> @sqsub8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK-LABEL: sqsub8b:
+;CHECK: sqsub.8b
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i8> @llvm.aarch64.neon.sqsub.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @sqsub4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK-LABEL: sqsub4h:
+;CHECK: sqsub.4h
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i16> @llvm.aarch64.neon.sqsub.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @sqsub2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK-LABEL: sqsub2s:
+;CHECK: sqsub.2s
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i32> @llvm.aarch64.neon.sqsub.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+define <8 x i8> @uqsub8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK-LABEL: uqsub8b:
+;CHECK: uqsub.8b
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i8> @llvm.aarch64.neon.uqsub.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @uqsub4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK-LABEL: uqsub4h:
+;CHECK: uqsub.4h
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i16> @llvm.aarch64.neon.uqsub.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @uqsub2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK-LABEL: uqsub2s:
+;CHECK: uqsub.2s
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i32> @llvm.aarch64.neon.uqsub.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+define <16 x i8> @sqsub16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK-LABEL: sqsub16b:
+;CHECK: sqsub.16b
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = call <16 x i8> @llvm.aarch64.neon.sqsub.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+	ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @sqsub8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK-LABEL: sqsub8h:
+;CHECK: sqsub.8h
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = call <8 x i16> @llvm.aarch64.neon.sqsub.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @sqsub4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK-LABEL: sqsub4s:
+;CHECK: sqsub.4s
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @sqsub2d(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK-LABEL: sqsub2d:
+;CHECK: sqsub.2d
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i64>* %B
+	%tmp3 = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
+	ret <2 x i64> %tmp3
+}
+
+define <16 x i8> @uqsub16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK-LABEL: uqsub16b:
+;CHECK: uqsub.16b
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = call <16 x i8> @llvm.aarch64.neon.uqsub.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+	ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @uqsub8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK-LABEL: uqsub8h:
+;CHECK: uqsub.8h
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = call <8 x i16> @llvm.aarch64.neon.uqsub.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @uqsub4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK-LABEL: uqsub4s:
+;CHECK: uqsub.4s
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = call <4 x i32> @llvm.aarch64.neon.uqsub.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @uqsub2d(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK-LABEL: uqsub2d:
+;CHECK: uqsub.2d
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i64>* %B
+	%tmp3 = call <2 x i64> @llvm.aarch64.neon.uqsub.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
+	ret <2 x i64> %tmp3
+}
+
+declare <8 x i8>  @llvm.aarch64.neon.sqsub.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.aarch64.neon.sqsub.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.sqsub.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+declare <1 x i64> @llvm.aarch64.neon.sqsub.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
+
+declare <8 x i8>  @llvm.aarch64.neon.uqsub.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.aarch64.neon.uqsub.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.uqsub.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+declare <1 x i64> @llvm.aarch64.neon.uqsub.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
+
+declare <16 x i8> @llvm.aarch64.neon.sqsub.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.aarch64.neon.sqsub.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
+
+declare <16 x i8> @llvm.aarch64.neon.uqsub.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.aarch64.neon.uqsub.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.aarch64.neon.uqsub.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.aarch64.neon.uqsub.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
diff --git a/test/CodeGen/ARM64/vselect.ll b/test/CodeGen/AArch64/arm64-vselect.ll
similarity index 89%
rename from test/CodeGen/ARM64/vselect.ll
rename to test/CodeGen/AArch64/arm64-vselect.ll
index aa8e81eb709..9988512f530 100644
--- a/test/CodeGen/ARM64/vselect.ll
+++ b/test/CodeGen/AArch64/arm64-vselect.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s
+; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
 
 ;CHECK: @func63
 ;CHECK: cmeq.4h v0, v0, v1
diff --git a/test/CodeGen/ARM64/vsetcc_fp.ll b/test/CodeGen/AArch64/arm64-vsetcc_fp.ll
similarity index 80%
rename from test/CodeGen/ARM64/vsetcc_fp.ll
rename to test/CodeGen/AArch64/arm64-vsetcc_fp.ll
index c93aad5c4ee..f4f4714dde4 100644
--- a/test/CodeGen/ARM64/vsetcc_fp.ll
+++ b/test/CodeGen/AArch64/arm64-vsetcc_fp.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple -asm-verbose=false | FileCheck %s
+; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple -asm-verbose=false | FileCheck %s
 define <2 x i32> @fcmp_one(<2 x float> %x, <2 x float> %y) nounwind optsize readnone {
 ; CHECK-LABEL: fcmp_one:
 ; CHECK-NEXT: fcmgt.2s [[REG:v[0-9]+]], v0, v1
diff --git a/test/CodeGen/ARM64/vshift.ll b/test/CodeGen/AArch64/arm64-vshift.ll
similarity index 68%
rename from test/CodeGen/ARM64/vshift.ll
rename to test/CodeGen/AArch64/arm64-vshift.ll
index 486c6cc390b..82ae486f8c4 100644
--- a/test/CodeGen/ARM64/vshift.ll
+++ b/test/CodeGen/AArch64/arm64-vshift.ll
@@ -1,11 +1,11 @@
-; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple -enable-misched=false | FileCheck %s
+; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple -enable-misched=false | FileCheck %s
 
 define <8 x i8> @sqshl8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: sqshl8b:
 ;CHECK: sqshl.8b
         %tmp1 = load <8 x i8>* %A
         %tmp2 = load <8 x i8>* %B
-        %tmp3 = call <8 x i8> @llvm.arm64.neon.sqshl.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+        %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
         ret <8 x i8> %tmp3
 }
 
@@ -14,7 +14,7 @@ define <4 x i16> @sqshl4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK: sqshl.4h
         %tmp1 = load <4 x i16>* %A
         %tmp2 = load <4 x i16>* %B
-        %tmp3 = call <4 x i16> @llvm.arm64.neon.sqshl.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+        %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
         ret <4 x i16> %tmp3
 }
 
@@ -23,7 +23,7 @@ define <2 x i32> @sqshl2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK: sqshl.2s
         %tmp1 = load <2 x i32>* %A
         %tmp2 = load <2 x i32>* %B
-        %tmp3 = call <2 x i32> @llvm.arm64.neon.sqshl.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+        %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqshl.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
         ret <2 x i32> %tmp3
 }
 
@@ -32,7 +32,7 @@ define <8 x i8> @uqshl8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK: uqshl.8b
         %tmp1 = load <8 x i8>* %A
         %tmp2 = load <8 x i8>* %B
-        %tmp3 = call <8 x i8> @llvm.arm64.neon.uqshl.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+        %tmp3 = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
         ret <8 x i8> %tmp3
 }
 
@@ -41,7 +41,7 @@ define <4 x i16> @uqshl4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK: uqshl.4h
         %tmp1 = load <4 x i16>* %A
         %tmp2 = load <4 x i16>* %B
-        %tmp3 = call <4 x i16> @llvm.arm64.neon.uqshl.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+        %tmp3 = call <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
         ret <4 x i16> %tmp3
 }
 
@@ -50,7 +50,7 @@ define <2 x i32> @uqshl2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK: uqshl.2s
         %tmp1 = load <2 x i32>* %A
         %tmp2 = load <2 x i32>* %B
-        %tmp3 = call <2 x i32> @llvm.arm64.neon.uqshl.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+        %tmp3 = call <2 x i32> @llvm.aarch64.neon.uqshl.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
         ret <2 x i32> %tmp3
 }
 
@@ -59,7 +59,7 @@ define <16 x i8> @sqshl16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK: sqshl.16b
         %tmp1 = load <16 x i8>* %A
         %tmp2 = load <16 x i8>* %B
-        %tmp3 = call <16 x i8> @llvm.arm64.neon.sqshl.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+        %tmp3 = call <16 x i8> @llvm.aarch64.neon.sqshl.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
         ret <16 x i8> %tmp3
 }
 
@@ -68,7 +68,7 @@ define <8 x i16> @sqshl8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK: sqshl.8h
         %tmp1 = load <8 x i16>* %A
         %tmp2 = load <8 x i16>* %B
-        %tmp3 = call <8 x i16> @llvm.arm64.neon.sqshl.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+        %tmp3 = call <8 x i16> @llvm.aarch64.neon.sqshl.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
         ret <8 x i16> %tmp3
 }
 
@@ -77,7 +77,7 @@ define <4 x i32> @sqshl4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK: sqshl.4s
         %tmp1 = load <4 x i32>* %A
         %tmp2 = load <4 x i32>* %B
-        %tmp3 = call <4 x i32> @llvm.arm64.neon.sqshl.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+        %tmp3 = call <4 x i32> @llvm.aarch64.neon.sqshl.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
         ret <4 x i32> %tmp3
 }
 
@@ -86,7 +86,7 @@ define <2 x i64> @sqshl2d(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 ;CHECK: sqshl.2d
         %tmp1 = load <2 x i64>* %A
         %tmp2 = load <2 x i64>* %B
-        %tmp3 = call <2 x i64> @llvm.arm64.neon.sqshl.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
+        %tmp3 = call <2 x i64> @llvm.aarch64.neon.sqshl.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
         ret <2 x i64> %tmp3
 }
 
@@ -95,7 +95,7 @@ define <16 x i8> @uqshl16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK: uqshl.16b
         %tmp1 = load <16 x i8>* %A
         %tmp2 = load <16 x i8>* %B
-        %tmp3 = call <16 x i8> @llvm.arm64.neon.uqshl.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+        %tmp3 = call <16 x i8> @llvm.aarch64.neon.uqshl.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
         ret <16 x i8> %tmp3
 }
 
@@ -104,7 +104,7 @@ define <8 x i16> @uqshl8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK: uqshl.8h
         %tmp1 = load <8 x i16>* %A
         %tmp2 = load <8 x i16>* %B
-        %tmp3 = call <8 x i16> @llvm.arm64.neon.uqshl.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+        %tmp3 = call <8 x i16> @llvm.aarch64.neon.uqshl.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
         ret <8 x i16> %tmp3
 }
 
@@ -113,7 +113,7 @@ define <4 x i32> @uqshl4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK: uqshl.4s
         %tmp1 = load <4 x i32>* %A
         %tmp2 = load <4 x i32>* %B
-        %tmp3 = call <4 x i32> @llvm.arm64.neon.uqshl.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+        %tmp3 = call <4 x i32> @llvm.aarch64.neon.uqshl.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
         ret <4 x i32> %tmp3
 }
 
@@ -122,36 +122,36 @@ define <2 x i64> @uqshl2d(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 ;CHECK: uqshl.2d
         %tmp1 = load <2 x i64>* %A
         %tmp2 = load <2 x i64>* %B
-        %tmp3 = call <2 x i64> @llvm.arm64.neon.uqshl.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
+        %tmp3 = call <2 x i64> @llvm.aarch64.neon.uqshl.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
         ret <2 x i64> %tmp3
 }
 
-declare <8 x i8>  @llvm.arm64.neon.sqshl.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
-declare <4 x i16> @llvm.arm64.neon.sqshl.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
-declare <2 x i32> @llvm.arm64.neon.sqshl.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
-declare <1 x i64> @llvm.arm64.neon.sqshl.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
+declare <8 x i8>  @llvm.aarch64.neon.sqshl.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.sqshl.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+declare <1 x i64> @llvm.aarch64.neon.sqshl.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
 
-declare <8 x i8>  @llvm.arm64.neon.uqshl.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
-declare <4 x i16> @llvm.arm64.neon.uqshl.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
-declare <2 x i32> @llvm.arm64.neon.uqshl.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
-declare <1 x i64> @llvm.arm64.neon.uqshl.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
+declare <8 x i8>  @llvm.aarch64.neon.uqshl.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.uqshl.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+declare <1 x i64> @llvm.aarch64.neon.uqshl.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
 
-declare <16 x i8> @llvm.arm64.neon.sqshl.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
-declare <8 x i16> @llvm.arm64.neon.sqshl.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
-declare <4 x i32> @llvm.arm64.neon.sqshl.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
-declare <2 x i64> @llvm.arm64.neon.sqshl.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
+declare <16 x i8> @llvm.aarch64.neon.sqshl.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.aarch64.neon.sqshl.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.aarch64.neon.sqshl.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.aarch64.neon.sqshl.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
 
-declare <16 x i8> @llvm.arm64.neon.uqshl.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
-declare <8 x i16> @llvm.arm64.neon.uqshl.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
-declare <4 x i32> @llvm.arm64.neon.uqshl.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
-declare <2 x i64> @llvm.arm64.neon.uqshl.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
+declare <16 x i8> @llvm.aarch64.neon.uqshl.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.aarch64.neon.uqshl.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.aarch64.neon.uqshl.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.aarch64.neon.uqshl.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
 
 define <8 x i8> @srshl8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: srshl8b:
 ;CHECK: srshl.8b
         %tmp1 = load <8 x i8>* %A
         %tmp2 = load <8 x i8>* %B
-        %tmp3 = call <8 x i8> @llvm.arm64.neon.srshl.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+        %tmp3 = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
         ret <8 x i8> %tmp3
 }
 
@@ -160,7 +160,7 @@ define <4 x i16> @srshl4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK: srshl.4h
         %tmp1 = load <4 x i16>* %A
         %tmp2 = load <4 x i16>* %B
-        %tmp3 = call <4 x i16> @llvm.arm64.neon.srshl.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+        %tmp3 = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
         ret <4 x i16> %tmp3
 }
 
@@ -169,7 +169,7 @@ define <2 x i32> @srshl2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK: srshl.2s
         %tmp1 = load <2 x i32>* %A
         %tmp2 = load <2 x i32>* %B
-        %tmp3 = call <2 x i32> @llvm.arm64.neon.srshl.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+        %tmp3 = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
         ret <2 x i32> %tmp3
 }
 
@@ -178,7 +178,7 @@ define <8 x i8> @urshl8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK: urshl.8b
         %tmp1 = load <8 x i8>* %A
         %tmp2 = load <8 x i8>* %B
-        %tmp3 = call <8 x i8> @llvm.arm64.neon.urshl.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+        %tmp3 = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
         ret <8 x i8> %tmp3
 }
 
@@ -187,7 +187,7 @@ define <4 x i16> @urshl4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK: urshl.4h
         %tmp1 = load <4 x i16>* %A
         %tmp2 = load <4 x i16>* %B
-        %tmp3 = call <4 x i16> @llvm.arm64.neon.urshl.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+        %tmp3 = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
         ret <4 x i16> %tmp3
 }
 
@@ -196,7 +196,7 @@ define <2 x i32> @urshl2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK: urshl.2s
         %tmp1 = load <2 x i32>* %A
         %tmp2 = load <2 x i32>* %B
-        %tmp3 = call <2 x i32> @llvm.arm64.neon.urshl.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+        %tmp3 = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
         ret <2 x i32> %tmp3
 }
 
@@ -205,7 +205,7 @@ define <16 x i8> @srshl16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK: srshl.16b
         %tmp1 = load <16 x i8>* %A
         %tmp2 = load <16 x i8>* %B
-        %tmp3 = call <16 x i8> @llvm.arm64.neon.srshl.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+        %tmp3 = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
         ret <16 x i8> %tmp3
 }
 
@@ -214,7 +214,7 @@ define <8 x i16> @srshl8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK: srshl.8h
         %tmp1 = load <8 x i16>* %A
         %tmp2 = load <8 x i16>* %B
-        %tmp3 = call <8 x i16> @llvm.arm64.neon.srshl.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+        %tmp3 = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
         ret <8 x i16> %tmp3
 }
 
@@ -223,7 +223,7 @@ define <4 x i32> @srshl4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK: srshl.4s
         %tmp1 = load <4 x i32>* %A
         %tmp2 = load <4 x i32>* %B
-        %tmp3 = call <4 x i32> @llvm.arm64.neon.srshl.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+        %tmp3 = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
         ret <4 x i32> %tmp3
 }
 
@@ -232,7 +232,7 @@ define <2 x i64> @srshl2d(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 ;CHECK: srshl.2d
         %tmp1 = load <2 x i64>* %A
         %tmp2 = load <2 x i64>* %B
-        %tmp3 = call <2 x i64> @llvm.arm64.neon.srshl.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
+        %tmp3 = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
         ret <2 x i64> %tmp3
 }
 
@@ -241,7 +241,7 @@ define <16 x i8> @urshl16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK: urshl.16b
         %tmp1 = load <16 x i8>* %A
         %tmp2 = load <16 x i8>* %B
-        %tmp3 = call <16 x i8> @llvm.arm64.neon.urshl.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+        %tmp3 = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
         ret <16 x i8> %tmp3
 }
 
@@ -250,7 +250,7 @@ define <8 x i16> @urshl8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK: urshl.8h
         %tmp1 = load <8 x i16>* %A
         %tmp2 = load <8 x i16>* %B
-        %tmp3 = call <8 x i16> @llvm.arm64.neon.urshl.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+        %tmp3 = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
         ret <8 x i16> %tmp3
 }
 
@@ -259,7 +259,7 @@ define <4 x i32> @urshl4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK: urshl.4s
         %tmp1 = load <4 x i32>* %A
         %tmp2 = load <4 x i32>* %B
-        %tmp3 = call <4 x i32> @llvm.arm64.neon.urshl.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+        %tmp3 = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
         ret <4 x i32> %tmp3
 }
 
@@ -268,36 +268,36 @@ define <2 x i64> @urshl2d(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 ;CHECK: urshl.2d
         %tmp1 = load <2 x i64>* %A
         %tmp2 = load <2 x i64>* %B
-        %tmp3 = call <2 x i64> @llvm.arm64.neon.urshl.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
+        %tmp3 = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
         ret <2 x i64> %tmp3
 }
 
-declare <8 x i8>  @llvm.arm64.neon.srshl.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
-declare <4 x i16> @llvm.arm64.neon.srshl.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
-declare <2 x i32> @llvm.arm64.neon.srshl.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
-declare <1 x i64> @llvm.arm64.neon.srshl.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
+declare <8 x i8>  @llvm.aarch64.neon.srshl.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+declare <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
 
-declare <8 x i8>  @llvm.arm64.neon.urshl.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
-declare <4 x i16> @llvm.arm64.neon.urshl.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
-declare <2 x i32> @llvm.arm64.neon.urshl.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
-declare <1 x i64> @llvm.arm64.neon.urshl.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
+declare <8 x i8>  @llvm.aarch64.neon.urshl.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+declare <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
 
-declare <16 x i8> @llvm.arm64.neon.srshl.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
-declare <8 x i16> @llvm.arm64.neon.srshl.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
-declare <4 x i32> @llvm.arm64.neon.srshl.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
-declare <2 x i64> @llvm.arm64.neon.srshl.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
+declare <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
 
-declare <16 x i8> @llvm.arm64.neon.urshl.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
-declare <8 x i16> @llvm.arm64.neon.urshl.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
-declare <4 x i32> @llvm.arm64.neon.urshl.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
-declare <2 x i64> @llvm.arm64.neon.urshl.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
+declare <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
 
 define <8 x i8> @sqrshl8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: sqrshl8b:
 ;CHECK: sqrshl.8b
         %tmp1 = load <8 x i8>* %A
         %tmp2 = load <8 x i8>* %B
-        %tmp3 = call <8 x i8> @llvm.arm64.neon.sqrshl.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+        %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqrshl.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
         ret <8 x i8> %tmp3
 }
 
@@ -306,7 +306,7 @@ define <4 x i16> @sqrshl4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK: sqrshl.4h
         %tmp1 = load <4 x i16>* %A
         %tmp2 = load <4 x i16>* %B
-        %tmp3 = call <4 x i16> @llvm.arm64.neon.sqrshl.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+        %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqrshl.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
         ret <4 x i16> %tmp3
 }
 
@@ -315,7 +315,7 @@ define <2 x i32> @sqrshl2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK: sqrshl.2s
         %tmp1 = load <2 x i32>* %A
         %tmp2 = load <2 x i32>* %B
-        %tmp3 = call <2 x i32> @llvm.arm64.neon.sqrshl.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+        %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqrshl.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
         ret <2 x i32> %tmp3
 }
 
@@ -324,7 +324,7 @@ define <8 x i8> @uqrshl8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK: uqrshl.8b
         %tmp1 = load <8 x i8>* %A
         %tmp2 = load <8 x i8>* %B
-        %tmp3 = call <8 x i8> @llvm.arm64.neon.uqrshl.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+        %tmp3 = call <8 x i8> @llvm.aarch64.neon.uqrshl.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
         ret <8 x i8> %tmp3
 }
 
@@ -333,7 +333,7 @@ define <4 x i16> @uqrshl4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK: uqrshl.4h
         %tmp1 = load <4 x i16>* %A
         %tmp2 = load <4 x i16>* %B
-        %tmp3 = call <4 x i16> @llvm.arm64.neon.uqrshl.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+        %tmp3 = call <4 x i16> @llvm.aarch64.neon.uqrshl.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
         ret <4 x i16> %tmp3
 }
 
@@ -342,7 +342,7 @@ define <2 x i32> @uqrshl2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK: uqrshl.2s
         %tmp1 = load <2 x i32>* %A
         %tmp2 = load <2 x i32>* %B
-        %tmp3 = call <2 x i32> @llvm.arm64.neon.uqrshl.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+        %tmp3 = call <2 x i32> @llvm.aarch64.neon.uqrshl.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
         ret <2 x i32> %tmp3
 }
 
@@ -351,7 +351,7 @@ define <16 x i8> @sqrshl16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK: sqrshl.16b
         %tmp1 = load <16 x i8>* %A
         %tmp2 = load <16 x i8>* %B
-        %tmp3 = call <16 x i8> @llvm.arm64.neon.sqrshl.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+        %tmp3 = call <16 x i8> @llvm.aarch64.neon.sqrshl.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
         ret <16 x i8> %tmp3
 }
 
@@ -360,7 +360,7 @@ define <8 x i16> @sqrshl8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK: sqrshl.8h
         %tmp1 = load <8 x i16>* %A
         %tmp2 = load <8 x i16>* %B
-        %tmp3 = call <8 x i16> @llvm.arm64.neon.sqrshl.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+        %tmp3 = call <8 x i16> @llvm.aarch64.neon.sqrshl.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
         ret <8 x i16> %tmp3
 }
 
@@ -369,7 +369,7 @@ define <4 x i32> @sqrshl4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK: sqrshl.4s
         %tmp1 = load <4 x i32>* %A
         %tmp2 = load <4 x i32>* %B
-        %tmp3 = call <4 x i32> @llvm.arm64.neon.sqrshl.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+        %tmp3 = call <4 x i32> @llvm.aarch64.neon.sqrshl.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
         ret <4 x i32> %tmp3
 }
 
@@ -378,7 +378,7 @@ define <2 x i64> @sqrshl2d(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 ;CHECK: sqrshl.2d
         %tmp1 = load <2 x i64>* %A
         %tmp2 = load <2 x i64>* %B
-        %tmp3 = call <2 x i64> @llvm.arm64.neon.sqrshl.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
+        %tmp3 = call <2 x i64> @llvm.aarch64.neon.sqrshl.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
         ret <2 x i64> %tmp3
 }
 
@@ -387,7 +387,7 @@ define <16 x i8> @uqrshl16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK: uqrshl.16b
         %tmp1 = load <16 x i8>* %A
         %tmp2 = load <16 x i8>* %B
-        %tmp3 = call <16 x i8> @llvm.arm64.neon.uqrshl.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+        %tmp3 = call <16 x i8> @llvm.aarch64.neon.uqrshl.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
         ret <16 x i8> %tmp3
 }
 
@@ -396,7 +396,7 @@ define <8 x i16> @uqrshl8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK: uqrshl.8h
         %tmp1 = load <8 x i16>* %A
         %tmp2 = load <8 x i16>* %B
-        %tmp3 = call <8 x i16> @llvm.arm64.neon.uqrshl.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+        %tmp3 = call <8 x i16> @llvm.aarch64.neon.uqrshl.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
         ret <8 x i16> %tmp3
 }
 
@@ -405,7 +405,7 @@ define <4 x i32> @uqrshl4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK: uqrshl.4s
         %tmp1 = load <4 x i32>* %A
         %tmp2 = load <4 x i32>* %B
-        %tmp3 = call <4 x i32> @llvm.arm64.neon.uqrshl.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+        %tmp3 = call <4 x i32> @llvm.aarch64.neon.uqrshl.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
         ret <4 x i32> %tmp3
 }
 
@@ -414,35 +414,35 @@ define <2 x i64> @uqrshl2d(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 ;CHECK: uqrshl.2d
         %tmp1 = load <2 x i64>* %A
         %tmp2 = load <2 x i64>* %B
-        %tmp3 = call <2 x i64> @llvm.arm64.neon.uqrshl.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
+        %tmp3 = call <2 x i64> @llvm.aarch64.neon.uqrshl.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
         ret <2 x i64> %tmp3
 }
 
-declare <8 x i8>  @llvm.arm64.neon.sqrshl.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
-declare <4 x i16> @llvm.arm64.neon.sqrshl.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
-declare <2 x i32> @llvm.arm64.neon.sqrshl.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
-declare <1 x i64> @llvm.arm64.neon.sqrshl.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
+declare <8 x i8>  @llvm.aarch64.neon.sqrshl.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.aarch64.neon.sqrshl.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.sqrshl.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+declare <1 x i64> @llvm.aarch64.neon.sqrshl.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
 
-declare <8 x i8>  @llvm.arm64.neon.uqrshl.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
-declare <4 x i16> @llvm.arm64.neon.uqrshl.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
-declare <2 x i32> @llvm.arm64.neon.uqrshl.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
-declare <1 x i64> @llvm.arm64.neon.uqrshl.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
+declare <8 x i8>  @llvm.aarch64.neon.uqrshl.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.aarch64.neon.uqrshl.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.uqrshl.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+declare <1 x i64> @llvm.aarch64.neon.uqrshl.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
 
-declare <16 x i8> @llvm.arm64.neon.sqrshl.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
-declare <8 x i16> @llvm.arm64.neon.sqrshl.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
-declare <4 x i32> @llvm.arm64.neon.sqrshl.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
-declare <2 x i64> @llvm.arm64.neon.sqrshl.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
+declare <16 x i8> @llvm.aarch64.neon.sqrshl.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.aarch64.neon.sqrshl.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.aarch64.neon.sqrshl.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.aarch64.neon.sqrshl.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
 
-declare <16 x i8> @llvm.arm64.neon.uqrshl.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
-declare <8 x i16> @llvm.arm64.neon.uqrshl.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
-declare <4 x i32> @llvm.arm64.neon.uqrshl.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
-declare <2 x i64> @llvm.arm64.neon.uqrshl.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
+declare <16 x i8> @llvm.aarch64.neon.uqrshl.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.aarch64.neon.uqrshl.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.aarch64.neon.uqrshl.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.aarch64.neon.uqrshl.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
 
 define <8 x i8> @urshr8b(<8 x i8>* %A) nounwind {
 ;CHECK-LABEL: urshr8b:
 ;CHECK: urshr.8b
         %tmp1 = load <8 x i8>* %A
-        %tmp3 = call <8 x i8> @llvm.arm64.neon.urshl.v8i8(<8 x i8> %tmp1, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
+        %tmp3 = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> %tmp1, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
         ret <8 x i8> %tmp3
 }
 
@@ -450,7 +450,7 @@ define <4 x i16> @urshr4h(<4 x i16>* %A) nounwind {
 ;CHECK-LABEL: urshr4h:
 ;CHECK: urshr.4h
         %tmp1 = load <4 x i16>* %A
-        %tmp3 = call <4 x i16> @llvm.arm64.neon.urshl.v4i16(<4 x i16> %tmp1, <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>)
+        %tmp3 = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> %tmp1, <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>)
         ret <4 x i16> %tmp3
 }
 
@@ -458,7 +458,7 @@ define <2 x i32> @urshr2s(<2 x i32>* %A) nounwind {
 ;CHECK-LABEL: urshr2s:
 ;CHECK: urshr.2s
         %tmp1 = load <2 x i32>* %A
-        %tmp3 = call <2 x i32> @llvm.arm64.neon.urshl.v2i32(<2 x i32> %tmp1, <2 x i32> <i32 -1, i32 -1>)
+        %tmp3 = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> %tmp1, <2 x i32> <i32 -1, i32 -1>)
         ret <2 x i32> %tmp3
 }
 
@@ -466,7 +466,7 @@ define <16 x i8> @urshr16b(<16 x i8>* %A) nounwind {
 ;CHECK-LABEL: urshr16b:
 ;CHECK: urshr.16b
         %tmp1 = load <16 x i8>* %A
-        %tmp3 = call <16 x i8> @llvm.arm64.neon.urshl.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
+        %tmp3 = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
         ret <16 x i8> %tmp3
 }
 
@@ -474,7 +474,7 @@ define <8 x i16> @urshr8h(<8 x i16>* %A) nounwind {
 ;CHECK-LABEL: urshr8h:
 ;CHECK: urshr.8h
         %tmp1 = load <8 x i16>* %A
-        %tmp3 = call <8 x i16> @llvm.arm64.neon.urshl.v8i16(<8 x i16> %tmp1, <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
+        %tmp3 = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> %tmp1, <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
         ret <8 x i16> %tmp3
 }
 
@@ -482,7 +482,7 @@ define <4 x i32> @urshr4s(<4 x i32>* %A) nounwind {
 ;CHECK-LABEL: urshr4s:
 ;CHECK: urshr.4s
         %tmp1 = load <4 x i32>* %A
-        %tmp3 = call <4 x i32> @llvm.arm64.neon.urshl.v4i32(<4 x i32> %tmp1, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
+        %tmp3 = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> %tmp1, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
         ret <4 x i32> %tmp3
 }
 
@@ -490,7 +490,7 @@ define <2 x i64> @urshr2d(<2 x i64>* %A) nounwind {
 ;CHECK-LABEL: urshr2d:
 ;CHECK: urshr.2d
         %tmp1 = load <2 x i64>* %A
-        %tmp3 = call <2 x i64> @llvm.arm64.neon.urshl.v2i64(<2 x i64> %tmp1, <2 x i64> <i64 -1, i64 -1>)
+        %tmp3 = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> %tmp1, <2 x i64> <i64 -1, i64 -1>)
         ret <2 x i64> %tmp3
 }
 
@@ -498,7 +498,7 @@ define <8 x i8> @srshr8b(<8 x i8>* %A) nounwind {
 ;CHECK-LABEL: srshr8b:
 ;CHECK: srshr.8b
         %tmp1 = load <8 x i8>* %A
-        %tmp3 = call <8 x i8> @llvm.arm64.neon.srshl.v8i8(<8 x i8> %tmp1, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
+        %tmp3 = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> %tmp1, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
         ret <8 x i8> %tmp3
 }
 
@@ -506,7 +506,7 @@ define <4 x i16> @srshr4h(<4 x i16>* %A) nounwind {
 ;CHECK-LABEL: srshr4h:
 ;CHECK: srshr.4h
         %tmp1 = load <4 x i16>* %A
-        %tmp3 = call <4 x i16> @llvm.arm64.neon.srshl.v4i16(<4 x i16> %tmp1, <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>)
+        %tmp3 = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> %tmp1, <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>)
         ret <4 x i16> %tmp3
 }
 
@@ -514,7 +514,7 @@ define <2 x i32> @srshr2s(<2 x i32>* %A) nounwind {
 ;CHECK-LABEL: srshr2s:
 ;CHECK: srshr.2s
         %tmp1 = load <2 x i32>* %A
-        %tmp3 = call <2 x i32> @llvm.arm64.neon.srshl.v2i32(<2 x i32> %tmp1, <2 x i32> <i32 -1, i32 -1>)
+        %tmp3 = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> %tmp1, <2 x i32> <i32 -1, i32 -1>)
         ret <2 x i32> %tmp3
 }
 
@@ -522,7 +522,7 @@ define <16 x i8> @srshr16b(<16 x i8>* %A) nounwind {
 ;CHECK-LABEL: srshr16b:
 ;CHECK: srshr.16b
         %tmp1 = load <16 x i8>* %A
-        %tmp3 = call <16 x i8> @llvm.arm64.neon.srshl.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
+        %tmp3 = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
         ret <16 x i8> %tmp3
 }
 
@@ -530,7 +530,7 @@ define <8 x i16> @srshr8h(<8 x i16>* %A) nounwind {
 ;CHECK-LABEL: srshr8h:
 ;CHECK: srshr.8h
         %tmp1 = load <8 x i16>* %A
-        %tmp3 = call <8 x i16> @llvm.arm64.neon.srshl.v8i16(<8 x i16> %tmp1, <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
+        %tmp3 = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> %tmp1, <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
         ret <8 x i16> %tmp3
 }
 
@@ -538,7 +538,7 @@ define <4 x i32> @srshr4s(<4 x i32>* %A) nounwind {
 ;CHECK-LABEL: srshr4s:
 ;CHECK: srshr.4s
         %tmp1 = load <4 x i32>* %A
-        %tmp3 = call <4 x i32> @llvm.arm64.neon.srshl.v4i32(<4 x i32> %tmp1, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
+        %tmp3 = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> %tmp1, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
         ret <4 x i32> %tmp3
 }
 
@@ -546,7 +546,7 @@ define <2 x i64> @srshr2d(<2 x i64>* %A) nounwind {
 ;CHECK-LABEL: srshr2d:
 ;CHECK: srshr.2d
         %tmp1 = load <2 x i64>* %A
-        %tmp3 = call <2 x i64> @llvm.arm64.neon.srshl.v2i64(<2 x i64> %tmp1, <2 x i64> <i64 -1, i64 -1>)
+        %tmp3 = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> %tmp1, <2 x i64> <i64 -1, i64 -1>)
         ret <2 x i64> %tmp3
 }
 
@@ -554,7 +554,7 @@ define <8 x i8> @sqshlu8b(<8 x i8>* %A) nounwind {
 ;CHECK-LABEL: sqshlu8b:
 ;CHECK: sqshlu.8b v0, {{v[0-9]+}}, #1
         %tmp1 = load <8 x i8>* %A
-        %tmp3 = call <8 x i8> @llvm.arm64.neon.sqshlu.v8i8(<8 x i8> %tmp1, <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
+        %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqshlu.v8i8(<8 x i8> %tmp1, <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
         ret <8 x i8> %tmp3
 }
 
@@ -562,7 +562,7 @@ define <4 x i16> @sqshlu4h(<4 x i16>* %A) nounwind {
 ;CHECK-LABEL: sqshlu4h:
 ;CHECK: sqshlu.4h v0, {{v[0-9]+}}, #1
         %tmp1 = load <4 x i16>* %A
-        %tmp3 = call <4 x i16> @llvm.arm64.neon.sqshlu.v4i16(<4 x i16> %tmp1, <4 x i16> <i16 1, i16 1, i16 1, i16 1>)
+        %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqshlu.v4i16(<4 x i16> %tmp1, <4 x i16> <i16 1, i16 1, i16 1, i16 1>)
         ret <4 x i16> %tmp3
 }
 
@@ -570,7 +570,7 @@ define <2 x i32> @sqshlu2s(<2 x i32>* %A) nounwind {
 ;CHECK-LABEL: sqshlu2s:
 ;CHECK: sqshlu.2s v0, {{v[0-9]+}}, #1
         %tmp1 = load <2 x i32>* %A
-        %tmp3 = call <2 x i32> @llvm.arm64.neon.sqshlu.v2i32(<2 x i32> %tmp1, <2 x i32> <i32 1, i32 1>)
+        %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqshlu.v2i32(<2 x i32> %tmp1, <2 x i32> <i32 1, i32 1>)
         ret <2 x i32> %tmp3
 }
 
@@ -578,7 +578,7 @@ define <16 x i8> @sqshlu16b(<16 x i8>* %A) nounwind {
 ;CHECK-LABEL: sqshlu16b:
 ;CHECK: sqshlu.16b v0, {{v[0-9]+}}, #1
         %tmp1 = load <16 x i8>* %A
-        %tmp3 = call <16 x i8> @llvm.arm64.neon.sqshlu.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
+        %tmp3 = call <16 x i8> @llvm.aarch64.neon.sqshlu.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
         ret <16 x i8> %tmp3
 }
 
@@ -586,7 +586,7 @@ define <8 x i16> @sqshlu8h(<8 x i16>* %A) nounwind {
 ;CHECK-LABEL: sqshlu8h:
 ;CHECK: sqshlu.8h v0, {{v[0-9]+}}, #1
         %tmp1 = load <8 x i16>* %A
-        %tmp3 = call <8 x i16> @llvm.arm64.neon.sqshlu.v8i16(<8 x i16> %tmp1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
+        %tmp3 = call <8 x i16> @llvm.aarch64.neon.sqshlu.v8i16(<8 x i16> %tmp1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
         ret <8 x i16> %tmp3
 }
 
@@ -594,7 +594,7 @@ define <4 x i32> @sqshlu4s(<4 x i32>* %A) nounwind {
 ;CHECK-LABEL: sqshlu4s:
 ;CHECK: sqshlu.4s v0, {{v[0-9]+}}, #1
         %tmp1 = load <4 x i32>* %A
-        %tmp3 = call <4 x i32> @llvm.arm64.neon.sqshlu.v4i32(<4 x i32> %tmp1, <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
+        %tmp3 = call <4 x i32> @llvm.aarch64.neon.sqshlu.v4i32(<4 x i32> %tmp1, <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
         ret <4 x i32> %tmp3
 }
 
@@ -602,25 +602,25 @@ define <2 x i64> @sqshlu2d(<2 x i64>* %A) nounwind {
 ;CHECK-LABEL: sqshlu2d:
 ;CHECK: sqshlu.2d v0, {{v[0-9]+}}, #1
         %tmp1 = load <2 x i64>* %A
-        %tmp3 = call <2 x i64> @llvm.arm64.neon.sqshlu.v2i64(<2 x i64> %tmp1, <2 x i64> <i64 1, i64 1>)
+        %tmp3 = call <2 x i64> @llvm.aarch64.neon.sqshlu.v2i64(<2 x i64> %tmp1, <2 x i64> <i64 1, i64 1>)
         ret <2 x i64> %tmp3
 }
 
-declare <8 x i8>  @llvm.arm64.neon.sqshlu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
-declare <4 x i16> @llvm.arm64.neon.sqshlu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
-declare <2 x i32> @llvm.arm64.neon.sqshlu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
-declare <1 x i64> @llvm.arm64.neon.sqshlu.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
+declare <8 x i8>  @llvm.aarch64.neon.sqshlu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.aarch64.neon.sqshlu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.sqshlu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+declare <1 x i64> @llvm.aarch64.neon.sqshlu.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
 
-declare <16 x i8> @llvm.arm64.neon.sqshlu.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
-declare <8 x i16> @llvm.arm64.neon.sqshlu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
-declare <4 x i32> @llvm.arm64.neon.sqshlu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
-declare <2 x i64> @llvm.arm64.neon.sqshlu.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
+declare <16 x i8> @llvm.aarch64.neon.sqshlu.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.aarch64.neon.sqshlu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.aarch64.neon.sqshlu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.aarch64.neon.sqshlu.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
 
 define <8 x i8> @rshrn8b(<8 x i16>* %A) nounwind {
 ;CHECK-LABEL: rshrn8b:
 ;CHECK: rshrn.8b v0, {{v[0-9]+}}, #1
         %tmp1 = load <8 x i16>* %A
-        %tmp3 = call <8 x i8> @llvm.arm64.neon.rshrn.v8i8(<8 x i16> %tmp1, i32 1)
+        %tmp3 = call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> %tmp1, i32 1)
         ret <8 x i8> %tmp3
 }
 
@@ -628,7 +628,7 @@ define <4 x i16> @rshrn4h(<4 x i32>* %A) nounwind {
 ;CHECK-LABEL: rshrn4h:
 ;CHECK: rshrn.4h v0, {{v[0-9]+}}, #1
         %tmp1 = load <4 x i32>* %A
-        %tmp3 = call <4 x i16> @llvm.arm64.neon.rshrn.v4i16(<4 x i32> %tmp1, i32 1)
+        %tmp3 = call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> %tmp1, i32 1)
         ret <4 x i16> %tmp3
 }
 
@@ -636,7 +636,7 @@ define <2 x i32> @rshrn2s(<2 x i64>* %A) nounwind {
 ;CHECK-LABEL: rshrn2s:
 ;CHECK: rshrn.2s v0, {{v[0-9]+}}, #1
         %tmp1 = load <2 x i64>* %A
-        %tmp3 = call <2 x i32> @llvm.arm64.neon.rshrn.v2i32(<2 x i64> %tmp1, i32 1)
+        %tmp3 = call <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64> %tmp1, i32 1)
         ret <2 x i32> %tmp3
 }
 
@@ -645,7 +645,7 @@ define <16 x i8> @rshrn16b(<8 x i8> *%ret, <8 x i16>* %A) nounwind {
 ;CHECK: rshrn2.16b v0, {{v[0-9]+}}, #1
         %out = load <8 x i8>* %ret
         %tmp1 = load <8 x i16>* %A
-        %tmp3 = call <8 x i8> @llvm.arm64.neon.rshrn.v8i8(<8 x i16> %tmp1, i32 1)
+        %tmp3 = call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> %tmp1, i32 1)
         %tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
         ret <16 x i8> %tmp4
 }
@@ -655,7 +655,7 @@ define <8 x i16> @rshrn8h(<4 x i16>* %ret, <4 x i32>* %A) nounwind {
 ;CHECK: rshrn2.8h v0, {{v[0-9]+}}, #1
         %out = load <4 x i16>* %ret
         %tmp1 = load <4 x i32>* %A
-        %tmp3 = call <4 x i16> @llvm.arm64.neon.rshrn.v4i16(<4 x i32> %tmp1, i32 1)
+        %tmp3 = call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> %tmp1, i32 1)
         %tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
         ret <8 x i16> %tmp4
 }
@@ -665,14 +665,14 @@ define <4 x i32> @rshrn4s(<2 x i32>* %ret, <2 x i64>* %A) nounwind {
 ;CHECK: rshrn2.4s v0, {{v[0-9]+}}, #1
         %out = load <2 x i32>* %ret
         %tmp1 = load <2 x i64>* %A
-        %tmp3 = call <2 x i32> @llvm.arm64.neon.rshrn.v2i32(<2 x i64> %tmp1, i32 1)
+        %tmp3 = call <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64> %tmp1, i32 1)
         %tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
         ret <4 x i32> %tmp4
 }
 
-declare <8 x i8>  @llvm.arm64.neon.rshrn.v8i8(<8 x i16>, i32) nounwind readnone
-declare <4 x i16> @llvm.arm64.neon.rshrn.v4i16(<4 x i32>, i32) nounwind readnone
-declare <2 x i32> @llvm.arm64.neon.rshrn.v2i32(<2 x i64>, i32) nounwind readnone
+declare <8 x i8>  @llvm.aarch64.neon.rshrn.v8i8(<8 x i16>, i32) nounwind readnone
+declare <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32>, i32) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64>, i32) nounwind readnone
 
 define <8 x i8> @shrn8b(<8 x i16>* %A) nounwind {
 ;CHECK-LABEL: shrn8b:
@@ -734,14 +734,14 @@ define <4 x i32> @shrn4s(<2 x i32>* %ret, <2 x i64>* %A) nounwind {
         ret <4 x i32> %tmp4
 }
 
-declare <8 x i8>  @llvm.arm64.neon.shrn.v8i8(<8 x i16>, i32) nounwind readnone
-declare <4 x i16> @llvm.arm64.neon.shrn.v4i16(<4 x i32>, i32) nounwind readnone
-declare <2 x i32> @llvm.arm64.neon.shrn.v2i32(<2 x i64>, i32) nounwind readnone
+declare <8 x i8>  @llvm.aarch64.neon.shrn.v8i8(<8 x i16>, i32) nounwind readnone
+declare <4 x i16> @llvm.aarch64.neon.shrn.v4i16(<4 x i32>, i32) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.shrn.v2i32(<2 x i64>, i32) nounwind readnone
 
 define i32 @sqshrn1s(i64 %A) nounwind {
 ; CHECK-LABEL: sqshrn1s:
 ; CHECK: sqshrn {{s[0-9]+}}, d0, #1
-  %tmp = call i32 @llvm.arm64.neon.sqshrn.i32(i64 %A, i32 1)
+  %tmp = call i32 @llvm.aarch64.neon.sqshrn.i32(i64 %A, i32 1)
   ret i32 %tmp
 }
 
@@ -749,7 +749,7 @@ define <8 x i8> @sqshrn8b(<8 x i16>* %A) nounwind {
 ;CHECK-LABEL: sqshrn8b:
 ;CHECK: sqshrn.8b v0, {{v[0-9]+}}, #1
         %tmp1 = load <8 x i16>* %A
-        %tmp3 = call <8 x i8> @llvm.arm64.neon.sqshrn.v8i8(<8 x i16> %tmp1, i32 1)
+        %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqshrn.v8i8(<8 x i16> %tmp1, i32 1)
         ret <8 x i8> %tmp3
 }
 
@@ -757,7 +757,7 @@ define <4 x i16> @sqshrn4h(<4 x i32>* %A) nounwind {
 ;CHECK-LABEL: sqshrn4h:
 ;CHECK: sqshrn.4h v0, {{v[0-9]+}}, #1
         %tmp1 = load <4 x i32>* %A
-        %tmp3 = call <4 x i16> @llvm.arm64.neon.sqshrn.v4i16(<4 x i32> %tmp1, i32 1)
+        %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32> %tmp1, i32 1)
         ret <4 x i16> %tmp3
 }
 
@@ -765,7 +765,7 @@ define <2 x i32> @sqshrn2s(<2 x i64>* %A) nounwind {
 ;CHECK-LABEL: sqshrn2s:
 ;CHECK: sqshrn.2s v0, {{v[0-9]+}}, #1
         %tmp1 = load <2 x i64>* %A
-        %tmp3 = call <2 x i32> @llvm.arm64.neon.sqshrn.v2i32(<2 x i64> %tmp1, i32 1)
+        %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqshrn.v2i32(<2 x i64> %tmp1, i32 1)
         ret <2 x i32> %tmp3
 }
 
@@ -775,7 +775,7 @@ define <16 x i8> @sqshrn16b(<8 x i8>* %ret, <8 x i16>* %A) nounwind {
 ;CHECK: sqshrn2.16b v0, {{v[0-9]+}}, #1
         %out = load <8 x i8>* %ret
         %tmp1 = load <8 x i16>* %A
-        %tmp3 = call <8 x i8> @llvm.arm64.neon.sqshrn.v8i8(<8 x i16> %tmp1, i32 1)
+        %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqshrn.v8i8(<8 x i16> %tmp1, i32 1)
         %tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
         ret <16 x i8> %tmp4
 }
@@ -785,7 +785,7 @@ define <8 x i16> @sqshrn8h(<4 x i16>* %ret, <4 x i32>* %A) nounwind {
 ;CHECK: sqshrn2.8h v0, {{v[0-9]+}}, #1
         %out = load <4 x i16>* %ret
         %tmp1 = load <4 x i32>* %A
-        %tmp3 = call <4 x i16> @llvm.arm64.neon.sqshrn.v4i16(<4 x i32> %tmp1, i32 1)
+        %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32> %tmp1, i32 1)
         %tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
         ret <8 x i16> %tmp4
 }
@@ -795,20 +795,20 @@ define <4 x i32> @sqshrn4s(<2 x i32>* %ret, <2 x i64>* %A) nounwind {
 ;CHECK: sqshrn2.4s v0, {{v[0-9]+}}, #1
         %out = load <2 x i32>* %ret
         %tmp1 = load <2 x i64>* %A
-        %tmp3 = call <2 x i32> @llvm.arm64.neon.sqshrn.v2i32(<2 x i64> %tmp1, i32 1)
+        %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqshrn.v2i32(<2 x i64> %tmp1, i32 1)
         %tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
         ret <4 x i32> %tmp4
 }
 
-declare i32  @llvm.arm64.neon.sqshrn.i32(i64, i32) nounwind readnone
-declare <8 x i8>  @llvm.arm64.neon.sqshrn.v8i8(<8 x i16>, i32) nounwind readnone
-declare <4 x i16> @llvm.arm64.neon.sqshrn.v4i16(<4 x i32>, i32) nounwind readnone
-declare <2 x i32> @llvm.arm64.neon.sqshrn.v2i32(<2 x i64>, i32) nounwind readnone
+declare i32  @llvm.aarch64.neon.sqshrn.i32(i64, i32) nounwind readnone
+declare <8 x i8>  @llvm.aarch64.neon.sqshrn.v8i8(<8 x i16>, i32) nounwind readnone
+declare <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32>, i32) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.sqshrn.v2i32(<2 x i64>, i32) nounwind readnone
 
 define i32 @sqshrun1s(i64 %A) nounwind {
 ; CHECK-LABEL: sqshrun1s:
 ; CHECK: sqshrun {{s[0-9]+}}, d0, #1
-  %tmp = call i32 @llvm.arm64.neon.sqshrun.i32(i64 %A, i32 1)
+  %tmp = call i32 @llvm.aarch64.neon.sqshrun.i32(i64 %A, i32 1)
   ret i32 %tmp
 }
 
@@ -816,7 +816,7 @@ define <8 x i8> @sqshrun8b(<8 x i16>* %A) nounwind {
 ;CHECK-LABEL: sqshrun8b:
 ;CHECK: sqshrun.8b v0, {{v[0-9]+}}, #1
         %tmp1 = load <8 x i16>* %A
-        %tmp3 = call <8 x i8> @llvm.arm64.neon.sqshrun.v8i8(<8 x i16> %tmp1, i32 1)
+        %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16> %tmp1, i32 1)
         ret <8 x i8> %tmp3
 }
 
@@ -824,7 +824,7 @@ define <4 x i16> @sqshrun4h(<4 x i32>* %A) nounwind {
 ;CHECK-LABEL: sqshrun4h:
 ;CHECK: sqshrun.4h v0, {{v[0-9]+}}, #1
         %tmp1 = load <4 x i32>* %A
-        %tmp3 = call <4 x i16> @llvm.arm64.neon.sqshrun.v4i16(<4 x i32> %tmp1, i32 1)
+        %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32> %tmp1, i32 1)
         ret <4 x i16> %tmp3
 }
 
@@ -832,7 +832,7 @@ define <2 x i32> @sqshrun2s(<2 x i64>* %A) nounwind {
 ;CHECK-LABEL: sqshrun2s:
 ;CHECK: sqshrun.2s v0, {{v[0-9]+}}, #1
         %tmp1 = load <2 x i64>* %A
-        %tmp3 = call <2 x i32> @llvm.arm64.neon.sqshrun.v2i32(<2 x i64> %tmp1, i32 1)
+        %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqshrun.v2i32(<2 x i64> %tmp1, i32 1)
         ret <2 x i32> %tmp3
 }
 
@@ -841,7 +841,7 @@ define <16 x i8> @sqshrun16b(<8 x i8>* %ret, <8 x i16>* %A) nounwind {
 ;CHECK: sqshrun2.16b v0, {{v[0-9]+}}, #1
         %out = load <8 x i8>* %ret
         %tmp1 = load <8 x i16>* %A
-        %tmp3 = call <8 x i8> @llvm.arm64.neon.sqshrun.v8i8(<8 x i16> %tmp1, i32 1)
+        %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16> %tmp1, i32 1)
         %tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
         ret <16 x i8> %tmp4
 }
@@ -851,7 +851,7 @@ define <8 x i16> @sqshrun8h(<4 x i16>* %ret, <4 x i32>* %A) nounwind {
 ;CHECK: sqshrun2.8h v0, {{v[0-9]+}}, #1
         %out = load <4 x i16>* %ret
         %tmp1 = load <4 x i32>* %A
-        %tmp3 = call <4 x i16> @llvm.arm64.neon.sqshrun.v4i16(<4 x i32> %tmp1, i32 1)
+        %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32> %tmp1, i32 1)
         %tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
         ret <8 x i16> %tmp4
 }
@@ -861,20 +861,20 @@ define <4 x i32> @sqshrun4s(<2 x i32>* %ret, <2 x i64>* %A) nounwind {
 ;CHECK: sqshrun2.4s v0, {{v[0-9]+}}, #1
         %out = load <2 x i32>* %ret
         %tmp1 = load <2 x i64>* %A
-        %tmp3 = call <2 x i32> @llvm.arm64.neon.sqshrun.v2i32(<2 x i64> %tmp1, i32 1)
+        %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqshrun.v2i32(<2 x i64> %tmp1, i32 1)
         %tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
         ret <4 x i32> %tmp4
 }
 
-declare i32  @llvm.arm64.neon.sqshrun.i32(i64, i32) nounwind readnone
-declare <8 x i8>  @llvm.arm64.neon.sqshrun.v8i8(<8 x i16>, i32) nounwind readnone
-declare <4 x i16> @llvm.arm64.neon.sqshrun.v4i16(<4 x i32>, i32) nounwind readnone
-declare <2 x i32> @llvm.arm64.neon.sqshrun.v2i32(<2 x i64>, i32) nounwind readnone
+declare i32  @llvm.aarch64.neon.sqshrun.i32(i64, i32) nounwind readnone
+declare <8 x i8>  @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16>, i32) nounwind readnone
+declare <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32>, i32) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.sqshrun.v2i32(<2 x i64>, i32) nounwind readnone
 
 define i32 @sqrshrn1s(i64 %A) nounwind {
 ; CHECK-LABEL: sqrshrn1s:
 ; CHECK: sqrshrn {{s[0-9]+}}, d0, #1
-  %tmp = call i32 @llvm.arm64.neon.sqrshrn.i32(i64 %A, i32 1)
+  %tmp = call i32 @llvm.aarch64.neon.sqrshrn.i32(i64 %A, i32 1)
   ret i32 %tmp
 }
 
@@ -882,7 +882,7 @@ define <8 x i8> @sqrshrn8b(<8 x i16>* %A) nounwind {
 ;CHECK-LABEL: sqrshrn8b:
 ;CHECK: sqrshrn.8b v0, {{v[0-9]+}}, #1
         %tmp1 = load <8 x i16>* %A
-        %tmp3 = call <8 x i8> @llvm.arm64.neon.sqrshrn.v8i8(<8 x i16> %tmp1, i32 1)
+        %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqrshrn.v8i8(<8 x i16> %tmp1, i32 1)
         ret <8 x i8> %tmp3
 }
 
@@ -890,7 +890,7 @@ define <4 x i16> @sqrshrn4h(<4 x i32>* %A) nounwind {
 ;CHECK-LABEL: sqrshrn4h:
 ;CHECK: sqrshrn.4h v0, {{v[0-9]+}}, #1
         %tmp1 = load <4 x i32>* %A
-        %tmp3 = call <4 x i16> @llvm.arm64.neon.sqrshrn.v4i16(<4 x i32> %tmp1, i32 1)
+        %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32> %tmp1, i32 1)
         ret <4 x i16> %tmp3
 }
 
@@ -898,7 +898,7 @@ define <2 x i32> @sqrshrn2s(<2 x i64>* %A) nounwind {
 ;CHECK-LABEL: sqrshrn2s:
 ;CHECK: sqrshrn.2s v0, {{v[0-9]+}}, #1
         %tmp1 = load <2 x i64>* %A
-        %tmp3 = call <2 x i32> @llvm.arm64.neon.sqrshrn.v2i32(<2 x i64> %tmp1, i32 1)
+        %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqrshrn.v2i32(<2 x i64> %tmp1, i32 1)
         ret <2 x i32> %tmp3
 }
 
@@ -907,7 +907,7 @@ define <16 x i8> @sqrshrn16b(<8 x i8>* %ret, <8 x i16>* %A) nounwind {
 ;CHECK: sqrshrn2.16b v0, {{v[0-9]+}}, #1
         %out = load <8 x i8>* %ret
         %tmp1 = load <8 x i16>* %A
-        %tmp3 = call <8 x i8> @llvm.arm64.neon.sqrshrn.v8i8(<8 x i16> %tmp1, i32 1)
+        %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqrshrn.v8i8(<8 x i16> %tmp1, i32 1)
         %tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
         ret <16 x i8> %tmp4
 }
@@ -917,7 +917,7 @@ define <8 x i16> @sqrshrn8h(<4 x i16>* %ret, <4 x i32>* %A) nounwind {
 ;CHECK: sqrshrn2.8h v0, {{v[0-9]+}}, #1
         %out = load <4 x i16>* %ret
         %tmp1 = load <4 x i32>* %A
-        %tmp3 = call <4 x i16> @llvm.arm64.neon.sqrshrn.v4i16(<4 x i32> %tmp1, i32 1)
+        %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32> %tmp1, i32 1)
         %tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
         ret <8 x i16> %tmp4
 }
@@ -927,20 +927,20 @@ define <4 x i32> @sqrshrn4s(<2 x i32>* %ret, <2 x i64>* %A) nounwind {
 ;CHECK: sqrshrn2.4s v0, {{v[0-9]+}}, #1
         %out = load <2 x i32>* %ret
         %tmp1 = load <2 x i64>* %A
-        %tmp3 = call <2 x i32> @llvm.arm64.neon.sqrshrn.v2i32(<2 x i64> %tmp1, i32 1)
+        %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqrshrn.v2i32(<2 x i64> %tmp1, i32 1)
         %tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
         ret <4 x i32> %tmp4
 }
 
-declare i32  @llvm.arm64.neon.sqrshrn.i32(i64, i32) nounwind readnone
-declare <8 x i8>  @llvm.arm64.neon.sqrshrn.v8i8(<8 x i16>, i32) nounwind readnone
-declare <4 x i16> @llvm.arm64.neon.sqrshrn.v4i16(<4 x i32>, i32) nounwind readnone
-declare <2 x i32> @llvm.arm64.neon.sqrshrn.v2i32(<2 x i64>, i32) nounwind readnone
+declare i32  @llvm.aarch64.neon.sqrshrn.i32(i64, i32) nounwind readnone
+declare <8 x i8>  @llvm.aarch64.neon.sqrshrn.v8i8(<8 x i16>, i32) nounwind readnone
+declare <4 x i16> @llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32>, i32) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.sqrshrn.v2i32(<2 x i64>, i32) nounwind readnone
 
 define i32 @sqrshrun1s(i64 %A) nounwind {
 ; CHECK-LABEL: sqrshrun1s:
 ; CHECK: sqrshrun {{s[0-9]+}}, d0, #1
-  %tmp = call i32 @llvm.arm64.neon.sqrshrun.i32(i64 %A, i32 1)
+  %tmp = call i32 @llvm.aarch64.neon.sqrshrun.i32(i64 %A, i32 1)
   ret i32 %tmp
 }
 
@@ -948,7 +948,7 @@ define <8 x i8> @sqrshrun8b(<8 x i16>* %A) nounwind {
 ;CHECK-LABEL: sqrshrun8b:
 ;CHECK: sqrshrun.8b v0, {{v[0-9]+}}, #1
         %tmp1 = load <8 x i16>* %A
-        %tmp3 = call <8 x i8> @llvm.arm64.neon.sqrshrun.v8i8(<8 x i16> %tmp1, i32 1)
+        %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16> %tmp1, i32 1)
         ret <8 x i8> %tmp3
 }
 
@@ -956,7 +956,7 @@ define <4 x i16> @sqrshrun4h(<4 x i32>* %A) nounwind {
 ;CHECK-LABEL: sqrshrun4h:
 ;CHECK: sqrshrun.4h v0, {{v[0-9]+}}, #1
         %tmp1 = load <4 x i32>* %A
-        %tmp3 = call <4 x i16> @llvm.arm64.neon.sqrshrun.v4i16(<4 x i32> %tmp1, i32 1)
+        %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32> %tmp1, i32 1)
         ret <4 x i16> %tmp3
 }
 
@@ -964,7 +964,7 @@ define <2 x i32> @sqrshrun2s(<2 x i64>* %A) nounwind {
 ;CHECK-LABEL: sqrshrun2s:
 ;CHECK: sqrshrun.2s v0, {{v[0-9]+}}, #1
         %tmp1 = load <2 x i64>* %A
-        %tmp3 = call <2 x i32> @llvm.arm64.neon.sqrshrun.v2i32(<2 x i64> %tmp1, i32 1)
+        %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqrshrun.v2i32(<2 x i64> %tmp1, i32 1)
         ret <2 x i32> %tmp3
 }
 
@@ -973,7 +973,7 @@ define <16 x i8> @sqrshrun16b(<8 x i8>* %ret, <8 x i16>* %A) nounwind {
 ;CHECK: sqrshrun2.16b v0, {{v[0-9]+}}, #1
         %out = load <8 x i8>* %ret
         %tmp1 = load <8 x i16>* %A
-        %tmp3 = call <8 x i8> @llvm.arm64.neon.sqrshrun.v8i8(<8 x i16> %tmp1, i32 1)
+        %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16> %tmp1, i32 1)
         %tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
         ret <16 x i8> %tmp4
 }
@@ -983,7 +983,7 @@ define <8 x i16> @sqrshrun8h(<4 x i16>* %ret, <4 x i32>* %A) nounwind {
 ;CHECK: sqrshrun2.8h v0, {{v[0-9]+}}, #1
         %out = load <4 x i16>* %ret
         %tmp1 = load <4 x i32>* %A
-        %tmp3 = call <4 x i16> @llvm.arm64.neon.sqrshrun.v4i16(<4 x i32> %tmp1, i32 1)
+        %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32> %tmp1, i32 1)
         %tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
         ret <8 x i16> %tmp4
 }
@@ -993,20 +993,20 @@ define <4 x i32> @sqrshrun4s(<2 x i32>* %ret, <2 x i64>* %A) nounwind {
 ;CHECK: sqrshrun2.4s v0, {{v[0-9]+}}, #1
         %out = load <2 x i32>* %ret
         %tmp1 = load <2 x i64>* %A
-        %tmp3 = call <2 x i32> @llvm.arm64.neon.sqrshrun.v2i32(<2 x i64> %tmp1, i32 1)
+        %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqrshrun.v2i32(<2 x i64> %tmp1, i32 1)
         %tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
         ret <4 x i32> %tmp4
 }
 
-declare i32  @llvm.arm64.neon.sqrshrun.i32(i64, i32) nounwind readnone
-declare <8 x i8>  @llvm.arm64.neon.sqrshrun.v8i8(<8 x i16>, i32) nounwind readnone
-declare <4 x i16> @llvm.arm64.neon.sqrshrun.v4i16(<4 x i32>, i32) nounwind readnone
-declare <2 x i32> @llvm.arm64.neon.sqrshrun.v2i32(<2 x i64>, i32) nounwind readnone
+declare i32  @llvm.aarch64.neon.sqrshrun.i32(i64, i32) nounwind readnone
+declare <8 x i8>  @llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16>, i32) nounwind readnone
+declare <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32>, i32) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.sqrshrun.v2i32(<2 x i64>, i32) nounwind readnone
 
 define i32 @uqrshrn1s(i64 %A) nounwind {
 ; CHECK-LABEL: uqrshrn1s:
 ; CHECK: uqrshrn {{s[0-9]+}}, d0, #1
-  %tmp = call i32 @llvm.arm64.neon.uqrshrn.i32(i64 %A, i32 1)
+  %tmp = call i32 @llvm.aarch64.neon.uqrshrn.i32(i64 %A, i32 1)
   ret i32 %tmp
 }
 
@@ -1014,7 +1014,7 @@ define <8 x i8> @uqrshrn8b(<8 x i16>* %A) nounwind {
 ;CHECK-LABEL: uqrshrn8b:
 ;CHECK: uqrshrn.8b v0, {{v[0-9]+}}, #1
         %tmp1 = load <8 x i16>* %A
-        %tmp3 = call <8 x i8> @llvm.arm64.neon.uqrshrn.v8i8(<8 x i16> %tmp1, i32 1)
+        %tmp3 = call <8 x i8> @llvm.aarch64.neon.uqrshrn.v8i8(<8 x i16> %tmp1, i32 1)
         ret <8 x i8> %tmp3
 }
 
@@ -1022,7 +1022,7 @@ define <4 x i16> @uqrshrn4h(<4 x i32>* %A) nounwind {
 ;CHECK-LABEL: uqrshrn4h:
 ;CHECK: uqrshrn.4h v0, {{v[0-9]+}}, #1
         %tmp1 = load <4 x i32>* %A
-        %tmp3 = call <4 x i16> @llvm.arm64.neon.uqrshrn.v4i16(<4 x i32> %tmp1, i32 1)
+        %tmp3 = call <4 x i16> @llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32> %tmp1, i32 1)
         ret <4 x i16> %tmp3
 }
 
@@ -1030,7 +1030,7 @@ define <2 x i32> @uqrshrn2s(<2 x i64>* %A) nounwind {
 ;CHECK-LABEL: uqrshrn2s:
 ;CHECK: uqrshrn.2s v0, {{v[0-9]+}}, #1
         %tmp1 = load <2 x i64>* %A
-        %tmp3 = call <2 x i32> @llvm.arm64.neon.uqrshrn.v2i32(<2 x i64> %tmp1, i32 1)
+        %tmp3 = call <2 x i32> @llvm.aarch64.neon.uqrshrn.v2i32(<2 x i64> %tmp1, i32 1)
         ret <2 x i32> %tmp3
 }
 
@@ -1039,7 +1039,7 @@ define <16 x i8> @uqrshrn16b(<8 x i8>* %ret, <8 x i16>* %A) nounwind {
 ;CHECK: uqrshrn2.16b v0, {{v[0-9]+}}, #1
         %out = load <8 x i8>* %ret
         %tmp1 = load <8 x i16>* %A
-        %tmp3 = call <8 x i8> @llvm.arm64.neon.uqrshrn.v8i8(<8 x i16> %tmp1, i32 1)
+        %tmp3 = call <8 x i8> @llvm.aarch64.neon.uqrshrn.v8i8(<8 x i16> %tmp1, i32 1)
         %tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
         ret <16 x i8> %tmp4
 }
@@ -1049,7 +1049,7 @@ define <8 x i16> @uqrshrn8h(<4 x i16>* %ret, <4 x i32>* %A) nounwind {
 ;CHECK: uqrshrn2.8h v0, {{v[0-9]+}}, #1
         %out = load <4 x i16>* %ret
         %tmp1 = load <4 x i32>* %A
-        %tmp3 = call <4 x i16> @llvm.arm64.neon.uqrshrn.v4i16(<4 x i32> %tmp1, i32 1)
+        %tmp3 = call <4 x i16> @llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32> %tmp1, i32 1)
         %tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
         ret <8 x i16> %tmp4
 }
@@ -1059,20 +1059,20 @@ define <4 x i32> @uqrshrn4s(<2 x i32>* %ret, <2 x i64>* %A) nounwind {
 ;CHECK: uqrshrn2.4s v0, {{v[0-9]+}}, #1
         %out = load <2 x i32>* %ret
         %tmp1 = load <2 x i64>* %A
-        %tmp3 = call <2 x i32> @llvm.arm64.neon.uqrshrn.v2i32(<2 x i64> %tmp1, i32 1)
+        %tmp3 = call <2 x i32> @llvm.aarch64.neon.uqrshrn.v2i32(<2 x i64> %tmp1, i32 1)
         %tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
         ret <4 x i32> %tmp4
 }
 
-declare i32  @llvm.arm64.neon.uqrshrn.i32(i64, i32) nounwind readnone
-declare <8 x i8>  @llvm.arm64.neon.uqrshrn.v8i8(<8 x i16>, i32) nounwind readnone
-declare <4 x i16> @llvm.arm64.neon.uqrshrn.v4i16(<4 x i32>, i32) nounwind readnone
-declare <2 x i32> @llvm.arm64.neon.uqrshrn.v2i32(<2 x i64>, i32) nounwind readnone
+declare i32  @llvm.aarch64.neon.uqrshrn.i32(i64, i32) nounwind readnone
+declare <8 x i8>  @llvm.aarch64.neon.uqrshrn.v8i8(<8 x i16>, i32) nounwind readnone
+declare <4 x i16> @llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32>, i32) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.uqrshrn.v2i32(<2 x i64>, i32) nounwind readnone
 
 define i32 @uqshrn1s(i64 %A) nounwind {
 ; CHECK-LABEL: uqshrn1s:
 ; CHECK: uqshrn {{s[0-9]+}}, d0, #1
-  %tmp = call i32 @llvm.arm64.neon.uqshrn.i32(i64 %A, i32 1)
+  %tmp = call i32 @llvm.aarch64.neon.uqshrn.i32(i64 %A, i32 1)
   ret i32 %tmp
 }
 
@@ -1080,7 +1080,7 @@ define <8 x i8> @uqshrn8b(<8 x i16>* %A) nounwind {
 ;CHECK-LABEL: uqshrn8b:
 ;CHECK: uqshrn.8b v0, {{v[0-9]+}}, #1
         %tmp1 = load <8 x i16>* %A
-        %tmp3 = call <8 x i8> @llvm.arm64.neon.uqshrn.v8i8(<8 x i16> %tmp1, i32 1)
+        %tmp3 = call <8 x i8> @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16> %tmp1, i32 1)
         ret <8 x i8> %tmp3
 }
 
@@ -1088,7 +1088,7 @@ define <4 x i16> @uqshrn4h(<4 x i32>* %A) nounwind {
 ;CHECK-LABEL: uqshrn4h:
 ;CHECK: uqshrn.4h v0, {{v[0-9]+}}, #1
         %tmp1 = load <4 x i32>* %A
-        %tmp3 = call <4 x i16> @llvm.arm64.neon.uqshrn.v4i16(<4 x i32> %tmp1, i32 1)
+        %tmp3 = call <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32> %tmp1, i32 1)
         ret <4 x i16> %tmp3
 }
 
@@ -1096,7 +1096,7 @@ define <2 x i32> @uqshrn2s(<2 x i64>* %A) nounwind {
 ;CHECK-LABEL: uqshrn2s:
 ;CHECK: uqshrn.2s v0, {{v[0-9]+}}, #1
         %tmp1 = load <2 x i64>* %A
-        %tmp3 = call <2 x i32> @llvm.arm64.neon.uqshrn.v2i32(<2 x i64> %tmp1, i32 1)
+        %tmp3 = call <2 x i32> @llvm.aarch64.neon.uqshrn.v2i32(<2 x i64> %tmp1, i32 1)
         ret <2 x i32> %tmp3
 }
 
@@ -1105,7 +1105,7 @@ define <16 x i8> @uqshrn16b(<8 x i8>* %ret, <8 x i16>* %A) nounwind {
 ;CHECK: uqshrn2.16b v0, {{v[0-9]+}}, #1
         %out = load <8 x i8>* %ret
         %tmp1 = load <8 x i16>* %A
-        %tmp3 = call <8 x i8> @llvm.arm64.neon.uqshrn.v8i8(<8 x i16> %tmp1, i32 1)
+        %tmp3 = call <8 x i8> @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16> %tmp1, i32 1)
         %tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
         ret <16 x i8> %tmp4
 }
@@ -1115,7 +1115,7 @@ define <8 x i16> @uqshrn8h(<4 x i16>* %ret, <4 x i32>* %A) nounwind {
 ;CHECK: uqshrn2.8h v0, {{v[0-9]+}}, #1
   %out = load <4 x i16>* %ret
   %tmp1 = load <4 x i32>* %A
-  %tmp3 = call <4 x i16> @llvm.arm64.neon.uqshrn.v4i16(<4 x i32> %tmp1, i32 1)
+  %tmp3 = call <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32> %tmp1, i32 1)
   %tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
   ret <8 x i16> %tmp4
 }
@@ -1125,15 +1125,15 @@ define <4 x i32> @uqshrn4s(<2 x i32>* %ret, <2 x i64>* %A) nounwind {
 ;CHECK: uqshrn2.4s v0, {{v[0-9]+}}, #1
   %out = load <2 x i32>* %ret
   %tmp1 = load <2 x i64>* %A
-  %tmp3 = call <2 x i32> @llvm.arm64.neon.uqshrn.v2i32(<2 x i64> %tmp1, i32 1)
+  %tmp3 = call <2 x i32> @llvm.aarch64.neon.uqshrn.v2i32(<2 x i64> %tmp1, i32 1)
   %tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   ret <4 x i32> %tmp4
 }
 
-declare i32  @llvm.arm64.neon.uqshrn.i32(i64, i32) nounwind readnone
-declare <8 x i8>  @llvm.arm64.neon.uqshrn.v8i8(<8 x i16>, i32) nounwind readnone
-declare <4 x i16> @llvm.arm64.neon.uqshrn.v4i16(<4 x i32>, i32) nounwind readnone
-declare <2 x i32> @llvm.arm64.neon.uqshrn.v2i32(<2 x i64>, i32) nounwind readnone
+declare i32  @llvm.aarch64.neon.uqshrn.i32(i64, i32) nounwind readnone
+declare <8 x i8>  @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16>, i32) nounwind readnone
+declare <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32>, i32) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.uqshrn.v2i32(<2 x i64>, i32) nounwind readnone
 
 define <8 x i16> @ushll8h(<8 x i8>* %A) nounwind {
 ;CHECK-LABEL: ushll8h:
@@ -1253,7 +1253,7 @@ define <8 x i8> @sqshli8b(<8 x i8>* %A) nounwind {
 ;CHECK-LABEL: sqshli8b:
 ;CHECK: sqshl.8b v0, {{v[0-9]+}}, #1
         %tmp1 = load <8 x i8>* %A
-        %tmp3 = call <8 x i8> @llvm.arm64.neon.sqshl.v8i8(<8 x i8> %tmp1, <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
+        %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8> %tmp1, <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
         ret <8 x i8> %tmp3
 }
 
@@ -1261,7 +1261,7 @@ define <4 x i16> @sqshli4h(<4 x i16>* %A) nounwind {
 ;CHECK-LABEL: sqshli4h:
 ;CHECK: sqshl.4h v0, {{v[0-9]+}}, #1
         %tmp1 = load <4 x i16>* %A
-        %tmp3 = call <4 x i16> @llvm.arm64.neon.sqshl.v4i16(<4 x i16> %tmp1, <4 x i16> <i16 1, i16 1, i16 1, i16 1>)
+        %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16> %tmp1, <4 x i16> <i16 1, i16 1, i16 1, i16 1>)
         ret <4 x i16> %tmp3
 }
 
@@ -1269,7 +1269,7 @@ define <2 x i32> @sqshli2s(<2 x i32>* %A) nounwind {
 ;CHECK-LABEL: sqshli2s:
 ;CHECK: sqshl.2s v0, {{v[0-9]+}}, #1
         %tmp1 = load <2 x i32>* %A
-        %tmp3 = call <2 x i32> @llvm.arm64.neon.sqshl.v2i32(<2 x i32> %tmp1, <2 x i32> <i32 1, i32 1>)
+        %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqshl.v2i32(<2 x i32> %tmp1, <2 x i32> <i32 1, i32 1>)
         ret <2 x i32> %tmp3
 }
 
@@ -1277,7 +1277,7 @@ define <16 x i8> @sqshli16b(<16 x i8>* %A) nounwind {
 ;CHECK-LABEL: sqshli16b:
 ;CHECK: sqshl.16b v0, {{v[0-9]+}}, #1
         %tmp1 = load <16 x i8>* %A
-        %tmp3 = call <16 x i8> @llvm.arm64.neon.sqshl.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
+        %tmp3 = call <16 x i8> @llvm.aarch64.neon.sqshl.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
         ret <16 x i8> %tmp3
 }
 
@@ -1285,7 +1285,7 @@ define <8 x i16> @sqshli8h(<8 x i16>* %A) nounwind {
 ;CHECK-LABEL: sqshli8h:
 ;CHECK: sqshl.8h v0, {{v[0-9]+}}, #1
         %tmp1 = load <8 x i16>* %A
-        %tmp3 = call <8 x i16> @llvm.arm64.neon.sqshl.v8i16(<8 x i16> %tmp1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
+        %tmp3 = call <8 x i16> @llvm.aarch64.neon.sqshl.v8i16(<8 x i16> %tmp1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
         ret <8 x i16> %tmp3
 }
 
@@ -1293,7 +1293,7 @@ define <4 x i32> @sqshli4s(<4 x i32>* %A) nounwind {
 ;CHECK-LABEL: sqshli4s:
 ;CHECK: sqshl.4s v0, {{v[0-9]+}}, #1
         %tmp1 = load <4 x i32>* %A
-        %tmp3 = call <4 x i32> @llvm.arm64.neon.sqshl.v4i32(<4 x i32> %tmp1, <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
+        %tmp3 = call <4 x i32> @llvm.aarch64.neon.sqshl.v4i32(<4 x i32> %tmp1, <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
         ret <4 x i32> %tmp3
 }
 
@@ -1301,7 +1301,7 @@ define <2 x i64> @sqshli2d(<2 x i64>* %A) nounwind {
 ;CHECK-LABEL: sqshli2d:
 ;CHECK: sqshl.2d v0, {{v[0-9]+}}, #1
         %tmp1 = load <2 x i64>* %A
-        %tmp3 = call <2 x i64> @llvm.arm64.neon.sqshl.v2i64(<2 x i64> %tmp1, <2 x i64> <i64 1, i64 1>)
+        %tmp3 = call <2 x i64> @llvm.aarch64.neon.sqshl.v2i64(<2 x i64> %tmp1, <2 x i64> <i64 1, i64 1>)
         ret <2 x i64> %tmp3
 }
 
@@ -1309,7 +1309,7 @@ define <8 x i8> @uqshli8b(<8 x i8>* %A) nounwind {
 ;CHECK-LABEL: uqshli8b:
 ;CHECK: uqshl.8b v0, {{v[0-9]+}}, #1
         %tmp1 = load <8 x i8>* %A
-        %tmp3 = call <8 x i8> @llvm.arm64.neon.uqshl.v8i8(<8 x i8> %tmp1, <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
+        %tmp3 = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> %tmp1, <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
         ret <8 x i8> %tmp3
 }
 
@@ -1317,7 +1317,7 @@ define <4 x i16> @uqshli4h(<4 x i16>* %A) nounwind {
 ;CHECK-LABEL: uqshli4h:
 ;CHECK: uqshl.4h v0, {{v[0-9]+}}, #1
         %tmp1 = load <4 x i16>* %A
-        %tmp3 = call <4 x i16> @llvm.arm64.neon.uqshl.v4i16(<4 x i16> %tmp1, <4 x i16> <i16 1, i16 1, i16 1, i16 1>)
+        %tmp3 = call <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16> %tmp1, <4 x i16> <i16 1, i16 1, i16 1, i16 1>)
         ret <4 x i16> %tmp3
 }
 
@@ -1325,7 +1325,7 @@ define <2 x i32> @uqshli2s(<2 x i32>* %A) nounwind {
 ;CHECK-LABEL: uqshli2s:
 ;CHECK: uqshl.2s v0, {{v[0-9]+}}, #1
         %tmp1 = load <2 x i32>* %A
-        %tmp3 = call <2 x i32> @llvm.arm64.neon.uqshl.v2i32(<2 x i32> %tmp1, <2 x i32> <i32 1, i32 1>)
+        %tmp3 = call <2 x i32> @llvm.aarch64.neon.uqshl.v2i32(<2 x i32> %tmp1, <2 x i32> <i32 1, i32 1>)
         ret <2 x i32> %tmp3
 }
 
@@ -1333,7 +1333,7 @@ define <16 x i8> @uqshli16b(<16 x i8>* %A) nounwind {
 ;CHECK-LABEL: uqshli16b:
 ;CHECK: uqshl.16b
         %tmp1 = load <16 x i8>* %A
-        %tmp3 = call <16 x i8> @llvm.arm64.neon.uqshl.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
+        %tmp3 = call <16 x i8> @llvm.aarch64.neon.uqshl.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
         ret <16 x i8> %tmp3
 }
 
@@ -1341,7 +1341,7 @@ define <8 x i16> @uqshli8h(<8 x i16>* %A) nounwind {
 ;CHECK-LABEL: uqshli8h:
 ;CHECK: uqshl.8h v0, {{v[0-9]+}}, #1
         %tmp1 = load <8 x i16>* %A
-        %tmp3 = call <8 x i16> @llvm.arm64.neon.uqshl.v8i16(<8 x i16> %tmp1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
+        %tmp3 = call <8 x i16> @llvm.aarch64.neon.uqshl.v8i16(<8 x i16> %tmp1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
         ret <8 x i16> %tmp3
 }
 
@@ -1349,7 +1349,7 @@ define <4 x i32> @uqshli4s(<4 x i32>* %A) nounwind {
 ;CHECK-LABEL: uqshli4s:
 ;CHECK: uqshl.4s v0, {{v[0-9]+}}, #1
         %tmp1 = load <4 x i32>* %A
-        %tmp3 = call <4 x i32> @llvm.arm64.neon.uqshl.v4i32(<4 x i32> %tmp1, <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
+        %tmp3 = call <4 x i32> @llvm.aarch64.neon.uqshl.v4i32(<4 x i32> %tmp1, <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
         ret <4 x i32> %tmp3
 }
 
@@ -1357,7 +1357,7 @@ define <2 x i64> @uqshli2d(<2 x i64>* %A) nounwind {
 ;CHECK-LABEL: uqshli2d:
 ;CHECK: uqshl.2d v0, {{v[0-9]+}}, #1
         %tmp1 = load <2 x i64>* %A
-        %tmp3 = call <2 x i64> @llvm.arm64.neon.uqshl.v2i64(<2 x i64> %tmp1, <2 x i64> <i64 1, i64 1>)
+        %tmp3 = call <2 x i64> @llvm.aarch64.neon.uqshl.v2i64(<2 x i64> %tmp1, <2 x i64> <i64 1, i64 1>)
         ret <2 x i64> %tmp3
 }
 
@@ -1365,7 +1365,7 @@ define <8 x i8> @ursra8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: ursra8b:
 ;CHECK: ursra.8b v0, {{v[0-9]+}}, #1
         %tmp1 = load <8 x i8>* %A
-        %tmp3 = call <8 x i8> @llvm.arm64.neon.urshl.v8i8(<8 x i8> %tmp1, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
+        %tmp3 = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> %tmp1, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
         %tmp4 = load <8 x i8>* %B
         %tmp5 = add <8 x i8> %tmp3, %tmp4
         ret <8 x i8> %tmp5
@@ -1375,7 +1375,7 @@ define <4 x i16> @ursra4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: ursra4h:
 ;CHECK: ursra.4h v0, {{v[0-9]+}}, #1
         %tmp1 = load <4 x i16>* %A
-        %tmp3 = call <4 x i16> @llvm.arm64.neon.urshl.v4i16(<4 x i16> %tmp1, <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>)
+        %tmp3 = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> %tmp1, <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>)
         %tmp4 = load <4 x i16>* %B
         %tmp5 = add <4 x i16> %tmp3, %tmp4
         ret <4 x i16> %tmp5
@@ -1385,7 +1385,7 @@ define <2 x i32> @ursra2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: ursra2s:
 ;CHECK: ursra.2s v0, {{v[0-9]+}}, #1
         %tmp1 = load <2 x i32>* %A
-        %tmp3 = call <2 x i32> @llvm.arm64.neon.urshl.v2i32(<2 x i32> %tmp1, <2 x i32> <i32 -1, i32 -1>)
+        %tmp3 = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> %tmp1, <2 x i32> <i32 -1, i32 -1>)
         %tmp4 = load <2 x i32>* %B
         %tmp5 = add <2 x i32> %tmp3, %tmp4
         ret <2 x i32> %tmp5
@@ -1395,7 +1395,7 @@ define <16 x i8> @ursra16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: ursra16b:
 ;CHECK: ursra.16b v0, {{v[0-9]+}}, #1
         %tmp1 = load <16 x i8>* %A
-        %tmp3 = call <16 x i8> @llvm.arm64.neon.urshl.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
+        %tmp3 = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
         %tmp4 = load <16 x i8>* %B
         %tmp5 = add <16 x i8> %tmp3, %tmp4
          ret <16 x i8> %tmp5
@@ -1405,7 +1405,7 @@ define <8 x i16> @ursra8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: ursra8h:
 ;CHECK: ursra.8h v0, {{v[0-9]+}}, #1
         %tmp1 = load <8 x i16>* %A
-        %tmp3 = call <8 x i16> @llvm.arm64.neon.urshl.v8i16(<8 x i16> %tmp1, <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
+        %tmp3 = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> %tmp1, <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
         %tmp4 = load <8 x i16>* %B
         %tmp5 = add <8 x i16> %tmp3, %tmp4
          ret <8 x i16> %tmp5
@@ -1415,7 +1415,7 @@ define <4 x i32> @ursra4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: ursra4s:
 ;CHECK: ursra.4s v0, {{v[0-9]+}}, #1
         %tmp1 = load <4 x i32>* %A
-        %tmp3 = call <4 x i32> @llvm.arm64.neon.urshl.v4i32(<4 x i32> %tmp1, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
+        %tmp3 = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> %tmp1, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
         %tmp4 = load <4 x i32>* %B
         %tmp5 = add <4 x i32> %tmp3, %tmp4
          ret <4 x i32> %tmp5
@@ -1425,7 +1425,7 @@ define <2 x i64> @ursra2d(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 ;CHECK-LABEL: ursra2d:
 ;CHECK: ursra.2d v0, {{v[0-9]+}}, #1
         %tmp1 = load <2 x i64>* %A
-        %tmp3 = call <2 x i64> @llvm.arm64.neon.urshl.v2i64(<2 x i64> %tmp1, <2 x i64> <i64 -1, i64 -1>)
+        %tmp3 = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> %tmp1, <2 x i64> <i64 -1, i64 -1>)
         %tmp4 = load <2 x i64>* %B
         %tmp5 = add <2 x i64> %tmp3, %tmp4
          ret <2 x i64> %tmp5
@@ -1435,7 +1435,7 @@ define <8 x i8> @srsra8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: srsra8b:
 ;CHECK: srsra.8b v0, {{v[0-9]+}}, #1
         %tmp1 = load <8 x i8>* %A
-        %tmp3 = call <8 x i8> @llvm.arm64.neon.srshl.v8i8(<8 x i8> %tmp1, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
+        %tmp3 = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> %tmp1, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
         %tmp4 = load <8 x i8>* %B
         %tmp5 = add <8 x i8> %tmp3, %tmp4
         ret <8 x i8> %tmp5
@@ -1445,7 +1445,7 @@ define <4 x i16> @srsra4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: srsra4h:
 ;CHECK: srsra.4h v0, {{v[0-9]+}}, #1
         %tmp1 = load <4 x i16>* %A
-        %tmp3 = call <4 x i16> @llvm.arm64.neon.srshl.v4i16(<4 x i16> %tmp1, <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>)
+        %tmp3 = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> %tmp1, <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>)
         %tmp4 = load <4 x i16>* %B
         %tmp5 = add <4 x i16> %tmp3, %tmp4
         ret <4 x i16> %tmp5
@@ -1455,7 +1455,7 @@ define <2 x i32> @srsra2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: srsra2s:
 ;CHECK: srsra.2s v0, {{v[0-9]+}}, #1
         %tmp1 = load <2 x i32>* %A
-        %tmp3 = call <2 x i32> @llvm.arm64.neon.srshl.v2i32(<2 x i32> %tmp1, <2 x i32> <i32 -1, i32 -1>)
+        %tmp3 = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> %tmp1, <2 x i32> <i32 -1, i32 -1>)
         %tmp4 = load <2 x i32>* %B
         %tmp5 = add <2 x i32> %tmp3, %tmp4
         ret <2 x i32> %tmp5
@@ -1465,7 +1465,7 @@ define <16 x i8> @srsra16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: srsra16b:
 ;CHECK: srsra.16b v0, {{v[0-9]+}}, #1
         %tmp1 = load <16 x i8>* %A
-        %tmp3 = call <16 x i8> @llvm.arm64.neon.srshl.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
+        %tmp3 = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
         %tmp4 = load <16 x i8>* %B
         %tmp5 = add <16 x i8> %tmp3, %tmp4
          ret <16 x i8> %tmp5
@@ -1475,7 +1475,7 @@ define <8 x i16> @srsra8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: srsra8h:
 ;CHECK: srsra.8h v0, {{v[0-9]+}}, #1
         %tmp1 = load <8 x i16>* %A
-        %tmp3 = call <8 x i16> @llvm.arm64.neon.srshl.v8i16(<8 x i16> %tmp1, <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
+        %tmp3 = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> %tmp1, <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
         %tmp4 = load <8 x i16>* %B
         %tmp5 = add <8 x i16> %tmp3, %tmp4
          ret <8 x i16> %tmp5
@@ -1485,7 +1485,7 @@ define <4 x i32> @srsra4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: srsra4s:
 ;CHECK: srsra.4s v0, {{v[0-9]+}}, #1
         %tmp1 = load <4 x i32>* %A
-        %tmp3 = call <4 x i32> @llvm.arm64.neon.srshl.v4i32(<4 x i32> %tmp1, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
+        %tmp3 = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> %tmp1, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
         %tmp4 = load <4 x i32>* %B
         %tmp5 = add <4 x i32> %tmp3, %tmp4
          ret <4 x i32> %tmp5
@@ -1495,7 +1495,7 @@ define <2 x i64> @srsra2d(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 ;CHECK-LABEL: srsra2d:
 ;CHECK: srsra.2d v0, {{v[0-9]+}}, #1
         %tmp1 = load <2 x i64>* %A
-        %tmp3 = call <2 x i64> @llvm.arm64.neon.srshl.v2i64(<2 x i64> %tmp1, <2 x i64> <i64 -1, i64 -1>)
+        %tmp3 = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> %tmp1, <2 x i64> <i64 -1, i64 -1>)
         %tmp4 = load <2 x i64>* %B
         %tmp5 = add <2 x i64> %tmp3, %tmp4
          ret <2 x i64> %tmp5
@@ -1831,7 +1831,7 @@ define <8 x i8> @sli8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK: sli.8b v0, {{v[0-9]+}}, #1
         %tmp1 = load <8 x i8>* %A
         %tmp2 = load <8 x i8>* %B
-        %tmp3 = call <8 x i8> @llvm.arm64.neon.vsli.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2, i32 1)
+        %tmp3 = call <8 x i8> @llvm.aarch64.neon.vsli.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2, i32 1)
         ret <8 x i8> %tmp3
 }
 
@@ -1840,7 +1840,7 @@ define <4 x i16> @sli4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK: sli.4h v0, {{v[0-9]+}}, #1
         %tmp1 = load <4 x i16>* %A
         %tmp2 = load <4 x i16>* %B
-        %tmp3 = call <4 x i16> @llvm.arm64.neon.vsli.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2, i32 1)
+        %tmp3 = call <4 x i16> @llvm.aarch64.neon.vsli.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2, i32 1)
         ret <4 x i16> %tmp3
 }
 
@@ -1849,7 +1849,7 @@ define <2 x i32> @sli2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK: sli.2s v0, {{v[0-9]+}}, #1
         %tmp1 = load <2 x i32>* %A
         %tmp2 = load <2 x i32>* %B
-        %tmp3 = call <2 x i32> @llvm.arm64.neon.vsli.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2, i32 1)
+        %tmp3 = call <2 x i32> @llvm.aarch64.neon.vsli.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2, i32 1)
         ret <2 x i32> %tmp3
 }
 
@@ -1858,7 +1858,7 @@ define <1 x i64> @sli1d(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 ;CHECK: sli d0, {{d[0-9]+}}, #1
         %tmp1 = load <1 x i64>* %A
         %tmp2 = load <1 x i64>* %B
-        %tmp3 = call <1 x i64> @llvm.arm64.neon.vsli.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2, i32 1)
+        %tmp3 = call <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2, i32 1)
         ret <1 x i64> %tmp3
 }
 
@@ -1867,7 +1867,7 @@ define <16 x i8> @sli16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK: sli.16b v0, {{v[0-9]+}}, #1
         %tmp1 = load <16 x i8>* %A
         %tmp2 = load <16 x i8>* %B
-        %tmp3 = call <16 x i8> @llvm.arm64.neon.vsli.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2, i32 1)
+        %tmp3 = call <16 x i8> @llvm.aarch64.neon.vsli.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2, i32 1)
         ret <16 x i8> %tmp3
 }
 
@@ -1876,7 +1876,7 @@ define <8 x i16> @sli8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK: sli.8h v0, {{v[0-9]+}}, #1
         %tmp1 = load <8 x i16>* %A
         %tmp2 = load <8 x i16>* %B
-        %tmp3 = call <8 x i16> @llvm.arm64.neon.vsli.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2, i32 1)
+        %tmp3 = call <8 x i16> @llvm.aarch64.neon.vsli.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2, i32 1)
         ret <8 x i16> %tmp3
 }
 
@@ -1885,7 +1885,7 @@ define <4 x i32> @sli4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK: sli.4s v0, {{v[0-9]+}}, #1
         %tmp1 = load <4 x i32>* %A
         %tmp2 = load <4 x i32>* %B
-        %tmp3 = call <4 x i32> @llvm.arm64.neon.vsli.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2, i32 1)
+        %tmp3 = call <4 x i32> @llvm.aarch64.neon.vsli.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2, i32 1)
         ret <4 x i32> %tmp3
 }
 
@@ -1894,19 +1894,19 @@ define <2 x i64> @sli2d(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 ;CHECK: sli.2d v0, {{v[0-9]+}}, #1
         %tmp1 = load <2 x i64>* %A
         %tmp2 = load <2 x i64>* %B
-        %tmp3 = call <2 x i64> @llvm.arm64.neon.vsli.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2, i32 1)
+        %tmp3 = call <2 x i64> @llvm.aarch64.neon.vsli.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2, i32 1)
         ret <2 x i64> %tmp3
 }
 
-declare <8 x i8>  @llvm.arm64.neon.vsli.v8i8(<8 x i8>, <8 x i8>, i32) nounwind readnone
-declare <4 x i16> @llvm.arm64.neon.vsli.v4i16(<4 x i16>, <4 x i16>, i32) nounwind readnone
-declare <2 x i32> @llvm.arm64.neon.vsli.v2i32(<2 x i32>, <2 x i32>, i32) nounwind readnone
-declare <1 x i64> @llvm.arm64.neon.vsli.v1i64(<1 x i64>, <1 x i64>, i32) nounwind readnone
+declare <8 x i8>  @llvm.aarch64.neon.vsli.v8i8(<8 x i8>, <8 x i8>, i32) nounwind readnone
+declare <4 x i16> @llvm.aarch64.neon.vsli.v4i16(<4 x i16>, <4 x i16>, i32) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.vsli.v2i32(<2 x i32>, <2 x i32>, i32) nounwind readnone
+declare <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64>, <1 x i64>, i32) nounwind readnone
 
-declare <16 x i8> @llvm.arm64.neon.vsli.v16i8(<16 x i8>, <16 x i8>, i32) nounwind readnone
-declare <8 x i16> @llvm.arm64.neon.vsli.v8i16(<8 x i16>, <8 x i16>, i32) nounwind readnone
-declare <4 x i32> @llvm.arm64.neon.vsli.v4i32(<4 x i32>, <4 x i32>, i32) nounwind readnone
-declare <2 x i64> @llvm.arm64.neon.vsli.v2i64(<2 x i64>, <2 x i64>, i32) nounwind readnone
+declare <16 x i8> @llvm.aarch64.neon.vsli.v16i8(<16 x i8>, <16 x i8>, i32) nounwind readnone
+declare <8 x i16> @llvm.aarch64.neon.vsli.v8i16(<8 x i16>, <8 x i16>, i32) nounwind readnone
+declare <4 x i32> @llvm.aarch64.neon.vsli.v4i32(<4 x i32>, <4 x i32>, i32) nounwind readnone
+declare <2 x i64> @llvm.aarch64.neon.vsli.v2i64(<2 x i64>, <2 x i64>, i32) nounwind readnone
 
 define <1 x i64> @ashr_v1i64(<1 x i64> %a, <1 x i64> %b) {
 ; CHECK-LABEL: ashr_v1i64:
diff --git a/test/CodeGen/ARM64/vshr.ll b/test/CodeGen/AArch64/arm64-vshr.ll
similarity index 95%
rename from test/CodeGen/ARM64/vshr.ll
rename to test/CodeGen/AArch64/arm64-vshr.ll
index 1da8f60acb6..21eb579f252 100644
--- a/test/CodeGen/ARM64/vshr.ll
+++ b/test/CodeGen/AArch64/arm64-vshr.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=arm64 -arm64-neon-syntax=apple < %s -mcpu=cyclone | FileCheck %s
+; RUN: llc -march=arm64 -aarch64-neon-syntax=apple < %s -mcpu=cyclone | FileCheck %s
 
 define <8 x i16> @testShiftRightArith_v8i16(<8 x i16> %a, <8 x i16> %b) #0 {
 ; CHECK-LABEL: testShiftRightArith_v8i16:
diff --git a/test/CodeGen/ARM64/vshuffle.ll b/test/CodeGen/AArch64/arm64-vshuffle.ll
similarity index 100%
rename from test/CodeGen/ARM64/vshuffle.ll
rename to test/CodeGen/AArch64/arm64-vshuffle.ll
diff --git a/test/CodeGen/ARM64/vsqrt.ll b/test/CodeGen/AArch64/arm64-vsqrt.ll
similarity index 51%
rename from test/CodeGen/ARM64/vsqrt.ll
rename to test/CodeGen/AArch64/arm64-vsqrt.ll
index 094d7042a4d..02b7c7ec5d8 100644
--- a/test/CodeGen/ARM64/vsqrt.ll
+++ b/test/CodeGen/AArch64/arm64-vsqrt.ll
@@ -1,11 +1,11 @@
-; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s
+; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
 
 define <2 x float> @frecps_2s(<2 x float>* %A, <2 x float>* %B) nounwind {
 ;CHECK-LABEL: frecps_2s:
 ;CHECK: frecps.2s
 	%tmp1 = load <2 x float>* %A
 	%tmp2 = load <2 x float>* %B
-	%tmp3 = call <2 x float> @llvm.arm64.neon.frecps.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
+	%tmp3 = call <2 x float> @llvm.aarch64.neon.frecps.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
 	ret <2 x float> %tmp3
 }
 
@@ -14,7 +14,7 @@ define <4 x float> @frecps_4s(<4 x float>* %A, <4 x float>* %B) nounwind {
 ;CHECK: frecps.4s
 	%tmp1 = load <4 x float>* %A
 	%tmp2 = load <4 x float>* %B
-	%tmp3 = call <4 x float> @llvm.arm64.neon.frecps.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
+	%tmp3 = call <4 x float> @llvm.aarch64.neon.frecps.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
 	ret <4 x float> %tmp3
 }
 
@@ -23,13 +23,13 @@ define <2 x double> @frecps_2d(<2 x double>* %A, <2 x double>* %B) nounwind {
 ;CHECK: frecps.2d
 	%tmp1 = load <2 x double>* %A
 	%tmp2 = load <2 x double>* %B
-	%tmp3 = call <2 x double> @llvm.arm64.neon.frecps.v2f64(<2 x double> %tmp1, <2 x double> %tmp2)
+	%tmp3 = call <2 x double> @llvm.aarch64.neon.frecps.v2f64(<2 x double> %tmp1, <2 x double> %tmp2)
 	ret <2 x double> %tmp3
 }
 
-declare <2 x float> @llvm.arm64.neon.frecps.v2f32(<2 x float>, <2 x float>) nounwind readnone
-declare <4 x float> @llvm.arm64.neon.frecps.v4f32(<4 x float>, <4 x float>) nounwind readnone
-declare <2 x double> @llvm.arm64.neon.frecps.v2f64(<2 x double>, <2 x double>) nounwind readnone
+declare <2 x float> @llvm.aarch64.neon.frecps.v2f32(<2 x float>, <2 x float>) nounwind readnone
+declare <4 x float> @llvm.aarch64.neon.frecps.v4f32(<4 x float>, <4 x float>) nounwind readnone
+declare <2 x double> @llvm.aarch64.neon.frecps.v2f64(<2 x double>, <2 x double>) nounwind readnone
 
 
 define <2 x float> @frsqrts_2s(<2 x float>* %A, <2 x float>* %B) nounwind {
@@ -37,7 +37,7 @@ define <2 x float> @frsqrts_2s(<2 x float>* %A, <2 x float>* %B) nounwind {
 ;CHECK: frsqrts.2s
 	%tmp1 = load <2 x float>* %A
 	%tmp2 = load <2 x float>* %B
-	%tmp3 = call <2 x float> @llvm.arm64.neon.frsqrts.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
+	%tmp3 = call <2 x float> @llvm.aarch64.neon.frsqrts.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
 	ret <2 x float> %tmp3
 }
 
@@ -46,7 +46,7 @@ define <4 x float> @frsqrts_4s(<4 x float>* %A, <4 x float>* %B) nounwind {
 ;CHECK: frsqrts.4s
 	%tmp1 = load <4 x float>* %A
 	%tmp2 = load <4 x float>* %B
-	%tmp3 = call <4 x float> @llvm.arm64.neon.frsqrts.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
+	%tmp3 = call <4 x float> @llvm.aarch64.neon.frsqrts.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
 	ret <4 x float> %tmp3
 }
 
@@ -55,19 +55,19 @@ define <2 x double> @frsqrts_2d(<2 x double>* %A, <2 x double>* %B) nounwind {
 ;CHECK: frsqrts.2d
 	%tmp1 = load <2 x double>* %A
 	%tmp2 = load <2 x double>* %B
-	%tmp3 = call <2 x double> @llvm.arm64.neon.frsqrts.v2f64(<2 x double> %tmp1, <2 x double> %tmp2)
+	%tmp3 = call <2 x double> @llvm.aarch64.neon.frsqrts.v2f64(<2 x double> %tmp1, <2 x double> %tmp2)
 	ret <2 x double> %tmp3
 }
 
-declare <2 x float> @llvm.arm64.neon.frsqrts.v2f32(<2 x float>, <2 x float>) nounwind readnone
-declare <4 x float> @llvm.arm64.neon.frsqrts.v4f32(<4 x float>, <4 x float>) nounwind readnone
-declare <2 x double> @llvm.arm64.neon.frsqrts.v2f64(<2 x double>, <2 x double>) nounwind readnone
+declare <2 x float> @llvm.aarch64.neon.frsqrts.v2f32(<2 x float>, <2 x float>) nounwind readnone
+declare <4 x float> @llvm.aarch64.neon.frsqrts.v4f32(<4 x float>, <4 x float>) nounwind readnone
+declare <2 x double> @llvm.aarch64.neon.frsqrts.v2f64(<2 x double>, <2 x double>) nounwind readnone
 
 define <2 x float> @frecpe_2s(<2 x float>* %A) nounwind {
 ;CHECK-LABEL: frecpe_2s:
 ;CHECK: frecpe.2s
 	%tmp1 = load <2 x float>* %A
-	%tmp3 = call <2 x float> @llvm.arm64.neon.frecpe.v2f32(<2 x float> %tmp1)
+	%tmp3 = call <2 x float> @llvm.aarch64.neon.frecpe.v2f32(<2 x float> %tmp1)
 	ret <2 x float> %tmp3
 }
 
@@ -75,7 +75,7 @@ define <4 x float> @frecpe_4s(<4 x float>* %A) nounwind {
 ;CHECK-LABEL: frecpe_4s:
 ;CHECK: frecpe.4s
 	%tmp1 = load <4 x float>* %A
-	%tmp3 = call <4 x float> @llvm.arm64.neon.frecpe.v4f32(<4 x float> %tmp1)
+	%tmp3 = call <4 x float> @llvm.aarch64.neon.frecpe.v4f32(<4 x float> %tmp1)
 	ret <4 x float> %tmp3
 }
 
@@ -83,7 +83,7 @@ define <2 x double> @frecpe_2d(<2 x double>* %A) nounwind {
 ;CHECK-LABEL: frecpe_2d:
 ;CHECK: frecpe.2d
 	%tmp1 = load <2 x double>* %A
-	%tmp3 = call <2 x double> @llvm.arm64.neon.frecpe.v2f64(<2 x double> %tmp1)
+	%tmp3 = call <2 x double> @llvm.aarch64.neon.frecpe.v2f64(<2 x double> %tmp1)
 	ret <2 x double> %tmp3
 }
 
@@ -91,7 +91,7 @@ define float @frecpe_s(float* %A) nounwind {
 ;CHECK-LABEL: frecpe_s:
 ;CHECK: frecpe s0, {{s[0-9]+}}
   %tmp1 = load float* %A
-  %tmp3 = call float @llvm.arm64.neon.frecpe.f32(float %tmp1)
+  %tmp3 = call float @llvm.aarch64.neon.frecpe.f32(float %tmp1)
   ret float %tmp3
 }
 
@@ -99,21 +99,21 @@ define double @frecpe_d(double* %A) nounwind {
 ;CHECK-LABEL: frecpe_d:
 ;CHECK: frecpe d0, {{d[0-9]+}}
   %tmp1 = load double* %A
-  %tmp3 = call double @llvm.arm64.neon.frecpe.f64(double %tmp1)
+  %tmp3 = call double @llvm.aarch64.neon.frecpe.f64(double %tmp1)
   ret double %tmp3
 }
 
-declare <2 x float> @llvm.arm64.neon.frecpe.v2f32(<2 x float>) nounwind readnone
-declare <4 x float> @llvm.arm64.neon.frecpe.v4f32(<4 x float>) nounwind readnone
-declare <2 x double> @llvm.arm64.neon.frecpe.v2f64(<2 x double>) nounwind readnone
-declare float @llvm.arm64.neon.frecpe.f32(float) nounwind readnone
-declare double @llvm.arm64.neon.frecpe.f64(double) nounwind readnone
+declare <2 x float> @llvm.aarch64.neon.frecpe.v2f32(<2 x float>) nounwind readnone
+declare <4 x float> @llvm.aarch64.neon.frecpe.v4f32(<4 x float>) nounwind readnone
+declare <2 x double> @llvm.aarch64.neon.frecpe.v2f64(<2 x double>) nounwind readnone
+declare float @llvm.aarch64.neon.frecpe.f32(float) nounwind readnone
+declare double @llvm.aarch64.neon.frecpe.f64(double) nounwind readnone
 
 define float @frecpx_s(float* %A) nounwind {
 ;CHECK-LABEL: frecpx_s:
 ;CHECK: frecpx s0, {{s[0-9]+}}
   %tmp1 = load float* %A
-  %tmp3 = call float @llvm.arm64.neon.frecpx.f32(float %tmp1)
+  %tmp3 = call float @llvm.aarch64.neon.frecpx.f32(float %tmp1)
   ret float %tmp3
 }
 
@@ -121,18 +121,18 @@ define double @frecpx_d(double* %A) nounwind {
 ;CHECK-LABEL: frecpx_d:
 ;CHECK: frecpx d0, {{d[0-9]+}}
   %tmp1 = load double* %A
-  %tmp3 = call double @llvm.arm64.neon.frecpx.f64(double %tmp1)
+  %tmp3 = call double @llvm.aarch64.neon.frecpx.f64(double %tmp1)
   ret double %tmp3
 }
 
-declare float @llvm.arm64.neon.frecpx.f32(float) nounwind readnone
-declare double @llvm.arm64.neon.frecpx.f64(double) nounwind readnone
+declare float @llvm.aarch64.neon.frecpx.f32(float) nounwind readnone
+declare double @llvm.aarch64.neon.frecpx.f64(double) nounwind readnone
 
 define <2 x float> @frsqrte_2s(<2 x float>* %A) nounwind {
 ;CHECK-LABEL: frsqrte_2s:
 ;CHECK: frsqrte.2s
 	%tmp1 = load <2 x float>* %A
-	%tmp3 = call <2 x float> @llvm.arm64.neon.frsqrte.v2f32(<2 x float> %tmp1)
+	%tmp3 = call <2 x float> @llvm.aarch64.neon.frsqrte.v2f32(<2 x float> %tmp1)
 	ret <2 x float> %tmp3
 }
 
@@ -140,7 +140,7 @@ define <4 x float> @frsqrte_4s(<4 x float>* %A) nounwind {
 ;CHECK-LABEL: frsqrte_4s:
 ;CHECK: frsqrte.4s
 	%tmp1 = load <4 x float>* %A
-	%tmp3 = call <4 x float> @llvm.arm64.neon.frsqrte.v4f32(<4 x float> %tmp1)
+	%tmp3 = call <4 x float> @llvm.aarch64.neon.frsqrte.v4f32(<4 x float> %tmp1)
 	ret <4 x float> %tmp3
 }
 
@@ -148,7 +148,7 @@ define <2 x double> @frsqrte_2d(<2 x double>* %A) nounwind {
 ;CHECK-LABEL: frsqrte_2d:
 ;CHECK: frsqrte.2d
 	%tmp1 = load <2 x double>* %A
-	%tmp3 = call <2 x double> @llvm.arm64.neon.frsqrte.v2f64(<2 x double> %tmp1)
+	%tmp3 = call <2 x double> @llvm.aarch64.neon.frsqrte.v2f64(<2 x double> %tmp1)
 	ret <2 x double> %tmp3
 }
 
@@ -156,7 +156,7 @@ define float @frsqrte_s(float* %A) nounwind {
 ;CHECK-LABEL: frsqrte_s:
 ;CHECK: frsqrte s0, {{s[0-9]+}}
   %tmp1 = load float* %A
-  %tmp3 = call float @llvm.arm64.neon.frsqrte.f32(float %tmp1)
+  %tmp3 = call float @llvm.aarch64.neon.frsqrte.f32(float %tmp1)
   ret float %tmp3
 }
 
@@ -164,21 +164,21 @@ define double @frsqrte_d(double* %A) nounwind {
 ;CHECK-LABEL: frsqrte_d:
 ;CHECK: frsqrte d0, {{d[0-9]+}}
   %tmp1 = load double* %A
-  %tmp3 = call double @llvm.arm64.neon.frsqrte.f64(double %tmp1)
+  %tmp3 = call double @llvm.aarch64.neon.frsqrte.f64(double %tmp1)
   ret double %tmp3
 }
 
-declare <2 x float> @llvm.arm64.neon.frsqrte.v2f32(<2 x float>) nounwind readnone
-declare <4 x float> @llvm.arm64.neon.frsqrte.v4f32(<4 x float>) nounwind readnone
-declare <2 x double> @llvm.arm64.neon.frsqrte.v2f64(<2 x double>) nounwind readnone
-declare float @llvm.arm64.neon.frsqrte.f32(float) nounwind readnone
-declare double @llvm.arm64.neon.frsqrte.f64(double) nounwind readnone
+declare <2 x float> @llvm.aarch64.neon.frsqrte.v2f32(<2 x float>) nounwind readnone
+declare <4 x float> @llvm.aarch64.neon.frsqrte.v4f32(<4 x float>) nounwind readnone
+declare <2 x double> @llvm.aarch64.neon.frsqrte.v2f64(<2 x double>) nounwind readnone
+declare float @llvm.aarch64.neon.frsqrte.f32(float) nounwind readnone
+declare double @llvm.aarch64.neon.frsqrte.f64(double) nounwind readnone
 
 define <2 x i32> @urecpe_2s(<2 x i32>* %A) nounwind {
 ;CHECK-LABEL: urecpe_2s:
 ;CHECK: urecpe.2s
 	%tmp1 = load <2 x i32>* %A
-	%tmp3 = call <2 x i32> @llvm.arm64.neon.urecpe.v2i32(<2 x i32> %tmp1)
+	%tmp3 = call <2 x i32> @llvm.aarch64.neon.urecpe.v2i32(<2 x i32> %tmp1)
 	ret <2 x i32> %tmp3
 }
 
@@ -186,18 +186,18 @@ define <4 x i32> @urecpe_4s(<4 x i32>* %A) nounwind {
 ;CHECK-LABEL: urecpe_4s:
 ;CHECK: urecpe.4s
 	%tmp1 = load <4 x i32>* %A
-	%tmp3 = call <4 x i32> @llvm.arm64.neon.urecpe.v4i32(<4 x i32> %tmp1)
+	%tmp3 = call <4 x i32> @llvm.aarch64.neon.urecpe.v4i32(<4 x i32> %tmp1)
 	ret <4 x i32> %tmp3
 }
 
-declare <2 x i32> @llvm.arm64.neon.urecpe.v2i32(<2 x i32>) nounwind readnone
-declare <4 x i32> @llvm.arm64.neon.urecpe.v4i32(<4 x i32>) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.urecpe.v2i32(<2 x i32>) nounwind readnone
+declare <4 x i32> @llvm.aarch64.neon.urecpe.v4i32(<4 x i32>) nounwind readnone
 
 define <2 x i32> @ursqrte_2s(<2 x i32>* %A) nounwind {
 ;CHECK-LABEL: ursqrte_2s:
 ;CHECK: ursqrte.2s
 	%tmp1 = load <2 x i32>* %A
-	%tmp3 = call <2 x i32> @llvm.arm64.neon.ursqrte.v2i32(<2 x i32> %tmp1)
+	%tmp3 = call <2 x i32> @llvm.aarch64.neon.ursqrte.v2i32(<2 x i32> %tmp1)
 	ret <2 x i32> %tmp3
 }
 
@@ -205,18 +205,18 @@ define <4 x i32> @ursqrte_4s(<4 x i32>* %A) nounwind {
 ;CHECK-LABEL: ursqrte_4s:
 ;CHECK: ursqrte.4s
 	%tmp1 = load <4 x i32>* %A
-	%tmp3 = call <4 x i32> @llvm.arm64.neon.ursqrte.v4i32(<4 x i32> %tmp1)
+	%tmp3 = call <4 x i32> @llvm.aarch64.neon.ursqrte.v4i32(<4 x i32> %tmp1)
 	ret <4 x i32> %tmp3
 }
 
-declare <2 x i32> @llvm.arm64.neon.ursqrte.v2i32(<2 x i32>) nounwind readnone
-declare <4 x i32> @llvm.arm64.neon.ursqrte.v4i32(<4 x i32>) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.ursqrte.v2i32(<2 x i32>) nounwind readnone
+declare <4 x i32> @llvm.aarch64.neon.ursqrte.v4i32(<4 x i32>) nounwind readnone
 
 define float @f1(float %a, float %b) nounwind readnone optsize ssp {
 ; CHECK-LABEL: f1:
 ; CHECK: frsqrts s0, s0, s1
 ; CHECK-NEXT: ret
-  %vrsqrtss.i = tail call float @llvm.arm64.neon.frsqrts.f32(float %a, float %b) nounwind
+  %vrsqrtss.i = tail call float @llvm.aarch64.neon.frsqrts.f32(float %a, float %b) nounwind
   ret float %vrsqrtss.i
 }
 
@@ -224,9 +224,9 @@ define double @f2(double %a, double %b) nounwind readnone optsize ssp {
 ; CHECK-LABEL: f2:
 ; CHECK: frsqrts d0, d0, d1
 ; CHECK-NEXT: ret
-  %vrsqrtsd.i = tail call double @llvm.arm64.neon.frsqrts.f64(double %a, double %b) nounwind
+  %vrsqrtsd.i = tail call double @llvm.aarch64.neon.frsqrts.f64(double %a, double %b) nounwind
   ret double %vrsqrtsd.i
 }
 
-declare double @llvm.arm64.neon.frsqrts.f64(double, double) nounwind readnone
-declare float @llvm.arm64.neon.frsqrts.f32(float, float) nounwind readnone
+declare double @llvm.aarch64.neon.frsqrts.f64(double, double) nounwind readnone
+declare float @llvm.aarch64.neon.frsqrts.f32(float, float) nounwind readnone
diff --git a/test/CodeGen/ARM64/vsra.ll b/test/CodeGen/AArch64/arm64-vsra.ll
similarity index 98%
rename from test/CodeGen/ARM64/vsra.ll
rename to test/CodeGen/AArch64/arm64-vsra.ll
index 3611eb3cba6..5e9cef3e7e2 100644
--- a/test/CodeGen/ARM64/vsra.ll
+++ b/test/CodeGen/AArch64/arm64-vsra.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s
+; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
 
 define <8 x i8> @vsras8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vsras8:
diff --git a/test/CodeGen/ARM64/vsub.ll b/test/CodeGen/AArch64/arm64-vsub.ll
similarity index 85%
rename from test/CodeGen/ARM64/vsub.ll
rename to test/CodeGen/AArch64/arm64-vsub.ll
index 5c7e84f46ef..c2c8755c066 100644
--- a/test/CodeGen/ARM64/vsub.ll
+++ b/test/CodeGen/AArch64/arm64-vsub.ll
@@ -1,11 +1,11 @@
-; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s
+; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
 
 define <8 x i8> @subhn8b(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: subhn8b:
 ;CHECK: subhn.8b
         %tmp1 = load <8 x i16>* %A
         %tmp2 = load <8 x i16>* %B
-        %tmp3 = call <8 x i8> @llvm.arm64.neon.subhn.v8i8(<8 x i16> %tmp1, <8 x i16> %tmp2)
+        %tmp3 = call <8 x i8> @llvm.aarch64.neon.subhn.v8i8(<8 x i16> %tmp1, <8 x i16> %tmp2)
         ret <8 x i8> %tmp3
 }
 
@@ -14,7 +14,7 @@ define <4 x i16> @subhn4h(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK: subhn.4h
         %tmp1 = load <4 x i32>* %A
         %tmp2 = load <4 x i32>* %B
-        %tmp3 = call <4 x i16> @llvm.arm64.neon.subhn.v4i16(<4 x i32> %tmp1, <4 x i32> %tmp2)
+        %tmp3 = call <4 x i16> @llvm.aarch64.neon.subhn.v4i16(<4 x i32> %tmp1, <4 x i32> %tmp2)
         ret <4 x i16> %tmp3
 }
 
@@ -23,7 +23,7 @@ define <2 x i32> @subhn2s(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 ;CHECK: subhn.2s
         %tmp1 = load <2 x i64>* %A
         %tmp2 = load <2 x i64>* %B
-        %tmp3 = call <2 x i32> @llvm.arm64.neon.subhn.v2i32(<2 x i64> %tmp1, <2 x i64> %tmp2)
+        %tmp3 = call <2 x i32> @llvm.aarch64.neon.subhn.v2i32(<2 x i64> %tmp1, <2 x i64> %tmp2)
         ret <2 x i32> %tmp3
 }
 
@@ -31,8 +31,8 @@ define <16 x i8> @subhn2_16b(<8 x i16> %a, <8 x i16> %b) nounwind  {
 ;CHECK-LABEL: subhn2_16b:
 ;CHECK: subhn.8b
 ;CHECK-NEXT: subhn2.16b
-  %vsubhn2.i = tail call <8 x i8> @llvm.arm64.neon.subhn.v8i8(<8 x i16> %a, <8 x i16> %b) nounwind
-  %vsubhn_high2.i = tail call <8 x i8> @llvm.arm64.neon.subhn.v8i8(<8 x i16> %a, <8 x i16> %b) nounwind
+  %vsubhn2.i = tail call <8 x i8> @llvm.aarch64.neon.subhn.v8i8(<8 x i16> %a, <8 x i16> %b) nounwind
+  %vsubhn_high2.i = tail call <8 x i8> @llvm.aarch64.neon.subhn.v8i8(<8 x i16> %a, <8 x i16> %b) nounwind
   %res = shufflevector <8 x i8> %vsubhn2.i, <8 x i8> %vsubhn_high2.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
   ret <16 x i8> %res
 }
@@ -41,8 +41,8 @@ define <8 x i16> @subhn2_8h(<4 x i32> %a, <4 x i32> %b) nounwind  {
 ;CHECK-LABEL: subhn2_8h:
 ;CHECK: subhn.4h
 ;CHECK-NEXT: subhn2.8h
-  %vsubhn2.i = tail call <4 x i16> @llvm.arm64.neon.subhn.v4i16(<4 x i32> %a, <4 x i32> %b) nounwind
-  %vsubhn_high3.i = tail call <4 x i16> @llvm.arm64.neon.subhn.v4i16(<4 x i32> %a, <4 x i32> %b) nounwind
+  %vsubhn2.i = tail call <4 x i16> @llvm.aarch64.neon.subhn.v4i16(<4 x i32> %a, <4 x i32> %b) nounwind
+  %vsubhn_high3.i = tail call <4 x i16> @llvm.aarch64.neon.subhn.v4i16(<4 x i32> %a, <4 x i32> %b) nounwind
   %res = shufflevector <4 x i16> %vsubhn2.i, <4 x i16> %vsubhn_high3.i, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
   ret <8 x i16> %res
 }
@@ -51,22 +51,22 @@ define <4 x i32> @subhn2_4s(<2 x i64> %a, <2 x i64> %b) nounwind  {
 ;CHECK-LABEL: subhn2_4s:
 ;CHECK: subhn.2s
 ;CHECK-NEXT: subhn2.4s
-  %vsubhn2.i = tail call <2 x i32> @llvm.arm64.neon.subhn.v2i32(<2 x i64> %a, <2 x i64> %b) nounwind
-  %vsubhn_high3.i = tail call <2 x i32> @llvm.arm64.neon.subhn.v2i32(<2 x i64> %a, <2 x i64> %b) nounwind
+  %vsubhn2.i = tail call <2 x i32> @llvm.aarch64.neon.subhn.v2i32(<2 x i64> %a, <2 x i64> %b) nounwind
+  %vsubhn_high3.i = tail call <2 x i32> @llvm.aarch64.neon.subhn.v2i32(<2 x i64> %a, <2 x i64> %b) nounwind
   %res = shufflevector <2 x i32> %vsubhn2.i, <2 x i32> %vsubhn_high3.i, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   ret <4 x i32> %res
 }
 
-declare <2 x i32> @llvm.arm64.neon.subhn.v2i32(<2 x i64>, <2 x i64>) nounwind readnone
-declare <4 x i16> @llvm.arm64.neon.subhn.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
-declare <8 x i8> @llvm.arm64.neon.subhn.v8i8(<8 x i16>, <8 x i16>) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.subhn.v2i32(<2 x i64>, <2 x i64>) nounwind readnone
+declare <4 x i16> @llvm.aarch64.neon.subhn.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
+declare <8 x i8> @llvm.aarch64.neon.subhn.v8i8(<8 x i16>, <8 x i16>) nounwind readnone
 
 define <8 x i8> @rsubhn8b(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: rsubhn8b:
 ;CHECK: rsubhn.8b
         %tmp1 = load <8 x i16>* %A
         %tmp2 = load <8 x i16>* %B
-        %tmp3 = call <8 x i8> @llvm.arm64.neon.rsubhn.v8i8(<8 x i16> %tmp1, <8 x i16> %tmp2)
+        %tmp3 = call <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16> %tmp1, <8 x i16> %tmp2)
         ret <8 x i8> %tmp3
 }
 
@@ -75,7 +75,7 @@ define <4 x i16> @rsubhn4h(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK: rsubhn.4h
         %tmp1 = load <4 x i32>* %A
         %tmp2 = load <4 x i32>* %B
-        %tmp3 = call <4 x i16> @llvm.arm64.neon.rsubhn.v4i16(<4 x i32> %tmp1, <4 x i32> %tmp2)
+        %tmp3 = call <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32> %tmp1, <4 x i32> %tmp2)
         ret <4 x i16> %tmp3
 }
 
@@ -84,7 +84,7 @@ define <2 x i32> @rsubhn2s(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 ;CHECK: rsubhn.2s
         %tmp1 = load <2 x i64>* %A
         %tmp2 = load <2 x i64>* %B
-        %tmp3 = call <2 x i32> @llvm.arm64.neon.rsubhn.v2i32(<2 x i64> %tmp1, <2 x i64> %tmp2)
+        %tmp3 = call <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64> %tmp1, <2 x i64> %tmp2)
         ret <2 x i32> %tmp3
 }
 
@@ -92,8 +92,8 @@ define <16 x i8> @rsubhn2_16b(<8 x i16> %a, <8 x i16> %b) nounwind  {
 ;CHECK-LABEL: rsubhn2_16b:
 ;CHECK: rsubhn.8b
 ;CHECK-NEXT: rsubhn2.16b
-  %vrsubhn2.i = tail call <8 x i8> @llvm.arm64.neon.rsubhn.v8i8(<8 x i16> %a, <8 x i16> %b) nounwind
-  %vrsubhn_high2.i = tail call <8 x i8> @llvm.arm64.neon.rsubhn.v8i8(<8 x i16> %a, <8 x i16> %b) nounwind
+  %vrsubhn2.i = tail call <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16> %a, <8 x i16> %b) nounwind
+  %vrsubhn_high2.i = tail call <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16> %a, <8 x i16> %b) nounwind
   %res = shufflevector <8 x i8> %vrsubhn2.i, <8 x i8> %vrsubhn_high2.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
   ret <16 x i8> %res
 }
@@ -102,8 +102,8 @@ define <8 x i16> @rsubhn2_8h(<4 x i32> %a, <4 x i32> %b) nounwind  {
 ;CHECK-LABEL: rsubhn2_8h:
 ;CHECK: rsubhn.4h
 ;CHECK-NEXT: rsubhn2.8h
-  %vrsubhn2.i = tail call <4 x i16> @llvm.arm64.neon.rsubhn.v4i16(<4 x i32> %a, <4 x i32> %b) nounwind
-  %vrsubhn_high3.i = tail call <4 x i16> @llvm.arm64.neon.rsubhn.v4i16(<4 x i32> %a, <4 x i32> %b) nounwind
+  %vrsubhn2.i = tail call <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32> %a, <4 x i32> %b) nounwind
+  %vrsubhn_high3.i = tail call <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32> %a, <4 x i32> %b) nounwind
   %res = shufflevector <4 x i16> %vrsubhn2.i, <4 x i16> %vrsubhn_high3.i, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
   ret <8 x i16> %res
 }
@@ -112,15 +112,15 @@ define <4 x i32> @rsubhn2_4s(<2 x i64> %a, <2 x i64> %b) nounwind  {
 ;CHECK-LABEL: rsubhn2_4s:
 ;CHECK: rsubhn.2s
 ;CHECK-NEXT: rsubhn2.4s
-  %vrsubhn2.i = tail call <2 x i32> @llvm.arm64.neon.rsubhn.v2i32(<2 x i64> %a, <2 x i64> %b) nounwind
-  %vrsubhn_high3.i = tail call <2 x i32> @llvm.arm64.neon.rsubhn.v2i32(<2 x i64> %a, <2 x i64> %b) nounwind
+  %vrsubhn2.i = tail call <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64> %a, <2 x i64> %b) nounwind
+  %vrsubhn_high3.i = tail call <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64> %a, <2 x i64> %b) nounwind
   %res = shufflevector <2 x i32> %vrsubhn2.i, <2 x i32> %vrsubhn_high3.i, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   ret <4 x i32> %res
 }
 
-declare <2 x i32> @llvm.arm64.neon.rsubhn.v2i32(<2 x i64>, <2 x i64>) nounwind readnone
-declare <4 x i16> @llvm.arm64.neon.rsubhn.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
-declare <8 x i8> @llvm.arm64.neon.rsubhn.v8i8(<8 x i16>, <8 x i16>) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64>, <2 x i64>) nounwind readnone
+declare <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
+declare <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16>, <8 x i16>) nounwind readnone
 
 define <8 x i16> @ssubl8h(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: ssubl8h:
diff --git a/test/CodeGen/ARM64/weak-reference.ll b/test/CodeGen/AArch64/arm64-weak-reference.ll
similarity index 100%
rename from test/CodeGen/ARM64/weak-reference.ll
rename to test/CodeGen/AArch64/arm64-weak-reference.ll
diff --git a/test/CodeGen/ARM64/xaluo.ll b/test/CodeGen/AArch64/arm64-xaluo.ll
similarity index 100%
rename from test/CodeGen/ARM64/xaluo.ll
rename to test/CodeGen/AArch64/arm64-xaluo.ll
diff --git a/test/CodeGen/ARM64/zero-cycle-regmov.ll b/test/CodeGen/AArch64/arm64-zero-cycle-regmov.ll
similarity index 100%
rename from test/CodeGen/ARM64/zero-cycle-regmov.ll
rename to test/CodeGen/AArch64/arm64-zero-cycle-regmov.ll
diff --git a/test/CodeGen/ARM64/zero-cycle-zeroing.ll b/test/CodeGen/AArch64/arm64-zero-cycle-zeroing.ll
similarity index 100%
rename from test/CodeGen/ARM64/zero-cycle-zeroing.ll
rename to test/CodeGen/AArch64/arm64-zero-cycle-zeroing.ll
diff --git a/test/CodeGen/ARM64/zext.ll b/test/CodeGen/AArch64/arm64-zext.ll
similarity index 100%
rename from test/CodeGen/ARM64/zext.ll
rename to test/CodeGen/AArch64/arm64-zext.ll
diff --git a/test/CodeGen/ARM64/zextload-unscaled.ll b/test/CodeGen/AArch64/arm64-zextload-unscaled.ll
similarity index 100%
rename from test/CodeGen/ARM64/zextload-unscaled.ll
rename to test/CodeGen/AArch64/arm64-zextload-unscaled.ll
diff --git a/test/CodeGen/ARM64/zip.ll b/test/CodeGen/AArch64/arm64-zip.ll
similarity index 98%
rename from test/CodeGen/ARM64/zip.ll
rename to test/CodeGen/AArch64/arm64-zip.ll
index d06a9f899dd..304b2809943 100644
--- a/test/CodeGen/ARM64/zip.ll
+++ b/test/CodeGen/AArch64/arm64-zip.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s
+; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
 
 define <8 x i8> @vzipi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vzipi8:
diff --git a/test/CodeGen/AArch64/atomic-ops-not-barriers.ll b/test/CodeGen/AArch64/atomic-ops-not-barriers.ll
index 162430b9b76..da095a0a42c 100644
--- a/test/CodeGen/AArch64/atomic-ops-not-barriers.ll
+++ b/test/CodeGen/AArch64/atomic-ops-not-barriers.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=arm64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s
 
 define i32 @foo(i32* %var, i1 %cond) {
 ; CHECK-LABEL: foo:
diff --git a/test/CodeGen/AArch64/atomic-ops.ll b/test/CodeGen/AArch64/atomic-ops.ll
index 58ea735c809..58b5d1d078c 100644
--- a/test/CodeGen/AArch64/atomic-ops.ll
+++ b/test/CodeGen/AArch64/atomic-ops.ll
@@ -1,5 +1,5 @@
-; RUN: llc -mtriple=arm64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-ARM64
-; RUN: llc -mtriple=arm64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK-REG
+; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK
+; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK-REG
 
 
 ; Point of CHECK-REG is to make sure UNPREDICTABLE instructions aren't created
@@ -501,9 +501,9 @@ define i8 @test_atomic_load_min_i8(i8 %offset) nounwind {
   ; w0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
 
-; CHECK-ARM64-NEXT: sxtb w[[OLD_EXT:[0-9]+]], w[[OLD]]
-; CHECK-ARM64-NEXT: cmp w[[OLD_EXT]], w0, sxtb
-; CHECK-ARM64-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, le
+; CHECK-NEXT: sxtb w[[OLD_EXT:[0-9]+]], w[[OLD]]
+; CHECK-NEXT: cmp w[[OLD_EXT]], w0, sxtb
+; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, le
 
 ; CHECK-NEXT: stxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
@@ -525,9 +525,9 @@ define i16 @test_atomic_load_min_i16(i16 %offset) nounwind {
   ; w0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
 
-; CHECK-ARM64-NEXT: sxth w[[OLD_EXT:[0-9]+]], w[[OLD]]
-; CHECK-ARM64-NEXT: cmp w[[OLD_EXT]], w0, sxth
-; CHECK-ARM64-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, le
+; CHECK-NEXT: sxth w[[OLD_EXT:[0-9]+]], w[[OLD]]
+; CHECK-NEXT: cmp w[[OLD_EXT]], w0, sxth
+; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, le
 
 
 ; CHECK-NEXT: stlxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
@@ -550,8 +550,8 @@ define i32 @test_atomic_load_min_i32(i32 %offset) nounwind {
   ; w0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
 
-; CHECK-ARM64-NEXT: cmp w[[OLD]], w0
-; CHECK-ARM64-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, le
+; CHECK-NEXT: cmp w[[OLD]], w0
+; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, le
 
 
 ; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
@@ -574,8 +574,8 @@ define i64 @test_atomic_load_min_i64(i64 %offset) nounwind {
   ; x0 below is a reasonable guess but could change: it certainly comes into the
   ; function there.
 
-; CHECK-ARM64-NEXT: cmp x[[OLD]], x0
-; CHECK-ARM64-NEXT: csel [[NEW:x[0-9]+]], x[[OLD]], x0, le
+; CHECK-NEXT: cmp x[[OLD]], x0
+; CHECK-NEXT: csel [[NEW:x[0-9]+]], x[[OLD]], x0, le
 
 
 ; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
@@ -598,9 +598,9 @@ define i8 @test_atomic_load_max_i8(i8 %offset) nounwind {
   ; w0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
 
-; CHECK-ARM64-NEXT: sxtb w[[OLD_EXT:[0-9]+]], w[[OLD]]
-; CHECK-ARM64-NEXT: cmp w[[OLD_EXT]], w0, sxtb
-; CHECK-ARM64-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, gt
+; CHECK-NEXT: sxtb w[[OLD_EXT:[0-9]+]], w[[OLD]]
+; CHECK-NEXT: cmp w[[OLD_EXT]], w0, sxtb
+; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, gt
 
 
 ; CHECK-NEXT: stlxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
@@ -623,9 +623,9 @@ define i16 @test_atomic_load_max_i16(i16 %offset) nounwind {
   ; w0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
 
-; CHECK-ARM64-NEXT: sxth w[[OLD_EXT:[0-9]+]], w[[OLD]]
-; CHECK-ARM64-NEXT: cmp w[[OLD_EXT]], w0, sxth
-; CHECK-ARM64-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, gt
+; CHECK-NEXT: sxth w[[OLD_EXT:[0-9]+]], w[[OLD]]
+; CHECK-NEXT: cmp w[[OLD_EXT]], w0, sxth
+; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, gt
 
 
 ; CHECK-NEXT: stxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
@@ -648,8 +648,8 @@ define i32 @test_atomic_load_max_i32(i32 %offset) nounwind {
   ; w0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
 
-; CHECK-ARM64-NEXT: cmp w[[OLD]], w0
-; CHECK-ARM64-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, gt
+; CHECK-NEXT: cmp w[[OLD]], w0
+; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, gt
 
 
 ; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
@@ -672,8 +672,8 @@ define i64 @test_atomic_load_max_i64(i64 %offset) nounwind {
   ; x0 below is a reasonable guess but could change: it certainly comes into the
   ; function there.
 
-; CHECK-ARM64-NEXT: cmp x[[OLD]], x0
-; CHECK-ARM64-NEXT: csel [[NEW:x[0-9]+]], x[[OLD]], x0, gt
+; CHECK-NEXT: cmp x[[OLD]], x0
+; CHECK-NEXT: csel [[NEW:x[0-9]+]], x[[OLD]], x0, gt
 
 
 ; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
@@ -696,8 +696,8 @@ define i8 @test_atomic_load_umin_i8(i8 %offset) nounwind {
   ; w0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
 
-; CHECK-ARM64-NEXT: cmp w[[OLD]], w0, uxtb
-; CHECK-ARM64-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, ls
+; CHECK-NEXT: cmp w[[OLD]], w0, uxtb
+; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, ls
 
 
 ; CHECK-NEXT: stxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
@@ -720,8 +720,8 @@ define i16 @test_atomic_load_umin_i16(i16 %offset) nounwind {
   ; w0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
 
-; CHECK-ARM64-NEXT: cmp w[[OLD]], w0, uxth
-; CHECK-ARM64-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, ls
+; CHECK-NEXT: cmp w[[OLD]], w0, uxth
+; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, ls
 
 
 ; CHECK-NEXT: stxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
@@ -744,8 +744,8 @@ define i32 @test_atomic_load_umin_i32(i32 %offset) nounwind {
   ; w0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
 
-; CHECK-ARM64-NEXT: cmp w[[OLD]], w0
-; CHECK-ARM64-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, ls
+; CHECK-NEXT: cmp w[[OLD]], w0
+; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, ls
 
 
 ; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
@@ -768,8 +768,8 @@ define i64 @test_atomic_load_umin_i64(i64 %offset) nounwind {
   ; x0 below is a reasonable guess but could change: it certainly comes into the
   ; function there.
 
-; CHECK-ARM64-NEXT: cmp x[[OLD]], x0
-; CHECK-ARM64-NEXT: csel [[NEW:x[0-9]+]], x[[OLD]], x0, ls
+; CHECK-NEXT: cmp x[[OLD]], x0
+; CHECK-NEXT: csel [[NEW:x[0-9]+]], x[[OLD]], x0, ls
 
 
 ; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
@@ -792,8 +792,8 @@ define i8 @test_atomic_load_umax_i8(i8 %offset) nounwind {
   ; w0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
 
-; CHECK-ARM64-NEXT: cmp w[[OLD]], w0, uxtb
-; CHECK-ARM64-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, hi
+; CHECK-NEXT: cmp w[[OLD]], w0, uxtb
+; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, hi
 
 
 ; CHECK-NEXT: stlxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
@@ -816,8 +816,8 @@ define i16 @test_atomic_load_umax_i16(i16 %offset) nounwind {
   ; w0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
 
-; CHECK-ARM64-NEXT: cmp w[[OLD]], w0, uxth
-; CHECK-ARM64-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, hi
+; CHECK-NEXT: cmp w[[OLD]], w0, uxth
+; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, hi
 
 
 ; CHECK-NEXT: stxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
@@ -840,8 +840,8 @@ define i32 @test_atomic_load_umax_i32(i32 %offset) nounwind {
   ; w0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
 
-; CHECK-ARM64-NEXT: cmp w[[OLD]], w0
-; CHECK-ARM64-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, hi
+; CHECK-NEXT: cmp w[[OLD]], w0
+; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, hi
 
 
 ; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
@@ -864,8 +864,8 @@ define i64 @test_atomic_load_umax_i64(i64 %offset) nounwind {
   ; x0 below is a reasonable guess but could change: it certainly comes into the
   ; function there.
 
-; CHECK-ARM64-NEXT: cmp x[[OLD]], x0
-; CHECK-ARM64-NEXT: csel [[NEW:x[0-9]+]], x[[OLD]], x0, hi
+; CHECK-NEXT: cmp x[[OLD]], x0
+; CHECK-NEXT: csel [[NEW:x[0-9]+]], x[[OLD]], x0, hi
 
 
 ; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
diff --git a/test/CodeGen/AArch64/basic-pic.ll b/test/CodeGen/AArch64/basic-pic.ll
index 2c69bee0d1b..62d41bcead6 100644
--- a/test/CodeGen/AArch64/basic-pic.ll
+++ b/test/CodeGen/AArch64/basic-pic.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=arm64-linux-gnu -verify-machineinstrs -relocation-model=pic %s -o - | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-gnu -verify-machineinstrs -relocation-model=pic %s -o - | FileCheck %s
 
 @var = global i32 0
 
diff --git a/test/CodeGen/AArch64/bitfield-insert-0.ll b/test/CodeGen/AArch64/bitfield-insert-0.ll
index 8959e1b6959..da0ed8af312 100644
--- a/test/CodeGen/AArch64/bitfield-insert-0.ll
+++ b/test/CodeGen/AArch64/bitfield-insert-0.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=arm64-linux-gnu -filetype=obj -o - %s | llvm-objdump -disassemble - | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-gnu -filetype=obj -o - %s | llvm-objdump -disassemble - | FileCheck %s
 
 ; The encoding of lsb -> immr in the CGed bitfield instructions was wrong at one
 ; point, in the edge case where lsb = 0. Just make sure.
diff --git a/test/CodeGen/AArch64/bitfield-insert.ll b/test/CodeGen/AArch64/bitfield-insert.ll
index 8b0b4dafe6c..2369a55aa92 100644
--- a/test/CodeGen/AArch64/bitfield-insert.ll
+++ b/test/CodeGen/AArch64/bitfield-insert.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=arm64-none-linux-gnu < %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-ARM64
+; RUN: llc -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s --check-prefix=CHECK
 
 ; First, a simple example from Clang. The registers could plausibly be
 ; different, but probably won't be.
@@ -64,7 +64,7 @@ define void @test_whole32_from64(i64* %existing, i64* %new) {
 ; CHECK-LABEL: test_whole32_from64:
 
 
-; CHECK-ARM64: bfxil {{x[0-9]+}}, {{x[0-9]+}}, #0, #16
+; CHECK: bfxil {{x[0-9]+}}, {{x[0-9]+}}, #0, #16
 
 ; CHECK: ret
 
@@ -83,7 +83,7 @@ define void @test_whole32_from64(i64* %existing, i64* %new) {
 define void @test_32bit_masked(i32 *%existing, i32 *%new) {
 ; CHECK-LABEL: test_32bit_masked:
 
-; CHECK-ARM64: and
+; CHECK: and
 ; CHECK: bfi [[INSERT:w[0-9]+]], {{w[0-9]+}}, #3, #4
 
   %oldval = load volatile i32* %existing
@@ -101,7 +101,7 @@ define void @test_32bit_masked(i32 *%existing, i32 *%new) {
 
 define void @test_64bit_masked(i64 *%existing, i64 *%new) {
 ; CHECK-LABEL: test_64bit_masked:
-; CHECK-ARM64: and
+; CHECK: and
 ; CHECK: bfi [[INSERT:x[0-9]+]], {{x[0-9]+}}, #40, #8
 
   %oldval = load volatile i64* %existing
@@ -121,7 +121,7 @@ define void @test_64bit_masked(i64 *%existing, i64 *%new) {
 define void @test_32bit_complexmask(i32 *%existing, i32 *%new) {
 ; CHECK-LABEL: test_32bit_complexmask:
 
-; CHECK-ARM64: and
+; CHECK: and
 ; CHECK: bfi {{w[0-9]+}}, {{w[0-9]+}}, #3, #4
 
   %oldval = load volatile i32* %existing
diff --git a/test/CodeGen/AArch64/bitfield.ll b/test/CodeGen/AArch64/bitfield.ll
index 71ffe30c928..0e1265372bd 100644
--- a/test/CodeGen/AArch64/bitfield.ll
+++ b/test/CodeGen/AArch64/bitfield.ll
@@ -1,4 +1,4 @@
-; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-none-linux-gnu | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-ARM64
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s --check-prefix=CHECK
 
 @var32 = global i32 0
 @var64 = global i64 0
@@ -23,7 +23,7 @@ define void @test_extendb(i8 %var) {
 
   %uxt64 = zext i8 %var to i64
   store volatile i64 %uxt64, i64* @var64
-; CHECK-ARM64: and {{x[0-9]+}}, {{x[0-9]+}}, #0xff
+; CHECK: and {{x[0-9]+}}, {{x[0-9]+}}, #0xff
   ret void
 }
 
@@ -47,7 +47,7 @@ define void @test_extendh(i16 %var) {
 
   %uxt64 = zext i16 %var to i64
   store volatile i64 %uxt64, i64* @var64
-; CHECK-ARM64: and {{x[0-9]+}}, {{x[0-9]+}}, #0xffff
+; CHECK: and {{x[0-9]+}}, {{x[0-9]+}}, #0xffff
   ret void
 }
 
@@ -60,7 +60,7 @@ define void @test_extendw(i32 %var) {
 
   %uxt64 = zext i32 %var to i64
   store volatile i64 %uxt64, i64* @var64
-; CHECK-ARM64: ubfx {{x[0-9]+}}, {{x[0-9]+}}, #0, #32
+; CHECK: ubfx {{x[0-9]+}}, {{x[0-9]+}}, #0, #32
   ret void
 }
 
diff --git a/test/CodeGen/AArch64/blockaddress.ll b/test/CodeGen/AArch64/blockaddress.ll
index 0cbdd3988b7..1eec4cc7f4e 100644
--- a/test/CodeGen/AArch64/blockaddress.ll
+++ b/test/CodeGen/AArch64/blockaddress.ll
@@ -1,5 +1,5 @@
-; RUN: llc -mtriple=arm64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -code-model=large -mtriple=arm64-none-linux-gnu -verify-machineinstrs < %s | FileCheck --check-prefix=CHECK-LARGE %s
+; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -code-model=large -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck --check-prefix=CHECK-LARGE %s
 
 @addr = global i8* null
 
diff --git a/test/CodeGen/AArch64/bool-loads.ll b/test/CodeGen/AArch64/bool-loads.ll
index 5d92ef67d0e..881aeaa15dd 100644
--- a/test/CodeGen/AArch64/bool-loads.ll
+++ b/test/CodeGen/AArch64/bool-loads.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=arm64-linux-gnu -o - %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-gnu -o - %s | FileCheck %s
 
 @var = global i1 0
 
diff --git a/test/CodeGen/AArch64/breg.ll b/test/CodeGen/AArch64/breg.ll
index 137173bc4f3..591f48303e2 100644
--- a/test/CodeGen/AArch64/breg.ll
+++ b/test/CodeGen/AArch64/breg.ll
@@ -1,4 +1,4 @@
-; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-linux-gnu | FileCheck %s
+; RUN: llc -verify-machineinstrs -o - %s -mtriple=aarch64-linux-gnu | FileCheck %s
 
 @stored_label = global i8* null
 
diff --git a/test/CodeGen/AArch64/callee-save.ll b/test/CodeGen/AArch64/callee-save.ll
index 9b04a8f979b..046e6ceac07 100644
--- a/test/CodeGen/AArch64/callee-save.ll
+++ b/test/CodeGen/AArch64/callee-save.ll
@@ -1,19 +1,14 @@
-; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-apple-ios7.0 | FileCheck %s --check-prefix=CHECK-ARM64
+; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-apple-ios7.0 | FileCheck %s
 
 @var = global float 0.0
 
 define void @foo() {
 ; CHECK-LABEL: foo:
 
-; CHECK: stp d14, d15, [sp
-; CHECK: stp d12, d13, [sp
-; CHECK: stp d10, d11, [sp
-; CHECK: stp d8, d9, [sp
-
-; CHECK-ARM64: stp d15, d14, [sp
-; CHECK-ARM64: stp d13, d12, [sp
-; CHECK-ARM64: stp d11, d10, [sp
-; CHECK-ARM64: stp d9, d8, [sp
+; CHECK: stp d15, d14, [sp
+; CHECK: stp d13, d12, [sp
+; CHECK: stp d11, d10, [sp
+; CHECK: stp d9, d8, [sp
 
   ; Create lots of live variables to exhaust the supply of
   ; caller-saved registers
@@ -83,14 +78,9 @@ define void @foo() {
   store volatile float %val31, float* @var
   store volatile float %val32, float* @var
 
-; CHECK: ldp     d8, d9, [sp
-; CHECK: ldp     d10, d11, [sp
-; CHECK: ldp     d12, d13, [sp
-; CHECK: ldp     d14, d15, [sp
-
-; CHECK-ARM64: ldp     d9, d8, [sp
-; CHECK-ARM64: ldp     d11, d10, [sp
-; CHECK-ARM64: ldp     d13, d12, [sp
-; CHECK-ARM64: ldp     d15, d14, [sp
+; CHECK: ldp     d9, d8, [sp
+; CHECK: ldp     d11, d10, [sp
+; CHECK: ldp     d13, d12, [sp
+; CHECK: ldp     d15, d14, [sp
   ret void
 }
diff --git a/test/CodeGen/AArch64/code-model-large-abs.ll b/test/CodeGen/AArch64/code-model-large-abs.ll
index 0408e6f4898..ca92500855b 100644
--- a/test/CodeGen/AArch64/code-model-large-abs.ll
+++ b/test/CodeGen/AArch64/code-model-large-abs.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=arm64-linux-gnu -code-model=large -o - %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-gnu -code-model=large -o - %s | FileCheck %s
 
 @var8 = global i8 0
 @var16 = global i16 0
diff --git a/test/CodeGen/AArch64/compare-branch.ll b/test/CodeGen/AArch64/compare-branch.ll
index accbadd4d4e..a1a87cf51a1 100644
--- a/test/CodeGen/AArch64/compare-branch.ll
+++ b/test/CodeGen/AArch64/compare-branch.ll
@@ -1,4 +1,4 @@
-; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-linux-gnu | FileCheck %s
+; RUN: llc -verify-machineinstrs -o - %s -mtriple=aarch64-linux-gnu | FileCheck %s
 
 @var32 = global i32 0
 @var64 = global i64 0
diff --git a/test/CodeGen/AArch64/complex-copy-noneon.ll b/test/CodeGen/AArch64/complex-copy-noneon.ll
index f65b1161282..4ae547856ec 100644
--- a/test/CodeGen/AArch64/complex-copy-noneon.ll
+++ b/test/CodeGen/AArch64/complex-copy-noneon.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=arm64-none-linux-gnu -mattr=-neon < %s
+; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=-neon < %s
 
 ; The DAG combiner decided to use a vector load/store for this struct copy
 ; previously. This probably shouldn't happen without NEON, but the most
diff --git a/test/CodeGen/AArch64/cond-sel.ll b/test/CodeGen/AArch64/cond-sel.ll
index 96e11b12a17..5f81cba66cb 100644
--- a/test/CodeGen/AArch64/cond-sel.ll
+++ b/test/CodeGen/AArch64/cond-sel.ll
@@ -1,5 +1,5 @@
-; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-none-linux-gnu -mcpu=cyclone | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-ARM64
-; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -mcpu=cyclone | FileCheck %s --check-prefix=CHECK
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s
 
 @var32 = global i32 0
 @var64 = global i64 0
@@ -45,7 +45,7 @@ define void @test_floatcsel(float %lhs32, float %rhs32, double %lhs64, double %r
 ; CHECK-NOFP-NOT: fcmp
   %val2 = select i1 %tst2, i64 9, i64 15
   store i64 %val2, i64* @var64
-; CHECK-ARM64: orr w[[CONST15:[0-9]+]], wzr, #0xf
+; CHECK: orr w[[CONST15:[0-9]+]], wzr, #0xf
 ; CHECK: movz {{[wx]}}[[CONST9:[0-9]+]], #{{9|0x9}}
 ; CHECK: csel [[MAYBETRUE:x[0-9]+]], x[[CONST9]], x[[CONST15]], eq
 ; CHECK: csel {{x[0-9]+}}, x[[CONST9]], [[MAYBETRUE]], vs
diff --git a/test/CodeGen/AArch64/directcond.ll b/test/CodeGen/AArch64/directcond.ll
index 832a01046b0..1b519284846 100644
--- a/test/CodeGen/AArch64/directcond.ll
+++ b/test/CodeGen/AArch64/directcond.ll
@@ -1,10 +1,10 @@
-; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-apple-ios7.0 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-ARM64
-; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s
+; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-apple-ios7.0 | FileCheck %s --check-prefix=CHECK
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s
 
 define i32 @test_select_i32(i1 %bit, i32 %a, i32 %b) {
 ; CHECK-LABEL: test_select_i32:
   %val = select i1 %bit, i32 %a, i32 %b
-; CHECK-ARM64: tst w0, #0x1
+; CHECK: tst w0, #0x1
 ; CHECK-NEXT: csel w0, w1, w2, ne
 
   ret i32 %val
@@ -13,7 +13,7 @@ define i32 @test_select_i32(i1 %bit, i32 %a, i32 %b) {
 define i64 @test_select_i64(i1 %bit, i64 %a, i64 %b) {
 ; CHECK-LABEL: test_select_i64:
   %val = select i1 %bit, i64 %a, i64 %b
-; CHECK-ARM64: tst w0, #0x1
+; CHECK: tst w0, #0x1
 ; CHECK-NEXT: csel x0, x1, x2, ne
 
   ret i64 %val
@@ -22,7 +22,7 @@ define i64 @test_select_i64(i1 %bit, i64 %a, i64 %b) {
 define float @test_select_float(i1 %bit, float %a, float %b) {
 ; CHECK-LABEL: test_select_float:
   %val = select i1 %bit, float %a, float %b
-; CHECK-ARM64: tst w0, #0x1
+; CHECK: tst w0, #0x1
 ; CHECK-NEXT: fcsel s0, s0, s1, ne
 ; CHECK-NOFP-NOT: fcsel
   ret float %val
@@ -31,7 +31,7 @@ define float @test_select_float(i1 %bit, float %a, float %b) {
 define double @test_select_double(i1 %bit, double %a, double %b) {
 ; CHECK-LABEL: test_select_double:
   %val = select i1 %bit, double %a, double %b
-; CHECK-ARM64: tst w0, #0x1
+; CHECK: tst w0, #0x1
 ; CHECK-NEXT: fcsel d0, d0, d1, ne
 ; CHECK-NOFP-NOT: fcsel
 
diff --git a/test/CodeGen/AArch64/dp1.ll b/test/CodeGen/AArch64/dp1.ll
index b09ce3668dc..662b4158854 100644
--- a/test/CodeGen/AArch64/dp1.ll
+++ b/test/CodeGen/AArch64/dp1.ll
@@ -1,4 +1,4 @@
-; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-linux-gnu | FileCheck %s
+; RUN: llc -verify-machineinstrs -o - %s -mtriple=aarch64-linux-gnu | FileCheck %s
 
 @var32 = global i32 0
 @var64 = global i64 0
diff --git a/test/CodeGen/AArch64/eliminate-trunc.ll b/test/CodeGen/AArch64/eliminate-trunc.ll
index 02a085acf03..ea86a084cb4 100644
--- a/test/CodeGen/AArch64/eliminate-trunc.ll
+++ b/test/CodeGen/AArch64/eliminate-trunc.ll
@@ -1,11 +1,11 @@
-; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-none-apple-ios7.0 -mcpu=cyclone | FileCheck %s --check-prefix=CHECK-ARM64
+; RUN: llc -verify-machineinstrs -o - %s -mtriple=aarch64-none-apple-ios7.0 -mcpu=cyclone | FileCheck %s
 
 ; Check  trunc i64 operation is translated as a subregister access
 ; eliminating an i32 induction varible.
 
-; CHECK-ARM64-NOT: add {{x[0-9]+}}, {{x[0-9]+}}, #1
-; CHECK-ARM64: add {{w[0-9]+}}, {{w[0-9]+}}, #1
-; CHECK-ARM64-NEXT: cmp {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK-NOT: add {{x[0-9]+}}, {{x[0-9]+}}, #1
+; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, #1
+; CHECK-NEXT: cmp {{w[0-9]+}}, {{w[0-9]+}}
 define void @test1_signed([8 x i8]* nocapture %a, i8* nocapture readonly %box, i8 %limit) minsize {
 entry:
   %conv = zext i8 %limit to i32
diff --git a/test/CodeGen/AArch64/extern-weak.ll b/test/CodeGen/AArch64/extern-weak.ll
index 8f418455ffa..ce5c0f68661 100644
--- a/test/CodeGen/AArch64/extern-weak.ll
+++ b/test/CodeGen/AArch64/extern-weak.ll
@@ -1,5 +1,5 @@
-; RUN: llc -mtriple=arm64-none-linux-gnu -o - %s | FileCheck %s --check-prefix=CHECK-ARM64
-; RUN: llc -mtriple=arm64-none-linux-gnu -code-model=large -o - %s | FileCheck --check-prefix=CHECK-LARGE %s
+; RUN: llc -mtriple=aarch64-none-linux-gnu -o - %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-none-linux-gnu -code-model=large -o - %s | FileCheck --check-prefix=CHECK-LARGE %s
 
 declare extern_weak i32 @var()
 
@@ -9,8 +9,8 @@ define i32()* @foo() {
   ret i32()* @var
 
 
-; CHECK-ARM64: adrp x[[ADDRHI:[0-9]+]], :got:var
-; CHECK-ARM64: ldr x0, [x[[ADDRHI]], :got_lo12:var]
+; CHECK: adrp x[[ADDRHI:[0-9]+]], :got:var
+; CHECK: ldr x0, [x[[ADDRHI]], :got_lo12:var]
 
   ; In the large model, the usual relocations are absolute and can
   ; materialise 0.
@@ -27,9 +27,9 @@ define i32* @bar() {
   %addr = getelementptr [10 x i32]* @arr_var, i32 0, i32 5
 
 
-; CHECK-ARM64: adrp x[[ADDRHI:[0-9]+]], :got:arr_var
-; CHECK-ARM64: ldr [[BASE:x[0-9]+]], [x[[ADDRHI]], :got_lo12:arr_var]
-; CHECK-ARM64: add x0, [[BASE]], #20
+; CHECK: adrp x[[ADDRHI:[0-9]+]], :got:arr_var
+; CHECK: ldr [[BASE:x[0-9]+]], [x[[ADDRHI]], :got_lo12:arr_var]
+; CHECK: add x0, [[BASE]], #20
 
   ret i32* %addr
 
@@ -46,8 +46,8 @@ define i32* @bar() {
 define i32* @wibble() {
   ret i32* @defined_weak_var
 
-; CHECK-ARM64: adrp [[BASE:x[0-9]+]], defined_weak_var
-; CHECK-ARM64: add x0, [[BASE]], :lo12:defined_weak_var
+; CHECK: adrp [[BASE:x[0-9]+]], defined_weak_var
+; CHECK: add x0, [[BASE]], :lo12:defined_weak_var
 
 ; CHECK-LARGE: movz x0, #:abs_g3:defined_weak_var
 ; CHECK-LARGE: movk x0, #:abs_g2_nc:defined_weak_var
diff --git a/test/CodeGen/AArch64/fastcc-reserved.ll b/test/CodeGen/AArch64/fastcc-reserved.ll
index 09a6ae3ccd2..a392619a768 100644
--- a/test/CodeGen/AArch64/fastcc-reserved.ll
+++ b/test/CodeGen/AArch64/fastcc-reserved.ll
@@ -1,4 +1,4 @@
-; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-none-linux-gnu -tailcallopt | FileCheck %s --check-prefix=CHECK-ARM64
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -tailcallopt | FileCheck %s
 
 ; This test is designed to be run in the situation where the
 ; call-frame is not reserved (hence disable-fp-elim), but where
@@ -12,30 +12,22 @@ define fastcc void @foo(i32 %in) {
   %addr = alloca i8, i32 %in
 
 ; Normal frame setup stuff:
-; CHECK: sub sp, sp,
-; CHECK: stp x29, x30
-; CHECK-ARM64: stp     x29, x30, [sp, #-16]!
-; CHECK-ARM64: mov     x29, sp
+; CHECK: stp     x29, x30, [sp, #-16]!
+; CHECK: mov     x29, sp
 
 ; Reserve space for call-frame:
 ; CHECK: sub sp, sp, #16
-; CHECK-ARM64: sub sp, sp, #16
 
   call fastcc void @will_pop([8 x i32] undef, i32 42)
 ; CHECK: bl will_pop
-; CHECK-ARM64: bl will_pop
 
 ; Since @will_pop is fastcc with tailcallopt, it will put the stack
 ; back where it needs to be, we shouldn't duplicate that
 ; CHECK-NOT: sub sp, sp, #16
 ; CHECK-NOT: add sp, sp,
-; CHECK-ARM64-NOT: sub sp, sp, #16
-; CHECK-ARM64-NOT: add sp, sp,
 
-; CHECK: ldp x29, x30
-; CHECK: add sp, sp,
-; CHECK-ARM64: mov     sp, x29
-; CHECK-ARM64: ldp     x29, x30, [sp], #16
+; CHECK: mov     sp, x29
+; CHECK: ldp     x29, x30, [sp], #16
   ret void
 }
 
@@ -46,28 +38,21 @@ define void @foo1(i32 %in) {
 
   %addr = alloca i8, i32 %in
 ; Normal frame setup again
-; CHECK: sub sp, sp,
-; CHECK: stp x29, x30
-; CHECK-ARM64: stp     x29, x30, [sp, #-16]!
-; CHECK-ARM64: mov     x29, sp
+; CHECK: stp     x29, x30, [sp, #-16]!
+; CHECK: mov     x29, sp
 
 ; Reserve space for call-frame
 ; CHECK: sub sp, sp, #16
-; CHECK-ARM64: sub sp, sp, #16
 
   call void @wont_pop([8 x i32] undef, i32 42)
 ; CHECK: bl wont_pop
-; CHECK-ARM64: bl wont_pop
 
 ; This time we *do* need to unreserve the call-frame
 ; CHECK: add sp, sp, #16
-; CHECK-ARM64: add sp, sp, #16
 
 ; Check for epilogue (primarily to make sure sp spotted above wasn't
 ; part of it).
-; CHECK: ldp x29, x30
-; CHECK: add sp, sp,
-; CHECK-ARM64: mov     sp, x29
-; CHECK-ARM64: ldp     x29, x30, [sp], #16
+; CHECK: mov     sp, x29
+; CHECK: ldp     x29, x30, [sp], #16
   ret void
 }
diff --git a/test/CodeGen/AArch64/fastcc.ll b/test/CodeGen/AArch64/fastcc.ll
index b641de0ee29..9917fcd044f 100644
--- a/test/CodeGen/AArch64/fastcc.ll
+++ b/test/CodeGen/AArch64/fastcc.ll
@@ -1,226 +1,144 @@
-; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-none-linux-gnu -tailcallopt | FileCheck %s -check-prefix CHECK-ARM64-TAIL
-; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-none-linux-gnu | FileCheck --check-prefix=CHECK-ARM64 %s
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -tailcallopt | FileCheck %s -check-prefix CHECK-TAIL
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
 
 ; Without tailcallopt fastcc still means the caller cleans up the
 ; stack, so try to make sure this is respected.
 
 define fastcc void @func_stack0() {
 ; CHECK-LABEL: func_stack0:
-; CHECK: sub sp, sp, #48
-
-; CHECK-ARM64-LABEL: func_stack0:
-; CHECK-ARM64: stp x29, x30, [sp, #-16]!
-; CHECK-ARM64-NEXT: mov x29, sp
-; CHECK-ARM64-NEXT: sub sp, sp, #32
+; CHECK: mov x29, sp
+; CHECK-NEXT: sub sp, sp, #32
 
 ; CHECK-TAIL-LABEL: func_stack0:
-; CHECK-TAIL: sub sp, sp, #48
-
-; CHECK-ARM64-TAIL-LABEL: func_stack0:
-; CHECK-ARM64-TAIL: stp x29, x30, [sp, #-16]!
-; CHECK-ARM64-TAIL-NEXT: mov x29, sp
-; CHECK-ARM64-TAIL-NEXT: sub sp, sp, #32
+; CHECK-TAIL: stp x29, x30, [sp, #-16]!
+; CHECK-TAIL-NEXT: mov x29, sp
+; CHECK-TAIL-NEXT: sub sp, sp, #32
 
 
   call fastcc void @func_stack8([8 x i32] undef, i32 42)
 ; CHECK:  bl func_stack8
 ; CHECK-NOT: sub sp, sp,
 
-; CHECK-ARM64:  bl func_stack8
-; CHECK-ARM64-NOT: sub sp, sp,
-
 ; CHECK-TAIL: bl func_stack8
 ; CHECK-TAIL: sub sp, sp, #16
 
-; CHECK-ARM64-TAIL: bl func_stack8
-; CHECK-ARM64-TAIL: sub sp, sp, #16
-
 
   call fastcc void @func_stack32([8 x i32] undef, i128 0, i128 9)
 ; CHECK: bl func_stack32
 ; CHECK-NOT: sub sp, sp,
 
-; CHECK-ARM64: bl func_stack32
-; CHECK-ARM64-NOT: sub sp, sp,
 
 ; CHECK-TAIL: bl func_stack32
 ; CHECK-TAIL: sub sp, sp, #32
 
-; CHECK-ARM64-TAIL: bl func_stack32
-; CHECK-ARM64-TAIL: sub sp, sp, #32
-
 
   call fastcc void @func_stack0()
 ; CHECK: bl func_stack0
 ; CHECK-NOT: sub sp, sp
 
-; CHECK-ARM64: bl func_stack0
-; CHECK-ARM64-NOT: sub sp, sp
 
 ; CHECK-TAIL: bl func_stack0
 ; CHECK-TAIL-NOT: sub sp, sp
 
-; CHECK-ARM64-TAIL: bl func_stack0
-; CHECK-ARM64-TAIL-NOT: sub sp, sp
-
   ret void
-; CHECK: add sp, sp, #48
+; CHECK: mov sp, x29
+; CHECK-NEXT: ldp     x29, x30, [sp], #16
 ; CHECK-NEXT: ret
 
-; CHECK-ARM64: mov sp, x29
-; CHECK-ARM64-NEXT: ldp     x29, x30, [sp], #16
-; CHECK-ARM64-NEXT: ret
 
-; CHECK-TAIL: add sp, sp, #48
+; CHECK-TAIL: mov sp, x29
+; CHECK-TAIL-NEXT: ldp     x29, x30, [sp], #16
 ; CHECK-TAIL-NEXT: ret
-
-; CHECK-ARM64-TAIL: mov sp, x29
-; CHECK-ARM64-TAIL-NEXT: ldp     x29, x30, [sp], #16
-; CHECK-ARM64-TAIL-NEXT: ret
 }
 
 define fastcc void @func_stack8([8 x i32], i32 %stacked) {
 ; CHECK-LABEL: func_stack8:
-; CHECK: sub sp, sp, #48
+; CHECK: stp x29, x30, [sp, #-16]!
+; CHECK: mov x29, sp
+; CHECK: sub sp, sp, #32
 
-; CHECK-ARM64-LABEL: func_stack8:
-; CHECK-ARM64: stp x29, x30, [sp, #-16]!
-; CHECK-ARM64: mov x29, sp
-; CHECK-ARM64: sub sp, sp, #32
 
 ; CHECK-TAIL-LABEL: func_stack8:
-; CHECK-TAIL: sub sp, sp, #48
-
-; CHECK-ARM64-TAIL-LABEL: func_stack8:
-; CHECK-ARM64-TAIL: stp x29, x30, [sp, #-16]!
-; CHECK-ARM64-TAIL: mov x29, sp
-; CHECK-ARM64-TAIL: sub sp, sp, #32
+; CHECK-TAIL: stp x29, x30, [sp, #-16]!
+; CHECK-TAIL: mov x29, sp
+; CHECK-TAIL: sub sp, sp, #32
 
 
   call fastcc void @func_stack8([8 x i32] undef, i32 42)
 ; CHECK:  bl func_stack8
 ; CHECK-NOT: sub sp, sp,
 
-; CHECK-ARM64:  bl func_stack8
-; CHECK-ARM64-NOT: sub sp, sp,
 
 ; CHECK-TAIL: bl func_stack8
 ; CHECK-TAIL: sub sp, sp, #16
 
-; CHECK-ARM64-TAIL: bl func_stack8
-; CHECK-ARM64-TAIL: sub sp, sp, #16
-
 
   call fastcc void @func_stack32([8 x i32] undef, i128 0, i128 9)
 ; CHECK: bl func_stack32
 ; CHECK-NOT: sub sp, sp,
 
-; CHECK-ARM64: bl func_stack32
-; CHECK-ARM64-NOT: sub sp, sp,
 
 ; CHECK-TAIL: bl func_stack32
 ; CHECK-TAIL: sub sp, sp, #32
 
-; CHECK-ARM64-TAIL: bl func_stack32
-; CHECK-ARM64-TAIL: sub sp, sp, #32
-
 
   call fastcc void @func_stack0()
 ; CHECK: bl func_stack0
 ; CHECK-NOT: sub sp, sp
 
-; CHECK-ARM64: bl func_stack0
-; CHECK-ARM64-NOT: sub sp, sp
-
 ; CHECK-TAIL: bl func_stack0
 ; CHECK-TAIL-NOT: sub sp, sp
 
-; CHECK-ARM64-TAIL: bl func_stack0
-; CHECK-ARM64-TAIL-NOT: sub sp, sp
-
   ret void
-; CHECK: add sp, sp, #48
+; CHECK: mov sp, x29
+; CHECK-NEXT: ldp     x29, x30, [sp], #16
 ; CHECK-NEXT: ret
 
-; CHECK-ARM64: mov sp, x29
-; CHECK-ARM64-NEXT: ldp     x29, x30, [sp], #16
-; CHECK-ARM64-NEXT: ret
 
-; CHECK-TAIL: add sp, sp, #64
+; CHECK-TAIL: mov sp, x29
+; CHECK-TAIL-NEXT: ldp     x29, x30, [sp], #16
 ; CHECK-TAIL-NEXT: ret
-
-; CHECK-ARM64-TAIL: mov sp, x29
-; CHECK-ARM64-TAIL-NEXT: ldp     x29, x30, [sp], #16
-; CHECK-ARM64-TAIL-NEXT: ret
 }
 
 define fastcc void @func_stack32([8 x i32], i128 %stacked0, i128 %stacked1) {
 ; CHECK-LABEL: func_stack32:
-; CHECK: sub sp, sp, #48
-
-; CHECK-ARM64-LABEL: func_stack32:
-; CHECK-ARM64: mov x29, sp
+; CHECK: mov x29, sp
 
 ; CHECK-TAIL-LABEL: func_stack32:
-; CHECK-TAIL: sub sp, sp, #48
-
-; CHECK-ARM64-TAIL-LABEL: func_stack32:
-; CHECK-ARM64-TAIL: mov x29, sp
+; CHECK-TAIL: mov x29, sp
 
 
   call fastcc void @func_stack8([8 x i32] undef, i32 42)
 ; CHECK:  bl func_stack8
 ; CHECK-NOT: sub sp, sp,
 
-; CHECK-ARM64:  bl func_stack8
-; CHECK-ARM64-NOT: sub sp, sp,
-
 ; CHECK-TAIL: bl func_stack8
 ; CHECK-TAIL: sub sp, sp, #16
 
-; CHECK-ARM64-TAIL: bl func_stack8
-; CHECK-ARM64-TAIL: sub sp, sp, #16
-
 
   call fastcc void @func_stack32([8 x i32] undef, i128 0, i128 9)
 ; CHECK: bl func_stack32
 ; CHECK-NOT: sub sp, sp,
 
-; CHECK-ARM64: bl func_stack32
-; CHECK-ARM64-NOT: sub sp, sp,
 
 ; CHECK-TAIL: bl func_stack32
 ; CHECK-TAIL: sub sp, sp, #32
 
-; CHECK-ARM64-TAIL: bl func_stack32
-; CHECK-ARM64-TAIL: sub sp, sp, #32
-
 
   call fastcc void @func_stack0()
 ; CHECK: bl func_stack0
 ; CHECK-NOT: sub sp, sp
 
-; CHECK-ARM64: bl func_stack0
-; CHECK-ARM64-NOT: sub sp, sp
 
 ; CHECK-TAIL: bl func_stack0
 ; CHECK-TAIL-NOT: sub sp, sp
 
-; CHECK-ARM64-TAIL: bl func_stack0
-; CHECK-ARM64-TAIL-NOT: sub sp, sp
-
   ret void
-; CHECK: add sp, sp, #48
+; CHECK: mov sp, x29
+; CHECK-NEXT: ldp     x29, x30, [sp], #16
 ; CHECK-NEXT: ret
 
-; CHECK-ARM64: mov sp, x29
-; CHECK-ARM64-NEXT: ldp     x29, x30, [sp], #16
-; CHECK-ARM64-NEXT: ret
-
-; CHECK-TAIL: add sp, sp, #80
+; CHECK-TAIL: mov sp, x29
+; CHECK-TAIL-NEXT: ldp     x29, x30, [sp], #16
 ; CHECK-TAIL-NEXT: ret
-
-; CHECK-ARM64-TAIL: mov sp, x29
-; CHECK-ARM64-TAIL-NEXT: ldp     x29, x30, [sp], #16
-; CHECK-ARM64-TAIL-NEXT: ret
 }
diff --git a/test/CodeGen/AArch64/fcmp.ll b/test/CodeGen/AArch64/fcmp.ll
index c54e3e62941..3c74508bb12 100644
--- a/test/CodeGen/AArch64/fcmp.ll
+++ b/test/CodeGen/AArch64/fcmp.ll
@@ -1,4 +1,4 @@
-; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-none-linux-gnu | FileCheck %s
+; RUN: llc -verify-machineinstrs -o - %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
 
 declare void @bar(i32)
 
diff --git a/test/CodeGen/AArch64/fcvt-fixed.ll b/test/CodeGen/AArch64/fcvt-fixed.ll
index 40800d00e50..ccb3616b70b 100644
--- a/test/CodeGen/AArch64/fcvt-fixed.ll
+++ b/test/CodeGen/AArch64/fcvt-fixed.ll
@@ -1,4 +1,4 @@
-; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-none-linux-gnu | FileCheck %s
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
 ; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-apple-ios7.0 -O0
 
 ; (The O0 test is to make sure FastISel still constrains its operands properly
diff --git a/test/CodeGen/AArch64/flags-multiuse.ll b/test/CodeGen/AArch64/flags-multiuse.ll
index 667c05d1653..c9b0b9ff7d8 100644
--- a/test/CodeGen/AArch64/flags-multiuse.ll
+++ b/test/CodeGen/AArch64/flags-multiuse.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=arm64-none-linux-gnu -verify-machineinstrs -o - %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs -o - %s | FileCheck %s
 
 ; LLVM should be able to cope with multiple uses of the same flag-setting
 ; instruction at different points of a routine. Either by rematerializing the
diff --git a/test/CodeGen/AArch64/floatdp_2source.ll b/test/CodeGen/AArch64/floatdp_2source.ll
index 8e98b784bb9..262271784ec 100644
--- a/test/CodeGen/AArch64/floatdp_2source.ll
+++ b/test/CodeGen/AArch64/floatdp_2source.ll
@@ -1,4 +1,4 @@
-; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-linux-gnu -mcpu=cyclone | FileCheck %s
+; RUN: llc -verify-machineinstrs -o - %s -mtriple=aarch64-linux-gnu -mcpu=cyclone | FileCheck %s
 
 @varfloat = global float 0.0
 @vardouble = global double 0.0
diff --git a/test/CodeGen/AArch64/fp-cond-sel.ll b/test/CodeGen/AArch64/fp-cond-sel.ll
index 07cbb4919e6..b4f4d77cd0b 100644
--- a/test/CodeGen/AArch64/fp-cond-sel.ll
+++ b/test/CodeGen/AArch64/fp-cond-sel.ll
@@ -1,4 +1,4 @@
-; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-none-linux-gnu -mcpu=cyclone | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-ARM64
+; RUN: llc -verify-machineinstrs -o - %s -mtriple=aarch64-none-linux-gnu -mcpu=cyclone | FileCheck %s --check-prefix=CHECK
 
 @varfloat = global float 0.0
 @vardouble = global double 0.0
@@ -12,7 +12,7 @@ define void @test_csel(i32 %lhs32, i32 %rhs32, i64 %lhs64) {
   %tst1 = icmp ugt i32 %lhs32, %rhs32
   %val1 = select i1 %tst1, float 0.0, float 1.0
   store float %val1, float* @varfloat
-; CHECK-ARM64: movi v[[FLT0:[0-9]+]].2d, #0
+; CHECK: movi v[[FLT0:[0-9]+]].2d, #0
 ; CHECK: fmov s[[FLT1:[0-9]+]], #1.0
 ; CHECK: fcsel {{s[0-9]+}}, s[[FLT0]], s[[FLT1]], hi
 
diff --git a/test/CodeGen/AArch64/fp-dp3.ll b/test/CodeGen/AArch64/fp-dp3.ll
index 53113b59127..10f88fdbbe9 100644
--- a/test/CodeGen/AArch64/fp-dp3.ll
+++ b/test/CodeGen/AArch64/fp-dp3.ll
@@ -1,4 +1,4 @@
-; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-none-linux-gnu -fp-contract=fast | FileCheck %s
+; RUN: llc -verify-machineinstrs -o - %s -mtriple=aarch64-none-linux-gnu -fp-contract=fast | FileCheck %s
 ; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-apple-ios7.0 | FileCheck %s -check-prefix=CHECK-NOFAST
 
 declare float @llvm.fma.f32(float, float, float)
diff --git a/test/CodeGen/AArch64/fp128-folding.ll b/test/CodeGen/AArch64/fp128-folding.ll
index 4b19deb976c..892b19c5cf3 100644
--- a/test/CodeGen/AArch64/fp128-folding.ll
+++ b/test/CodeGen/AArch64/fp128-folding.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=arm64-linux-gnu -verify-machineinstrs -o - %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-gnu -verify-machineinstrs -o - %s | FileCheck %s
 declare void @bar(i8*, i8*, i32*)
 
 ; SelectionDAG used to try to fold some fp128 operations using the ppc128 type,
diff --git a/test/CodeGen/AArch64/fpimm.ll b/test/CodeGen/AArch64/fpimm.ll
index e279d5b0096..e59520c4dc9 100644
--- a/test/CodeGen/AArch64/fpimm.ll
+++ b/test/CodeGen/AArch64/fpimm.ll
@@ -1,4 +1,4 @@
-; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-linux-gnu | FileCheck %s
+; RUN: llc -verify-machineinstrs -o - %s -mtriple=aarch64-linux-gnu | FileCheck %s
 
 @varf32 = global float 0.0
 @varf64 = global double 0.0
diff --git a/test/CodeGen/AArch64/func-argpassing.ll b/test/CodeGen/AArch64/func-argpassing.ll
index 129ab25c877..abb732ccf43 100644
--- a/test/CodeGen/AArch64/func-argpassing.ll
+++ b/test/CodeGen/AArch64/func-argpassing.ll
@@ -1,8 +1,5 @@
-
-; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-none-linux-gnu | FileCheck --check-prefix=CHECK --check-prefix=CHECK-ARM64 %s
-; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s
-; RUN: llc -verify-machineinstrs < %s -mtriple=arm64_be-none-linux-gnu | FileCheck --check-prefix=CHECK --check-prefix=CHECK-BE --check-prefix=CHECK-ARM64-BE %s
-; RUN: llc -verify-machineinstrs < %s -mtriple=arm64_be-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck --check-prefix=CHECK %s
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s
 
 %myStruct = type { i64 , i8, i32 }
 
@@ -63,7 +60,7 @@ define void @check_byval_align(i32* byval %ignore, %myStruct* byval align 16 %st
 
     %val0 = load volatile i32* %addr0
     ; Some weird move means x0 is used for one access
-; CHECK-ARM64: ldr [[REG32:w[0-9]+]], [sp, #28]
+; CHECK: ldr [[REG32:w[0-9]+]], [sp, #28]
     store i32 %val0, i32* @var32
 ; CHECK: str [[REG32]], [{{x[0-9]+}}, {{#?}}:lo12:var32]
 
@@ -149,7 +146,6 @@ define i32 @struct_on_stack(i8 %var0, i16 %var1, i32 %var2, i64 %var3, i128 %var
     %retval = load volatile i32* %stacked
     ret i32 %retval
 ; CHECK-LE: ldr w0, [sp, #16]
-; CHECK-BE-AARCH64: ldr w0, [sp, #20]
 }
 
 define void @stacked_fpu(float %var0, double %var1, float %var2, float %var3,
@@ -159,8 +155,8 @@ define void @stacked_fpu(float %var0, double %var1, float %var2, float %var3,
     store float %var8, float* @varfloat
     ; Beware as above: the offset would be different on big-endian
     ; machines if the first ldr were changed to use s-registers.
-; CHECK-ARM64: ldr {{[ds]}}[[VALFLOAT:[0-9]+]], [sp]
-; CHECK-ARM64: str s[[VALFLOAT]], [{{x[0-9]+}}, {{#?}}:lo12:varfloat]
+; CHECK: ldr {{[ds]}}[[VALFLOAT:[0-9]+]], [sp]
+; CHECK: str s[[VALFLOAT]], [{{x[0-9]+}}, {{#?}}:lo12:varfloat]
 
     ret void
 }
@@ -185,11 +181,10 @@ define void @check_i128_stackalign(i32 %val0, i32 %val1, i32 %val2, i32 %val3,
     ; Nothing local on stack in current codegen, so first stack is 16 away
 ; CHECK-LE: add     x[[REG:[0-9]+]], sp, #16
 ; CHECK-LE: ldr {{x[0-9]+}}, [x[[REG]], #8]
-; CHECK-BE-AARCH64: ldr {{x[0-9]+}}, [sp, #24]
 
     ; Important point is that we address sp+24 for second dword
 
-; CHECK-ARM64: ldp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #16]
+; CHECK: ldp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #16]
     ret void
 }
 
@@ -209,6 +204,5 @@ define i16 @stacked_i16(i32 %val0, i32 %val1, i32 %val2, i32 %val3,
                         i32 %val4, i32 %val5, i32 %val6, i32 %val7,
                         i16 %stack1) {
 ; CHECK-LABEL: stacked_i16
-; CHECK-ARM64-BE: ldrh
   ret i16 %stack1
 }
diff --git a/test/CodeGen/AArch64/func-calls.ll b/test/CodeGen/AArch64/func-calls.ll
index 8cb5f97e888..422c5765ec4 100644
--- a/test/CodeGen/AArch64/func-calls.ll
+++ b/test/CodeGen/AArch64/func-calls.ll
@@ -1,8 +1,7 @@
-
-; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-none-linux-gnu | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-ARM64
-; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-none-linux-gnu -mattr=-neon | FileCheck --check-prefix=CHECK --check-prefix=CHECK-ARM64-NONEON %s
-; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s
-; RUN: llc -verify-machineinstrs < %s -mtriple=arm64_be-none-linux-gnu | FileCheck --check-prefix=CHECK --check-prefix=CHECK-BE %s
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s --check-prefix=CHECK
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -mattr=-neon | FileCheck --check-prefix=CHECK-NONEON %s
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s
+; RUN: llc -verify-machineinstrs < %s -mtriple=arm64_be-none-linux-gnu | FileCheck --check-prefix=CHECK-BE %s
 
 %myStruct = type { i64 , i8, i32 }
 
@@ -90,13 +89,13 @@ define void @check_stack_args() {
   ; that varstruct is passed on the stack. Rather dependent on how a
   ; memcpy gets created, but the following works for now.
 
-; CHECK-ARM64-DAG: str {{q[0-9]+}}, [sp]
-; CHECK-ARM64-DAG: fmov d[[FINAL_DOUBLE:[0-9]+]], #1.0
-; CHECK-ARM64: mov v0.16b, v[[FINAL_DOUBLE]].16b
+; CHECK-DAG: str {{q[0-9]+}}, [sp]
+; CHECK-DAG: fmov d[[FINAL_DOUBLE:[0-9]+]], #1.0
+; CHECK: mov v0.16b, v[[FINAL_DOUBLE]].16b
 
-; CHECK-ARM64-NONEON-DAG: str {{q[0-9]+}}, [sp]
-; CHECK-ARM64-NONEON-DAG: fmov d[[FINAL_DOUBLE:[0-9]+]], #1.0
-; CHECK-ARM64-NONEON: fmov d0, d[[FINAL_DOUBLE]]
+; CHECK-NONEON-DAG: str {{q[0-9]+}}, [sp]
+; CHECK-NONEON-DAG: fmov d[[FINAL_DOUBLE:[0-9]+]], #1.0
+; CHECK-NONEON: fmov d0, d[[FINAL_DOUBLE]]
 
 ; CHECK: bl struct_on_stack
 ; CHECK-NOFP-NOT: fmov
@@ -105,11 +104,11 @@ define void @check_stack_args() {
                          float -2.0, float -8.0, float 16.0, float 1.0,
                          float 64.0)
 
-; CHECK-ARM64:  movz [[SIXTY_FOUR:w[0-9]+]], #0x4280, lsl #16
-; CHECK-ARM64: str [[SIXTY_FOUR]], [sp]
+; CHECK:  movz [[SIXTY_FOUR:w[0-9]+]], #0x4280, lsl #16
+; CHECK: str [[SIXTY_FOUR]], [sp]
 
-; CHECK-ARM64-NONEON:  movz [[SIXTY_FOUR:w[0-9]+]], #0x4280, lsl #16
-; CHECK-ARM64-NONEON: str [[SIXTY_FOUR]], [sp]
+; CHECK-NONEON:  movz [[SIXTY_FOUR:w[0-9]+]], #0x4280, lsl #16
+; CHECK-NONEON: str [[SIXTY_FOUR]], [sp]
 
 ; CHECK: bl stacked_fpu
   ret void
@@ -131,8 +130,11 @@ define void @check_i128_align() {
                                    i32 42, i128 %val)
 ; CHECK: ldr [[I128LO:x[0-9]+]], [{{x[0-9]+}}, {{#?}}:lo12:var128]
 ; CHECK: ldr [[I128HI:x[0-9]+]], [{{x[0-9]+}}, #8]
-; CHECK-ARM64: stp [[I128LO]], [[I128HI]], [sp, #16]
-; CHECK-ARM64-NONEON: stp [[I128LO]], [[I128HI]], [sp, #16]
+; CHECK: stp [[I128LO]], [[I128HI]], [sp, #16]
+
+; CHECK-NONEON: ldr [[I128LO:x[0-9]+]], [{{x[0-9]+}}, :lo12:var128]
+; CHECK-NONEON: ldr [[I128HI:x[0-9]+]], [{{x[0-9]+}}, #8]
+; CHECK-NONEON: stp [[I128LO]], [[I128HI]], [sp, #16]
 ; CHECK: bl check_i128_stackalign
 
   call void @check_i128_regalign(i32 0, i128 42)
diff --git a/test/CodeGen/AArch64/global-alignment.ll b/test/CodeGen/AArch64/global-alignment.ll
index 2bf4a2cbce4..451b9d6741e 100644
--- a/test/CodeGen/AArch64/global-alignment.ll
+++ b/test/CodeGen/AArch64/global-alignment.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=arm64-linux-gnu -verify-machineinstrs -o - %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-gnu -verify-machineinstrs -o - %s | FileCheck %s
 
 @var32 = global [3 x i32] zeroinitializer
 @var64 = global [3 x i64] zeroinitializer
diff --git a/test/CodeGen/AArch64/got-abuse.ll b/test/CodeGen/AArch64/got-abuse.ll
index c23edaf4360..7a02b104e77 100644
--- a/test/CodeGen/AArch64/got-abuse.ll
+++ b/test/CodeGen/AArch64/got-abuse.ll
@@ -1,5 +1,5 @@
-; RUN: llc -mtriple=arm64-none-linux-gnu -relocation-model=pic -o - %s | FileCheck %s
-; RUN: llc -mtriple=arm64-none-linux-gnu -relocation-model=pic -filetype=obj -o - %s
+; RUN: llc -mtriple=aarch64-none-linux-gnu -relocation-model=pic -o - %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-none-linux-gnu -relocation-model=pic -filetype=obj -o - %s
 
 ; LLVM gives well-defined semantics to this horrible construct (though C says
 ; it's undefined). Regardless, we shouldn't crash. The important feature here is
diff --git a/test/CodeGen/AArch64/illegal-float-ops.ll b/test/CodeGen/AArch64/illegal-float-ops.ll
index 8320f3ab044..9f7dd998bc2 100644
--- a/test/CodeGen/AArch64/illegal-float-ops.ll
+++ b/test/CodeGen/AArch64/illegal-float-ops.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=arm64-none-linux-gnu -verify-machineinstrs -o - %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs -o - %s | FileCheck %s
 
 @varfloat = global float 0.0
 @vardouble = global double 0.0
diff --git a/test/CodeGen/AArch64/init-array.ll b/test/CodeGen/AArch64/init-array.ll
index d3ed363821c..f47b490baeb 100644
--- a/test/CodeGen/AArch64/init-array.ll
+++ b/test/CodeGen/AArch64/init-array.ll
@@ -1,5 +1,5 @@
-; RUN: llc -mtriple=arm64-none-linux-gnu -verify-machineinstrs -use-init-array -o - %s | FileCheck %s
-; RUN: llc -mtriple=arm64-none-none-eabi -verify-machineinstrs -use-init-array -o - %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs -use-init-array -o - %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-none-none-eabi -verify-machineinstrs -use-init-array -o - %s | FileCheck %s
 
 define internal void @_GLOBAL__I_a() section ".text.startup" {
   ret void
diff --git a/test/CodeGen/AArch64/inline-asm-constraints-badI.ll b/test/CodeGen/AArch64/inline-asm-constraints-badI.ll
index 7ca9ade9cc6..9d833d936c0 100644
--- a/test/CodeGen/AArch64/inline-asm-constraints-badI.ll
+++ b/test/CodeGen/AArch64/inline-asm-constraints-badI.ll
@@ -1,4 +1,4 @@
-; RUN: not llc -mtriple=arm64-none-linux-gnu -o - %s
+; RUN: not llc -mtriple=aarch64-none-linux-gnu -o - %s
 
 define void @foo() {
   ; Out of range immediate for I.
diff --git a/test/CodeGen/AArch64/inline-asm-constraints-badK2.ll b/test/CodeGen/AArch64/inline-asm-constraints-badK2.ll
index 6bc633814c7..17260130199 100644
--- a/test/CodeGen/AArch64/inline-asm-constraints-badK2.ll
+++ b/test/CodeGen/AArch64/inline-asm-constraints-badK2.ll
@@ -1,4 +1,4 @@
-; RUN: not llc -mtriple=arm64-none-linux-gnu -o - %s
+; RUN: not llc -mtriple=aarch64-none-linux-gnu -o - %s
 
 define void @foo() {
   ; 32-bit bitpattern ending in 1101 can't be produced.
diff --git a/test/CodeGen/AArch64/jump-table.ll b/test/CodeGen/AArch64/jump-table.ll
index a0fcafa4510..1dfb789ac8e 100644
--- a/test/CodeGen/AArch64/jump-table.ll
+++ b/test/CodeGen/AArch64/jump-table.ll
@@ -1,6 +1,6 @@
-; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-none-linux-gnu | FileCheck %s
-; RUN: llc -code-model=large -verify-machineinstrs -o - %s -mtriple=arm64-none-linux-gnu | FileCheck --check-prefix=CHECK-LARGE %s
-; RUN: llc -mtriple=arm64-none-linux-gnu -verify-machineinstrs -relocation-model=pic -o - %s | FileCheck --check-prefix=CHECK-PIC %s
+; RUN: llc -verify-machineinstrs -o - %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
+; RUN: llc -code-model=large -verify-machineinstrs -o - %s -mtriple=aarch64-none-linux-gnu | FileCheck --check-prefix=CHECK-LARGE %s
+; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs -relocation-model=pic -o - %s | FileCheck --check-prefix=CHECK-PIC %s
 
 define i32 @test_jumptable(i32 %in) {
 ; CHECK: test_jumptable
diff --git a/test/CodeGen/AArch64/large-consts.ll b/test/CodeGen/AArch64/large-consts.ll
index b5f6c32eef4..6bf85e829f6 100644
--- a/test/CodeGen/AArch64/large-consts.ll
+++ b/test/CodeGen/AArch64/large-consts.ll
@@ -1,14 +1,14 @@
-; RUN: llc -mtriple=arm64-linux-gnu -o - %s -code-model=large -show-mc-encoding | FileCheck %s --check-prefix=CHECK-ARM64
+; RUN: llc -mtriple=aarch64-linux-gnu -o - %s -code-model=large -show-mc-encoding | FileCheck %s
 
 ; Make sure the shift amount is encoded into the instructions by LLVM because
 ; it's not the linker's job to put it there.
 
 define double @foo() {
 
-; CHECK-ARM64: movz [[CPADDR:x[0-9]+]], #:abs_g3:.LCPI0_0   // encoding: [0bAAA01000,A,0b111AAAAA,0xd2]
-; CHECK-ARM64: movk [[CPADDR]], #:abs_g2_nc:.LCPI0_0 // encoding: [0bAAA01000,A,0b110AAAAA,0xf2]
-; CHECK-ARM64: movk [[CPADDR]], #:abs_g1_nc:.LCPI0_0 // encoding: [0bAAA01000,A,0b101AAAAA,0xf2]
-; CHECK-ARM64: movk [[CPADDR]], #:abs_g0_nc:.LCPI0_0 // encoding: [0bAAA01000,A,0b100AAAAA,0xf2]
+; CHECK: movz [[CPADDR:x[0-9]+]], #:abs_g3:.LCPI0_0   // encoding: [0bAAA01000,A,0b111AAAAA,0xd2]
+; CHECK: movk [[CPADDR]], #:abs_g2_nc:.LCPI0_0 // encoding: [0bAAA01000,A,0b110AAAAA,0xf2]
+; CHECK: movk [[CPADDR]], #:abs_g1_nc:.LCPI0_0 // encoding: [0bAAA01000,A,0b101AAAAA,0xf2]
+; CHECK: movk [[CPADDR]], #:abs_g0_nc:.LCPI0_0 // encoding: [0bAAA01000,A,0b100AAAAA,0xf2]
 
   ret double 3.14159
 }
diff --git a/test/CodeGen/AArch64/ldst-regoffset.ll b/test/CodeGen/AArch64/ldst-regoffset.ll
index b13634ca706..e2fa08bcce6 100644
--- a/test/CodeGen/AArch64/ldst-regoffset.ll
+++ b/test/CodeGen/AArch64/ldst-regoffset.ll
@@ -1,5 +1,5 @@
-; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-none-linux-gnu | FileCheck %s
-; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s
+; RUN: llc -verify-machineinstrs -o - %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s
 
 @var_8bit = global i8 0
 @var_16bit = global i16 0
diff --git a/test/CodeGen/AArch64/ldst-unscaledimm.ll b/test/CodeGen/AArch64/ldst-unscaledimm.ll
index d738cfdaa26..1de8443d9ed 100644
--- a/test/CodeGen/AArch64/ldst-unscaledimm.ll
+++ b/test/CodeGen/AArch64/ldst-unscaledimm.ll
@@ -1,5 +1,5 @@
-; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-linux-gnu | FileCheck %s
-; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s
+; RUN: llc -verify-machineinstrs -o - %s -mtriple=aarch64-linux-gnu | FileCheck %s
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s
 
 @var_8bit = global i8 0
 @var_16bit = global i16 0
diff --git a/test/CodeGen/AArch64/ldst-unsignedimm.ll b/test/CodeGen/AArch64/ldst-unsignedimm.ll
index d6475f90429..e171d22b6c7 100644
--- a/test/CodeGen/AArch64/ldst-unsignedimm.ll
+++ b/test/CodeGen/AArch64/ldst-unsignedimm.ll
@@ -1,5 +1,5 @@
-; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-none-linux-gnu | FileCheck %s
-; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s
+; RUN: llc -verify-machineinstrs -o - %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s
 
 @var_8bit = global i8 0
 @var_16bit = global i16 0
diff --git a/test/CodeGen/ARM64/lit.local.cfg b/test/CodeGen/AArch64/lit.local.cfg
similarity index 89%
rename from test/CodeGen/ARM64/lit.local.cfg
rename to test/CodeGen/AArch64/lit.local.cfg
index 3468e27f07f..77493d88b2a 100644
--- a/test/CodeGen/ARM64/lit.local.cfg
+++ b/test/CodeGen/AArch64/lit.local.cfg
@@ -3,7 +3,7 @@ import re
 config.suffixes = ['.ll']
 
 targets = set(config.root.targets_to_build.split())
-if not 'ARM64' in targets:
+if not 'AArch64' in targets:
     config.unsupported = True
 
 # For now we don't test arm64-win32.
diff --git a/test/CodeGen/AArch64/literal_pools_float.ll b/test/CodeGen/AArch64/literal_pools_float.ll
index 6f9f3fc3772..e53b8b62c6f 100644
--- a/test/CodeGen/AArch64/literal_pools_float.ll
+++ b/test/CodeGen/AArch64/literal_pools_float.ll
@@ -1,7 +1,7 @@
-; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-none-linux-gnu -mcpu=cyclone | FileCheck %s
-; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-none-linux-gnu -code-model=large -mcpu=cyclone | FileCheck --check-prefix=CHECK-LARGE %s
-; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s
-; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-none-linux-gnu -code-model=large -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP-LARGE %s
+; RUN: llc -verify-machineinstrs -o - %s -mtriple=aarch64-none-linux-gnu -mcpu=cyclone | FileCheck %s
+; RUN: llc -verify-machineinstrs -o - %s -mtriple=aarch64-none-linux-gnu -code-model=large -mcpu=cyclone | FileCheck --check-prefix=CHECK-LARGE %s
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -code-model=large -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP-LARGE %s
 
 @varfloat = global float 0.0
 @vardouble = global double 0.0
diff --git a/test/CodeGen/AArch64/local_vars.ll b/test/CodeGen/AArch64/local_vars.ll
index 4518fa21023..2f5b9f2adb4 100644
--- a/test/CodeGen/AArch64/local_vars.ll
+++ b/test/CodeGen/AArch64/local_vars.ll
@@ -1,5 +1,5 @@
-; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-none-linux-gnu | FileCheck %s
-; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-none-linux-gnu -disable-fp-elim | FileCheck -check-prefix CHECK-WITHFP-ARM64 %s
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -disable-fp-elim | FileCheck -check-prefix CHECK-WITHFP-ARM64 %s
 
 ; Make sure a reasonably sane prologue and epilogue are
 ; generated. This test is not robust in the face of an frame-handling
diff --git a/test/CodeGen/AArch64/logical_shifted_reg.ll b/test/CodeGen/AArch64/logical_shifted_reg.ll
index 608d44fc9d7..b249d72e0f9 100644
--- a/test/CodeGen/AArch64/logical_shifted_reg.ll
+++ b/test/CodeGen/AArch64/logical_shifted_reg.ll
@@ -1,4 +1,4 @@
-; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-none-linux-gnu | FileCheck %s
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
 
 @var1_32 = global i32 0
 @var2_32 = global i32 0
diff --git a/test/CodeGen/AArch64/mature-mc-support.ll b/test/CodeGen/AArch64/mature-mc-support.ll
index 2948da9f200..276c54d2cc4 100644
--- a/test/CodeGen/AArch64/mature-mc-support.ll
+++ b/test/CodeGen/AArch64/mature-mc-support.ll
@@ -1,14 +1,10 @@
 ; Test that inline assembly is parsed by the MC layer when MC support is mature
 ; (even when the output is assembly).
 
-; RUN: FileCheck %s < %t1
-
-; RUN: FileCheck %s < %t2
-
-; RUN: not llc -mtriple=arm64-pc-linux < %s > /dev/null 2> %t3
+; RUN: not llc -mtriple=aarch64-pc-linux < %s > /dev/null 2> %t3
 ; RUN: FileCheck %s < %t3
 
-; RUN: not llc -mtriple=arm64-pc-linux -filetype=obj < %s > /dev/null 2> %t4
+; RUN: not llc -mtriple=aarch64-pc-linux -filetype=obj < %s > /dev/null 2> %t4
 ; RUN: FileCheck %s < %t4
 
 module asm "	.this_directive_is_very_unlikely_to_exist"
diff --git a/test/CodeGen/AArch64/movw-consts.ll b/test/CodeGen/AArch64/movw-consts.ll
index 6fe000974d5..93c18127175 100644
--- a/test/CodeGen/AArch64/movw-consts.ll
+++ b/test/CodeGen/AArch64/movw-consts.ll
@@ -1,4 +1,4 @@
-; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-apple-ios7.0 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-ARM64
+; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-apple-ios7.0 | FileCheck %s --check-prefix=CHECK
 
 define i64 @test0() {
 ; CHECK-LABEL: test0:
@@ -9,43 +9,43 @@ define i64 @test0() {
 
 define i64 @test1() {
 ; CHECK-LABEL: test1:
-; CHECK-ARM64: orr w0, wzr, #0x1
+; CHECK: orr w0, wzr, #0x1
   ret i64 1
 }
 
 define i64 @test2() {
 ; CHECK-LABEL: test2:
-; CHECK-ARM64: orr w0, wzr, #0xffff
+; CHECK: orr w0, wzr, #0xffff
   ret i64 65535
 }
 
 define i64 @test3() {
 ; CHECK-LABEL: test3:
-; CHECK-ARM64: orr w0, wzr, #0x10000
+; CHECK: orr w0, wzr, #0x10000
   ret i64 65536
 }
 
 define i64 @test4() {
 ; CHECK-LABEL: test4:
-; CHECK-ARM64: orr w0, wzr, #0xffff0000
+; CHECK: orr w0, wzr, #0xffff0000
   ret i64 4294901760
 }
 
 define i64 @test5() {
 ; CHECK-LABEL: test5:
-; CHECK-ARM64: orr x0, xzr, #0x100000000
+; CHECK: orr x0, xzr, #0x100000000
   ret i64 4294967296
 }
 
 define i64 @test6() {
 ; CHECK-LABEL: test6:
-; CHECK-ARM64: orr x0, xzr, #0xffff00000000
+; CHECK: orr x0, xzr, #0xffff00000000
   ret i64 281470681743360
 }
 
 define i64 @test7() {
 ; CHECK-LABEL: test7:
-; CHECK-ARM64: orr x0, xzr, #0x1000000000000
+; CHECK: orr x0, xzr, #0x1000000000000
   ret i64 281474976710656
 }
 
@@ -75,35 +75,35 @@ define i64 @test10() {
 
 define void @test11() {
 ; CHECK-LABEL: test11:
-; CHECK-ARM64: str wzr
+; CHECK: str wzr
   store i32 0, i32* @var32
   ret void
 }
 
 define void @test12() {
 ; CHECK-LABEL: test12:
-; CHECK-ARM64: orr {{w[0-9]+}}, wzr, #0x1
+; CHECK: orr {{w[0-9]+}}, wzr, #0x1
   store i32 1, i32* @var32
   ret void
 }
 
 define void @test13() {
 ; CHECK-LABEL: test13:
-; CHECK-ARM64: orr {{w[0-9]+}}, wzr, #0xffff
+; CHECK: orr {{w[0-9]+}}, wzr, #0xffff
   store i32 65535, i32* @var32
   ret void
 }
 
 define void @test14() {
 ; CHECK-LABEL: test14:
-; CHECK-ARM64: orr {{w[0-9]+}}, wzr, #0x10000
+; CHECK: orr {{w[0-9]+}}, wzr, #0x10000
   store i32 65536, i32* @var32
   ret void
 }
 
 define void @test15() {
 ; CHECK-LABEL: test15:
-; CHECK-ARM64: orr {{w[0-9]+}}, wzr, #0xffff0000
+; CHECK: orr {{w[0-9]+}}, wzr, #0xffff0000
   store i32 4294901760, i32* @var32
   ret void
 }
@@ -119,6 +119,6 @@ define i64 @test17() {
 ; CHECK-LABEL: test17:
 
   ; Mustn't MOVN w0 here.
-; CHECK-ARM64: orr x0, xzr, #0xfffffffffffffffd
+; CHECK: orr x0, xzr, #0xfffffffffffffffd
   ret i64 -3
 }
diff --git a/test/CodeGen/AArch64/movw-shift-encoding.ll b/test/CodeGen/AArch64/movw-shift-encoding.ll
index 2fe9dd4516e..178fccce333 100644
--- a/test/CodeGen/AArch64/movw-shift-encoding.ll
+++ b/test/CodeGen/AArch64/movw-shift-encoding.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=arm64-linux-gnu < %s -show-mc-encoding -code-model=large | FileCheck %s --check-prefix=CHECK-ARM64
+; RUN: llc -mtriple=aarch64-linux-gnu < %s -show-mc-encoding -code-model=large | FileCheck %s
 
 @var = global i32 0
 
@@ -8,8 +8,8 @@
 define i32* @get_var() {
   ret i32* @var
 
-; CHECK-ARM64: movz    x0, #:abs_g3:var        // encoding: [0bAAA00000,A,0b111AAAAA,0xd2]
-; CHECK-ARM64: movk    x0, #:abs_g2_nc:var     // encoding: [0bAAA00000,A,0b110AAAAA,0xf2]
-; CHECK-ARM64: movk    x0, #:abs_g1_nc:var     // encoding: [0bAAA00000,A,0b101AAAAA,0xf2]
-; CHECK-ARM64: movk    x0, #:abs_g0_nc:var     // encoding: [0bAAA00000,A,0b100AAAAA,0xf2]
+; CHECK: movz    x0, #:abs_g3:var        // encoding: [0bAAA00000,A,0b111AAAAA,0xd2]
+; CHECK: movk    x0, #:abs_g2_nc:var     // encoding: [0bAAA00000,A,0b110AAAAA,0xf2]
+; CHECK: movk    x0, #:abs_g1_nc:var     // encoding: [0bAAA00000,A,0b101AAAAA,0xf2]
+; CHECK: movk    x0, #:abs_g0_nc:var     // encoding: [0bAAA00000,A,0b100AAAAA,0xf2]
 }
diff --git a/test/CodeGen/AArch64/neon-bitcast.ll b/test/CodeGen/AArch64/neon-bitcast.ll
index b70cda3175a..61099d48fdd 100644
--- a/test/CodeGen/AArch64/neon-bitcast.ll
+++ b/test/CodeGen/AArch64/neon-bitcast.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=arm64-none-linux-gnu -mattr=+neon -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon -verify-machineinstrs < %s | FileCheck %s
 
 ; From <8 x i8>
 
diff --git a/test/CodeGen/AArch64/neon-bitwise-instructions.ll b/test/CodeGen/AArch64/neon-bitwise-instructions.ll
index dfaf1f25179..6497856c7d3 100644
--- a/test/CodeGen/AArch64/neon-bitwise-instructions.ll
+++ b/test/CodeGen/AArch64/neon-bitwise-instructions.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon | FileCheck %s
+; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
 
 define <8 x i8> @and8xi8(<8 x i8> %a, <8 x i8> %b) {
 ; CHECK-LABEL: and8xi8:
diff --git a/test/CodeGen/AArch64/neon-compare-instructions.ll b/test/CodeGen/AArch64/neon-compare-instructions.ll
index b99057ebf2b..6d89dfbacf4 100644
--- a/test/CodeGen/AArch64/neon-compare-instructions.ll
+++ b/test/CodeGen/AArch64/neon-compare-instructions.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=arm64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
 
 define <8 x i8> @cmeq8xi8(<8 x i8> %A, <8 x i8> %B) {
 ; CHECK-LABEL: cmeq8xi8:
diff --git a/test/CodeGen/AArch64/neon-diagnostics.ll b/test/CodeGen/AArch64/neon-diagnostics.ll
index e28df29f3e8..099b6856cec 100644
--- a/test/CodeGen/AArch64/neon-diagnostics.ll
+++ b/test/CodeGen/AArch64/neon-diagnostics.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon | FileCheck %s
+; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
 
 define <2 x float> @test_vfma_lane_f32(<2 x float> %a, <2 x float> %b, <2 x float> %v) {
 ; CHECK: test_vfma_lane_f32:
diff --git a/test/CodeGen/AArch64/neon-extract.ll b/test/CodeGen/AArch64/neon-extract.ll
index 96b4084a257..f270b54abb4 100644
--- a/test/CodeGen/AArch64/neon-extract.ll
+++ b/test/CodeGen/AArch64/neon-extract.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon | FileCheck %s
+; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
 
 define <8 x i8> @test_vext_s8(<8 x i8> %a, <8 x i8> %b) {
 ; CHECK-LABEL: test_vext_s8:
diff --git a/test/CodeGen/AArch64/neon-fma.ll b/test/CodeGen/AArch64/neon-fma.ll
index 6df494dedae..af70302ca93 100644
--- a/test/CodeGen/AArch64/neon-fma.ll
+++ b/test/CodeGen/AArch64/neon-fma.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s
+; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s
 
 define <2 x float> @fmla2xfloat(<2 x float> %A, <2 x float> %B, <2 x float> %C) {
 ;CHECK: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
diff --git a/test/CodeGen/AArch64/neon-fpround_f128.ll b/test/CodeGen/AArch64/neon-fpround_f128.ll
index e48dbbaec92..a93f3f2723c 100644
--- a/test/CodeGen/AArch64/neon-fpround_f128.ll
+++ b/test/CodeGen/AArch64/neon-fpround_f128.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s
+; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s
 
 define <1 x double> @test_fpround_v1f128(<1 x fp128>* %a) {
 ; CHECK-LABEL: test_fpround_v1f128:
diff --git a/test/CodeGen/AArch64/neon-idiv.ll b/test/CodeGen/AArch64/neon-idiv.ll
index 11e1af7e143..de402c4780b 100644
--- a/test/CodeGen/AArch64/neon-idiv.ll
+++ b/test/CodeGen/AArch64/neon-idiv.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=arm64-none-linux-gnu < %s -mattr=+neon | FileCheck %s
+; RUN: llc -mtriple=aarch64-none-linux-gnu < %s -mattr=+neon | FileCheck %s
 
 define <4 x i32> @test1(<4 x i32> %a) {
   %rem = srem <4 x i32> %a, <i32 7, i32 7, i32 7, i32 7>
diff --git a/test/CodeGen/AArch64/neon-mla-mls.ll b/test/CodeGen/AArch64/neon-mla-mls.ll
index e7bff748ad3..71bb0e70abf 100644
--- a/test/CodeGen/AArch64/neon-mla-mls.ll
+++ b/test/CodeGen/AArch64/neon-mla-mls.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon | FileCheck %s
+; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
 
 
 define <8 x i8> @mla8xi8(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C) {
diff --git a/test/CodeGen/AArch64/neon-mov.ll b/test/CodeGen/AArch64/neon-mov.ll
index b7baf25f807..40649aeb1b8 100644
--- a/test/CodeGen/AArch64/neon-mov.ll
+++ b/test/CodeGen/AArch64/neon-mov.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-ARM64
+; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s --check-prefix=CHECK
 
 define <8 x i8> @movi8b() {
 ; CHECK-LABEL: movi8b:
@@ -14,75 +14,75 @@ define <16 x i8> @movi16b() {
 
 define <2 x i32> @movi2s_lsl0() {
 ; CHECK-LABEL: movi2s_lsl0:
-; CHECK-ARM64: movi {{d[0-9]+}}, #0x0000ff000000ff
+; CHECK: movi {{d[0-9]+}}, #0x0000ff000000ff
    ret <2 x i32> < i32 255, i32 255 >
 }
 
 define <2 x i32> @movi2s_lsl8() {
 ; CHECK-LABEL: movi2s_lsl8:
-; CHECK-ARM64: movi {{d[0-9]+}}, #0x00ff000000ff00
+; CHECK: movi {{d[0-9]+}}, #0x00ff000000ff00
    ret <2 x i32> < i32 65280, i32 65280 >
 }
 
 define <2 x i32> @movi2s_lsl16() {
 ; CHECK-LABEL: movi2s_lsl16:
-; CHECK-ARM64: movi {{d[0-9]+}}, #0xff000000ff0000
+; CHECK: movi {{d[0-9]+}}, #0xff000000ff0000
    ret <2 x i32> < i32 16711680, i32 16711680 >
 
 }
 
 define <2 x i32> @movi2s_lsl24() {
 ; CHECK-LABEL: movi2s_lsl24:
-; CHECK-ARM64: movi {{d[0-9]+}}, #0xff000000ff000000
+; CHECK: movi {{d[0-9]+}}, #0xff000000ff000000
    ret <2 x i32> < i32 4278190080, i32 4278190080 >
 }
 
 define <4 x i32> @movi4s_lsl0() {
 ; CHECK-LABEL: movi4s_lsl0:
-; CHECK-ARM64: movi {{v[0-9]+}}.2d, #0x0000ff000000ff
+; CHECK: movi {{v[0-9]+}}.2d, #0x0000ff000000ff
    ret <4 x i32> < i32 255, i32 255, i32 255, i32 255 >
 }
 
 define <4 x i32> @movi4s_lsl8() {
 ; CHECK-LABEL: movi4s_lsl8:
-; CHECK-ARM64: movi {{v[0-9]+}}.2d, #0x00ff000000ff00
+; CHECK: movi {{v[0-9]+}}.2d, #0x00ff000000ff00
    ret <4 x i32> < i32 65280, i32 65280, i32 65280, i32 65280 >
 }
 
 define <4 x i32> @movi4s_lsl16() {
 ; CHECK-LABEL: movi4s_lsl16:
-; CHECK-ARM64:  movi {{v[0-9]+}}.2d, #0xff000000ff0000
+; CHECK:  movi {{v[0-9]+}}.2d, #0xff000000ff0000
    ret <4 x i32> < i32 16711680, i32 16711680, i32 16711680, i32 16711680 >
 
 }
 
 define <4 x i32> @movi4s_lsl24() {
 ; CHECK-LABEL: movi4s_lsl24:
-; CHECK-ARM64:  movi {{v[0-9]+}}.2d, #0xff000000ff000000
+; CHECK:  movi {{v[0-9]+}}.2d, #0xff000000ff000000
    ret <4 x i32> < i32 4278190080, i32 4278190080, i32 4278190080, i32 4278190080 >
 }
 
 define <4 x i16> @movi4h_lsl0() {
 ; CHECK-LABEL: movi4h_lsl0:
-; CHECK-ARM64:  movi {{d[0-9]+}}, #0xff00ff00ff00ff
+; CHECK:  movi {{d[0-9]+}}, #0xff00ff00ff00ff
    ret <4 x i16> < i16 255, i16 255, i16 255, i16 255 >
 }
 
 define <4 x i16> @movi4h_lsl8() {
 ; CHECK-LABEL: movi4h_lsl8:
-; CHECK-ARM64: movi d0, #0xff00ff00ff00ff00
+; CHECK: movi d0, #0xff00ff00ff00ff00
    ret <4 x i16> < i16 65280, i16 65280, i16 65280, i16 65280 >
 }
 
 define <8 x i16> @movi8h_lsl0() {
 ; CHECK-LABEL: movi8h_lsl0:
-; CHECK-ARM64: movi v0.2d, #0xff00ff00ff00ff
+; CHECK: movi v0.2d, #0xff00ff00ff00ff
    ret <8 x i16> < i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255 >
 }
 
 define <8 x i16> @movi8h_lsl8() {
 ; CHECK-LABEL: movi8h_lsl8:
-; CHECK-ARM64: movi v0.2d, #0xff00ff00ff00ff00
+; CHECK: movi v0.2d, #0xff00ff00ff00ff00
    ret <8 x i16> < i16 65280, i16 65280, i16 65280, i16 65280, i16 65280, i16 65280, i16 65280, i16 65280 >
 }
 
@@ -164,26 +164,26 @@ define <8 x i16> @mvni8h_lsl8() {
 
 define <2 x i32> @movi2s_msl8(<2 x i32> %a) {
 ; CHECK-LABEL: movi2s_msl8:
-; CHECK-ARM64: movi {{d[0-9]+}}, #0x00ffff0000ffff
+; CHECK: movi {{d[0-9]+}}, #0x00ffff0000ffff
 	ret <2 x i32> < i32 65535, i32 65535 >
 }
 
 define <2 x i32> @movi2s_msl16() {
 ; CHECK-LABEL: movi2s_msl16:
-; CHECK-ARM64:  movi d0, #0xffffff00ffffff
+; CHECK:  movi d0, #0xffffff00ffffff
    ret <2 x i32> < i32 16777215, i32 16777215 >
 }
 
 
 define <4 x i32> @movi4s_msl8() {
 ; CHECK-LABEL: movi4s_msl8:
-; CHECK-ARM64:  movi v0.2d, #0x00ffff0000ffff
+; CHECK:  movi v0.2d, #0x00ffff0000ffff
    ret <4 x i32> < i32 65535, i32 65535, i32 65535, i32 65535 >
 }
 
 define <4 x i32> @movi4s_msl16() {
 ; CHECK-LABEL: movi4s_msl16:
-; CHECK-ARM64:  movi v0.2d, #0xffffff00ffffff
+; CHECK:  movi v0.2d, #0xffffff00ffffff
    ret <4 x i32> < i32 16777215, i32 16777215, i32 16777215, i32 16777215 >
 }
 
diff --git a/test/CodeGen/AArch64/neon-or-combine.ll b/test/CodeGen/AArch64/neon-or-combine.ll
index d98c12802a0..260f6935dde 100644
--- a/test/CodeGen/AArch64/neon-or-combine.ll
+++ b/test/CodeGen/AArch64/neon-or-combine.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=arm64-none-linux-gnu -mattr=+neon | FileCheck %s
+; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
 
 ; Check that the DAGCombiner does not crash with an assertion failure
 ; when performing a target specific combine to simplify a 'or' dag node
diff --git a/test/CodeGen/AArch64/neon-perm.ll b/test/CodeGen/AArch64/neon-perm.ll
index d45dde649e4..4f8571db748 100644
--- a/test/CodeGen/AArch64/neon-perm.ll
+++ b/test/CodeGen/AArch64/neon-perm.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-ARM64
+; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s --check-prefix=CHECK
 
 %struct.int8x8x2_t = type { [2 x <8 x i8>] }
 %struct.int16x4x2_t = type { [2 x <4 x i16>] }
@@ -53,7 +53,7 @@ entry:
 
 define <2 x i32> @test_vuzp1_s32(<2 x i32> %a, <2 x i32> %b) {
 ; CHECK-LABEL: test_vuzp1_s32:
-; CHECK-ARM64: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+; CHECK: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
 entry:
   %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
   ret <2 x i32> %shuffle.i
@@ -69,7 +69,7 @@ entry:
 
 define <2 x i64> @test_vuzp1q_s64(<2 x i64> %a, <2 x i64> %b) {
 ; CHECK-LABEL: test_vuzp1q_s64:
-; CHECK-ARM64: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+; CHECK: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
 entry:
   %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
   ret <2 x i64> %shuffle.i
@@ -109,7 +109,7 @@ entry:
 
 define <2 x i32> @test_vuzp1_u32(<2 x i32> %a, <2 x i32> %b) {
 ; CHECK-LABEL: test_vuzp1_u32:
-; CHECK-ARM64: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+; CHECK: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
 entry:
   %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
   ret <2 x i32> %shuffle.i
@@ -125,7 +125,7 @@ entry:
 
 define <2 x i64> @test_vuzp1q_u64(<2 x i64> %a, <2 x i64> %b) {
 ; CHECK-LABEL: test_vuzp1q_u64:
-; CHECK-ARM64: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+; CHECK: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
 entry:
   %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
   ret <2 x i64> %shuffle.i
@@ -133,7 +133,7 @@ entry:
 
 define <2 x float> @test_vuzp1_f32(<2 x float> %a, <2 x float> %b) {
 ; CHECK-LABEL: test_vuzp1_f32:
-; CHECK-ARM64: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+; CHECK: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
 entry:
   %shuffle.i = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 0, i32 2>
   ret <2 x float> %shuffle.i
@@ -149,7 +149,7 @@ entry:
 
 define <2 x double> @test_vuzp1q_f64(<2 x double> %a, <2 x double> %b) {
 ; CHECK-LABEL: test_vuzp1q_f64:
-; CHECK-ARM64: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+; CHECK: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
 entry:
   %shuffle.i = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 2>
   ret <2 x double> %shuffle.i
@@ -221,7 +221,7 @@ entry:
 
 define <2 x i32> @test_vuzp2_s32(<2 x i32> %a, <2 x i32> %b) {
 ; CHECK-LABEL: test_vuzp2_s32:
-; CHECK-ARM64: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+; CHECK: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
 entry:
   %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
   ret <2 x i32> %shuffle.i
@@ -237,7 +237,7 @@ entry:
 
 define <2 x i64> @test_vuzp2q_s64(<2 x i64> %a, <2 x i64> %b) {
 ; CHECK-LABEL: test_vuzp2q_s64:
-; CHECK-ARM64: zip2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+; CHECK: zip2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
 entry:
   %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
   ret <2 x i64> %shuffle.i
@@ -277,7 +277,7 @@ entry:
 
 define <2 x i32> @test_vuzp2_u32(<2 x i32> %a, <2 x i32> %b) {
 ; CHECK-LABEL: test_vuzp2_u32:
-; CHECK-ARM64: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+; CHECK: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
 entry:
   %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
   ret <2 x i32> %shuffle.i
@@ -293,7 +293,7 @@ entry:
 
 define <2 x i64> @test_vuzp2q_u64(<2 x i64> %a, <2 x i64> %b) {
 ; CHECK-LABEL: test_vuzp2q_u64:
-; CHECK-ARM64: zip2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+; CHECK: zip2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
 entry:
   %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
   ret <2 x i64> %shuffle.i
@@ -301,7 +301,7 @@ entry:
 
 define <2 x float> @test_vuzp2_f32(<2 x float> %a, <2 x float> %b) {
 ; CHECK-LABEL: test_vuzp2_f32:
-; CHECK-ARM64: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+; CHECK: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
 entry:
   %shuffle.i = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 1, i32 3>
   ret <2 x float> %shuffle.i
@@ -317,7 +317,7 @@ entry:
 
 define <2 x double> @test_vuzp2q_f64(<2 x double> %a, <2 x double> %b) {
 ; CHECK-LABEL: test_vuzp2q_f64:
-; CHECK-ARM64: zip2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+; CHECK: zip2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
 entry:
   %shuffle.i = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 3>
   ret <2 x double> %shuffle.i
@@ -389,7 +389,7 @@ entry:
 
 define <2 x i32> @test_vzip1_s32(<2 x i32> %a, <2 x i32> %b) {
 ; CHECK-LABEL: test_vzip1_s32:
-; CHECK-ARM64: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+; CHECK: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
 entry:
   %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
   ret <2 x i32> %shuffle.i
@@ -405,7 +405,7 @@ entry:
 
 define <2 x i64> @test_vzip1q_s64(<2 x i64> %a, <2 x i64> %b) {
 ; CHECK-LABEL: test_vzip1q_s64:
-; CHECK-ARM64: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+; CHECK: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
 entry:
   %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
   ret <2 x i64> %shuffle.i
@@ -445,7 +445,7 @@ entry:
 
 define <2 x i32> @test_vzip1_u32(<2 x i32> %a, <2 x i32> %b) {
 ; CHECK-LABEL: test_vzip1_u32:
-; CHECK-ARM64: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+; CHECK: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
 entry:
   %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
   ret <2 x i32> %shuffle.i
@@ -461,7 +461,7 @@ entry:
 
 define <2 x i64> @test_vzip1q_u64(<2 x i64> %a, <2 x i64> %b) {
 ; CHECK-LABEL: test_vzip1q_u64:
-; CHECK-ARM64: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+; CHECK: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
 entry:
   %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
   ret <2 x i64> %shuffle.i
@@ -469,7 +469,7 @@ entry:
 
 define <2 x float> @test_vzip1_f32(<2 x float> %a, <2 x float> %b) {
 ; CHECK-LABEL: test_vzip1_f32:
-; CHECK-ARM64: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+; CHECK: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
 entry:
   %shuffle.i = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 0, i32 2>
   ret <2 x float> %shuffle.i
@@ -485,7 +485,7 @@ entry:
 
 define <2 x double> @test_vzip1q_f64(<2 x double> %a, <2 x double> %b) {
 ; CHECK-LABEL: test_vzip1q_f64:
-; CHECK-ARM64: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+; CHECK: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
 entry:
   %shuffle.i = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 2>
   ret <2 x double> %shuffle.i
@@ -557,7 +557,7 @@ entry:
 
 define <2 x i32> @test_vzip2_s32(<2 x i32> %a, <2 x i32> %b) {
 ; CHECK-LABEL: test_vzip2_s32:
-; CHECK-ARM64: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+; CHECK: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
 entry:
   %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
   ret <2 x i32> %shuffle.i
@@ -573,7 +573,7 @@ entry:
 
 define <2 x i64> @test_vzip2q_s64(<2 x i64> %a, <2 x i64> %b) {
 ; CHECK-LABEL: test_vzip2q_s64:
-; CHECK-ARM64: zip2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+; CHECK: zip2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
 entry:
   %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
   ret <2 x i64> %shuffle.i
@@ -613,7 +613,7 @@ entry:
 
 define <2 x i32> @test_vzip2_u32(<2 x i32> %a, <2 x i32> %b) {
 ; CHECK-LABEL: test_vzip2_u32:
-; CHECK-ARM64: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+; CHECK: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
 entry:
   %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
   ret <2 x i32> %shuffle.i
@@ -629,7 +629,7 @@ entry:
 
 define <2 x i64> @test_vzip2q_u64(<2 x i64> %a, <2 x i64> %b) {
 ; CHECK-LABEL: test_vzip2q_u64:
-; CHECK-ARM64: zip2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+; CHECK: zip2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
 entry:
   %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
   ret <2 x i64> %shuffle.i
@@ -637,7 +637,7 @@ entry:
 
 define <2 x float> @test_vzip2_f32(<2 x float> %a, <2 x float> %b) {
 ; CHECK-LABEL: test_vzip2_f32:
-; CHECK-ARM64: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+; CHECK: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
 entry:
   %shuffle.i = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 1, i32 3>
   ret <2 x float> %shuffle.i
@@ -653,7 +653,7 @@ entry:
 
 define <2 x double> @test_vzip2q_f64(<2 x double> %a, <2 x double> %b) {
 ; CHECK-LABEL: test_vzip2q_f64:
-; CHECK-ARM64: zip2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+; CHECK: zip2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
 entry:
   %shuffle.i = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 3>
   ret <2 x double> %shuffle.i
@@ -725,7 +725,7 @@ entry:
 
 define <2 x i32> @test_vtrn1_s32(<2 x i32> %a, <2 x i32> %b) {
 ; CHECK-LABEL: test_vtrn1_s32:
-; CHECK-ARM64: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+; CHECK: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
 entry:
   %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
   ret <2 x i32> %shuffle.i
@@ -741,7 +741,7 @@ entry:
 
 define <2 x i64> @test_vtrn1q_s64(<2 x i64> %a, <2 x i64> %b) {
 ; CHECK-LABEL: test_vtrn1q_s64:
-; CHECK-ARM64: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+; CHECK: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
 entry:
   %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
   ret <2 x i64> %shuffle.i
@@ -781,7 +781,7 @@ entry:
 
 define <2 x i32> @test_vtrn1_u32(<2 x i32> %a, <2 x i32> %b) {
 ; CHECK-LABEL: test_vtrn1_u32:
-; CHECK-ARM64: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+; CHECK: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
 entry:
   %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
   ret <2 x i32> %shuffle.i
@@ -797,7 +797,7 @@ entry:
 
 define <2 x i64> @test_vtrn1q_u64(<2 x i64> %a, <2 x i64> %b) {
 ; CHECK-LABEL: test_vtrn1q_u64:
-; CHECK-ARM64: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+; CHECK: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
 entry:
   %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
   ret <2 x i64> %shuffle.i
@@ -805,7 +805,7 @@ entry:
 
 define <2 x float> @test_vtrn1_f32(<2 x float> %a, <2 x float> %b) {
 ; CHECK-LABEL: test_vtrn1_f32:
-; CHECK-ARM64: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+; CHECK: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
 entry:
   %shuffle.i = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 0, i32 2>
   ret <2 x float> %shuffle.i
@@ -821,7 +821,7 @@ entry:
 
 define <2 x double> @test_vtrn1q_f64(<2 x double> %a, <2 x double> %b) {
 ; CHECK-LABEL: test_vtrn1q_f64:
-; CHECK-ARM64: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+; CHECK: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
 entry:
   %shuffle.i = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 2>
   ret <2 x double> %shuffle.i
@@ -893,7 +893,7 @@ entry:
 
 define <2 x i32> @test_vtrn2_s32(<2 x i32> %a, <2 x i32> %b) {
 ; CHECK-LABEL: test_vtrn2_s32:
-; CHECK-ARM64: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+; CHECK: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
 entry:
   %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
   ret <2 x i32> %shuffle.i
@@ -909,7 +909,7 @@ entry:
 
 define <2 x i64> @test_vtrn2q_s64(<2 x i64> %a, <2 x i64> %b) {
 ; CHECK-LABEL: test_vtrn2q_s64:
-; CHECK-ARM64: zip2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+; CHECK: zip2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
 entry:
   %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
   ret <2 x i64> %shuffle.i
@@ -949,7 +949,7 @@ entry:
 
 define <2 x i32> @test_vtrn2_u32(<2 x i32> %a, <2 x i32> %b) {
 ; CHECK-LABEL: test_vtrn2_u32:
-; CHECK-ARM64: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+; CHECK: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
 entry:
   %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
   ret <2 x i32> %shuffle.i
@@ -965,7 +965,7 @@ entry:
 
 define <2 x i64> @test_vtrn2q_u64(<2 x i64> %a, <2 x i64> %b) {
 ; CHECK-LABEL: test_vtrn2q_u64:
-; CHECK-ARM64: zip2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+; CHECK: zip2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
 entry:
   %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
   ret <2 x i64> %shuffle.i
@@ -973,7 +973,7 @@ entry:
 
 define <2 x float> @test_vtrn2_f32(<2 x float> %a, <2 x float> %b) {
 ; CHECK-LABEL: test_vtrn2_f32:
-; CHECK-ARM64: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+; CHECK: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
 entry:
   %shuffle.i = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 1, i32 3>
   ret <2 x float> %shuffle.i
@@ -989,7 +989,7 @@ entry:
 
 define <2 x double> @test_vtrn2q_f64(<2 x double> %a, <2 x double> %b) {
 ; CHECK-LABEL: test_vtrn2q_f64:
-; CHECK-ARM64: zip2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+; CHECK: zip2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
 entry:
   %shuffle.i = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 3>
   ret <2 x double> %shuffle.i
@@ -2494,8 +2494,8 @@ entry:
 
 define %struct.int32x2x2_t @test_vuzp_s32(<2 x i32> %a, <2 x i32> %b) {
 ; CHECK-LABEL: test_vuzp_s32:
-; CHECK-ARM64: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
-; CHECK-ARM64: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+; CHECK: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+; CHECK: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
 entry:
   %vuzp.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
   %vuzp1.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
@@ -2530,8 +2530,8 @@ entry:
 
 define %struct.uint32x2x2_t @test_vuzp_u32(<2 x i32> %a, <2 x i32> %b) {
 ; CHECK-LABEL: test_vuzp_u32:
-; CHECK-ARM64: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
-; CHECK-ARM64: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+; CHECK: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+; CHECK: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
 entry:
   %vuzp.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
   %vuzp1.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
@@ -2542,8 +2542,8 @@ entry:
 
 define %struct.float32x2x2_t @test_vuzp_f32(<2 x float> %a, <2 x float> %b) {
 ; CHECK-LABEL: test_vuzp_f32:
-; CHECK-ARM64: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
-; CHECK-ARM64: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+; CHECK: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+; CHECK: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
 entry:
   %vuzp.i = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 0, i32 2>
   %vuzp1.i = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 1, i32 3>
@@ -2710,8 +2710,8 @@ entry:
 
 define %struct.int32x2x2_t @test_vzip_s32(<2 x i32> %a, <2 x i32> %b) {
 ; CHECK-LABEL: test_vzip_s32:
-; CHECK-ARM64: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
-; CHECK-ARM64: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+; CHECK: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+; CHECK: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
 entry:
   %vzip.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
   %vzip1.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
@@ -2746,8 +2746,8 @@ entry:
 
 define %struct.uint32x2x2_t @test_vzip_u32(<2 x i32> %a, <2 x i32> %b) {
 ; CHECK-LABEL: test_vzip_u32:
-; CHECK-ARM64: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
-; CHECK-ARM64: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+; CHECK: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+; CHECK: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
 entry:
   %vzip.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
   %vzip1.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
@@ -2758,8 +2758,8 @@ entry:
 
 define %struct.float32x2x2_t @test_vzip_f32(<2 x float> %a, <2 x float> %b) {
 ; CHECK-LABEL: test_vzip_f32:
-; CHECK-ARM64: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
-; CHECK-ARM64: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+; CHECK: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+; CHECK: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
 entry:
   %vzip.i = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 0, i32 2>
   %vzip1.i = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 1, i32 3>
@@ -2926,8 +2926,8 @@ entry:
 
 define %struct.int32x2x2_t @test_vtrn_s32(<2 x i32> %a, <2 x i32> %b) {
 ; CHECK-LABEL: test_vtrn_s32:
-; CHECK-ARM64: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
-; CHECK-ARM64: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+; CHECK: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+; CHECK: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
 entry:
   %vtrn.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
   %vtrn1.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
@@ -2962,8 +2962,8 @@ entry:
 
 define %struct.uint32x2x2_t @test_vtrn_u32(<2 x i32> %a, <2 x i32> %b) {
 ; CHECK-LABEL: test_vtrn_u32:
-; CHECK-ARM64: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
-; CHECK-ARM64: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+; CHECK: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+; CHECK: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
 entry:
   %vtrn.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
   %vtrn1.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
@@ -2974,8 +2974,8 @@ entry:
 
 define %struct.float32x2x2_t @test_vtrn_f32(<2 x float> %a, <2 x float> %b) {
 ; CHECK-LABEL: test_vtrn_f32:
-; CHECK-ARM64: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
-; CHECK-ARM64: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+; CHECK: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+; CHECK: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
 entry:
   %vtrn.i = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 0, i32 2>
   %vtrn1.i = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 1, i32 3>
diff --git a/test/CodeGen/AArch64/neon-scalar-by-elem-fma.ll b/test/CodeGen/AArch64/neon-scalar-by-elem-fma.ll
index 6cfdc5be131..32f59626b38 100644
--- a/test/CodeGen/AArch64/neon-scalar-by-elem-fma.ll
+++ b/test/CodeGen/AArch64/neon-scalar-by-elem-fma.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s
+; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s
 
 declare float @llvm.fma.f32(float, float, float)
 declare double @llvm.fma.f64(double, double, double)
diff --git a/test/CodeGen/AArch64/neon-scalar-copy.ll b/test/CodeGen/AArch64/neon-scalar-copy.ll
index ab7ea661b40..a01df3275a9 100644
--- a/test/CodeGen/AArch64/neon-scalar-copy.ll
+++ b/test/CodeGen/AArch64/neon-scalar-copy.ll
@@ -1,9 +1,9 @@
-; RUN: llc -mtriple=arm64-none-linux-gnu -mattr=+neon < %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-ARM64
+; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s --check-prefix=CHECK
 
 
 define float @test_dup_sv2S(<2 x float> %v) {
  ; CHECK-LABEL: test_dup_sv2S
- ; CHECK-ARM64: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+ ; CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
  %tmp1 = extractelement <2 x float> %v, i32 1
  ret float  %tmp1
 }
@@ -37,14 +37,14 @@ define double @test_dup_dvD(<1 x double> %v) {
 
 define double @test_dup_dv2D(<2 x double> %v) {
  ; CHECK-LABEL: test_dup_dv2D
- ; CHECK-ARM64: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1]
+ ; CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1]
  %tmp1 = extractelement <2 x double> %v, i32 1
  ret double  %tmp1
 }
 
 define double @test_dup_dv2D_0(<2 x double> %v) {
  ; CHECK-LABEL: test_dup_dv2D_0
- ; CHECK-ARM64: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1]
+ ; CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1]
  ; CHECK: ret
  %tmp1 = extractelement <2 x double> %v, i32 1
  ret double  %tmp1
@@ -88,7 +88,7 @@ define <1 x i32> @test_vector_dup_sv2S(<2 x i32> %v1) {
 
 define <1 x i64> @test_vector_dup_dv2D(<2 x i64> %v1) {
  ; CHECK-LABEL: test_vector_dup_dv2D
- ; CHECK-ARM64: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #8
+ ; CHECK: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #8
  %shuffle.i = shufflevector <2 x i64> %v1, <2 x i64> undef, <1 x i32> <i32 1> 
  ret <1 x i64> %shuffle.i
 }
diff --git a/test/CodeGen/AArch64/neon-shift-left-long.ll b/test/CodeGen/AArch64/neon-shift-left-long.ll
index 1d9c92c999d..d10d551805a 100644
--- a/test/CodeGen/AArch64/neon-shift-left-long.ll
+++ b/test/CodeGen/AArch64/neon-shift-left-long.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon | FileCheck %s
+; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
 
 define <8 x i16> @test_sshll_v8i8(<8 x i8> %a) {
 ; CHECK: test_sshll_v8i8:
diff --git a/test/CodeGen/AArch64/neon-truncStore-extLoad.ll b/test/CodeGen/AArch64/neon-truncStore-extLoad.ll
index f15cd24e5d4..1df3719c886 100644
--- a/test/CodeGen/AArch64/neon-truncStore-extLoad.ll
+++ b/test/CodeGen/AArch64/neon-truncStore-extLoad.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon | FileCheck %s
+; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
 
 ; A vector TruncStore can not be selected.
 ; Test a trunc IR and a vector store IR can be selected correctly.
diff --git a/test/CodeGen/AArch64/pic-eh-stubs.ll b/test/CodeGen/AArch64/pic-eh-stubs.ll
index d2697910e6f..e8c762504fc 100644
--- a/test/CodeGen/AArch64/pic-eh-stubs.ll
+++ b/test/CodeGen/AArch64/pic-eh-stubs.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=arm64-none-linux-gnu -relocation-model=pic -o - %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-none-linux-gnu -relocation-model=pic -o - %s | FileCheck %s
 ; RUN: llc -mtriple=arm64_be-none-linux-gnu -relocation-model=pic -o - %s | FileCheck %s
 
 ; Make sure exception-handling PIC code can be linked correctly. An alternative
diff --git a/test/CodeGen/AArch64/regress-f128csel-flags.ll b/test/CodeGen/AArch64/regress-f128csel-flags.ll
index 313cdb1bf0c..25b5e0c5f77 100644
--- a/test/CodeGen/AArch64/regress-f128csel-flags.ll
+++ b/test/CodeGen/AArch64/regress-f128csel-flags.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=arm64-none-linux-gnu -verify-machineinstrs -o - %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs -o - %s | FileCheck %s
 
 ; We used to not mark NZCV as being used in the continuation basic-block
 ; when lowering a 128-bit "select" to branches. This meant a subsequent use
diff --git a/test/CodeGen/AArch64/regress-fp128-livein.ll b/test/CodeGen/AArch64/regress-fp128-livein.ll
index 141c0d862f6..5e6ab0a9675 100644
--- a/test/CodeGen/AArch64/regress-fp128-livein.ll
+++ b/test/CodeGen/AArch64/regress-fp128-livein.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=arm64-linux-gnu -verify-machineinstrs -o - %s
+; RUN: llc -mtriple=aarch64-linux-gnu -verify-machineinstrs -o - %s
 
 ; Regression test for NZCV reg live-in not being added to fp128csel IfTrue BB,
 ; causing a crash during live range calc.
diff --git a/test/CodeGen/AArch64/regress-tblgen-chains.ll b/test/CodeGen/AArch64/regress-tblgen-chains.ll
index 55c3bcdcdd4..477d99625ee 100644
--- a/test/CodeGen/AArch64/regress-tblgen-chains.ll
+++ b/test/CodeGen/AArch64/regress-tblgen-chains.ll
@@ -1,4 +1,4 @@
-; RUN: llc -verify-machineinstrs -mtriple=arm64-apple-ios7.0 -o - %s | FileCheck %s --check-prefix CHECK-ARM64
+; RUN: llc -verify-machineinstrs -mtriple=arm64-apple-ios7.0 -o - %s | FileCheck %s
 
 ; When generating DAG selection tables, TableGen used to only flag an
 ; instruction as needing a chain on its own account if it had a built-in pattern
@@ -12,7 +12,7 @@
 declare void @bar(i8*)
 
 define i64 @test_chains() {
-; CHECK-ARM64-LABEL: test_chains:
+; CHECK-LABEL: test_chains:
 
   %locvar = alloca i8
 
@@ -25,13 +25,13 @@ define i64 @test_chains() {
   %inc.4 = trunc i64 %inc.3 to i8
   store i8 %inc.4, i8* %locvar
 
-; CHECK-ARM64: ldurb {{w[0-9]+}}, [x29, [[LOCADDR:#-?[0-9]+]]]
-; CHECK-ARM64: add {{w[0-9]+}}, {{w[0-9]+}}, #1
-; CHECK-ARM64: sturb {{w[0-9]+}}, [x29, [[LOCADDR]]]
-; CHECK-ARM64: ldurb {{w[0-9]+}}, [x29, [[LOCADDR]]]
+; CHECK: ldurb {{w[0-9]+}}, [x29, [[LOCADDR:#-?[0-9]+]]]
+; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, #1
+; CHECK: sturb {{w[0-9]+}}, [x29, [[LOCADDR]]]
+; CHECK: ldurb {{w[0-9]+}}, [x29, [[LOCADDR]]]
 
   %ret.1 = load i8* %locvar
   %ret.2 = zext i8 %ret.1 to i64
   ret i64 %ret.2
-; CHECK-ARM64: ret
+; CHECK: ret
 }
diff --git a/test/CodeGen/AArch64/regress-w29-reserved-with-fp.ll b/test/CodeGen/AArch64/regress-w29-reserved-with-fp.ll
index cc42b0c9df4..c3167e4f4bd 100644
--- a/test/CodeGen/AArch64/regress-w29-reserved-with-fp.ll
+++ b/test/CodeGen/AArch64/regress-w29-reserved-with-fp.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=arm64-none-linux-gnu -disable-fp-elim < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-none-linux-gnu -disable-fp-elim < %s | FileCheck %s
 @var = global i32 0
 
 declare void @bar()
diff --git a/test/CodeGen/AArch64/setcc-takes-i32.ll b/test/CodeGen/AArch64/setcc-takes-i32.ll
index f06c8ecd28d..ec8615910cf 100644
--- a/test/CodeGen/AArch64/setcc-takes-i32.ll
+++ b/test/CodeGen/AArch64/setcc-takes-i32.ll
@@ -1,4 +1,4 @@
-; RUN: llc -verify-machineinstrs -mtriple=arm64-none-linux-gnu -o - %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -o - %s | FileCheck %s
 
 ; Most important point here is that the promotion of the i1 works
 ; correctly. Previously LLVM thought that i64 was the appropriate SetCC output,
diff --git a/test/CodeGen/AArch64/sibling-call.ll b/test/CodeGen/AArch64/sibling-call.ll
index 85245718afc..34e3bb410e8 100644
--- a/test/CodeGen/AArch64/sibling-call.ll
+++ b/test/CodeGen/AArch64/sibling-call.ll
@@ -1,4 +1,4 @@
-; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-none-linux-gnu -arm64-load-store-opt=0 | FileCheck %s
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -aarch64-load-store-opt=0 | FileCheck %s
 
 declare void @callee_stack0()
 declare void @callee_stack8([8 x i32], i64)
diff --git a/test/CodeGen/AArch64/sincos-expansion.ll b/test/CodeGen/AArch64/sincos-expansion.ll
index 5ba1d8d0a83..c3a172dfb42 100644
--- a/test/CodeGen/AArch64/sincos-expansion.ll
+++ b/test/CodeGen/AArch64/sincos-expansion.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=arm64-linux-gnu -verify-machineinstrs -o - %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-gnu -verify-machineinstrs -o - %s | FileCheck %s
 
 define float @test_sincos_f32(float %f) {
   %sin = call float @sinf(float %f) readnone
diff --git a/test/CodeGen/AArch64/sincospow-vector-expansion.ll b/test/CodeGen/AArch64/sincospow-vector-expansion.ll
index 38c8bb2d5e3..22f33a83394 100644
--- a/test/CodeGen/AArch64/sincospow-vector-expansion.ll
+++ b/test/CodeGen/AArch64/sincospow-vector-expansion.ll
@@ -1,4 +1,4 @@
-; RUN: llc -o - %s -verify-machineinstrs -mtriple=arm64-linux-gnu -mattr=+neon | FileCheck %s
+; RUN: llc -o - %s -verify-machineinstrs -mtriple=aarch64-linux-gnu -mattr=+neon | FileCheck %s
 
 
 define <2 x float> @test_cos_v2f64(<2 x double> %v1) {
diff --git a/test/CodeGen/AArch64/tail-call.ll b/test/CodeGen/AArch64/tail-call.ll
index b3841fac68a..8aab8421526 100644
--- a/test/CodeGen/AArch64/tail-call.ll
+++ b/test/CodeGen/AArch64/tail-call.ll
@@ -1,4 +1,4 @@
-; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-none-linux-gnu -tailcallopt | FileCheck --check-prefix=CHECK-ARM64 %s
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -tailcallopt | FileCheck %s
 
 declare fastcc void @callee_stack0()
 declare fastcc void @callee_stack8([8 x i32], i64)
@@ -8,91 +8,59 @@ define fastcc void @caller_to0_from0() nounwind {
 ; CHECK-LABEL: caller_to0_from0:
 ; CHECK-NEXT: // BB
 
-; CHECK-ARM64-LABEL: caller_to0_from0:
-; CHECK-ARM64-NEXT: // BB
-
   tail call fastcc void @callee_stack0()
   ret void
 
 ; CHECK-NEXT: b callee_stack0
-
-; CHECK-ARM64-NEXT: b callee_stack0
 }
 
 define fastcc void @caller_to0_from8([8 x i32], i64) {
 ; CHECK-LABEL: caller_to0_from8:
 
-; CHECK-ARM64-LABEL: caller_to0_from8:
-
   tail call fastcc void @callee_stack0()
   ret void
 
 ; CHECK: add sp, sp, #16
 ; CHECK-NEXT: b callee_stack0
-
-; CHECK-ARM64: add sp, sp, #16
-; CHECK-ARM64-NEXT: b callee_stack0
 }
 
 define fastcc void @caller_to8_from0() {
 ; CHECK-LABEL: caller_to8_from0:
 ; CHECK: sub sp, sp, #32
 
-; CHECK-ARM64-LABEL: caller_to8_from0:
-; CHECK-ARM64: sub sp, sp, #32
-
 ; Key point is that the "42" should go #16 below incoming stack
 ; pointer (we didn't have arg space to reuse).
   tail call fastcc void @callee_stack8([8 x i32] undef, i64 42)
   ret void
 
-; CHECK: str {{x[0-9]+}}, [sp, #16]
-; CHECK-NEXT: add sp, sp, #16
+; CHECK: str {{x[0-9]+}}, [sp, #16]!
 ; CHECK-NEXT: b callee_stack8
-
-; CHECK-ARM64: str {{x[0-9]+}}, [sp, #16]!
-; CHECK-ARM64-NEXT: b callee_stack8
 }
 
 define fastcc void @caller_to8_from8([8 x i32], i64 %a) {
 ; CHECK-LABEL: caller_to8_from8:
 ; CHECK: sub sp, sp, #16
 
-; CHECK-ARM64-LABEL: caller_to8_from8:
-; CHECK-ARM64: sub sp, sp, #16
-
 ; Key point is that the "%a" should go where at SP on entry.
   tail call fastcc void @callee_stack8([8 x i32] undef, i64 42)
   ret void
 
-; CHECK: str {{x[0-9]+}}, [sp, #16]
-; CHECK-NEXT: add sp, sp, #16
+; CHECK: str {{x[0-9]+}}, [sp, #16]!
 ; CHECK-NEXT: b callee_stack8
-
-; CHECK-ARM64: str {{x[0-9]+}}, [sp, #16]!
-; CHECK-ARM64-NEXT: b callee_stack8
 }
 
 define fastcc void @caller_to16_from8([8 x i32], i64 %a) {
 ; CHECK-LABEL: caller_to16_from8:
 ; CHECK: sub sp, sp, #16
 
-; CHECK-ARM64-LABEL: caller_to16_from8:
-; CHECK-ARM64: sub sp, sp, #16
-
 ; Important point is that the call reuses the "dead" argument space
 ; above %a on the stack. If it tries to go below incoming-SP then the
 ; callee will not deallocate the space, even in fastcc.
   tail call fastcc void @callee_stack16([8 x i32] undef, i64 42, i64 2)
 
-; CHECK: str {{x[0-9]+}}, [sp, #24]
-; CHECK: str {{x[0-9]+}}, [sp, #16]
+; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #16]
 ; CHECK-NEXT: add sp, sp, #16
 ; CHECK-NEXT: b callee_stack16
-
-; CHECK-ARM64: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #16]
-; CHECK-ARM64-NEXT: add sp, sp, #16
-; CHECK-ARM64-NEXT: b callee_stack16
   ret void
 }
 
@@ -101,19 +69,12 @@ define fastcc void @caller_to8_from24([8 x i32], i64 %a, i64 %b, i64 %c) {
 ; CHECK-LABEL: caller_to8_from24:
 ; CHECK: sub sp, sp, #16
 
-; CHECK-ARM64-LABEL: caller_to8_from24:
-; CHECK-ARM64: sub sp, sp, #16
-
 ; Key point is that the "%a" should go where at #16 above SP on entry.
   tail call fastcc void @callee_stack8([8 x i32] undef, i64 42)
   ret void
 
-; CHECK: str {{x[0-9]+}}, [sp, #32]
-; CHECK-NEXT: add sp, sp, #32
+; CHECK: str {{x[0-9]+}}, [sp, #32]!
 ; CHECK-NEXT: b callee_stack8
-
-; CHECK-ARM64: str {{x[0-9]+}}, [sp, #32]!
-; CHECK-ARM64-NEXT: b callee_stack8
 }
 
 
@@ -121,24 +82,13 @@ define fastcc void @caller_to16_from16([8 x i32], i64 %a, i64 %b) {
 ; CHECK-LABEL: caller_to16_from16:
 ; CHECK: sub sp, sp, #16
 
-; CHECK-ARM64-LABEL: caller_to16_from16:
-; CHECK-ARM64: sub sp, sp, #16
-
 ; Here we want to make sure that both loads happen before the stores:
 ; otherwise either %a or %b will be wrongly clobbered.
   tail call fastcc void @callee_stack16([8 x i32] undef, i64 %b, i64 %a)
   ret void
 
-; CHECK: ldr x0,
-; CHECK: ldr x1,
-; CHECK: str x1,
-; CHECK: str x0,
-
+; CHECK: ldp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #16]
+; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #16]
 ; CHECK-NEXT: add sp, sp, #16
 ; CHECK-NEXT: b callee_stack16
-
-; CHECK-ARM64: ldp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #16]
-; CHECK-ARM64: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #16]
-; CHECK-ARM64-NEXT: add sp, sp, #16
-; CHECK-ARM64-NEXT: b callee_stack16
 }
diff --git a/test/CodeGen/AArch64/zero-reg.ll b/test/CodeGen/AArch64/zero-reg.ll
index 44072c67d90..bc112ab8db9 100644
--- a/test/CodeGen/AArch64/zero-reg.ll
+++ b/test/CodeGen/AArch64/zero-reg.ll
@@ -1,4 +1,4 @@
-; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-linux-gnu | FileCheck %s
+; RUN: llc -verify-machineinstrs -o - %s -mtriple=aarch64-linux-gnu | FileCheck %s
 
 @var32 = global i32 0
 @var64 = global i64 0
diff --git a/test/CodeGen/ARM64/compact-unwind-unhandled-cfi.S b/test/CodeGen/ARM64/compact-unwind-unhandled-cfi.S
deleted file mode 100644
index 250732d6e84..00000000000
--- a/test/CodeGen/ARM64/compact-unwind-unhandled-cfi.S
+++ /dev/null
@@ -1,17 +0,0 @@
-; RUN: llvm-mc -triple arm64-apple-darwin -filetype=obj -o /dev/null %s
-
-        .text
-        .globl _foo
-        .cfi_startproc
-_foo:
-        stp x29, x30, [sp, #-16]!
- .cfi_adjust_cfa_offset 16
-
-        ldp x29, x30, [sp], #16
- .cfi_adjust_cfa_offset -16
-        .cfi_restore x29
-        .cfi_restore x30
-
-        ret
-
-        .cfi_endproc
diff --git a/test/CodeGen/ARM64/fminv.ll b/test/CodeGen/ARM64/fminv.ll
deleted file mode 100644
index ca706d897ca..00000000000
--- a/test/CodeGen/ARM64/fminv.ll
+++ /dev/null
@@ -1,101 +0,0 @@
-; RUN: llc -mtriple=arm64-linux-gnu -o - %s | FileCheck %s
-
-define float @test_fminv_v2f32(<2 x float> %in) {
-; CHECK: test_fminv_v2f32:
-; CHECK: fminp s0, v0.2s
-  %min = call float @llvm.arm64.neon.fminv.f32.v2f32(<2 x float> %in)
-  ret float %min
-}
-
-define float @test_fminv_v4f32(<4 x float> %in) {
-; CHECK: test_fminv_v4f32:
-; CHECK: fminv s0, v0.4s
-  %min = call float @llvm.arm64.neon.fminv.f32.v4f32(<4 x float> %in)
-  ret float %min
-}
-
-define double @test_fminv_v2f64(<2 x double> %in) {
-; CHECK: test_fminv_v2f64:
-; CHECK: fminp d0, v0.2d
-  %min = call double @llvm.arm64.neon.fminv.f64.v2f64(<2 x double> %in)
-  ret double %min
-}
-
-declare float @llvm.arm64.neon.fminv.f32.v2f32(<2 x float>)
-declare float @llvm.arm64.neon.fminv.f32.v4f32(<4 x float>)
-declare double @llvm.arm64.neon.fminv.f64.v2f64(<2 x double>)
-
-define float @test_fmaxv_v2f32(<2 x float> %in) {
-; CHECK: test_fmaxv_v2f32:
-; CHECK: fmaxp s0, v0.2s
-  %max = call float @llvm.arm64.neon.fmaxv.f32.v2f32(<2 x float> %in)
-  ret float %max
-}
-
-define float @test_fmaxv_v4f32(<4 x float> %in) {
-; CHECK: test_fmaxv_v4f32:
-; CHECK: fmaxv s0, v0.4s
-  %max = call float @llvm.arm64.neon.fmaxv.f32.v4f32(<4 x float> %in)
-  ret float %max
-}
-
-define double @test_fmaxv_v2f64(<2 x double> %in) {
-; CHECK: test_fmaxv_v2f64:
-; CHECK: fmaxp d0, v0.2d
-  %max = call double @llvm.arm64.neon.fmaxv.f64.v2f64(<2 x double> %in)
-  ret double %max
-}
-
-declare float @llvm.arm64.neon.fmaxv.f32.v2f32(<2 x float>)
-declare float @llvm.arm64.neon.fmaxv.f32.v4f32(<4 x float>)
-declare double @llvm.arm64.neon.fmaxv.f64.v2f64(<2 x double>)
-
-define float @test_fminnmv_v2f32(<2 x float> %in) {
-; CHECK: test_fminnmv_v2f32:
-; CHECK: fminnmp s0, v0.2s
-  %minnm = call float @llvm.arm64.neon.fminnmv.f32.v2f32(<2 x float> %in)
-  ret float %minnm
-}
-
-define float @test_fminnmv_v4f32(<4 x float> %in) {
-; CHECK: test_fminnmv_v4f32:
-; CHECK: fminnmv s0, v0.4s
-  %minnm = call float @llvm.arm64.neon.fminnmv.f32.v4f32(<4 x float> %in)
-  ret float %minnm
-}
-
-define double @test_fminnmv_v2f64(<2 x double> %in) {
-; CHECK: test_fminnmv_v2f64:
-; CHECK: fminnmp d0, v0.2d
-  %minnm = call double @llvm.arm64.neon.fminnmv.f64.v2f64(<2 x double> %in)
-  ret double %minnm
-}
-
-declare float @llvm.arm64.neon.fminnmv.f32.v2f32(<2 x float>)
-declare float @llvm.arm64.neon.fminnmv.f32.v4f32(<4 x float>)
-declare double @llvm.arm64.neon.fminnmv.f64.v2f64(<2 x double>)
-
-define float @test_fmaxnmv_v2f32(<2 x float> %in) {
-; CHECK: test_fmaxnmv_v2f32:
-; CHECK: fmaxnmp s0, v0.2s
-  %maxnm = call float @llvm.arm64.neon.fmaxnmv.f32.v2f32(<2 x float> %in)
-  ret float %maxnm
-}
-
-define float @test_fmaxnmv_v4f32(<4 x float> %in) {
-; CHECK: test_fmaxnmv_v4f32:
-; CHECK: fmaxnmv s0, v0.4s
-  %maxnm = call float @llvm.arm64.neon.fmaxnmv.f32.v4f32(<4 x float> %in)
-  ret float %maxnm
-}
-
-define double @test_fmaxnmv_v2f64(<2 x double> %in) {
-; CHECK: test_fmaxnmv_v2f64:
-; CHECK: fmaxnmp d0, v0.2d
-  %maxnm = call double @llvm.arm64.neon.fmaxnmv.f64.v2f64(<2 x double> %in)
-  ret double %maxnm
-}
-
-declare float @llvm.arm64.neon.fmaxnmv.f32.v2f32(<2 x float>)
-declare float @llvm.arm64.neon.fmaxnmv.f32.v4f32(<4 x float>)
-declare double @llvm.arm64.neon.fmaxnmv.f64.v2f64(<2 x double>)
diff --git a/test/CodeGen/ARM64/simd-scalar-to-vector.ll b/test/CodeGen/ARM64/simd-scalar-to-vector.ll
deleted file mode 100644
index 10e8d6c0ea4..00000000000
--- a/test/CodeGen/ARM64/simd-scalar-to-vector.ll
+++ /dev/null
@@ -1,22 +0,0 @@
-; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple -mcpu=cyclone | FileCheck %s
-; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple -O0 -mcpu=cyclone | FileCheck %s --check-prefix=CHECK-FAST
-
-define <16 x i8> @foo(<16 x i8> %a) nounwind optsize readnone ssp {
-; CHECK: uaddlv.16b h0, v0
-; CHECK: rshrn.8b v0, v0, #4
-; CHECK: dup.16b v0, v0[0]
-; CHECK: ret
-
-; CHECK-FAST: uaddlv.16b
-; CHECK-FAST: rshrn.8b
-; CHECK-FAST: dup.16b
-  %tmp = tail call i32 @llvm.arm64.neon.uaddlv.i32.v16i8(<16 x i8> %a) nounwind
-  %tmp1 = trunc i32 %tmp to i16
-  %tmp2 = insertelement <8 x i16> undef, i16 %tmp1, i32 0
-  %tmp3 = tail call <8 x i8> @llvm.arm64.neon.rshrn.v8i8(<8 x i16> %tmp2, i32 4)
-  %tmp4 = shufflevector <8 x i8> %tmp3, <8 x i8> undef, <16 x i32> zeroinitializer
-  ret <16 x i8> %tmp4
-}
-
-declare <8 x i8> @llvm.arm64.neon.rshrn.v8i8(<8 x i16>, i32) nounwind readnone
-declare i32 @llvm.arm64.neon.uaddlv.i32.v16i8(<16 x i8>) nounwind readnone
diff --git a/test/CodeGen/ARM64/tbl.ll b/test/CodeGen/ARM64/tbl.ll
deleted file mode 100644
index e1edd21d8a2..00000000000
--- a/test/CodeGen/ARM64/tbl.ll
+++ /dev/null
@@ -1,132 +0,0 @@
-; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s
-
-define <8 x i8> @tbl1_8b(<16 x i8> %A, <8 x i8> %B) nounwind {
-; CHECK: tbl1_8b
-; CHECK: tbl.8b
-  %tmp3 = call <8 x i8> @llvm.arm64.neon.tbl1.v8i8(<16 x i8> %A, <8 x i8> %B)
-  ret <8 x i8> %tmp3
-}
-
-define <16 x i8> @tbl1_16b(<16 x i8> %A, <16 x i8> %B) nounwind {
-; CHECK: tbl1_16b
-; CHECK: tbl.16b
-  %tmp3 = call <16 x i8> @llvm.arm64.neon.tbl1.v16i8(<16 x i8> %A, <16 x i8> %B)
-  ret <16 x i8> %tmp3
-}
-
-define <8 x i8> @tbl2_8b(<16 x i8> %A, <16 x i8> %B, <8 x i8> %C) {
-; CHECK: tbl2_8b
-; CHECK: tbl.8b
-  %tmp3 = call <8 x i8> @llvm.arm64.neon.tbl2.v8i8(<16 x i8> %A, <16 x i8> %B, <8 x i8> %C)
-  ret <8 x i8> %tmp3
-}
-
-define <16 x i8> @tbl2_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C) {
-; CHECK: tbl2_16b
-; CHECK: tbl.16b
-  %tmp3 = call <16 x i8> @llvm.arm64.neon.tbl2.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C)
-  ret <16 x i8> %tmp3
-}
-
-define <8 x i8> @tbl3_8b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <8 x i8> %D) {
-; CHECK: tbl3_8b
-; CHECK: tbl.8b
-  %tmp3 = call <8 x i8> @llvm.arm64.neon.tbl3.v8i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <8 x i8> %D)
-  ret <8 x i8> %tmp3
-}
-
-define <16 x i8> @tbl3_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) {
-; CHECK: tbl3_16b
-; CHECK: tbl.16b
-  %tmp3 = call <16 x i8> @llvm.arm64.neon.tbl3.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D)
-  ret <16 x i8> %tmp3
-}
-
-define <8 x i8> @tbl4_8b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <8 x i8> %E) {
-; CHECK: tbl4_8b
-; CHECK: tbl.8b
-  %tmp3 = call <8 x i8> @llvm.arm64.neon.tbl4.v8i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <8 x i8> %E)
-  ret <8 x i8> %tmp3
-}
-
-define <16 x i8> @tbl4_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) {
-; CHECK: tbl4_16b
-; CHECK: tbl.16b
-  %tmp3 = call <16 x i8> @llvm.arm64.neon.tbl4.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E)
-  ret <16 x i8> %tmp3
-}
-
-declare <8 x i8> @llvm.arm64.neon.tbl1.v8i8(<16 x i8>, <8 x i8>) nounwind readnone
-declare <16 x i8> @llvm.arm64.neon.tbl1.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
-declare <8 x i8> @llvm.arm64.neon.tbl2.v8i8(<16 x i8>, <16 x i8>, <8 x i8>) nounwind readnone
-declare <16 x i8> @llvm.arm64.neon.tbl2.v16i8(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone
-declare <8 x i8> @llvm.arm64.neon.tbl3.v8i8(<16 x i8>, <16 x i8>, <16 x i8>, <8 x i8>) nounwind readnone
-declare <16 x i8> @llvm.arm64.neon.tbl3.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone
-declare <8 x i8> @llvm.arm64.neon.tbl4.v8i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <8 x i8>) nounwind readnone
-declare <16 x i8> @llvm.arm64.neon.tbl4.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone
-
-define <8 x i8> @tbx1_8b(<8 x i8> %A, <16 x i8> %B, <8 x i8> %C) nounwind {
-; CHECK: tbx1_8b
-; CHECK: tbx.8b
-  %tmp3 = call <8 x i8> @llvm.arm64.neon.tbx1.v8i8(<8 x i8> %A, <16 x i8> %B, <8 x i8> %C)
-  ret <8 x i8> %tmp3
-}
-
-define <16 x i8> @tbx1_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C) nounwind {
-; CHECK: tbx1_16b
-; CHECK: tbx.16b
-  %tmp3 = call <16 x i8> @llvm.arm64.neon.tbx1.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C)
-  ret <16 x i8> %tmp3
-}
-
-define <8 x i8> @tbx2_8b(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <8 x i8> %D) {
-; CHECK: tbx2_8b
-; CHECK: tbx.8b
-  %tmp3 = call <8 x i8> @llvm.arm64.neon.tbx2.v8i8(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <8 x i8> %D)
-  ret <8 x i8> %tmp3
-}
-
-define <16 x i8> @tbx2_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) {
-; CHECK: tbx2_16b
-; CHECK: tbx.16b
-  %tmp3 = call <16 x i8> @llvm.arm64.neon.tbx2.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D)
-  ret <16 x i8> %tmp3
-}
-
-define <8 x i8> @tbx3_8b(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <8 x i8> %E) {
-; CHECK: tbx3_8b
-; CHECK: tbx.8b
-  %tmp3 = call <8 x i8> @llvm.arm64.neon.tbx3.v8i8(< 8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <8 x i8> %E)
-  ret <8 x i8> %tmp3
-}
-
-define <16 x i8> @tbx3_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) {
-; CHECK: tbx3_16b
-; CHECK: tbx.16b
-  %tmp3 = call <16 x i8> @llvm.arm64.neon.tbx3.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E)
-  ret <16 x i8> %tmp3
-}
-
-define <8 x i8> @tbx4_8b(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, <8 x i8> %F) {
-; CHECK: tbx4_8b
-; CHECK: tbx.8b
-  %tmp3 = call <8 x i8> @llvm.arm64.neon.tbx4.v8i8(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, <8 x i8> %F)
-  ret <8 x i8> %tmp3
-}
-
-define <16 x i8> @tbx4_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, <16 x i8> %F) {
-; CHECK: tbx4_16b
-; CHECK: tbx.16b
-  %tmp3 = call <16 x i8> @llvm.arm64.neon.tbx4.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, <16 x i8> %F)
-  ret <16 x i8> %tmp3
-}
-
-declare <8 x i8> @llvm.arm64.neon.tbx1.v8i8(<8 x i8>, <16 x i8>, <8 x i8>) nounwind readnone
-declare <16 x i8> @llvm.arm64.neon.tbx1.v16i8(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone
-declare <8 x i8> @llvm.arm64.neon.tbx2.v8i8(<8 x i8>, <16 x i8>, <16 x i8>, <8 x i8>) nounwind readnone
-declare <16 x i8> @llvm.arm64.neon.tbx2.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone
-declare <8 x i8> @llvm.arm64.neon.tbx3.v8i8(<8 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <8 x i8>) nounwind readnone
-declare <16 x i8> @llvm.arm64.neon.tbx3.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone
-declare <8 x i8> @llvm.arm64.neon.tbx4.v8i8(<8 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <8 x i8>) nounwind readnone
-declare <16 x i8> @llvm.arm64.neon.tbx4.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone
-
diff --git a/test/CodeGen/ARM64/vcnt.ll b/test/CodeGen/ARM64/vcnt.ll
deleted file mode 100644
index e00658a4bda..00000000000
--- a/test/CodeGen/ARM64/vcnt.ll
+++ /dev/null
@@ -1,56 +0,0 @@
-; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s
-
-define <8 x i8> @cls_8b(<8 x i8>* %A) nounwind {
-;CHECK-LABEL: cls_8b:
-;CHECK: cls.8b
-	%tmp1 = load <8 x i8>* %A
-	%tmp3 = call <8 x i8> @llvm.arm64.neon.cls.v8i8(<8 x i8> %tmp1)
-	ret <8 x i8> %tmp3
-}
-
-define <16 x i8> @cls_16b(<16 x i8>* %A) nounwind {
-;CHECK-LABEL: cls_16b:
-;CHECK: cls.16b
-	%tmp1 = load <16 x i8>* %A
-	%tmp3 = call <16 x i8> @llvm.arm64.neon.cls.v16i8(<16 x i8> %tmp1)
-	ret <16 x i8> %tmp3
-}
-
-define <4 x i16> @cls_4h(<4 x i16>* %A) nounwind {
-;CHECK-LABEL: cls_4h:
-;CHECK: cls.4h
-	%tmp1 = load <4 x i16>* %A
-	%tmp3 = call <4 x i16> @llvm.arm64.neon.cls.v4i16(<4 x i16> %tmp1)
-	ret <4 x i16> %tmp3
-}
-
-define <8 x i16> @cls_8h(<8 x i16>* %A) nounwind {
-;CHECK-LABEL: cls_8h:
-;CHECK: cls.8h
-	%tmp1 = load <8 x i16>* %A
-	%tmp3 = call <8 x i16> @llvm.arm64.neon.cls.v8i16(<8 x i16> %tmp1)
-	ret <8 x i16> %tmp3
-}
-
-define <2 x i32> @cls_2s(<2 x i32>* %A) nounwind {
-;CHECK-LABEL: cls_2s:
-;CHECK: cls.2s
-	%tmp1 = load <2 x i32>* %A
-	%tmp3 = call <2 x i32> @llvm.arm64.neon.cls.v2i32(<2 x i32> %tmp1)
-	ret <2 x i32> %tmp3
-}
-
-define <4 x i32> @cls_4s(<4 x i32>* %A) nounwind {
-;CHECK-LABEL: cls_4s:
-;CHECK: cls.4s
-	%tmp1 = load <4 x i32>* %A
-	%tmp3 = call <4 x i32> @llvm.arm64.neon.cls.v4i32(<4 x i32> %tmp1)
-	ret <4 x i32> %tmp3
-}
-
-declare <8 x i8> @llvm.arm64.neon.cls.v8i8(<8 x i8>) nounwind readnone
-declare <16 x i8> @llvm.arm64.neon.cls.v16i8(<16 x i8>) nounwind readnone
-declare <4 x i16> @llvm.arm64.neon.cls.v4i16(<4 x i16>) nounwind readnone
-declare <8 x i16> @llvm.arm64.neon.cls.v8i16(<8 x i16>) nounwind readnone
-declare <2 x i32> @llvm.arm64.neon.cls.v2i32(<2 x i32>) nounwind readnone
-declare <4 x i32> @llvm.arm64.neon.cls.v4i32(<4 x i32>) nounwind readnone
diff --git a/test/CodeGen/ARM64/vcvt_n.ll b/test/CodeGen/ARM64/vcvt_n.ll
deleted file mode 100644
index 46de557b070..00000000000
--- a/test/CodeGen/ARM64/vcvt_n.ll
+++ /dev/null
@@ -1,49 +0,0 @@
-; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s
-
-define <2 x float> @cvtf32fxpu(<2 x i32> %a) nounwind readnone ssp {
-; CHECK-LABEL: cvtf32fxpu:
-; CHECK: ucvtf.2s	v0, v0, #9
-; CHECK: ret
-  %vcvt_n1 = tail call <2 x float> @llvm.arm64.neon.vcvtfxu2fp.v2f32.v2i32(<2 x i32> %a, i32 9)
-  ret <2 x float> %vcvt_n1
-}
-
-define <2 x float> @cvtf32fxps(<2 x i32> %a) nounwind readnone ssp {
-; CHECK-LABEL: cvtf32fxps:
-; CHECK: scvtf.2s	v0, v0, #12
-; CHECK: ret
-  %vcvt_n1 = tail call <2 x float> @llvm.arm64.neon.vcvtfxs2fp.v2f32.v2i32(<2 x i32> %a, i32 12)
-  ret <2 x float> %vcvt_n1
-}
-
-define <4 x float> @cvtqf32fxpu(<4 x i32> %a) nounwind readnone ssp {
-; CHECK-LABEL: cvtqf32fxpu:
-; CHECK: ucvtf.4s	v0, v0, #18
-; CHECK: ret
-  %vcvt_n1 = tail call <4 x float> @llvm.arm64.neon.vcvtfxu2fp.v4f32.v4i32(<4 x i32> %a, i32 18)
-  ret <4 x float> %vcvt_n1
-}
-
-define <4 x float> @cvtqf32fxps(<4 x i32> %a) nounwind readnone ssp {
-; CHECK-LABEL: cvtqf32fxps:
-; CHECK: scvtf.4s	v0, v0, #30
-; CHECK: ret
-  %vcvt_n1 = tail call <4 x float> @llvm.arm64.neon.vcvtfxs2fp.v4f32.v4i32(<4 x i32> %a, i32 30)
-  ret <4 x float> %vcvt_n1
-}
-define <2 x double> @f1(<2 x i64> %a) nounwind readnone ssp {
-  %vcvt_n1 = tail call <2 x double> @llvm.arm64.neon.vcvtfxu2fp.v2f64.v2i64(<2 x i64> %a, i32 12)
-  ret <2 x double> %vcvt_n1
-}
-
-define <2 x double> @f2(<2 x i64> %a) nounwind readnone ssp {
-  %vcvt_n1 = tail call <2 x double> @llvm.arm64.neon.vcvtfxs2fp.v2f64.v2i64(<2 x i64> %a, i32 9)
-  ret <2 x double> %vcvt_n1
-}
-
-declare <4 x float> @llvm.arm64.neon.vcvtfxu2fp.v4f32.v4i32(<4 x i32>, i32) nounwind readnone
-declare <4 x float> @llvm.arm64.neon.vcvtfxs2fp.v4f32.v4i32(<4 x i32>, i32) nounwind readnone
-declare <2 x float> @llvm.arm64.neon.vcvtfxu2fp.v2f32.v2i32(<2 x i32>, i32) nounwind readnone
-declare <2 x float> @llvm.arm64.neon.vcvtfxs2fp.v2f32.v2i32(<2 x i32>, i32) nounwind readnone
-declare <2 x double> @llvm.arm64.neon.vcvtfxu2fp.v2f64.v2i64(<2 x i64>, i32) nounwind readnone
-declare <2 x double> @llvm.arm64.neon.vcvtfxs2fp.v2f64.v2i64(<2 x i64>, i32) nounwind readnone
diff --git a/test/CodeGen/ARM64/vminmaxnm.ll b/test/CodeGen/ARM64/vminmaxnm.ll
deleted file mode 100644
index 628640759a5..00000000000
--- a/test/CodeGen/ARM64/vminmaxnm.ll
+++ /dev/null
@@ -1,68 +0,0 @@
-; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s
-
-define <2 x float> @f1(<2 x float> %a, <2 x float> %b) nounwind readnone ssp {
-; CHECK: fmaxnm.2s	v0, v0, v1
-; CHECK: ret
-  %vmaxnm2.i = tail call <2 x float> @llvm.arm64.neon.fmaxnm.v2f32(<2 x float> %a, <2 x float> %b) nounwind
-  ret <2 x float> %vmaxnm2.i
-}
-
-define <4 x float> @f2(<4 x float> %a, <4 x float> %b) nounwind readnone ssp {
-; CHECK: fmaxnm.4s	v0, v0, v1
-; CHECK: ret
-  %vmaxnm2.i = tail call <4 x float> @llvm.arm64.neon.fmaxnm.v4f32(<4 x float> %a, <4 x float> %b) nounwind
-  ret <4 x float> %vmaxnm2.i
-}
-
-define <2 x double> @f3(<2 x double> %a, <2 x double> %b) nounwind readnone ssp {
-; CHECK: fmaxnm.2d	v0, v0, v1
-; CHECK: ret
-  %vmaxnm2.i = tail call <2 x double> @llvm.arm64.neon.fmaxnm.v2f64(<2 x double> %a, <2 x double> %b) nounwind
-  ret <2 x double> %vmaxnm2.i
-}
-
-define <2 x float> @f4(<2 x float> %a, <2 x float> %b) nounwind readnone ssp {
-; CHECK: fminnm.2s	v0, v0, v1
-; CHECK: ret
-  %vminnm2.i = tail call <2 x float> @llvm.arm64.neon.fminnm.v2f32(<2 x float> %a, <2 x float> %b) nounwind
-  ret <2 x float> %vminnm2.i
-}
-
-define <4 x float> @f5(<4 x float> %a, <4 x float> %b) nounwind readnone ssp {
-; CHECK: fminnm.4s	v0, v0, v1
-; CHECK: ret
-  %vminnm2.i = tail call <4 x float> @llvm.arm64.neon.fminnm.v4f32(<4 x float> %a, <4 x float> %b) nounwind
-  ret <4 x float> %vminnm2.i
-}
-
-define <2 x double> @f6(<2 x double> %a, <2 x double> %b) nounwind readnone ssp {
-; CHECK: fminnm.2d	v0, v0, v1
-; CHECK: ret
-  %vminnm2.i = tail call <2 x double> @llvm.arm64.neon.fminnm.v2f64(<2 x double> %a, <2 x double> %b) nounwind
-  ret <2 x double> %vminnm2.i
-}
-
-declare <2 x double> @llvm.arm64.neon.fminnm.v2f64(<2 x double>, <2 x double>) nounwind readnone
-declare <4 x float> @llvm.arm64.neon.fminnm.v4f32(<4 x float>, <4 x float>) nounwind readnone
-declare <2 x float> @llvm.arm64.neon.fminnm.v2f32(<2 x float>, <2 x float>) nounwind readnone
-declare <2 x double> @llvm.arm64.neon.fmaxnm.v2f64(<2 x double>, <2 x double>) nounwind readnone
-declare <4 x float> @llvm.arm64.neon.fmaxnm.v4f32(<4 x float>, <4 x float>) nounwind readnone
-declare <2 x float> @llvm.arm64.neon.fmaxnm.v2f32(<2 x float>, <2 x float>) nounwind readnone
-
-
-define double @test_fmaxnmv(<2 x double> %in) {
-; CHECK-LABEL: test_fmaxnmv:
-; CHECK: fmaxnmp.2d d0, v0
-  %max = call double @llvm.arm64.neon.fmaxnmv.f64.v2f64(<2 x double> %in)
-  ret double %max
-}
-
-define double @test_fminnmv(<2 x double> %in) {
-; CHECK-LABEL: test_fminnmv:
-; CHECK: fminnmp.2d d0, v0
-  %min = call double @llvm.arm64.neon.fminnmv.f64.v2f64(<2 x double> %in)
-  ret double %min
-}
-
-declare double @llvm.arm64.neon.fmaxnmv.f64.v2f64(<2 x double>)
-declare double @llvm.arm64.neon.fminnmv.f64.v2f64(<2 x double>)
diff --git a/test/CodeGen/ARM64/vqsub.ll b/test/CodeGen/ARM64/vqsub.ll
deleted file mode 100644
index 0afeb68348b..00000000000
--- a/test/CodeGen/ARM64/vqsub.ll
+++ /dev/null
@@ -1,147 +0,0 @@
-; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s
-
-define <8 x i8> @sqsub8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK-LABEL: sqsub8b:
-;CHECK: sqsub.8b
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
-	%tmp3 = call <8 x i8> @llvm.arm64.neon.sqsub.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
-	ret <8 x i8> %tmp3
-}
-
-define <4 x i16> @sqsub4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK-LABEL: sqsub4h:
-;CHECK: sqsub.4h
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
-	%tmp3 = call <4 x i16> @llvm.arm64.neon.sqsub.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
-	ret <4 x i16> %tmp3
-}
-
-define <2 x i32> @sqsub2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK-LABEL: sqsub2s:
-;CHECK: sqsub.2s
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
-	%tmp3 = call <2 x i32> @llvm.arm64.neon.sqsub.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
-	ret <2 x i32> %tmp3
-}
-
-define <8 x i8> @uqsub8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK-LABEL: uqsub8b:
-;CHECK: uqsub.8b
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
-	%tmp3 = call <8 x i8> @llvm.arm64.neon.uqsub.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
-	ret <8 x i8> %tmp3
-}
-
-define <4 x i16> @uqsub4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK-LABEL: uqsub4h:
-;CHECK: uqsub.4h
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
-	%tmp3 = call <4 x i16> @llvm.arm64.neon.uqsub.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
-	ret <4 x i16> %tmp3
-}
-
-define <2 x i32> @uqsub2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK-LABEL: uqsub2s:
-;CHECK: uqsub.2s
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
-	%tmp3 = call <2 x i32> @llvm.arm64.neon.uqsub.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
-	ret <2 x i32> %tmp3
-}
-
-define <16 x i8> @sqsub16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
-;CHECK-LABEL: sqsub16b:
-;CHECK: sqsub.16b
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
-	%tmp3 = call <16 x i8> @llvm.arm64.neon.sqsub.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
-	ret <16 x i8> %tmp3
-}
-
-define <8 x i16> @sqsub8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK-LABEL: sqsub8h:
-;CHECK: sqsub.8h
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
-	%tmp3 = call <8 x i16> @llvm.arm64.neon.sqsub.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
-	ret <8 x i16> %tmp3
-}
-
-define <4 x i32> @sqsub4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK-LABEL: sqsub4s:
-;CHECK: sqsub.4s
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
-	%tmp3 = call <4 x i32> @llvm.arm64.neon.sqsub.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
-	ret <4 x i32> %tmp3
-}
-
-define <2 x i64> @sqsub2d(<2 x i64>* %A, <2 x i64>* %B) nounwind {
-;CHECK-LABEL: sqsub2d:
-;CHECK: sqsub.2d
-	%tmp1 = load <2 x i64>* %A
-	%tmp2 = load <2 x i64>* %B
-	%tmp3 = call <2 x i64> @llvm.arm64.neon.sqsub.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
-	ret <2 x i64> %tmp3
-}
-
-define <16 x i8> @uqsub16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
-;CHECK-LABEL: uqsub16b:
-;CHECK: uqsub.16b
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
-	%tmp3 = call <16 x i8> @llvm.arm64.neon.uqsub.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
-	ret <16 x i8> %tmp3
-}
-
-define <8 x i16> @uqsub8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK-LABEL: uqsub8h:
-;CHECK: uqsub.8h
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
-	%tmp3 = call <8 x i16> @llvm.arm64.neon.uqsub.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
-	ret <8 x i16> %tmp3
-}
-
-define <4 x i32> @uqsub4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK-LABEL: uqsub4s:
-;CHECK: uqsub.4s
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
-	%tmp3 = call <4 x i32> @llvm.arm64.neon.uqsub.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
-	ret <4 x i32> %tmp3
-}
-
-define <2 x i64> @uqsub2d(<2 x i64>* %A, <2 x i64>* %B) nounwind {
-;CHECK-LABEL: uqsub2d:
-;CHECK: uqsub.2d
-	%tmp1 = load <2 x i64>* %A
-	%tmp2 = load <2 x i64>* %B
-	%tmp3 = call <2 x i64> @llvm.arm64.neon.uqsub.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
-	ret <2 x i64> %tmp3
-}
-
-declare <8 x i8>  @llvm.arm64.neon.sqsub.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
-declare <4 x i16> @llvm.arm64.neon.sqsub.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
-declare <2 x i32> @llvm.arm64.neon.sqsub.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
-declare <1 x i64> @llvm.arm64.neon.sqsub.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
-
-declare <8 x i8>  @llvm.arm64.neon.uqsub.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
-declare <4 x i16> @llvm.arm64.neon.uqsub.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
-declare <2 x i32> @llvm.arm64.neon.uqsub.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
-declare <1 x i64> @llvm.arm64.neon.uqsub.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
-
-declare <16 x i8> @llvm.arm64.neon.sqsub.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
-declare <8 x i16> @llvm.arm64.neon.sqsub.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
-declare <4 x i32> @llvm.arm64.neon.sqsub.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
-declare <2 x i64> @llvm.arm64.neon.sqsub.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
-
-declare <16 x i8> @llvm.arm64.neon.uqsub.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
-declare <8 x i16> @llvm.arm64.neon.uqsub.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
-declare <4 x i32> @llvm.arm64.neon.uqsub.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
-declare <2 x i64> @llvm.arm64.neon.uqsub.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
diff --git a/test/DebugInfo/ARM64/struct_by_value.ll b/test/DebugInfo/AArch64/struct_by_value.ll
similarity index 100%
rename from test/DebugInfo/ARM64/struct_by_value.ll
rename to test/DebugInfo/AArch64/struct_by_value.ll
diff --git a/test/MC/AArch64/adrp-relocation.s b/test/MC/AArch64/adrp-relocation.s
index 03b930d5397..3bcef34e4f5 100644
--- a/test/MC/AArch64/adrp-relocation.s
+++ b/test/MC/AArch64/adrp-relocation.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple=arm64-linux-gnu -filetype=obj -o - %s| llvm-readobj -r - | FileCheck %s
+// RUN: llvm-mc -triple=aarch64-linux-gnu -filetype=obj -o - %s| llvm-readobj -r - | FileCheck %s
         .text
 // These should produce an ADRP/ADD pair to calculate the address of
 // testfn. The important point is that LLVM shouldn't think it can deal with the
diff --git a/test/MC/ARM64/adr.s b/test/MC/AArch64/arm64-adr.s
similarity index 88%
rename from test/MC/ARM64/adr.s
rename to test/MC/AArch64/arm64-adr.s
index 3442225dfe3..131e545d3bb 100644
--- a/test/MC/ARM64/adr.s
+++ b/test/MC/AArch64/arm64-adr.s
@@ -8,9 +8,9 @@ adr x0, foo
 // CHECK: adr x0, #0          // encoding: [0x00,0x00,0x00,0x10]
 // CHECK: adr x0, #1          // encoding: [0x00,0x00,0x00,0x30]
 // CHECK: adr x0, .Ltmp0      // encoding: [A,A,A,0x10'A']
-// CHECK-NEXT:                //   fixup A - offset: 0, value: .Ltmp0, kind: fixup_arm64_pcrel_adr_imm21
+// CHECK-NEXT:                //   fixup A - offset: 0, value: .Ltmp0, kind: fixup_aarch64_pcrel_adr_imm21
 // CHECK: adr x0, foo         // encoding: [A,A,A,0x10'A']
-// CHECK-NEXT:                //   fixup A - offset: 0, value: foo, kind: fixup_arm64_pcrel_adr_imm21
+// CHECK-NEXT:                //   fixup A - offset: 0, value: foo, kind: fixup_aarch64_pcrel_adr_imm21
 
 adrp x0, #0
 adrp x0, #4096
@@ -19,9 +19,9 @@ adrp x0, foo
 // CHECK: adrp    x0, #0      // encoding: [0x00,0x00,0x00,0x90]
 // CHECK: adrp    x0, #4096   // encoding: [0x00,0x00,0x00,0xb0]
 // CHECK: adrp    x0, .Ltmp0  // encoding: [A,A,A,0x90'A']
-// CHECK-NEXT:                //   fixup A - offset: 0, value: .Ltmp0, kind: fixup_arm64_pcrel_adrp_imm21
+// CHECK-NEXT:                //   fixup A - offset: 0, value: .Ltmp0, kind: fixup_aarch64_pcrel_adrp_imm21
 // CHECK: adrp    x0, foo     // encoding: [A,A,A,0x90'A']
-// CHECK-NEXT:                //   fixup A - offset: 0, value: foo, kind: fixup_arm64_pcrel_adrp_imm21
+// CHECK-NEXT:                //   fixup A - offset: 0, value: foo, kind: fixup_aarch64_pcrel_adrp_imm21
 
 adr x0, #0xffffffff
 adrp x0, #0xffffffff
diff --git a/test/MC/ARM64/advsimd.s b/test/MC/AArch64/arm64-advsimd.s
similarity index 100%
rename from test/MC/ARM64/advsimd.s
rename to test/MC/AArch64/arm64-advsimd.s
diff --git a/test/MC/ARM64/aliases.s b/test/MC/AArch64/arm64-aliases.s
similarity index 100%
rename from test/MC/ARM64/aliases.s
rename to test/MC/AArch64/arm64-aliases.s
diff --git a/test/MC/ARM64/arithmetic-encoding.s b/test/MC/AArch64/arm64-arithmetic-encoding.s
similarity index 100%
rename from test/MC/ARM64/arithmetic-encoding.s
rename to test/MC/AArch64/arm64-arithmetic-encoding.s
diff --git a/test/MC/ARM64/arm64-fixup.s b/test/MC/AArch64/arm64-arm64-fixup.s
similarity index 76%
rename from test/MC/ARM64/arm64-fixup.s
rename to test/MC/AArch64/arm64-arm64-fixup.s
index eae6f68390e..81306fb5ac0 100644
--- a/test/MC/ARM64/arm64-fixup.s
+++ b/test/MC/AArch64/arm64-arm64-fixup.s
@@ -3,8 +3,8 @@
 foo:
   adr x3, Lbar
 ; CHECK: adr x3, Lbar            ; encoding: [0x03'A',A,A,0x10'A']
-; CHECK: fixup A - offset: 0, value: Lbar, kind: fixup_arm64_pcrel_adr_imm21
+; CHECK: fixup A - offset: 0, value: Lbar, kind: fixup_aarch64_pcrel_adr_imm21
 Lbar:
   adrp x3, _printf@page
 ; CHECK: adrp x3, _printf@PAGE      ; encoding: [0x03'A',A,A,0x90'A']
-; CHECK: fixup A - offset: 0, value: _printf@PAGE, kind: fixup_arm64_pcrel_adrp_imm21
+; CHECK: fixup A - offset: 0, value: _printf@PAGE, kind: fixup_aarch64_pcrel_adrp_imm21
diff --git a/test/MC/ARM64/basic-a64-instructions.s b/test/MC/AArch64/arm64-basic-a64-instructions.s
similarity index 100%
rename from test/MC/ARM64/basic-a64-instructions.s
rename to test/MC/AArch64/arm64-basic-a64-instructions.s
diff --git a/test/MC/ARM64/be-datalayout.s b/test/MC/AArch64/arm64-be-datalayout.s
similarity index 100%
rename from test/MC/ARM64/be-datalayout.s
rename to test/MC/AArch64/arm64-be-datalayout.s
diff --git a/test/MC/ARM64/bitfield-encoding.s b/test/MC/AArch64/arm64-bitfield-encoding.s
similarity index 100%
rename from test/MC/ARM64/bitfield-encoding.s
rename to test/MC/AArch64/arm64-bitfield-encoding.s
diff --git a/test/MC/ARM64/branch-encoding.s b/test/MC/AArch64/arm64-branch-encoding.s
similarity index 77%
rename from test/MC/ARM64/branch-encoding.s
rename to test/MC/AArch64/arm64-branch-encoding.s
index ba8fb3d9018..48c2099012f 100644
--- a/test/MC/ARM64/branch-encoding.s
+++ b/test/MC/AArch64/arm64-branch-encoding.s
@@ -20,7 +20,7 @@ foo:
 ; CHECK: encoding: [0x20,0x01,0x3f,0xd6]
   bl  L1
 ; CHECK: bl L1   ; encoding: [A,A,A,0b100101AA]
-; CHECK: fixup A - offset: 0, value: L1, kind: fixup_arm64_pcrel_call26
+; CHECK: fixup A - offset: 0, value: L1, kind: fixup_aarch64_pcrel_call26
 
 ;-----------------------------------------------------------------------------
 ; Contitional branch instructions.
@@ -28,52 +28,52 @@ foo:
 
   b     L1
 ; CHECK: b L1      ; encoding: [A,A,A,0b000101AA]
-; CHECK: fixup A - offset: 0, value: L1, kind: fixup_arm64_pcrel_branch26
+; CHECK: fixup A - offset: 0, value: L1, kind: fixup_aarch64_pcrel_branch26
   b.eq  L1
 ; CHECK: b.eq L1   ; encoding: [0bAAA00000,A,A,0x54]
-; CHECK: fixup A - offset: 0, value: L1, kind: fixup_arm64_pcrel_branch19
+; CHECK: fixup A - offset: 0, value: L1, kind: fixup_aarch64_pcrel_branch19
   b.ne  L1
 ; CHECK: b.ne L1   ; encoding: [0bAAA00001,A,A,0x54]
-; CHECK: fixup A - offset: 0, value: L1, kind: fixup_arm64_pcrel_branch19
+; CHECK: fixup A - offset: 0, value: L1, kind: fixup_aarch64_pcrel_branch19
   b.cs  L1
 ; CHECK: b.hs L1   ; encoding: [0bAAA00010,A,A,0x54]
-; CHECK: fixup A - offset: 0, value: L1, kind: fixup_arm64_pcrel_branch19
+; CHECK: fixup A - offset: 0, value: L1, kind: fixup_aarch64_pcrel_branch19
   b.cc  L1
 ; CHECK: b.lo L1   ; encoding: [0bAAA00011,A,A,0x54]
-; CHECK: fixup A - offset: 0, value: L1, kind: fixup_arm64_pcrel_branch19
+; CHECK: fixup A - offset: 0, value: L1, kind: fixup_aarch64_pcrel_branch19
   b.mi  L1
 ; CHECK: b.mi L1   ; encoding: [0bAAA00100,A,A,0x54]
-; CHECK: fixup A - offset: 0, value: L1, kind: fixup_arm64_pcrel_branch19
+; CHECK: fixup A - offset: 0, value: L1, kind: fixup_aarch64_pcrel_branch19
   b.pl  L1
 ; CHECK: b.pl L1   ; encoding: [0bAAA00101,A,A,0x54]
-; CHECK: fixup A - offset: 0, value: L1, kind: fixup_arm64_pcrel_branch19
+; CHECK: fixup A - offset: 0, value: L1, kind: fixup_aarch64_pcrel_branch19
   b.vs  L1
 ; CHECK: b.vs L1   ; encoding: [0bAAA00110,A,A,0x54]
-; CHECK: fixup A - offset: 0, value: L1, kind: fixup_arm64_pcrel_branch19
+; CHECK: fixup A - offset: 0, value: L1, kind: fixup_aarch64_pcrel_branch19
   b.vc  L1
 ; CHECK: b.vc L1   ; encoding: [0bAAA00111,A,A,0x54]
-; CHECK: fixup A - offset: 0, value: L1, kind: fixup_arm64_pcrel_branch19
+; CHECK: fixup A - offset: 0, value: L1, kind: fixup_aarch64_pcrel_branch19
   b.hi  L1
 ; CHECK: b.hi L1   ; encoding: [0bAAA01000,A,A,0x54]
-; CHECK: fixup A - offset: 0, value: L1, kind: fixup_arm64_pcrel_branch19
+; CHECK: fixup A - offset: 0, value: L1, kind: fixup_aarch64_pcrel_branch19
   b.ls  L1
 ; CHECK: b.ls L1   ; encoding: [0bAAA01001,A,A,0x54]
-; CHECK: fixup A - offset: 0, value: L1, kind: fixup_arm64_pcrel_branch19
+; CHECK: fixup A - offset: 0, value: L1, kind: fixup_aarch64_pcrel_branch19
   b.ge  L1
 ; CHECK: b.ge L1   ; encoding: [0bAAA01010,A,A,0x54]
-; CHECK: fixup A - offset: 0, value: L1, kind: fixup_arm64_pcrel_branch19
+; CHECK: fixup A - offset: 0, value: L1, kind: fixup_aarch64_pcrel_branch19
   b.lt  L1
 ; CHECK: b.lt L1   ; encoding: [0bAAA01011,A,A,0x54]
-; CHECK: fixup A - offset: 0, value: L1, kind: fixup_arm64_pcrel_branch19
+; CHECK: fixup A - offset: 0, value: L1, kind: fixup_aarch64_pcrel_branch19
   b.gt  L1
 ; CHECK: b.gt L1   ; encoding: [0bAAA01100,A,A,0x54]
-; CHECK: fixup A - offset: 0, value: L1, kind: fixup_arm64_pcrel_branch19
+; CHECK: fixup A - offset: 0, value: L1, kind: fixup_aarch64_pcrel_branch19
   b.le  L1
 ; CHECK: b.le L1   ; encoding: [0bAAA01101,A,A,0x54]
-; CHECK: fixup A - offset: 0, value: L1, kind: fixup_arm64_pcrel_branch19
+; CHECK: fixup A - offset: 0, value: L1, kind: fixup_aarch64_pcrel_branch19
   b.al  L1
 ; CHECK: b.al L1      ; encoding: [0bAAA01110,A,A,0x54]
-; CHECK: fixup A - offset: 0, value: L1, kind: fixup_arm64_pcrel_branch19
+; CHECK: fixup A - offset: 0, value: L1, kind: fixup_aarch64_pcrel_branch19
 L1:
   b #28
 ; CHECK: b #28
diff --git a/test/MC/ARM64/condbr-without-dots.s b/test/MC/AArch64/arm64-condbr-without-dots.s
similarity index 100%
rename from test/MC/ARM64/condbr-without-dots.s
rename to test/MC/AArch64/arm64-condbr-without-dots.s
diff --git a/test/MC/ARM64/crypto.s b/test/MC/AArch64/arm64-crypto.s
similarity index 100%
rename from test/MC/ARM64/crypto.s
rename to test/MC/AArch64/arm64-crypto.s
diff --git a/test/MC/ARM64/diagno-predicate.s b/test/MC/AArch64/arm64-diagno-predicate.s
similarity index 100%
rename from test/MC/ARM64/diagno-predicate.s
rename to test/MC/AArch64/arm64-diagno-predicate.s
diff --git a/test/MC/ARM64/diags.s b/test/MC/AArch64/arm64-diags.s
similarity index 99%
rename from test/MC/ARM64/diags.s
rename to test/MC/AArch64/arm64-diags.s
index 3ff2b54998f..cf00e9826e1 100644
--- a/test/MC/ARM64/diags.s
+++ b/test/MC/AArch64/arm64-diags.s
@@ -8,7 +8,7 @@ foo:
   ldr x3, (foo + 4)
   ldr x3, [foo + 4]
 ; CHECK:  ldr x3, foo+4               ; encoding: [0bAAA00011,A,A,0x58]
-; CHECK:                              ;   fixup A - offset: 0, value: foo+4, kind: fixup_arm64_ldr_pcrel_imm19
+; CHECK:                              ;   fixup A - offset: 0, value: foo+4, kind: fixup_aarch64_ldr_pcrel_imm19
 ; CHECK-ERRORS: error: invalid operand for instruction
 
 ; The last argument should be flagged as an error.  rdar://9576009
diff --git a/test/MC/ARM64/directive_loh.s b/test/MC/AArch64/arm64-directive_loh.s
similarity index 100%
rename from test/MC/ARM64/directive_loh.s
rename to test/MC/AArch64/arm64-directive_loh.s
diff --git a/test/MC/ARM64/elf-reloc-condbr.s b/test/MC/AArch64/arm64-elf-reloc-condbr.s
similarity index 100%
rename from test/MC/ARM64/elf-reloc-condbr.s
rename to test/MC/AArch64/arm64-elf-reloc-condbr.s
diff --git a/test/MC/ARM64/elf-relocs.s b/test/MC/AArch64/arm64-elf-relocs.s
similarity index 100%
rename from test/MC/ARM64/elf-relocs.s
rename to test/MC/AArch64/arm64-elf-relocs.s
diff --git a/test/MC/ARM64/fp-encoding.s b/test/MC/AArch64/arm64-fp-encoding.s
similarity index 100%
rename from test/MC/ARM64/fp-encoding.s
rename to test/MC/AArch64/arm64-fp-encoding.s
diff --git a/test/MC/ARM64/large-relocs.s b/test/MC/AArch64/arm64-large-relocs.s
similarity index 83%
rename from test/MC/ARM64/large-relocs.s
rename to test/MC/AArch64/arm64-large-relocs.s
index 48aea43d6bc..2a0cfa22286 100644
--- a/test/MC/ARM64/large-relocs.s
+++ b/test/MC/AArch64/arm64-large-relocs.s
@@ -4,9 +4,9 @@
         movz x2, #:abs_g0:sym
         movk w3, #:abs_g0_nc:sym
 // CHECK: movz    x2, #:abs_g0:sym        // encoding: [0bAAA00010,A,0b100AAAAA,0xd2]
-// CHECK-NEXT:                            //   fixup A - offset: 0, value: :abs_g0:sym, kind: fixup_arm64_movw
+// CHECK-NEXT:                            //   fixup A - offset: 0, value: :abs_g0:sym, kind: fixup_aarch64_movw
 // CHECK: movk     w3, #:abs_g0_nc:sym    // encoding: [0bAAA00011,A,0b100AAAAA,0x72]
-// CHECK-NEXT:                            //   fixup A - offset: 0, value: :abs_g0_nc:sym, kind: fixup_arm64_movw
+// CHECK-NEXT:                            //   fixup A - offset: 0, value: :abs_g0_nc:sym, kind: fixup_aarch64_movw
 
 // CHECK-OBJ: 0 R_AARCH64_MOVW_UABS_G0 sym
 // CHECK-OBJ: 4 R_AARCH64_MOVW_UABS_G0_NC sym
@@ -14,9 +14,9 @@
         movz x4, #:abs_g1:sym
         movk w5, #:abs_g1_nc:sym
 // CHECK: movz     x4, #:abs_g1:sym       // encoding: [0bAAA00100,A,0b101AAAAA,0xd2]
-// CHECK-NEXT:                            //   fixup A - offset: 0, value: :abs_g1:sym, kind: fixup_arm64_movw
+// CHECK-NEXT:                            //   fixup A - offset: 0, value: :abs_g1:sym, kind: fixup_aarch64_movw
 // CHECK: movk     w5, #:abs_g1_nc:sym    // encoding: [0bAAA00101,A,0b101AAAAA,0x72]
-// CHECK-NEXT:                            //   fixup A - offset: 0, value: :abs_g1_nc:sym, kind: fixup_arm64_movw
+// CHECK-NEXT:                            //   fixup A - offset: 0, value: :abs_g1_nc:sym, kind: fixup_aarch64_movw
 
 // CHECK-OBJ: 8 R_AARCH64_MOVW_UABS_G1 sym
 // CHECK-OBJ: c R_AARCH64_MOVW_UABS_G1_NC sym
@@ -24,15 +24,15 @@
         movz x6, #:abs_g2:sym
         movk x7, #:abs_g2_nc:sym
 // CHECK: movz     x6, #:abs_g2:sym       // encoding: [0bAAA00110,A,0b110AAAAA,0xd2]
-// CHECK-NEXT:                            //   fixup A - offset: 0, value: :abs_g2:sym, kind: fixup_arm64_movw
+// CHECK-NEXT:                            //   fixup A - offset: 0, value: :abs_g2:sym, kind: fixup_aarch64_movw
 // CHECK: movk     x7, #:abs_g2_nc:sym    // encoding: [0bAAA00111,A,0b110AAAAA,0xf2]
-// CHECK-NEXT:                            //   fixup A - offset: 0, value: :abs_g2_nc:sym, kind: fixup_arm64_movw
+// CHECK-NEXT:                            //   fixup A - offset: 0, value: :abs_g2_nc:sym, kind: fixup_aarch64_movw
 
 // CHECK-OBJ: 10 R_AARCH64_MOVW_UABS_G2 sym
 // CHECK-OBJ: 14 R_AARCH64_MOVW_UABS_G2_NC sym
 
         movz x8, #:abs_g3:sym
 // CHECK: movz     x8, #:abs_g3:sym       // encoding: [0bAAA01000,A,0b111AAAAA,0xd2]
-// CHECK-NEXT:                            //   fixup A - offset: 0, value: :abs_g3:sym, kind: fixup_arm64_movw
+// CHECK-NEXT:                            //   fixup A - offset: 0, value: :abs_g3:sym, kind: fixup_aarch64_movw
 
 // CHECK-OBJ: 18 R_AARCH64_MOVW_UABS_G3 sym
diff --git a/test/MC/ARM64/leaf-compact-unwind.s b/test/MC/AArch64/arm64-leaf-compact-unwind.s
similarity index 100%
rename from test/MC/ARM64/leaf-compact-unwind.s
rename to test/MC/AArch64/arm64-leaf-compact-unwind.s
diff --git a/test/MC/ARM64/logical-encoding.s b/test/MC/AArch64/arm64-logical-encoding.s
similarity index 100%
rename from test/MC/ARM64/logical-encoding.s
rename to test/MC/AArch64/arm64-logical-encoding.s
diff --git a/test/MC/ARM64/mapping-across-sections.s b/test/MC/AArch64/arm64-mapping-across-sections.s
similarity index 100%
rename from test/MC/ARM64/mapping-across-sections.s
rename to test/MC/AArch64/arm64-mapping-across-sections.s
diff --git a/test/MC/ARM64/mapping-within-section.s b/test/MC/AArch64/arm64-mapping-within-section.s
similarity index 100%
rename from test/MC/ARM64/mapping-within-section.s
rename to test/MC/AArch64/arm64-mapping-within-section.s
diff --git a/test/MC/ARM64/memory.s b/test/MC/AArch64/arm64-memory.s
similarity index 100%
rename from test/MC/ARM64/memory.s
rename to test/MC/AArch64/arm64-memory.s
diff --git a/test/MC/ARM64/nv-cond.s b/test/MC/AArch64/arm64-nv-cond.s
similarity index 100%
rename from test/MC/ARM64/nv-cond.s
rename to test/MC/AArch64/arm64-nv-cond.s
diff --git a/test/MC/ARM64/optional-hash.s b/test/MC/AArch64/arm64-optional-hash.s
similarity index 100%
rename from test/MC/ARM64/optional-hash.s
rename to test/MC/AArch64/arm64-optional-hash.s
diff --git a/test/MC/ARM64/separator.s b/test/MC/AArch64/arm64-separator.s
similarity index 100%
rename from test/MC/ARM64/separator.s
rename to test/MC/AArch64/arm64-separator.s
diff --git a/test/MC/ARM64/simd-ldst.s b/test/MC/AArch64/arm64-simd-ldst.s
similarity index 100%
rename from test/MC/ARM64/simd-ldst.s
rename to test/MC/AArch64/arm64-simd-ldst.s
diff --git a/test/MC/ARM64/small-data-fixups.s b/test/MC/AArch64/arm64-small-data-fixups.s
similarity index 100%
rename from test/MC/ARM64/small-data-fixups.s
rename to test/MC/AArch64/arm64-small-data-fixups.s
diff --git a/test/MC/ARM64/spsel-sysreg.s b/test/MC/AArch64/arm64-spsel-sysreg.s
similarity index 100%
rename from test/MC/ARM64/spsel-sysreg.s
rename to test/MC/AArch64/arm64-spsel-sysreg.s
diff --git a/test/MC/ARM64/system-encoding.s b/test/MC/AArch64/arm64-system-encoding.s
similarity index 100%
rename from test/MC/ARM64/system-encoding.s
rename to test/MC/AArch64/arm64-system-encoding.s
diff --git a/test/MC/ARM64/target-specific-sysreg.s b/test/MC/AArch64/arm64-target-specific-sysreg.s
similarity index 100%
rename from test/MC/ARM64/target-specific-sysreg.s
rename to test/MC/AArch64/arm64-target-specific-sysreg.s
diff --git a/test/MC/ARM64/tls-modifiers-darwin.s b/test/MC/AArch64/arm64-tls-modifiers-darwin.s
similarity index 100%
rename from test/MC/ARM64/tls-modifiers-darwin.s
rename to test/MC/AArch64/arm64-tls-modifiers-darwin.s
diff --git a/test/MC/ARM64/tls-relocs.s b/test/MC/AArch64/arm64-tls-relocs.s
similarity index 82%
rename from test/MC/ARM64/tls-relocs.s
rename to test/MC/AArch64/arm64-tls-relocs.s
index 681f616d909..96c2b55c36d 100644
--- a/test/MC/ARM64/tls-relocs.s
+++ b/test/MC/AArch64/arm64-tls-relocs.s
@@ -9,14 +9,14 @@
 
         movz x15, #:gottprel_g1:var
 // CHECK: movz    x15, #:gottprel_g1:var  // encoding: [0bAAA01111,A,0b101AAAAA,0x92]
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :gottprel_g1:var, kind: fixup_arm64_movw
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :gottprel_g1:var, kind: fixup_aarch64_movw
 
 // CHECK-ELF:     {{0x[0-9A-F]+}} R_AARCH64_TLSIE_MOVW_GOTTPREL_G1 [[VARSYM:[^ ]+]]
 
 
         movk x13, #:gottprel_g0_nc:var
 // CHECK: movk    x13, #:gottprel_g0_nc:var // encoding: [0bAAA01101,A,0b100AAAAA,0xf2]
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :gottprel_g0_nc:var, kind: fixup_arm64_movw
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :gottprel_g0_nc:var, kind: fixup_aarch64_movw
 
 
 // CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSIE_MOVW_GOTTPREL_G0_NC [[VARSYM]]
@@ -25,11 +25,11 @@
         ldr x10, [x0, #:gottprel_lo12:var]
         ldr x9, :gottprel:var
 // CHECK: adrp    x11, :gottprel:var      // encoding: [0x0b'A',A,A,0x90'A']
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :gottprel:var, kind: fixup_arm64_pcrel_adrp_imm21
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :gottprel:var, kind: fixup_aarch64_pcrel_adrp_imm21
 // CHECK: ldr     x10, [x0, :gottprel_lo12:var] // encoding: [0x0a,0bAAAAAA00,0b01AAAAAA,0xf9]
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :gottprel_lo12:var, kind: fixup_arm64_ldst_imm12_scale8
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :gottprel_lo12:var, kind: fixup_aarch64_ldst_imm12_scale8
 // CHECK: ldr     x9, :gottprel:var       // encoding: [0bAAA01001,A,A,0x58]
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :gottprel:var, kind: fixup_arm64_ldr_pcrel_imm19
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :gottprel:var, kind: fixup_aarch64_ldr_pcrel_imm19
 
 // CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21 [[VARSYM]]
 // CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC [[VARSYM]]
@@ -43,9 +43,9 @@
         movz x3, #:tprel_g2:var
         movn x4, #:tprel_g2:var
 // CHECK: movz    x3, #:tprel_g2:var      // encoding: [0bAAA00011,A,0b110AAAAA,0x92]
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_g2:var, kind: fixup_arm64_movw
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_g2:var, kind: fixup_aarch64_movw
 // CHECK: movn    x4, #:tprel_g2:var      // encoding: [0bAAA00100,A,0b110AAAAA,0x92]
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_g2:var, kind: fixup_arm64_movw
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_g2:var, kind: fixup_aarch64_movw
 
 // CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSLE_MOVW_TPREL_G2 [[VARSYM]]
 // CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSLE_MOVW_TPREL_G2 [[VARSYM]]
@@ -55,11 +55,11 @@
         movn x6, #:tprel_g1:var
         movz w7, #:tprel_g1:var
 // CHECK: movz    x5, #:tprel_g1:var      // encoding: [0bAAA00101,A,0b101AAAAA,0x92]
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_g1:var, kind: fixup_arm64_movw
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_g1:var, kind: fixup_aarch64_movw
 // CHECK: movn    x6, #:tprel_g1:var      // encoding: [0bAAA00110,A,0b101AAAAA,0x92]
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_g1:var, kind: fixup_arm64_movw
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_g1:var, kind: fixup_aarch64_movw
 // CHECK: movz    w7, #:tprel_g1:var      // encoding: [0bAAA00111,A,0b101AAAAA,0x12]
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_g1:var, kind: fixup_arm64_movw
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_g1:var, kind: fixup_aarch64_movw
 
 // CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSLE_MOVW_TPREL_G1 [[VARSYM]]
 // CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSLE_MOVW_TPREL_G1 [[VARSYM]]
@@ -69,9 +69,9 @@
         movk x9, #:tprel_g1_nc:var
         movk w10, #:tprel_g1_nc:var
 // CHECK: movk    x9, #:tprel_g1_nc:var   // encoding: [0bAAA01001,A,0b101AAAAA,0xf2]
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_g1_nc:var, kind: fixup_arm64_movw
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_g1_nc:var, kind: fixup_aarch64_movw
 // CHECK: movk    w10, #:tprel_g1_nc:var  // encoding: [0bAAA01010,A,0b101AAAAA,0x72]
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_g1_nc:var, kind: fixup_arm64_movw
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_g1_nc:var, kind: fixup_aarch64_movw
 
 // CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSLE_MOVW_TPREL_G1_NC [[VARSYM]]
 // CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSLE_MOVW_TPREL_G1_NC [[VARSYM]]
@@ -81,11 +81,11 @@
         movn x12, #:tprel_g0:var
         movz w13, #:tprel_g0:var
 // CHECK: movz    x11, #:tprel_g0:var     // encoding: [0bAAA01011,A,0b100AAAAA,0x92]
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_g0:var, kind: fixup_arm64_movw
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_g0:var, kind: fixup_aarch64_movw
 // CHECK: movn    x12, #:tprel_g0:var     // encoding: [0bAAA01100,A,0b100AAAAA,0x92]
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_g0:var, kind: fixup_arm64_movw
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_g0:var, kind: fixup_aarch64_movw
 // CHECK: movz    w13, #:tprel_g0:var     // encoding: [0bAAA01101,A,0b100AAAAA,0x12]
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_g0:var, kind: fixup_arm64_movw
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_g0:var, kind: fixup_aarch64_movw
 
 // CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSLE_MOVW_TPREL_G0 [[VARSYM]]
 // CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSLE_MOVW_TPREL_G0 [[VARSYM]]
@@ -95,9 +95,9 @@
         movk x15, #:tprel_g0_nc:var
         movk w16, #:tprel_g0_nc:var
 // CHECK: movk    x15, #:tprel_g0_nc:var  // encoding: [0bAAA01111,A,0b100AAAAA,0xf2]
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_g0_nc:var, kind: fixup_arm64_movw
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_g0_nc:var, kind: fixup_aarch64_movw
 // CHECK: movk    w16, #:tprel_g0_nc:var  // encoding: [0bAAA10000,A,0b100AAAAA,0x72]
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_g0_nc:var, kind: fixup_arm64_movw
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_g0_nc:var, kind: fixup_aarch64_movw
 
 // CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSLE_MOVW_TPREL_G0_NC [[VARSYM]]
 // CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSLE_MOVW_TPREL_G0_NC [[VARSYM]]
@@ -105,14 +105,14 @@
 
         add x21, x22, #:tprel_lo12:var
 // CHECK: add     x21, x22, :tprel_lo12:var // encoding: [0xd5,0bAAAAAA10,0b00AAAAAA,0x91]
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_lo12:var, kind: fixup_arm64_add_imm12
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_lo12:var, kind: fixup_aarch64_add_imm12
 
 // CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSLE_ADD_TPREL_LO12 [[VARSYM]]
 
 
         add x25, x26, #:tprel_lo12_nc:var
 // CHECK: add     x25, x26, :tprel_lo12_nc:var // encoding: [0x59,0bAAAAAA11,0b00AAAAAA,0x91]
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_lo12_nc:var, kind: fixup_arm64_add_imm12
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_lo12_nc:var, kind: fixup_aarch64_add_imm12
 
 // CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSLE_ADD_TPREL_LO12_NC [[VARSYM]]
 
@@ -120,9 +120,9 @@
         ldrb w29, [x30, #:tprel_lo12:var]
         ldrsb x29, [x28, #:tprel_lo12_nc:var]
 // CHECK: ldrb    w29, [x30, :tprel_lo12:var] // encoding: [0xdd,0bAAAAAA11,0b01AAAAAA,0x39]
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_lo12:var, kind: fixup_arm64_ldst_imm12_scale1
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_lo12:var, kind: fixup_aarch64_ldst_imm12_scale1
 // CHECK: ldrsb   x29, [x28, :tprel_lo12_nc:var] // encoding: [0x9d,0bAAAAAA11,0b10AAAAAA,0x39]
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_lo12_nc:var, kind: fixup_arm64_ldst_imm12_scale1
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_lo12_nc:var, kind: fixup_aarch64_ldst_imm12_scale1
 
 // CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSLE_LDST8_TPREL_LO12 [[VARSYM]]
 // CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSLE_LDST8_TPREL_LO12_NC [[VARSYM]]
@@ -131,9 +131,9 @@
         strh w27, [x26, #:tprel_lo12:var]
         ldrsh x25, [x24, #:tprel_lo12_nc:var]
 // CHECK: strh    w27, [x26, :tprel_lo12:var] // encoding: [0x5b,0bAAAAAA11,0b00AAAAAA,0x79]
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_lo12:var, kind: fixup_arm64_ldst_imm12_scale2
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_lo12:var, kind: fixup_aarch64_ldst_imm12_scale2
 // CHECK: ldrsh   x25, [x24, :tprel_lo12_nc:var] // encoding: [0x19,0bAAAAAA11,0b10AAAAAA,0x79]
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_lo12_nc:var, kind: fixup_arm64_ldst_imm12_scale2
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_lo12_nc:var, kind: fixup_aarch64_ldst_imm12_scale2
 
 // CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSLE_LDST16_TPREL_LO12 [[VARSYM]]
 // CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSLE_LDST16_TPREL_LO12_NC [[VARSYM]]
@@ -142,9 +142,9 @@
         ldr w23, [x22, #:tprel_lo12:var]
         ldrsw x21, [x20, #:tprel_lo12_nc:var]
 // CHECK: ldr     w23, [x22, :tprel_lo12:var] // encoding: [0xd7,0bAAAAAA10,0b01AAAAAA,0xb9]
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_lo12:var, kind: fixup_arm64_ldst_imm12_scale4
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_lo12:var, kind: fixup_aarch64_ldst_imm12_scale4
 // CHECK: ldrsw   x21, [x20, :tprel_lo12_nc:var] // encoding: [0x95,0bAAAAAA10,0b10AAAAAA,0xb9]
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_lo12_nc:var, kind: fixup_arm64_ldst_imm12_scale4
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_lo12_nc:var, kind: fixup_aarch64_ldst_imm12_scale4
 
 // CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSLE_LDST32_TPREL_LO12 [[VARSYM]]
 // CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSLE_LDST32_TPREL_LO12_NC [[VARSYM]]
@@ -152,9 +152,9 @@
         ldr x19, [x18, #:tprel_lo12:var]
         str x17, [x16, #:tprel_lo12_nc:var]
 // CHECK: ldr     x19, [x18, :tprel_lo12:var] // encoding: [0x53,0bAAAAAA10,0b01AAAAAA,0xf9]
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_lo12:var, kind: fixup_arm64_ldst_imm12_scale8
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_lo12:var, kind: fixup_aarch64_ldst_imm12_scale8
 // CHECK: str     x17, [x16, :tprel_lo12_nc:var] // encoding: [0x11,0bAAAAAA10,0b00AAAAAA,0xf9]
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_lo12_nc:var, kind: fixup_arm64_ldst_imm12_scale8
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_lo12_nc:var, kind: fixup_aarch64_ldst_imm12_scale8
 
 // CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSLE_LDST64_TPREL_LO12 [[VARSYM]]
 // CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSLE_LDST64_TPREL_LO12_NC [[VARSYM]]
@@ -167,9 +167,9 @@
         movz x3, #:dtprel_g2:var
         movn x4, #:dtprel_g2:var
 // CHECK: movz    x3, #:dtprel_g2:var      // encoding: [0bAAA00011,A,0b110AAAAA,0x92]
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_g2:var, kind: fixup_arm64_movw
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_g2:var, kind: fixup_aarch64_movw
 // CHECK: movn    x4, #:dtprel_g2:var      // encoding: [0bAAA00100,A,0b110AAAAA,0x92]
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_g2:var, kind: fixup_arm64_movw
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_g2:var, kind: fixup_aarch64_movw
 
 // CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSLD_MOVW_DTPREL_G2 [[VARSYM]]
 // CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSLD_MOVW_DTPREL_G2 [[VARSYM]]
@@ -179,11 +179,11 @@
         movn x6, #:dtprel_g1:var
         movz w7, #:dtprel_g1:var
 // CHECK: movz    x5, #:dtprel_g1:var      // encoding: [0bAAA00101,A,0b101AAAAA,0x92]
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_g1:var, kind: fixup_arm64_movw
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_g1:var, kind: fixup_aarch64_movw
 // CHECK: movn    x6, #:dtprel_g1:var      // encoding: [0bAAA00110,A,0b101AAAAA,0x92]
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_g1:var, kind: fixup_arm64_movw
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_g1:var, kind: fixup_aarch64_movw
 // CHECK: movz    w7, #:dtprel_g1:var      // encoding: [0bAAA00111,A,0b101AAAAA,0x12]
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_g1:var, kind: fixup_arm64_movw
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_g1:var, kind: fixup_aarch64_movw
 
 // CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSLD_MOVW_DTPREL_G1 [[VARSYM]]
 // CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSLD_MOVW_DTPREL_G1 [[VARSYM]]
@@ -193,9 +193,9 @@
         movk x9, #:dtprel_g1_nc:var
         movk w10, #:dtprel_g1_nc:var
 // CHECK: movk    x9, #:dtprel_g1_nc:var   // encoding: [0bAAA01001,A,0b101AAAAA,0xf2]
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_g1_nc:var, kind: fixup_arm64_movw
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_g1_nc:var, kind: fixup_aarch64_movw
 // CHECK: movk    w10, #:dtprel_g1_nc:var  // encoding: [0bAAA01010,A,0b101AAAAA,0x72]
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_g1_nc:var, kind: fixup_arm64_movw
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_g1_nc:var, kind: fixup_aarch64_movw
 
 // CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSLD_MOVW_DTPREL_G1_NC [[VARSYM]]
 // CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSLD_MOVW_DTPREL_G1_NC [[VARSYM]]
@@ -205,11 +205,11 @@
         movn x12, #:dtprel_g0:var
         movz w13, #:dtprel_g0:var
 // CHECK: movz    x11, #:dtprel_g0:var     // encoding: [0bAAA01011,A,0b100AAAAA,0x92]
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_g0:var, kind: fixup_arm64_movw
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_g0:var, kind: fixup_aarch64_movw
 // CHECK: movn    x12, #:dtprel_g0:var     // encoding: [0bAAA01100,A,0b100AAAAA,0x92]
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_g0:var, kind: fixup_arm64_movw
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_g0:var, kind: fixup_aarch64_movw
 // CHECK: movz    w13, #:dtprel_g0:var     // encoding: [0bAAA01101,A,0b100AAAAA,0x12]
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_g0:var, kind: fixup_arm64_movw
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_g0:var, kind: fixup_aarch64_movw
 
 // CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSLD_MOVW_DTPREL_G0 [[VARSYM]]
 // CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSLD_MOVW_DTPREL_G0 [[VARSYM]]
@@ -219,9 +219,9 @@
         movk x15, #:dtprel_g0_nc:var
         movk w16, #:dtprel_g0_nc:var
 // CHECK: movk    x15, #:dtprel_g0_nc:var  // encoding: [0bAAA01111,A,0b100AAAAA,0xf2]
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_g0_nc:var, kind: fixup_arm64_movw
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_g0_nc:var, kind: fixup_aarch64_movw
 // CHECK: movk    w16, #:dtprel_g0_nc:var  // encoding: [0bAAA10000,A,0b100AAAAA,0x72]
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_g0_nc:var, kind: fixup_arm64_movw
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_g0_nc:var, kind: fixup_aarch64_movw
 
 // CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSLD_MOVW_DTPREL_G0_NC [[VARSYM]]
 // CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSLD_MOVW_DTPREL_G0_NC [[VARSYM]]
@@ -229,14 +229,14 @@
 
         add x21, x22, #:dtprel_lo12:var
 // CHECK: add     x21, x22, :dtprel_lo12:var // encoding: [0xd5,0bAAAAAA10,0b00AAAAAA,0x91]
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_lo12:var, kind: fixup_arm64_add_imm12
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_lo12:var, kind: fixup_aarch64_add_imm12
 
 // CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSLD_ADD_DTPREL_LO12 [[VARSYM]]
 
 
         add x25, x26, #:dtprel_lo12_nc:var
 // CHECK: add     x25, x26, :dtprel_lo12_nc:var // encoding: [0x59,0bAAAAAA11,0b00AAAAAA,0x91]
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_lo12_nc:var, kind: fixup_arm64_add_imm12
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_lo12_nc:var, kind: fixup_aarch64_add_imm12
 
 // CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSLD_ADD_DTPREL_LO12_NC [[VARSYM]]
 
@@ -244,9 +244,9 @@
         ldrb w29, [x30, #:dtprel_lo12:var]
         ldrsb x29, [x28, #:dtprel_lo12_nc:var]
 // CHECK: ldrb    w29, [x30, :dtprel_lo12:var] // encoding: [0xdd,0bAAAAAA11,0b01AAAAAA,0x39]
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_lo12:var, kind: fixup_arm64_ldst_imm12_scale1
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_lo12:var, kind: fixup_aarch64_ldst_imm12_scale1
 // CHECK: ldrsb   x29, [x28, :dtprel_lo12_nc:var] // encoding: [0x9d,0bAAAAAA11,0b10AAAAAA,0x39]
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_lo12_nc:var, kind: fixup_arm64_ldst_imm12_scale1
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_lo12_nc:var, kind: fixup_aarch64_ldst_imm12_scale1
 
 // CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSLD_LDST8_DTPREL_LO12 [[VARSYM]]
 // CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSLD_LDST8_DTPREL_LO12_NC [[VARSYM]]
@@ -255,9 +255,9 @@
         strh w27, [x26, #:dtprel_lo12:var]
         ldrsh x25, [x24, #:dtprel_lo12_nc:var]
 // CHECK: strh    w27, [x26, :dtprel_lo12:var] // encoding: [0x5b,0bAAAAAA11,0b00AAAAAA,0x79]
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_lo12:var, kind: fixup_arm64_ldst_imm12_scale2
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_lo12:var, kind: fixup_aarch64_ldst_imm12_scale2
 // CHECK: ldrsh   x25, [x24, :dtprel_lo12_nc:var] // encoding: [0x19,0bAAAAAA11,0b10AAAAAA,0x79]
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_lo12_nc:var, kind: fixup_arm64_ldst_imm12_scale2
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_lo12_nc:var, kind: fixup_aarch64_ldst_imm12_scale2
 
 // CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSLD_LDST16_DTPREL_LO12 [[VARSYM]]
 // CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSLD_LDST16_DTPREL_LO12_NC [[VARSYM]]
@@ -266,9 +266,9 @@
         ldr w23, [x22, #:dtprel_lo12:var]
         ldrsw x21, [x20, #:dtprel_lo12_nc:var]
 // CHECK: ldr     w23, [x22, :dtprel_lo12:var] // encoding: [0xd7,0bAAAAAA10,0b01AAAAAA,0xb9]
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_lo12:var, kind: fixup_arm64_ldst_imm12_scale4
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_lo12:var, kind: fixup_aarch64_ldst_imm12_scale4
 // CHECK: ldrsw   x21, [x20, :dtprel_lo12_nc:var] // encoding: [0x95,0bAAAAAA10,0b10AAAAAA,0xb9]
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_lo12_nc:var, kind: fixup_arm64_ldst_imm12_scale4
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_lo12_nc:var, kind: fixup_aarch64_ldst_imm12_scale4
 
 // CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSLD_LDST32_DTPREL_LO12 [[VARSYM]]
 // CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSLD_LDST32_DTPREL_LO12_NC [[VARSYM]]
@@ -276,9 +276,9 @@
         ldr x19, [x18, #:dtprel_lo12:var]
         str x17, [x16, #:dtprel_lo12_nc:var]
 // CHECK: ldr     x19, [x18, :dtprel_lo12:var] // encoding: [0x53,0bAAAAAA10,0b01AAAAAA,0xf9]
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_lo12:var, kind: fixup_arm64_ldst_imm12_scale8
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_lo12:var, kind: fixup_aarch64_ldst_imm12_scale8
 // CHECK: str     x17, [x16, :dtprel_lo12_nc:var] // encoding: [0x11,0bAAAAAA10,0b00AAAAAA,0xf9]
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_lo12_nc:var, kind: fixup_arm64_ldst_imm12_scale8
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_lo12_nc:var, kind: fixup_aarch64_ldst_imm12_scale8
 
 // CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSLD_LDST64_DTPREL_LO12 [[VARSYM]]
 // CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSLD_LDST64_DTPREL_LO12_NC [[VARSYM]]
@@ -294,13 +294,13 @@
         blr x3
 
 // CHECK: adrp    x8, :tlsdesc:var        // encoding: [0x08'A',A,A,0x90'A']
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tlsdesc:var, kind: fixup_arm64_pcrel_adrp_imm21
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tlsdesc:var, kind: fixup_aarch64_pcrel_adrp_imm21
 // CHECK: ldr     x7, [x6, :tlsdesc_lo12:var] // encoding: [0xc7,0bAAAAAA00,0b01AAAAAA,0xf9]
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tlsdesc_lo12:var, kind: fixup_arm64_ldst_imm12_scale8
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tlsdesc_lo12:var, kind: fixup_aarch64_ldst_imm12_scale8
 // CHECK: add     x5, x4, :tlsdesc_lo12:var // encoding: [0x85,0bAAAAAA00,0b00AAAAAA,0x91]
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tlsdesc_lo12:var, kind: fixup_arm64_add_imm12
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tlsdesc_lo12:var, kind: fixup_aarch64_add_imm12
 // CHECK: .tlsdesccall var                // encoding: []
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: var, kind: fixup_arm64_tlsdesc_call
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: var, kind: fixup_aarch64_tlsdesc_call
 // CHECK: blr     x3                      // encoding: [0x60,0x00,0x3f,0xd6]
 
 
diff --git a/test/MC/ARM64/v128_lo-diagnostics.s b/test/MC/AArch64/arm64-v128_lo-diagnostics.s
similarity index 100%
rename from test/MC/ARM64/v128_lo-diagnostics.s
rename to test/MC/AArch64/arm64-v128_lo-diagnostics.s
diff --git a/test/MC/ARM64/variable-exprs.s b/test/MC/AArch64/arm64-variable-exprs.s
similarity index 100%
rename from test/MC/ARM64/variable-exprs.s
rename to test/MC/AArch64/arm64-variable-exprs.s
diff --git a/test/MC/ARM64/vector-lists.s b/test/MC/AArch64/arm64-vector-lists.s
similarity index 100%
rename from test/MC/ARM64/vector-lists.s
rename to test/MC/AArch64/arm64-vector-lists.s
diff --git a/test/MC/ARM64/verbose-vector-case.s b/test/MC/AArch64/arm64-verbose-vector-case.s
similarity index 100%
rename from test/MC/ARM64/verbose-vector-case.s
rename to test/MC/AArch64/arm64-verbose-vector-case.s
diff --git a/test/MC/AArch64/basic-a64-diagnostics.s b/test/MC/AArch64/basic-a64-diagnostics.s
index c6cb6b01f8a..a4a3b1379c9 100644
--- a/test/MC/AArch64/basic-a64-diagnostics.s
+++ b/test/MC/AArch64/basic-a64-diagnostics.s
@@ -1,4 +1,4 @@
-// RUN: not llvm-mc -triple arm64-none-linux-gnu < %s 2> %t
+// RUN: not llvm-mc -triple aarch64-none-linux-gnu < %s 2> %t
 // RUN: FileCheck --check-prefix=CHECK-ERROR --check-prefix=CHECK-ERROR-ARM64 < %t %s
 
 //------------------------------------------------------------------------------
diff --git a/test/MC/AArch64/basic-a64-instructions.s b/test/MC/AArch64/basic-a64-instructions.s
index 72156bc9c51..a12968b04e3 100644
--- a/test/MC/AArch64/basic-a64-instructions.s
+++ b/test/MC/AArch64/basic-a64-instructions.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple arm64-none-linux-gnu -show-encoding -mattr=+fp-armv8 < %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-ARM64
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=+fp-armv8 < %s | FileCheck %s
   .globl _func
 
 // Check that the assembler can handle the documented syntax from the ARM ARM.
@@ -127,7 +127,7 @@ _func:
 // CHECK: adds     w19, w17, w1, uxtx         // encoding: [0x33,0x62,0x21,0x2b]
 // CHECK: adds     w2, w5, w1, sxtb #1        // encoding: [0xa2,0x84,0x21,0x2b]
 // CHECK: adds     w26, wsp, w19, sxth        // encoding: [0xfa,0xa3,0x33,0x2b]
-// CHECK-ARM64: cmn w2, w3, sxtw          // encoding: [0x5f,0xc0,0x23,0x2b]
+// CHECK: cmn w2, w3, sxtw          // encoding: [0x5f,0xc0,0x23,0x2b]
 // CHECK: adds     w2, w3, w5, sxtx           // encoding: [0x62,0xe0,0x25,0x2b]
 
         // subs
@@ -255,7 +255,7 @@ _func:
 // CHECK: sub      sp, x3, x7, lsl #4         // encoding: [0x7f,0x70,0x27,0xcb]
 // CHECK: add      w2, wsp, w3, lsl #1        // encoding: [0xe2,0x47,0x23,0x0b]
 // CHECK: cmp      wsp, w9                    // encoding: [0xff,0x43,0x29,0x6b]
-// CHECK-ARM64: cmn wsp, w3, lsl #4       // encoding: [0xff,0x53,0x23,0x2b]
+// CHECK: cmn wsp, w3, lsl #4       // encoding: [0xff,0x53,0x23,0x2b]
 // CHECK: subs     x3, sp, x9, lsl #2         // encoding: [0xe3,0x6b,0x29,0xeb]
 
 //------------------------------------------------------------------------------
@@ -349,8 +349,8 @@ _func:
 
 // A relocation check (default to lo12, which is the only sane relocation anyway really)
         add x0, x4, #:lo12:var
-// CHECK-ARM64: add x0, x4, :lo12:var       // encoding: [0x80,0bAAAAAA00,0b00AAAAAA,0x91]
-// CHECK-ARM64:                             // fixup A - offset: 0, value: :lo12:var, kind: fixup_arm64_add_imm12
+// CHECK: add x0, x4, :lo12:var       // encoding: [0x80,0bAAAAAA00,0b00AAAAAA,0x91]
+// CHECK:                             // fixup A - offset: 0, value: :lo12:var, kind: fixup_aarch64_add_imm12
 
 //------------------------------------------------------------------------------
 // Add-sub (shifted register)
@@ -484,7 +484,7 @@ _func:
         sub w4, w6, wzr
 // CHECK: sub      w3, w5, w7                 // encoding: [0xa3,0x00,0x07,0x4b]
 // CHECK: sub      wzr, w3, w5                // encoding: [0x7f,0x00,0x05,0x4b]
-// CHECK-ARM64: neg      w20, w4              // encoding: [0xf4,0x03,0x04,0x4b]
+// CHECK: neg      w20, w4              // encoding: [0xf4,0x03,0x04,0x4b]
 // CHECK: sub      w4, w6, wzr                // encoding: [0xc4,0x00,0x1f,0x4b]
 
         sub w11, w13, w15, lsl #0
@@ -514,7 +514,7 @@ _func:
         sub x4, x6, xzr
 // CHECK: sub      x3, x5, x7                 // encoding: [0xa3,0x00,0x07,0xcb]
 // CHECK: sub      xzr, x3, x5                // encoding: [0x7f,0x00,0x05,0xcb]
-// CHECK-ARM64: neg      x20, x4              // encoding: [0xf4,0x03,0x04,0xcb]
+// CHECK: neg      x20, x4              // encoding: [0xf4,0x03,0x04,0xcb]
 // CHECK: sub      x4, x6, xzr                // encoding: [0xc4,0x00,0x1f,0xcb]
 
         sub x11, x13, x15, lsl #0
@@ -544,7 +544,7 @@ _func:
         subs w4, w6, wzr
 // CHECK: subs     w3, w5, w7                 // encoding: [0xa3,0x00,0x07,0x6b]
 // CHECK: {{subs wzr,|cmp}} w3, w5            // encoding: [0x7f,0x00,0x05,0x6b]
-// CHECK-ARM64: negs     w20, w4              // encoding: [0xf4,0x03,0x04,0x6b]
+// CHECK: negs     w20, w4              // encoding: [0xf4,0x03,0x04,0x6b]
 // CHECK: subs     w4, w6, wzr                // encoding: [0xc4,0x00,0x1f,0x6b]
 
         subs w11, w13, w15, lsl #0
@@ -574,7 +574,7 @@ _func:
         subs x4, x6, xzr
 // CHECK: subs     x3, x5, x7                 // encoding: [0xa3,0x00,0x07,0xeb]
 // CHECK: {{subs xzr,|cmp}} x3, x5            // encoding: [0x7f,0x00,0x05,0xeb]
-// CHECK-ARM64: negs     x20, x4              // encoding: [0xf4,0x03,0x04,0xeb]
+// CHECK: negs     x20, x4              // encoding: [0xf4,0x03,0x04,0xeb]
 // CHECK: subs     x4, x6, xzr                // encoding: [0xc4,0x00,0x1f,0xeb]
 
         subs x11, x13, x15, lsl #0
@@ -713,17 +713,17 @@ _func:
         neg w29, w30
         neg w30, wzr
         neg wzr, w0
-// CHECK-ARM64: neg      w29, w30              // encoding: [0xfd,0x03,0x1e,0x4b]
-// CHECK-ARM64: neg      w30, wzr              // encoding: [0xfe,0x03,0x1f,0x4b]
-// CHECK-ARM64: neg      wzr, w0               // encoding: [0xff,0x03,0x00,0x4b]
+// CHECK: neg      w29, w30              // encoding: [0xfd,0x03,0x1e,0x4b]
+// CHECK: neg      w30, wzr              // encoding: [0xfe,0x03,0x1f,0x4b]
+// CHECK: neg      wzr, w0               // encoding: [0xff,0x03,0x00,0x4b]
 
         neg w28, w27, lsl #0
         neg w26, w25, lsl #29
         neg w24, w23, lsl #31
 
-// CHECK-ARM64: neg      w28, w27              // encoding: [0xfc,0x03,0x1b,0x4b]
-// CHECK-ARM64: neg      w26, w25, lsl #29     // encoding: [0xfa,0x77,0x19,0x4b]
-// CHECK-ARM64: neg      w24, w23, lsl #31     // encoding: [0xf8,0x7f,0x17,0x4b]
+// CHECK: neg      w28, w27              // encoding: [0xfc,0x03,0x1b,0x4b]
+// CHECK: neg      w26, w25, lsl #29     // encoding: [0xfa,0x77,0x19,0x4b]
+// CHECK: neg      w24, w23, lsl #31     // encoding: [0xf8,0x7f,0x17,0x4b]
 
         neg w22, w21, lsr #0
         neg w20, w19, lsr #1
@@ -742,17 +742,17 @@ _func:
         neg x29, x30
         neg x30, xzr
         neg xzr, x0
-// CHECK-ARM64: neg      x29, x30              // encoding: [0xfd,0x03,0x1e,0xcb]
-// CHECK-ARM64: neg      x30, xzr              // encoding: [0xfe,0x03,0x1f,0xcb]
-// CHECK-ARM64: neg      xzr, x0               // encoding: [0xff,0x03,0x00,0xcb]
+// CHECK: neg      x29, x30              // encoding: [0xfd,0x03,0x1e,0xcb]
+// CHECK: neg      x30, xzr              // encoding: [0xfe,0x03,0x1f,0xcb]
+// CHECK: neg      xzr, x0               // encoding: [0xff,0x03,0x00,0xcb]
 
         neg x28, x27, lsl #0
         neg x26, x25, lsl #29
         neg x24, x23, lsl #31
 
-// CHECK-ARM64: neg      x28, x27              // encoding: [0xfc,0x03,0x1b,0xcb]
-// CHECK-ARM64: neg      x26, x25, lsl #29     // encoding: [0xfa,0x77,0x19,0xcb]
-// CHECK-ARM64: neg      x24, x23, lsl #31     // encoding: [0xf8,0x7f,0x17,0xcb]
+// CHECK: neg      x28, x27              // encoding: [0xfc,0x03,0x1b,0xcb]
+// CHECK: neg      x26, x25, lsl #29     // encoding: [0xfa,0x77,0x19,0xcb]
+// CHECK: neg      x24, x23, lsl #31     // encoding: [0xf8,0x7f,0x17,0xcb]
 
         neg x22, x21, lsr #0
         neg x20, x19, lsr #1
@@ -771,17 +771,17 @@ _func:
         negs w29, w30
         negs w30, wzr
         negs wzr, w0
-// CHECK-ARM64: negs     w29, w30              // encoding: [0xfd,0x03,0x1e,0x6b]
-// CHECK-ARM64: negs     w30, wzr              // encoding: [0xfe,0x03,0x1f,0x6b]
-// CHECK-ARM64: cmp      wzr, w0               // encoding: [0xff,0x03,0x00,0x6b]
+// CHECK: negs     w29, w30              // encoding: [0xfd,0x03,0x1e,0x6b]
+// CHECK: negs     w30, wzr              // encoding: [0xfe,0x03,0x1f,0x6b]
+// CHECK: cmp      wzr, w0               // encoding: [0xff,0x03,0x00,0x6b]
 
         negs w28, w27, lsl #0
         negs w26, w25, lsl #29
         negs w24, w23, lsl #31
 
-// CHECK-ARM64: negs     w28, w27             // encoding: [0xfc,0x03,0x1b,0x6b]
-// CHECK-ARM64: negs     w26, w25, lsl #29     // encoding: [0xfa,0x77,0x19,0x6b]
-// CHECK-ARM64: negs     w24, w23, lsl #31     // encoding: [0xf8,0x7f,0x17,0x6b]
+// CHECK: negs     w28, w27             // encoding: [0xfc,0x03,0x1b,0x6b]
+// CHECK: negs     w26, w25, lsl #29     // encoding: [0xfa,0x77,0x19,0x6b]
+// CHECK: negs     w24, w23, lsl #31     // encoding: [0xf8,0x7f,0x17,0x6b]
 
         negs w22, w21, lsr #0
         negs w20, w19, lsr #1
@@ -800,17 +800,17 @@ _func:
         negs x29, x30
         negs x30, xzr
         negs xzr, x0
-// CHECK-ARM64: negs     x29, x30              // encoding: [0xfd,0x03,0x1e,0xeb]
-// CHECK-ARM64: negs     x30, xzr              // encoding: [0xfe,0x03,0x1f,0xeb]
-// CHECK-ARM64: cmp     xzr, x0                // encoding: [0xff,0x03,0x00,0xeb]
+// CHECK: negs     x29, x30              // encoding: [0xfd,0x03,0x1e,0xeb]
+// CHECK: negs     x30, xzr              // encoding: [0xfe,0x03,0x1f,0xeb]
+// CHECK: cmp     xzr, x0                // encoding: [0xff,0x03,0x00,0xeb]
 
         negs x28, x27, lsl #0
         negs x26, x25, lsl #29
         negs x24, x23, lsl #31
 
-// CHECK-ARM64: negs     x28, x27              // encoding: [0xfc,0x03,0x1b,0xeb]
-// CHECK-ARM64: negs     x26, x25, lsl #29     // encoding: [0xfa,0x77,0x19,0xeb]
-// CHECK-ARM64: negs     x24, x23, lsl #31     // encoding: [0xf8,0x7f,0x17,0xeb]
+// CHECK: negs     x28, x27              // encoding: [0xfc,0x03,0x1b,0xeb]
+// CHECK: negs     x26, x25, lsl #29     // encoding: [0xfa,0x77,0x19,0xeb]
+// CHECK: negs     x24, x23, lsl #31     // encoding: [0xf8,0x7f,0x17,0xeb]
 
         negs x22, x21, lsr #0
         negs x20, x19, lsr #1
@@ -938,28 +938,28 @@ _func:
         sbfm wzr, wzr, #31, #31
         sbfm w12, w9, #0, #0
 
-// CHECK-ARM64: sbfx     x1, x2, #3, #2       // encoding: [0x41,0x10,0x43,0x93]
-// CHECK-ARM64: asr      x3, x4, #63          // encoding: [0x83,0xfc,0x7f,0x93]
-// CHECK-ARM64: asr      wzr, wzr, #31        // encoding: [0xff,0x7f,0x1f,0x13]
-// CHECK-ARM64: sbfx     w12, w9, #0, #1      // encoding: [0x2c,0x01,0x00,0x13]
+// CHECK: sbfx     x1, x2, #3, #2       // encoding: [0x41,0x10,0x43,0x93]
+// CHECK: asr      x3, x4, #63          // encoding: [0x83,0xfc,0x7f,0x93]
+// CHECK: asr      wzr, wzr, #31        // encoding: [0xff,0x7f,0x1f,0x13]
+// CHECK: sbfx     w12, w9, #0, #1      // encoding: [0x2c,0x01,0x00,0x13]
 
         ubfm x4, x5, #12, #10
         ubfm xzr, x4, #0, #0
         ubfm x4, xzr, #63, #5
         ubfm x5, x6, #12, #63
-// CHECK-ARM64: ubfiz    x4, x5, #52, #11        // encoding: [0xa4,0x28,0x4c,0xd3]
-// CHECK-ARM64: ubfx     xzr, x4, #0, #1         // encoding: [0x9f,0x00,0x40,0xd3]
-// CHECK-ARM64: ubfiz    x4, xzr, #1, #6         // encoding: [0xe4,0x17,0x7f,0xd3]
-// CHECK-ARM64: lsr      x5, x6, #12             // encoding: [0xc5,0xfc,0x4c,0xd3]
+// CHECK: ubfiz    x4, x5, #52, #11        // encoding: [0xa4,0x28,0x4c,0xd3]
+// CHECK: ubfx     xzr, x4, #0, #1         // encoding: [0x9f,0x00,0x40,0xd3]
+// CHECK: ubfiz    x4, xzr, #1, #6         // encoding: [0xe4,0x17,0x7f,0xd3]
+// CHECK: lsr      x5, x6, #12             // encoding: [0xc5,0xfc,0x4c,0xd3]
 
         bfm x4, x5, #12, #10
         bfm xzr, x4, #0, #0
         bfm x4, xzr, #63, #5
         bfm x5, x6, #12, #63
-// CHECK-ARM64: bfi      x4, x5, #52, #11           // encoding: [0xa4,0x28,0x4c,0xb3]
-// CHECK-ARM64: bfxil    xzr, x4, #0, #1            // encoding: [0x9f,0x00,0x40,0xb3]
-// CHECK-ARM64: bfi      x4, xzr, #1, #6            // encoding: [0xe4,0x17,0x7f,0xb3]
-// CHECK-ARM64: bfxil    x5, x6, #12, #52           // encoding: [0xc5,0xfc,0x4c,0xb3]
+// CHECK: bfi      x4, x5, #52, #11           // encoding: [0xa4,0x28,0x4c,0xb3]
+// CHECK: bfxil    xzr, x4, #0, #1            // encoding: [0x9f,0x00,0x40,0xb3]
+// CHECK: bfi      x4, xzr, #1, #6            // encoding: [0xe4,0x17,0x7f,0xb3]
+// CHECK: bfxil    x5, x6, #12, #52           // encoding: [0xc5,0xfc,0x4c,0xb3]
 
         sxtb w1, w2
         sxtb xzr, w3
@@ -1018,9 +1018,9 @@ _func:
         sbfiz xzr, xzr, #10, #11
 // CHECK: {{sbfiz|sbfx}}    w9, w10, #0, #1   // encoding: [0x49,0x01,0x00,0x13]
 // CHECK: sbfiz    x2, x3, #63, #1            // encoding: [0x62,0x00,0x41,0x93]
-// CHECK-ARM64: asr    x19, x20, #0           // encoding: [0x93,0xfe,0x40,0x93]
+// CHECK: asr    x19, x20, #0           // encoding: [0x93,0xfe,0x40,0x93]
 // CHECK: sbfiz    x9, x10, #5, #59           // encoding: [0x49,0xe9,0x7b,0x93]
-// CHECK-ARM64: asr    w9, w10, #0            // encoding: [0x49,0x7d,0x00,0x13]
+// CHECK: asr    w9, w10, #0            // encoding: [0x49,0x7d,0x00,0x13]
 // CHECK: sbfiz    w11, w12, #31, #1          // encoding: [0x8b,0x01,0x01,0x13]
 // CHECK: sbfiz    w13, w14, #29, #3          // encoding: [0xcd,0x09,0x03,0x13]
 // CHECK: sbfiz    xzr, xzr, #10, #11         // encoding: [0xff,0x2b,0x76,0x93]
@@ -1034,12 +1034,12 @@ _func:
         sbfx w13, w14, #29, #3
         sbfx xzr, xzr, #10, #11
 // CHECK: sbfx     w9, w10, #0, #1            // encoding: [0x49,0x01,0x00,0x13]
-// CHECK-ARM64: asr     x2, x3, #63           // encoding: [0x62,0xfc,0x7f,0x93]
-// CHECK-ARM64: asr     x19, x20, #0          // encoding: [0x93,0xfe,0x40,0x93]
-// CHECK-ARM64: asr     x9, x10, #5           // encoding: [0x49,0xfd,0x45,0x93]
-// CHECK-ARM64: asr     w9, w10, #0           // encoding: [0x49,0x7d,0x00,0x13]
-// CHECK-ARM64: asr     w11, w12, #31         // encoding: [0x8b,0x7d,0x1f,0x13]
-// CHECK-ARM64: asr     w13, w14, #29         // encoding: [0xcd,0x7d,0x1d,0x13]
+// CHECK: asr     x2, x3, #63           // encoding: [0x62,0xfc,0x7f,0x93]
+// CHECK: asr     x19, x20, #0          // encoding: [0x93,0xfe,0x40,0x93]
+// CHECK: asr     x9, x10, #5           // encoding: [0x49,0xfd,0x45,0x93]
+// CHECK: asr     w9, w10, #0           // encoding: [0x49,0x7d,0x00,0x13]
+// CHECK: asr     w11, w12, #31         // encoding: [0x8b,0x7d,0x1f,0x13]
+// CHECK: asr     w13, w14, #29         // encoding: [0xcd,0x7d,0x1d,0x13]
 // CHECK: sbfx     xzr, xzr, #10, #11         // encoding: [0xff,0x53,0x4a,0x93]
 
         bfi w9, w10, #0, #1
@@ -1051,14 +1051,14 @@ _func:
         bfi w13, w14, #29, #3
         bfi xzr, xzr, #10, #11
 
-// CHECK-ARM64: bfxil    w9, w10, #0, #1            // encoding: [0x49,0x01,0x00,0x33]
-// CHECK-ARM64: bfi      x2, x3, #63, #1            // encoding: [0x62,0x00,0x41,0xb3]
-// CHECK-ARM64: bfxil    x19, x20, #0, #64          // encoding: [0x93,0xfe,0x40,0xb3]
-// CHECK-ARM64: bfi      x9, x10, #5, #59           // encoding: [0x49,0xe9,0x7b,0xb3]
-// CHECK-ARM64: bfxil    w9, w10, #0, #32           // encoding: [0x49,0x7d,0x00,0x33]
-// CHECK-ARM64: bfi      w11, w12, #31, #1          // encoding: [0x8b,0x01,0x01,0x33]
-// CHECK-ARM64: bfi      w13, w14, #29, #3          // encoding: [0xcd,0x09,0x03,0x33]
-// CHECK-ARM64: bfi      xzr, xzr, #10, #11         // encoding: [0xff,0x2b,0x76,0xb3]
+// CHECK: bfxil    w9, w10, #0, #1            // encoding: [0x49,0x01,0x00,0x33]
+// CHECK: bfi      x2, x3, #63, #1            // encoding: [0x62,0x00,0x41,0xb3]
+// CHECK: bfxil    x19, x20, #0, #64          // encoding: [0x93,0xfe,0x40,0xb3]
+// CHECK: bfi      x9, x10, #5, #59           // encoding: [0x49,0xe9,0x7b,0xb3]
+// CHECK: bfxil    w9, w10, #0, #32           // encoding: [0x49,0x7d,0x00,0x33]
+// CHECK: bfi      w11, w12, #31, #1          // encoding: [0x8b,0x01,0x01,0x33]
+// CHECK: bfi      w13, w14, #29, #3          // encoding: [0xcd,0x09,0x03,0x33]
+// CHECK: bfi      xzr, xzr, #10, #11         // encoding: [0xff,0x2b,0x76,0xb3]
 
         bfxil w9, w10, #0, #1
         bfxil x2, x3, #63, #1
@@ -1086,14 +1086,14 @@ _func:
         ubfiz w13, w14, #29, #3
         ubfiz xzr, xzr, #10, #11
 
-// CHECK-ARM64: ubfx     w9, w10, #0, #1         // encoding: [0x49,0x01,0x00,0x53]
-// CHECK-ARM64: lsl      x2, x3, #63             // encoding: [0x62,0x00,0x41,0xd3]
-// CHECK-ARM64: lsr      x19, x20, #0            // encoding: [0x93,0xfe,0x40,0xd3]
-// CHECK-ARM64: lsl      x9, x10, #5             // encoding: [0x49,0xe9,0x7b,0xd3]
-// CHECK-ARM64: lsr      w9, w10, #0             // encoding: [0x49,0x7d,0x00,0x53]
-// CHECK-ARM64: lsl      w11, w12, #31           // encoding: [0x8b,0x01,0x01,0x53]
-// CHECK-ARM64: lsl      w13, w14, #29           // encoding: [0xcd,0x09,0x03,0x53]
-// CHECK-ARM64: ubfiz    xzr, xzr, #10, #11      // encoding: [0xff,0x2b,0x76,0xd3]
+// CHECK: ubfx     w9, w10, #0, #1         // encoding: [0x49,0x01,0x00,0x53]
+// CHECK: lsl      x2, x3, #63             // encoding: [0x62,0x00,0x41,0xd3]
+// CHECK: lsr      x19, x20, #0            // encoding: [0x93,0xfe,0x40,0xd3]
+// CHECK: lsl      x9, x10, #5             // encoding: [0x49,0xe9,0x7b,0xd3]
+// CHECK: lsr      w9, w10, #0             // encoding: [0x49,0x7d,0x00,0x53]
+// CHECK: lsl      w11, w12, #31           // encoding: [0x8b,0x01,0x01,0x53]
+// CHECK: lsl      w13, w14, #29           // encoding: [0xcd,0x09,0x03,0x53]
+// CHECK: ubfiz    xzr, xzr, #10, #11      // encoding: [0xff,0x2b,0x76,0xd3]
 
         ubfx w9, w10, #0, #1
         ubfx x2, x3, #63, #1
@@ -1104,14 +1104,14 @@ _func:
         ubfx w13, w14, #29, #3
         ubfx xzr, xzr, #10, #11
 
-// CHECK-ARM64: ubfx    w9, w10, #0, #1         // encoding: [0x49,0x01,0x00,0x53]
-// CHECK-ARM64: lsr     x2, x3, #63             // encoding: [0x62,0xfc,0x7f,0xd3]
-// CHECK-ARM64: lsr     x19, x20, #0            // encoding: [0x93,0xfe,0x40,0xd3]
-// CHECK-ARM64: lsr     x9, x10, #5             // encoding: [0x49,0xfd,0x45,0xd3]
-// CHECK-ARM64: lsr     w9, w10, #0             // encoding: [0x49,0x7d,0x00,0x53]
-// CHECK-ARM64: lsr     w11, w12, #31           // encoding: [0x8b,0x7d,0x1f,0x53]
-// CHECK-ARM64: lsr     w13, w14, #29           // encoding: [0xcd,0x7d,0x1d,0x53]
-// CHECK-ARM64: ubfx    xzr, xzr, #10, #11      // encoding: [0xff,0x53,0x4a,0xd3]
+// CHECK: ubfx    w9, w10, #0, #1         // encoding: [0x49,0x01,0x00,0x53]
+// CHECK: lsr     x2, x3, #63             // encoding: [0x62,0xfc,0x7f,0xd3]
+// CHECK: lsr     x19, x20, #0            // encoding: [0x93,0xfe,0x40,0xd3]
+// CHECK: lsr     x9, x10, #5             // encoding: [0x49,0xfd,0x45,0xd3]
+// CHECK: lsr     w9, w10, #0             // encoding: [0x49,0x7d,0x00,0x53]
+// CHECK: lsr     w11, w12, #31           // encoding: [0x8b,0x7d,0x1f,0x53]
+// CHECK: lsr     w13, w14, #29           // encoding: [0xcd,0x7d,0x1d,0x53]
+// CHECK: ubfx    xzr, xzr, #10, #11      // encoding: [0xff,0x53,0x4a,0xd3]
 //------------------------------------------------------------------------------
 // Compare & branch (immediate)
 //------------------------------------------------------------------------------
@@ -1120,22 +1120,22 @@ _func:
         cbz x5, lbl
         cbnz x2, lbl
         cbnz x26, lbl
-// CHECK-ARM64: cbz    w5, lbl                 // encoding: [0bAAA00101,A,A,0x34]
-// CHECK-ARM64:                                 //   fixup A - offset: 0, value: lbl, kind: fixup_arm64_pcrel_branch19
-// CHECK-ARM64: cbz    x5, lbl                 // encoding: [0bAAA00101,A,A,0xb4]
-// CHECK-ARM64:                                 //   fixup A - offset: 0, value: lbl, kind: fixup_arm64_pcrel_branch19
-// CHECK-ARM64: cbnz    x2, lbl                 // encoding: [0bAAA00010,A,A,0xb5]
-// CHECK-ARM64:                                 //   fixup A - offset: 0, value: lbl, kind: fixup_arm64_pcrel_branch19
-// CHECK-ARM64: cbnz    x26, lbl                // encoding: [0bAAA11010,A,A,0xb5]
-// CHECK-ARM64:                                 //   fixup A - offset: 0, value: lbl, kind: fixup_arm64_pcrel_branch19
+// CHECK: cbz    w5, lbl                 // encoding: [0bAAA00101,A,A,0x34]
+// CHECK:                                 //   fixup A - offset: 0, value: lbl, kind: fixup_aarch64_pcrel_branch19
+// CHECK: cbz    x5, lbl                 // encoding: [0bAAA00101,A,A,0xb4]
+// CHECK:                                 //   fixup A - offset: 0, value: lbl, kind: fixup_aarch64_pcrel_branch19
+// CHECK: cbnz    x2, lbl                 // encoding: [0bAAA00010,A,A,0xb5]
+// CHECK:                                 //   fixup A - offset: 0, value: lbl, kind: fixup_aarch64_pcrel_branch19
+// CHECK: cbnz    x26, lbl                // encoding: [0bAAA11010,A,A,0xb5]
+// CHECK:                                 //   fixup A - offset: 0, value: lbl, kind: fixup_aarch64_pcrel_branch19
 
         cbz wzr, lbl
         cbnz xzr, lbl
 
-// CHECK-ARM64: cbz    wzr, lbl                // encoding: [0bAAA11111,A,A,0x34]
-// CHECK-ARM64:                                 //   fixup A - offset: 0, value: lbl, kind: fixup_arm64_pcrel_branch19
-// CHECK-ARM64: cbnz    xzr, lbl                // encoding: [0bAAA11111,A,A,0xb5]
-// CHECK-ARM64:                                 //   fixup A - offset: 0, value: lbl, kind: fixup_arm64_pcrel_branch19
+// CHECK: cbz    wzr, lbl                // encoding: [0bAAA11111,A,A,0x34]
+// CHECK:                                 //   fixup A - offset: 0, value: lbl, kind: fixup_aarch64_pcrel_branch19
+// CHECK: cbnz    xzr, lbl                // encoding: [0bAAA11111,A,A,0xb5]
+// CHECK:                                 //   fixup A - offset: 0, value: lbl, kind: fixup_aarch64_pcrel_branch19
 
         cbz w5, #0
         cbnz x3, #-4
@@ -1168,40 +1168,40 @@ _func:
         b.le lbl
         b.al lbl
 
-// CHECK-ARM64: b.eq lbl                     // encoding: [0bAAA00000,A,A,0x54]
-// CHECK-ARM64:                              //   fixup A - offset: 0, value: lbl, kind: fixup_arm64_pcrel_branch19
-// CHECK-ARM64: b.ne lbl                     // encoding: [0bAAA00001,A,A,0x54]
-// CHECK-ARM64:                              //   fixup A - offset: 0, value: lbl, kind: fixup_arm64_pcrel_branch19
-// CHECK-ARM64: b.hs lbl                     // encoding: [0bAAA00010,A,A,0x54]
-// CHECK-ARM64:                              //   fixup A - offset: 0, value: lbl, kind: fixup_arm64_pcrel_branch19
-// CHECK-ARM64: b.hs lbl                     // encoding: [0bAAA00010,A,A,0x54]
-// CHECK-ARM64:                              //   fixup A - offset: 0, value: lbl, kind: fixup_arm64_pcrel_branch19
-// CHECK-ARM64: b.lo lbl                     // encoding: [0bAAA00011,A,A,0x54]
-// CHECK-ARM64:                              //   fixup A - offset: 0, value: lbl, kind: fixup_arm64_pcrel_branch19
-// CHECK-ARM64: b.lo lbl                     // encoding: [0bAAA00011,A,A,0x54]
-// CHECK-ARM64:                              //   fixup A - offset: 0, value: lbl, kind: fixup_arm64_pcrel_branch19
-// CHECK-ARM64: b.mi lbl                     // encoding: [0bAAA00100,A,A,0x54]
-// CHECK-ARM64:                              //   fixup A - offset: 0, value: lbl, kind: fixup_arm64_pcrel_branch19
-// CHECK-ARM64: b.pl lbl                     // encoding: [0bAAA00101,A,A,0x54]
-// CHECK-ARM64:                              //   fixup A - offset: 0, value: lbl, kind: fixup_arm64_pcrel_branch19
-// CHECK-ARM64: b.vs lbl                     // encoding: [0bAAA00110,A,A,0x54]
-// CHECK-ARM64:                              //   fixup A - offset: 0, value: lbl, kind: fixup_arm64_pcrel_branch19
-// CHECK-ARM64: b.vc lbl                     // encoding: [0bAAA00111,A,A,0x54]
-// CHECK-ARM64:                              //   fixup A - offset: 0, value: lbl, kind: fixup_arm64_pcrel_branch19
-// CHECK-ARM64: b.hi lbl                     // encoding: [0bAAA01000,A,A,0x54]
-// CHECK-ARM64:                              //   fixup A - offset: 0, value: lbl, kind: fixup_arm64_pcrel_branch19
-// CHECK-ARM64: b.ls lbl                     // encoding: [0bAAA01001,A,A,0x54]
-// CHECK-ARM64:                              //   fixup A - offset: 0, value: lbl, kind: fixup_arm64_pcrel_branch19
-// CHECK-ARM64: b.ge lbl                     // encoding: [0bAAA01010,A,A,0x54]
-// CHECK-ARM64:                              //   fixup A - offset: 0, value: lbl, kind: fixup_arm64_pcrel_branch19
-// CHECK-ARM64: b.lt lbl                     // encoding: [0bAAA01011,A,A,0x54]
-// CHECK-ARM64:                              //   fixup A - offset: 0, value: lbl, kind: fixup_arm64_pcrel_branch19
-// CHECK-ARM64: b.gt lbl                     // encoding: [0bAAA01100,A,A,0x54]
-// CHECK-ARM64:                              //   fixup A - offset: 0, value: lbl, kind: fixup_arm64_pcrel_branch19
-// CHECK-ARM64: b.le lbl                     // encoding: [0bAAA01101,A,A,0x54]
-// CHECK-ARM64:                              //   fixup A - offset: 0, value: lbl, kind: fixup_arm64_pcrel_branch19
-// CHECK-ARM64: b.al lbl                     // encoding: [0bAAA01110,A,A,0x54]
-// CHECK-ARM64:                              //   fixup A - offset: 0, value: lbl, kind: fixup_arm64_pcrel_branch19
+// CHECK: b.eq lbl                     // encoding: [0bAAA00000,A,A,0x54]
+// CHECK:                              //   fixup A - offset: 0, value: lbl, kind: fixup_aarch64_pcrel_branch19
+// CHECK: b.ne lbl                     // encoding: [0bAAA00001,A,A,0x54]
+// CHECK:                              //   fixup A - offset: 0, value: lbl, kind: fixup_aarch64_pcrel_branch19
+// CHECK: b.hs lbl                     // encoding: [0bAAA00010,A,A,0x54]
+// CHECK:                              //   fixup A - offset: 0, value: lbl, kind: fixup_aarch64_pcrel_branch19
+// CHECK: b.hs lbl                     // encoding: [0bAAA00010,A,A,0x54]
+// CHECK:                              //   fixup A - offset: 0, value: lbl, kind: fixup_aarch64_pcrel_branch19
+// CHECK: b.lo lbl                     // encoding: [0bAAA00011,A,A,0x54]
+// CHECK:                              //   fixup A - offset: 0, value: lbl, kind: fixup_aarch64_pcrel_branch19
+// CHECK: b.lo lbl                     // encoding: [0bAAA00011,A,A,0x54]
+// CHECK:                              //   fixup A - offset: 0, value: lbl, kind: fixup_aarch64_pcrel_branch19
+// CHECK: b.mi lbl                     // encoding: [0bAAA00100,A,A,0x54]
+// CHECK:                              //   fixup A - offset: 0, value: lbl, kind: fixup_aarch64_pcrel_branch19
+// CHECK: b.pl lbl                     // encoding: [0bAAA00101,A,A,0x54]
+// CHECK:                              //   fixup A - offset: 0, value: lbl, kind: fixup_aarch64_pcrel_branch19
+// CHECK: b.vs lbl                     // encoding: [0bAAA00110,A,A,0x54]
+// CHECK:                              //   fixup A - offset: 0, value: lbl, kind: fixup_aarch64_pcrel_branch19
+// CHECK: b.vc lbl                     // encoding: [0bAAA00111,A,A,0x54]
+// CHECK:                              //   fixup A - offset: 0, value: lbl, kind: fixup_aarch64_pcrel_branch19
+// CHECK: b.hi lbl                     // encoding: [0bAAA01000,A,A,0x54]
+// CHECK:                              //   fixup A - offset: 0, value: lbl, kind: fixup_aarch64_pcrel_branch19
+// CHECK: b.ls lbl                     // encoding: [0bAAA01001,A,A,0x54]
+// CHECK:                              //   fixup A - offset: 0, value: lbl, kind: fixup_aarch64_pcrel_branch19
+// CHECK: b.ge lbl                     // encoding: [0bAAA01010,A,A,0x54]
+// CHECK:                              //   fixup A - offset: 0, value: lbl, kind: fixup_aarch64_pcrel_branch19
+// CHECK: b.lt lbl                     // encoding: [0bAAA01011,A,A,0x54]
+// CHECK:                              //   fixup A - offset: 0, value: lbl, kind: fixup_aarch64_pcrel_branch19
+// CHECK: b.gt lbl                     // encoding: [0bAAA01100,A,A,0x54]
+// CHECK:                              //   fixup A - offset: 0, value: lbl, kind: fixup_aarch64_pcrel_branch19
+// CHECK: b.le lbl                     // encoding: [0bAAA01101,A,A,0x54]
+// CHECK:                              //   fixup A - offset: 0, value: lbl, kind: fixup_aarch64_pcrel_branch19
+// CHECK: b.al lbl                     // encoding: [0bAAA01110,A,A,0x54]
+// CHECK:                              //   fixup A - offset: 0, value: lbl, kind: fixup_aarch64_pcrel_branch19
 
         //  ARM64 has these in a separate file
         beq lbl
@@ -2186,23 +2186,23 @@ _func:
         ldr x29, there
         ldrsw xzr, everywhere
 
-// CHECK-ARM64: ldr    w3, here                // encoding: [0bAAA00011,A,A,0x18]
-// CHECK-ARM64:                                 //   fixup A - offset: 0, value: here, kind: fixup_arm64_ldr_pcrel_imm19
-// CHECK-ARM64: ldr    x29, there              // encoding: [0bAAA11101,A,A,0x58]
-// CHECK-ARM64:                                 //   fixup A - offset: 0, value: there, kind: fixup_arm64_ldr_pcrel_imm19
-// CHECK-ARM64: ldrsw    xzr, everywhere         // encoding: [0bAAA11111,A,A,0x98]
-// CHECK-ARM64:                                 //   fixup A - offset: 0, value: everywhere, kind: fixup_arm64_ldr_pcrel_imm19
+// CHECK: ldr    w3, here                // encoding: [0bAAA00011,A,A,0x18]
+// CHECK:                                 //   fixup A - offset: 0, value: here, kind: fixup_aarch64_ldr_pcrel_imm19
+// CHECK: ldr    x29, there              // encoding: [0bAAA11101,A,A,0x58]
+// CHECK:                                 //   fixup A - offset: 0, value: there, kind: fixup_aarch64_ldr_pcrel_imm19
+// CHECK: ldrsw    xzr, everywhere         // encoding: [0bAAA11111,A,A,0x98]
+// CHECK:                                 //   fixup A - offset: 0, value: everywhere, kind: fixup_aarch64_ldr_pcrel_imm19
 
         ldr s0, who_knows
         ldr d0, i_dont
         ldr q0, there_must_be_a_better_way
 
-// CHECK-ARM64: ldr    s0, who_knows           // encoding: [0bAAA00000,A,A,0x1c]
-// CHECK-ARM64:                                 //   fixup A - offset: 0, value: who_knows, kind: fixup_arm64_ldr_pcrel_imm19
-// CHECK-ARM64: ldr    d0, i_dont              // encoding: [0bAAA00000,A,A,0x5c]
-// CHECK-ARM64:                                 //   fixup A - offset: 0, value: i_dont, kind: fixup_arm64_ldr_pcrel_imm19
-// CHECK-ARM64: ldr    q0, there_must_be_a_better_way // encoding: [0bAAA00000,A,A,0x9c]
-// CHECK-ARM64:                                 //   fixup A - offset: 0, value: there_must_be_a_better_way, kind: fixup_arm64_ldr_pcrel_imm19
+// CHECK: ldr    s0, who_knows           // encoding: [0bAAA00000,A,A,0x1c]
+// CHECK:                                 //   fixup A - offset: 0, value: who_knows, kind: fixup_aarch64_ldr_pcrel_imm19
+// CHECK: ldr    d0, i_dont              // encoding: [0bAAA00000,A,A,0x5c]
+// CHECK:                                 //   fixup A - offset: 0, value: i_dont, kind: fixup_aarch64_ldr_pcrel_imm19
+// CHECK: ldr    q0, there_must_be_a_better_way // encoding: [0bAAA00000,A,A,0x9c]
+// CHECK:                                 //   fixup A - offset: 0, value: there_must_be_a_better_way, kind: fixup_aarch64_ldr_pcrel_imm19
 
         ldr w0, #1048572
         ldr x10, #-1048576
@@ -2212,10 +2212,10 @@ _func:
         prfm pldl1strm, nowhere
         prfm #22, somewhere
 
-// CHECK-ARM64: prfm    pldl1strm, nowhere      // encoding: [0bAAA00001,A,A,0xd8]
-// CHECK-ARM64:                                 //   fixup A - offset: 0, value: nowhere, kind: fixup_arm64_ldr_pcrel_imm19
-// CHECK-ARM64: prfm    #22, somewhere          // encoding: [0bAAA10110,A,A,0xd8]
-// CHECK-ARM64:                                 //   fixup A - offset: 0, value: somewhere, kind: fixup_arm64_ldr_pcrel_imm19
+// CHECK: prfm    pldl1strm, nowhere      // encoding: [0bAAA00001,A,A,0xd8]
+// CHECK:                                 //   fixup A - offset: 0, value: nowhere, kind: fixup_aarch64_ldr_pcrel_imm19
+// CHECK: prfm    #22, somewhere          // encoding: [0bAAA10110,A,A,0xd8]
+// CHECK:                                 //   fixup A - offset: 0, value: somewhere, kind: fixup_aarch64_ldr_pcrel_imm19
 
 //------------------------------------------------------------------------------
 // Load/store exclusive
@@ -2431,18 +2431,18 @@ _func:
         ldr x15, [x5, #:lo12:sym]
         ldr q3, [x2, #:lo12:sym]
 
-// CHECK-ARM64: str    x15, [x5, :lo12:sym]    // encoding: [0xaf,0bAAAAAA00,0b00AAAAAA,0xf9]
-// CHECK-ARM64:                                 //   fixup A - offset: 0, value: :lo12:sym, kind: fixup_arm64_ldst_imm12_scale8
-// CHECK-ARM64: ldrb    w15, [x5, :lo12:sym]    // encoding: [0xaf,0bAAAAAA00,0b01AAAAAA,0x39]
-// CHECK-ARM64:                                 //   fixup A - offset: 0, value: :lo12:sym, kind: fixup_arm64_ldst_imm12_scale1
-// CHECK-ARM64: ldrsh    x15, [x5, :lo12:sym]    // encoding: [0xaf,0bAAAAAA00,0b10AAAAAA,0x79]
-// CHECK-ARM64:                                 //   fixup A - offset: 0, value: :lo12:sym, kind: fixup_arm64_ldst_imm12_scale2
-// CHECK-ARM64: ldrsw    x15, [x5, :lo12:sym]    // encoding: [0xaf,0bAAAAAA00,0b10AAAAAA,0xb9]
-// CHECK-ARM64:                                 //   fixup A - offset: 0, value: :lo12:sym, kind: fixup_arm64_ldst_imm12_scale4
-// CHECK-ARM64: ldr    x15, [x5, :lo12:sym]    // encoding: [0xaf,0bAAAAAA00,0b01AAAAAA,0xf9]
-// CHECK-ARM64:                                 //   fixup A - offset: 0, value: :lo12:sym, kind: fixup_arm64_ldst_imm12_scale8
-// CHECK-ARM64: ldr    q3, [x2, :lo12:sym]     // encoding: [0x43,0bAAAAAA00,0b11AAAAAA,0x3d]
-// CHECK-ARM64:                                 //   fixup A - offset: 0, value: :lo12:sym, kind: fixup_arm64_ldst_imm12_scale16
+// CHECK: str    x15, [x5, :lo12:sym]    // encoding: [0xaf,0bAAAAAA00,0b00AAAAAA,0xf9]
+// CHECK:                                 //   fixup A - offset: 0, value: :lo12:sym, kind: fixup_aarch64_ldst_imm12_scale8
+// CHECK: ldrb    w15, [x5, :lo12:sym]    // encoding: [0xaf,0bAAAAAA00,0b01AAAAAA,0x39]
+// CHECK:                                 //   fixup A - offset: 0, value: :lo12:sym, kind: fixup_aarch64_ldst_imm12_scale1
+// CHECK: ldrsh    x15, [x5, :lo12:sym]    // encoding: [0xaf,0bAAAAAA00,0b10AAAAAA,0x79]
+// CHECK:                                 //   fixup A - offset: 0, value: :lo12:sym, kind: fixup_aarch64_ldst_imm12_scale2
+// CHECK: ldrsw    x15, [x5, :lo12:sym]    // encoding: [0xaf,0bAAAAAA00,0b10AAAAAA,0xb9]
+// CHECK:                                 //   fixup A - offset: 0, value: :lo12:sym, kind: fixup_aarch64_ldst_imm12_scale4
+// CHECK: ldr    x15, [x5, :lo12:sym]    // encoding: [0xaf,0bAAAAAA00,0b01AAAAAA,0xf9]
+// CHECK:                                 //   fixup A - offset: 0, value: :lo12:sym, kind: fixup_aarch64_ldst_imm12_scale8
+// CHECK: ldr    q3, [x2, :lo12:sym]     // encoding: [0x43,0bAAAAAA00,0b11AAAAAA,0x3d]
+// CHECK:                                 //   fixup A - offset: 0, value: :lo12:sym, kind: fixup_aarch64_ldst_imm12_scale16
 
         prfm pldl1keep, [sp, #8]
         prfm pldl1strm, [x3]
@@ -3323,34 +3323,34 @@ _func:
         movz x2, #:abs_g0:sym
         movk w3, #:abs_g0_nc:sym
 
-// CHECK-ARM64: movz    x2, #:abs_g0:sym        // encoding: [0bAAA00010,A,0b100AAAAA,0xd2]
-// CHECK-ARM64-NEXT:                                 //   fixup A - offset: 0, value: :abs_g0:sym, kind: fixup_arm64_movw
-// CHECK-ARM64: movk    w3, #:abs_g0_nc:sym     // encoding: [0bAAA00011,A,0b100AAAAA,0x72]
-// CHECK-ARM64-NEXT:                                 //   fixup A - offset: 0, value: :abs_g0_nc:sym, kind: fixup_arm64_movw
+// CHECK: movz    x2, #:abs_g0:sym        // encoding: [0bAAA00010,A,0b100AAAAA,0xd2]
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :abs_g0:sym, kind: fixup_aarch64_movw
+// CHECK: movk    w3, #:abs_g0_nc:sym     // encoding: [0bAAA00011,A,0b100AAAAA,0x72]
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :abs_g0_nc:sym, kind: fixup_aarch64_movw
 
         movz x4, #:abs_g1:sym
         movk w5, #:abs_g1_nc:sym
 
-// CHECK-ARM64: movz    x4, #:abs_g1:sym        // encoding: [0bAAA00100,A,0b101AAAAA,0xd2]
-// CHECK-ARM64-NEXT:                                 //   fixup A - offset: 0, value: :abs_g1:sym, kind: fixup_arm64_movw
-// CHECK-ARM64: movk    w5, #:abs_g1_nc:sym     // encoding: [0bAAA00101,A,0b101AAAAA,0x72]
-// CHECK-ARM64-NEXT:                                 //   fixup A - offset: 0, value: :abs_g1_nc:sym, kind: fixup_arm64_movw
+// CHECK: movz    x4, #:abs_g1:sym        // encoding: [0bAAA00100,A,0b101AAAAA,0xd2]
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :abs_g1:sym, kind: fixup_aarch64_movw
+// CHECK: movk    w5, #:abs_g1_nc:sym     // encoding: [0bAAA00101,A,0b101AAAAA,0x72]
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :abs_g1_nc:sym, kind: fixup_aarch64_movw
 
         movz x6, #:abs_g2:sym
         movk x7, #:abs_g2_nc:sym
 
-// CHECK-ARM64: movz    x6, #:abs_g2:sym        // encoding: [0bAAA00110,A,0b110AAAAA,0xd2]
-// CHECK-ARM64-NEXT:                                 //   fixup A - offset: 0, value: :abs_g2:sym, kind: fixup_arm64_movw
-// CHECK-ARM64: movk    x7, #:abs_g2_nc:sym     // encoding: [0bAAA00111,A,0b110AAAAA,0xf2]
-// CHECK-ARM64-NEXT:                                 //   fixup A - offset: 0, value: :abs_g2_nc:sym, kind: fixup_arm64_movw
+// CHECK: movz    x6, #:abs_g2:sym        // encoding: [0bAAA00110,A,0b110AAAAA,0xd2]
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :abs_g2:sym, kind: fixup_aarch64_movw
+// CHECK: movk    x7, #:abs_g2_nc:sym     // encoding: [0bAAA00111,A,0b110AAAAA,0xf2]
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :abs_g2_nc:sym, kind: fixup_aarch64_movw
 
         movz x8, #:abs_g3:sym
         movk x9, #:abs_g3:sym
 
-// CHECK-ARM64: movz    x8, #:abs_g3:sym        // encoding: [0bAAA01000,A,0b111AAAAA,0xd2]
-// CHECK-ARM64-NEXT:                                 //   fixup A - offset: 0, value: :abs_g3:sym, kind: fixup_arm64_movw
-// CHECK-ARM64: movk    x9, #:abs_g3:sym        // encoding: [0bAAA01001,A,0b111AAAAA,0xf2]
-// CHECK-ARM64-NEXT:                                 //   fixup A - offset: 0, value: :abs_g3:sym, kind: fixup_arm64_movw
+// CHECK: movz    x8, #:abs_g3:sym        // encoding: [0bAAA01000,A,0b111AAAAA,0xd2]
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :abs_g3:sym, kind: fixup_aarch64_movw
+// CHECK: movk    x9, #:abs_g3:sym        // encoding: [0bAAA01001,A,0b111AAAAA,0xf2]
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :abs_g3:sym, kind: fixup_aarch64_movw
 
 
         movn x30, #:abs_g0_s:sym
@@ -3358,36 +3358,36 @@ _func:
         movn w10, #:abs_g0_s:sym
         movz w25, #:abs_g0_s:sym
 
-// CHECK-ARM64: movn    x30, #:abs_g0_s:sym     // encoding: [0bAAA11110,A,0b100AAAAA,0x92]
-// CHECK-ARM64-NEXT:                                 //   fixup A - offset: 0, value: :abs_g0_s:sym, kind: fixup_arm64_movw
-// CHECK-ARM64: movz    x19, #:abs_g0_s:sym     // encoding: [0bAAA10011,A,0b100AAAAA,0xd2]
-// CHECK-ARM64-NEXT:                                 //   fixup A - offset: 0, value: :abs_g0_s:sym, kind: fixup_arm64_movw
-// CHECK-ARM64: movn    w10, #:abs_g0_s:sym     // encoding: [0bAAA01010,A,0b100AAAAA,0x12]
-// CHECK-ARM64-NEXT:                                 //   fixup A - offset: 0, value: :abs_g0_s:sym, kind: fixup_arm64_movw
-// CHECK-ARM64: movz    w25, #:abs_g0_s:sym     // encoding: [0bAAA11001,A,0b100AAAAA,0x52]
-// CHECK-ARM64-NEXT:                                 //   fixup A - offset: 0, value: :abs_g0_s:sym, kind: fixup_arm64_movw
+// CHECK: movn    x30, #:abs_g0_s:sym     // encoding: [0bAAA11110,A,0b100AAAAA,0x92]
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :abs_g0_s:sym, kind: fixup_aarch64_movw
+// CHECK: movz    x19, #:abs_g0_s:sym     // encoding: [0bAAA10011,A,0b100AAAAA,0xd2]
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :abs_g0_s:sym, kind: fixup_aarch64_movw
+// CHECK: movn    w10, #:abs_g0_s:sym     // encoding: [0bAAA01010,A,0b100AAAAA,0x12]
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :abs_g0_s:sym, kind: fixup_aarch64_movw
+// CHECK: movz    w25, #:abs_g0_s:sym     // encoding: [0bAAA11001,A,0b100AAAAA,0x52]
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :abs_g0_s:sym, kind: fixup_aarch64_movw
 
         movn x30, #:abs_g1_s:sym
         movz x19, #:abs_g1_s:sym
         movn w10, #:abs_g1_s:sym
         movz w25, #:abs_g1_s:sym
 
-// CHECK-ARM64: movn    x30, #:abs_g1_s:sym     // encoding: [0bAAA11110,A,0b101AAAAA,0x92]
-// CHECK-ARM64-NEXT:                                 //   fixup A - offset: 0, value: :abs_g1_s:sym, kind: fixup_arm64_movw
-// CHECK-ARM64: movz    x19, #:abs_g1_s:sym     // encoding: [0bAAA10011,A,0b101AAAAA,0xd2]
-// CHECK-ARM64-NEXT:                                 //   fixup A - offset: 0, value: :abs_g1_s:sym, kind: fixup_arm64_movw
-// CHECK-ARM64: movn    w10, #:abs_g1_s:sym     // encoding: [0bAAA01010,A,0b101AAAAA,0x12]
-// CHECK-ARM64-NEXT:                                 //   fixup A - offset: 0, value: :abs_g1_s:sym, kind: fixup_arm64_movw
-// CHECK-ARM64: movz    w25, #:abs_g1_s:sym     // encoding: [0bAAA11001,A,0b101AAAAA,0x52]
-// CHECK-ARM64-NEXT:                                 //   fixup A - offset: 0, value: :abs_g1_s:sym, kind: fixup_arm64_movw
+// CHECK: movn    x30, #:abs_g1_s:sym     // encoding: [0bAAA11110,A,0b101AAAAA,0x92]
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :abs_g1_s:sym, kind: fixup_aarch64_movw
+// CHECK: movz    x19, #:abs_g1_s:sym     // encoding: [0bAAA10011,A,0b101AAAAA,0xd2]
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :abs_g1_s:sym, kind: fixup_aarch64_movw
+// CHECK: movn    w10, #:abs_g1_s:sym     // encoding: [0bAAA01010,A,0b101AAAAA,0x12]
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :abs_g1_s:sym, kind: fixup_aarch64_movw
+// CHECK: movz    w25, #:abs_g1_s:sym     // encoding: [0bAAA11001,A,0b101AAAAA,0x52]
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :abs_g1_s:sym, kind: fixup_aarch64_movw
 
         movn x30, #:abs_g2_s:sym
         movz x19, #:abs_g2_s:sym
 
-// CHECK-ARM64: movn    x30, #:abs_g2_s:sym     // encoding: [0bAAA11110,A,0b110AAAAA,0x92]
-// CHECK-ARM64-NEXT:                                 //   fixup A - offset: 0, value: :abs_g2_s:sym, kind: fixup_arm64_movw
-// CHECK-ARM64: movz    x19, #:abs_g2_s:sym     // encoding: [0bAAA10011,A,0b110AAAAA,0xd2]
-// CHECK-ARM64-NEXT:                                 //   fixup A - offset: 0, value: :abs_g2_s:sym, kind: fixup_arm64_movw
+// CHECK: movn    x30, #:abs_g2_s:sym     // encoding: [0bAAA11110,A,0b110AAAAA,0x92]
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :abs_g2_s:sym, kind: fixup_aarch64_movw
+// CHECK: movz    x19, #:abs_g2_s:sym     // encoding: [0bAAA10011,A,0b110AAAAA,0xd2]
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :abs_g2_s:sym, kind: fixup_aarch64_movw
 
 //------------------------------------------------------------------------------
 // PC-relative addressing
@@ -3396,15 +3396,15 @@ _func:
         adr x2, loc
         adr xzr, loc
 
-// CHECK-ARM64: adr    x2, loc                 // encoding: [0x02'A',A,A,0x10'A']
-// CHECK-ARM64:                                 //   fixup A - offset: 0, value: loc, kind: fixup_arm64_pcrel_adr_imm21
-// CHECK-ARM64: adr    xzr, loc                // encoding: [0x1f'A',A,A,0x10'A']
-// CHECK-ARM64:                                 //   fixup A - offset: 0, value: loc, kind: fixup_arm64_pcrel_adr_imm21
+// CHECK: adr    x2, loc                 // encoding: [0x02'A',A,A,0x10'A']
+// CHECK:                                 //   fixup A - offset: 0, value: loc, kind: fixup_aarch64_pcrel_adr_imm21
+// CHECK: adr    xzr, loc                // encoding: [0x1f'A',A,A,0x10'A']
+// CHECK:                                 //   fixup A - offset: 0, value: loc, kind: fixup_aarch64_pcrel_adr_imm21
 
         adrp x29, loc
 
-// CHECK-ARM64: adrp    x29, loc                // encoding: [0x1d'A',A,A,0x90'A']
-// CHECK-ARM64:                                 //   fixup A - offset: 0, value: loc, kind: fixup_arm64_pcrel_adrp_imm21
+// CHECK: adrp    x29, loc                // encoding: [0x1d'A',A,A,0x90'A']
+// CHECK:                                 //   fixup A - offset: 0, value: loc, kind: fixup_aarch64_pcrel_adrp_imm21
         adrp x30, #4096
         adr x20, #0
         adr x9, #-1
@@ -4782,24 +4782,24 @@ _func:
         tbz xzr, #63, elsewhere
         tbnz x5, #45, nowhere
 
-// CHECK-ARM64: tbz    w5, #0, somewhere       // encoding: [0bAAA00101,A,0b00000AAA,0x36]
-// CHECK-ARM64:                                //   fixup A - offset: 0, value: somewhere, kind: fixup_arm64_pcrel_branch14
-// CHECK-ARM64: tbz    xzr, #63, elsewhere     // encoding: [0bAAA11111,A,0b11111AAA,0xb6]
-// CHECK-ARM64:                                //   fixup A - offset: 0, value: elsewhere, kind: fixup_arm64_pcrel_branch14
-// CHECK-ARM64: tbnz   x5, #45, nowhere        // encoding: [0bAAA00101,A,0b01101AAA,0xb7]
-// CHECK-ARM64:                                //   fixup A - offset: 0, value: nowhere, kind: fixup_arm64_pcrel_branch14
+// CHECK: tbz    w5, #0, somewhere       // encoding: [0bAAA00101,A,0b00000AAA,0x36]
+// CHECK:                                //   fixup A - offset: 0, value: somewhere, kind: fixup_aarch64_pcrel_branch14
+// CHECK: tbz    xzr, #63, elsewhere     // encoding: [0bAAA11111,A,0b11111AAA,0xb6]
+// CHECK:                                //   fixup A - offset: 0, value: elsewhere, kind: fixup_aarch64_pcrel_branch14
+// CHECK: tbnz   x5, #45, nowhere        // encoding: [0bAAA00101,A,0b01101AAA,0xb7]
+// CHECK:                                //   fixup A - offset: 0, value: nowhere, kind: fixup_aarch64_pcrel_branch14
 
 
         tbnz w3, #2, there
         tbnz wzr, #31, nowhere
         tbz w5, #12, anywhere
 
-// CHECK-ARM64: tbnz    w3, #2, there           // encoding: [0bAAA00011,A,0b00010AAA,0x37]
-// CHECK-ARM64:                                 //   fixup A - offset: 0, value: there, kind: fixup_arm64_pcrel_branch14
-// CHECK-ARM64: tbnz    wzr, #31, nowhere       // encoding: [0bAAA11111,A,0b11111AAA,0x37]
-// CHECK-ARM64:                                 //   fixup A - offset: 0, value: nowhere, kind: fixup_arm64_pcrel_branch14
-// CHECK-ARM64: tbz     w5, #12, anywhere       // encoding: [0bAAA00101,A,0b01100AAA,0x36]
-// CHECK-ARM64:                                 //   fixup A - offset: 0, value: anywhere, kind: fixup_arm64_pcrel_branch14
+// CHECK: tbnz    w3, #2, there           // encoding: [0bAAA00011,A,0b00010AAA,0x37]
+// CHECK:                                 //   fixup A - offset: 0, value: there, kind: fixup_aarch64_pcrel_branch14
+// CHECK: tbnz    wzr, #31, nowhere       // encoding: [0bAAA11111,A,0b11111AAA,0x37]
+// CHECK:                                 //   fixup A - offset: 0, value: nowhere, kind: fixup_aarch64_pcrel_branch14
+// CHECK: tbz     w5, #12, anywhere       // encoding: [0bAAA00101,A,0b01100AAA,0x36]
+// CHECK:                                 //   fixup A - offset: 0, value: anywhere, kind: fixup_aarch64_pcrel_branch14
 
 //------------------------------------------------------------------------------
 // Unconditional branch (immediate)
@@ -4808,10 +4808,10 @@ _func:
         b somewhere
         bl elsewhere
 
-// CHECK-ARM64: b    somewhere               // encoding: [A,A,A,0b000101AA]
-// CHECK-ARM64:                              //   fixup A - offset: 0, value: somewhere, kind: fixup_arm64_pcrel_branch26
-// CHECK-ARM64: bl    elsewhere               // encoding: [A,A,A,0b100101AA]
-// CHECK-ARM64:                               //   fixup A - offset: 0, value: elsewhere, kind: fixup_arm64_pcrel_call26
+// CHECK: b    somewhere               // encoding: [A,A,A,0b000101AA]
+// CHECK:                              //   fixup A - offset: 0, value: somewhere, kind: fixup_aarch64_pcrel_branch26
+// CHECK: bl    elsewhere               // encoding: [A,A,A,0b100101AA]
+// CHECK:                               //   fixup A - offset: 0, value: elsewhere, kind: fixup_aarch64_pcrel_call26
 
         b #4
         bl #0
diff --git a/test/MC/AArch64/basic-pic.s b/test/MC/AArch64/basic-pic.s
index 6bb6aaa7de1..a10874dcca0 100644
--- a/test/MC/AArch64/basic-pic.s
+++ b/test/MC/AArch64/basic-pic.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple=arm64-none-linux-gnu -filetype=obj %s -o -| llvm-objdump -r - | FileCheck %s
+// RUN: llvm-mc -triple=aarch64-none-linux-gnu -filetype=obj %s -o -| llvm-objdump -r - | FileCheck %s
 
 // CHECK: RELOCATION RECORDS FOR [.rela.text]
 
diff --git a/test/MC/AArch64/elf-extern.s b/test/MC/AArch64/elf-extern.s
index 3d84bde052f..dfa3fb002ed 100644
--- a/test/MC/AArch64/elf-extern.s
+++ b/test/MC/AArch64/elf-extern.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc < %s -triple=arm64-none-linux-gnu -filetype=obj | llvm-readobj -r | FileCheck %s
+// RUN: llvm-mc < %s -triple=aarch64-none-linux-gnu -filetype=obj | llvm-readobj -r | FileCheck %s
 
 // External symbols are a different concept to global variables but should still
 // get relocations and so on when used.
diff --git a/test/MC/AArch64/elf-objdump.s b/test/MC/AArch64/elf-objdump.s
index b69926efbc2..3b3aa65819d 100644
--- a/test/MC/AArch64/elf-objdump.s
+++ b/test/MC/AArch64/elf-objdump.s
@@ -1,5 +1,5 @@
 // 64 bit little endian
-// RUN: llvm-mc -filetype=obj -triple arm64-none-linux-gnu %s -o - | llvm-objdump -d -
+// RUN: llvm-mc -filetype=obj -triple aarch64-none-linux-gnu %s -o - | llvm-objdump -d -
 
 // We just want to see if llvm-objdump works at all.
 // CHECK: .text
diff --git a/test/MC/AArch64/elf-reloc-addsubimm.s b/test/MC/AArch64/elf-reloc-addsubimm.s
index cc5c3f7f25b..e37991bfba1 100644
--- a/test/MC/AArch64/elf-reloc-addsubimm.s
+++ b/test/MC/AArch64/elf-reloc-addsubimm.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple=arm64-none-linux-gnu -filetype=obj %s -o - | \
+// RUN: llvm-mc -triple=aarch64-none-linux-gnu -filetype=obj %s -o - | \
 // RUN:   llvm-readobj -r | FileCheck -check-prefix=OBJ %s
 
         add x2, x3, #:lo12:some_label
diff --git a/test/MC/AArch64/elf-reloc-ldrlit.s b/test/MC/AArch64/elf-reloc-ldrlit.s
index 3554ef3ae42..d4c3a4eb50d 100644
--- a/test/MC/AArch64/elf-reloc-ldrlit.s
+++ b/test/MC/AArch64/elf-reloc-ldrlit.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple=arm64-none-linux-gnu -filetype=obj %s -o - | \
+// RUN: llvm-mc -triple=aarch64-none-linux-gnu -filetype=obj %s -o - | \
 // RUN:   llvm-readobj -r | FileCheck -check-prefix=OBJ %s
 
         ldr x0, some_label
diff --git a/test/MC/AArch64/elf-reloc-ldstunsimm.s b/test/MC/AArch64/elf-reloc-ldstunsimm.s
index 196f65fd299..371e7e51f24 100644
--- a/test/MC/AArch64/elf-reloc-ldstunsimm.s
+++ b/test/MC/AArch64/elf-reloc-ldstunsimm.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple=arm64-none-linux-gnu -mattr=+fp-armv8 -filetype=obj %s -o - | \
+// RUN: llvm-mc -triple=aarch64-none-linux-gnu -mattr=+fp-armv8 -filetype=obj %s -o - | \
 // RUN:   llvm-readobj -r | FileCheck -check-prefix=OBJ %s
 
         ldrb w0, [sp, #:lo12:some_label]
diff --git a/test/MC/AArch64/elf-reloc-movw.s b/test/MC/AArch64/elf-reloc-movw.s
index dc7dbb0c156..333159562c0 100644
--- a/test/MC/AArch64/elf-reloc-movw.s
+++ b/test/MC/AArch64/elf-reloc-movw.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple=arm64-none-linux-gnu -filetype=obj %s -o - | \
+// RUN: llvm-mc -triple=aarch64-none-linux-gnu -filetype=obj %s -o - | \
 // RUN:   llvm-readobj -r | FileCheck -check-prefix=OBJ %s
 
         movz x0, #:abs_g0:some_label
diff --git a/test/MC/AArch64/elf-reloc-pcreladdressing.s b/test/MC/AArch64/elf-reloc-pcreladdressing.s
index 652011318c3..093891d931a 100644
--- a/test/MC/AArch64/elf-reloc-pcreladdressing.s
+++ b/test/MC/AArch64/elf-reloc-pcreladdressing.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple=arm64-none-linux-gnu -filetype=obj %s -o - | \
+// RUN: llvm-mc -triple=aarch64-none-linux-gnu -filetype=obj %s -o - | \
 // RUN:   llvm-readobj -r | FileCheck -check-prefix=OBJ %s
 
         adr x2, some_label
diff --git a/test/MC/AArch64/elf-reloc-tstb.s b/test/MC/AArch64/elf-reloc-tstb.s
index 9cbe3a53fb7..25c98163b58 100644
--- a/test/MC/AArch64/elf-reloc-tstb.s
+++ b/test/MC/AArch64/elf-reloc-tstb.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple=arm64-none-linux-gnu -filetype=obj %s -o - | \
+// RUN: llvm-mc -triple=aarch64-none-linux-gnu -filetype=obj %s -o - | \
 // RUN:   llvm-readobj -r | FileCheck -check-prefix=OBJ %s
 
         tbz x6, #45, somewhere
diff --git a/test/MC/AArch64/elf-reloc-uncondbrimm.s b/test/MC/AArch64/elf-reloc-uncondbrimm.s
index 8f3915afab7..9ac66bd876a 100644
--- a/test/MC/AArch64/elf-reloc-uncondbrimm.s
+++ b/test/MC/AArch64/elf-reloc-uncondbrimm.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple=arm64-none-linux-gnu -filetype=obj %s -o - | \
+// RUN: llvm-mc -triple=aarch64-none-linux-gnu -filetype=obj %s -o - | \
 // RUN:   llvm-readobj -r | FileCheck -check-prefix=OBJ %s
 
         b somewhere
diff --git a/test/MC/AArch64/gicv3-regs-diagnostics.s b/test/MC/AArch64/gicv3-regs-diagnostics.s
index 6f4f5ee66c6..bc005b1d530 100644
--- a/test/MC/AArch64/gicv3-regs-diagnostics.s
+++ b/test/MC/AArch64/gicv3-regs-diagnostics.s
@@ -1,4 +1,4 @@
-// RUN: not llvm-mc -triple arm64-none-linux-gnu < %s 2>&1 | FileCheck %s
+// RUN: not llvm-mc -triple aarch64-none-linux-gnu < %s 2>&1 | FileCheck %s
 
         // Write-only
         mrs x10, icc_eoir1_el1
diff --git a/test/MC/AArch64/gicv3-regs.s b/test/MC/AArch64/gicv3-regs.s
index b9eac1a5695..0f5742ee543 100644
--- a/test/MC/AArch64/gicv3-regs.s
+++ b/test/MC/AArch64/gicv3-regs.s
@@ -1,4 +1,4 @@
- // RUN: llvm-mc -triple arm64-none-linux-gnu -show-encoding < %s | FileCheck %s
+ // RUN: llvm-mc -triple aarch64-none-linux-gnu -show-encoding < %s | FileCheck %s
 
         mrs x8, icc_iar1_el1
         mrs x26, icc_iar0_el1
diff --git a/test/MC/AArch64/inline-asm-modifiers.s b/test/MC/AArch64/inline-asm-modifiers.s
index 33d5bf519f9..cf34a952e90 100644
--- a/test/MC/AArch64/inline-asm-modifiers.s
+++ b/test/MC/AArch64/inline-asm-modifiers.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple=arm64-none-linux-gnu -filetype=obj -mattr=+fp-armv8 < %s | llvm-objdump -r - | FileCheck %s
+// RUN: llvm-mc -triple=aarch64-none-linux-gnu -filetype=obj -mattr=+fp-armv8 < %s | llvm-objdump -r - | FileCheck %s
 
 	.file	"<stdin>"
 	.text
diff --git a/test/MC/AArch64/jump-table.s b/test/MC/AArch64/jump-table.s
index 439ecd90de3..578ebf4e660 100644
--- a/test/MC/AArch64/jump-table.s
+++ b/test/MC/AArch64/jump-table.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc < %s -triple=arm64-none-linux-gnu -filetype=obj | llvm-readobj -r | FileCheck %s
+// RUN: llvm-mc < %s -triple=aarch64-none-linux-gnu -filetype=obj | llvm-readobj -r | FileCheck %s
 
 	.file	"<stdin>"
 	.text
diff --git a/test/MC/AArch64/lit.local.cfg b/test/MC/AArch64/lit.local.cfg
index 17a6b7ab033..1be70c04415 100644
--- a/test/MC/AArch64/lit.local.cfg
+++ b/test/MC/AArch64/lit.local.cfg
@@ -1,3 +1,3 @@
 targets = set(config.root.targets_to_build.split())
-if 'ARM64' not in targets:
+if 'AArch64' not in targets:
     config.unsupported = True
diff --git a/test/MC/AArch64/mapping-across-sections.s b/test/MC/AArch64/mapping-across-sections.s
index 00b324cb826..3d32c1dfb40 100644
--- a/test/MC/AArch64/mapping-across-sections.s
+++ b/test/MC/AArch64/mapping-across-sections.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple=arm64-none-linux-gnu -filetype=obj < %s | llvm-objdump -t - | FileCheck %s
+// RUN: llvm-mc -triple=aarch64-none-linux-gnu -filetype=obj < %s | llvm-objdump -t - | FileCheck %s
 
         .text
         add w0, w0, w0
diff --git a/test/MC/AArch64/mapping-within-section.s b/test/MC/AArch64/mapping-within-section.s
index f515cb9a5c0..c8bd804fa0e 100644
--- a/test/MC/AArch64/mapping-within-section.s
+++ b/test/MC/AArch64/mapping-within-section.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple=arm64-none-linux-gnu -filetype=obj < %s | llvm-objdump -t - | FileCheck %s
+// RUN: llvm-mc -triple=aarch64-none-linux-gnu -filetype=obj < %s | llvm-objdump -t - | FileCheck %s
 
     .text
 // $x at 0x0000
diff --git a/test/MC/AArch64/neon-3vdiff.s b/test/MC/AArch64/neon-3vdiff.s
index 3ffc38fc69c..fc3215b4b67 100644
--- a/test/MC/AArch64/neon-3vdiff.s
+++ b/test/MC/AArch64/neon-3vdiff.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple=arm64-none-linux-gnu -mattr=+crypto -mattr=+neon -show-encoding < %s | FileCheck %s
+// RUN: llvm-mc -triple=aarch64-none-linux-gnu -mattr=+crypto -mattr=+neon -show-encoding < %s | FileCheck %s
 
 // Check that the assembler can handle the documented syntax for AArch64
 
diff --git a/test/MC/AArch64/neon-aba-abd.s b/test/MC/AArch64/neon-aba-abd.s
index e7964834146..178eb26f64c 100644
--- a/test/MC/AArch64/neon-aba-abd.s
+++ b/test/MC/AArch64/neon-aba-abd.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
 
 // Check that the assembler can handle the documented syntax for AArch64
 
diff --git a/test/MC/AArch64/neon-add-pairwise.s b/test/MC/AArch64/neon-add-pairwise.s
index 0b9e4d3146b..df9938b07e5 100644
--- a/test/MC/AArch64/neon-add-pairwise.s
+++ b/test/MC/AArch64/neon-add-pairwise.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
 
 // Check that the assembler can handle the documented syntax for AArch64
 
diff --git a/test/MC/AArch64/neon-add-sub-instructions.s b/test/MC/AArch64/neon-add-sub-instructions.s
index 7d11d70bb90..68f169b3dd9 100644
--- a/test/MC/AArch64/neon-add-sub-instructions.s
+++ b/test/MC/AArch64/neon-add-sub-instructions.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
 
 // Check that the assembler can handle the documented syntax for AArch64
 
diff --git a/test/MC/AArch64/neon-bitwise-instructions.s b/test/MC/AArch64/neon-bitwise-instructions.s
index ec192aa2d8a..79d0a9b70b5 100644
--- a/test/MC/AArch64/neon-bitwise-instructions.s
+++ b/test/MC/AArch64/neon-bitwise-instructions.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
 
 // Check that the assembler can handle the documented syntax for AArch64
 
diff --git a/test/MC/AArch64/neon-compare-instructions.s b/test/MC/AArch64/neon-compare-instructions.s
index 4d3daf066ed..19cfaf1f4d3 100644
--- a/test/MC/AArch64/neon-compare-instructions.s
+++ b/test/MC/AArch64/neon-compare-instructions.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
 
 // Check that the assembler can handle the documented syntax for AArch64
 
diff --git a/test/MC/AArch64/neon-diagnostics.s b/test/MC/AArch64/neon-diagnostics.s
index 46ae311f5f8..fa1f3caf5ad 100644
--- a/test/MC/AArch64/neon-diagnostics.s
+++ b/test/MC/AArch64/neon-diagnostics.s
@@ -1,6 +1,5 @@
-
-// RUN: not llvm-mc -triple arm64-none-linux-gnu -mattr=+neon < %s 2> %t
-// RUN: FileCheck --check-prefix=CHECK-ERROR --check-prefix=CHECK-ARM64-ERROR < %t %s
+// RUN: not llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon < %s 2> %t
+// RUN: FileCheck --check-prefix=CHECK-ERROR < %t %s
 
 //------------------------------------------------------------------------------
 // Vector Integer Add/sub
@@ -589,12 +588,12 @@
 // CHECK-ERROR:        fcmgt v0.2d, v31.2s, v16.2s
 // CHECK-ERROR:                         ^
 
-// CHECK-ARM64-ERROR: error: invalid operand for instruction
-// CHECK-ARM64-ERROR:        fcmgt v4.4s, v7.4s, v15.4h
-// CHECK-ARM64-ERROR:                                ^
-// CHECK-ARM64-ERROR: error: invalid operand for instruction
-// CHECK-ARM64-ERROR:        fcmlt v29.2d, v5.2d, v2.16b
-// CHECK-ARM64-ERROR:                                ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        fcmgt v4.4s, v7.4s, v15.4h
+// CHECK-ERROR:                                ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        fcmlt v29.2d, v5.2d, v2.16b
+// CHECK-ERROR:                                ^
 
 //----------------------------------------------------------------------
 // Vector Compare Mask Equal to Zero (Integer)
@@ -684,12 +683,12 @@
 // CHECK-ERROR:                 ^
 
 
-// CHECK-ARM64-ERROR: error: expected floating-point constant #0.0
-// CHECK-ARM64-ERROR:        fcmeq v0.8b, v1.4h, #1.0
-// CHECK-ARM64-ERROR:                             ^
-// CHECK-ARM64-ERROR: error: invalid operand for instruction
-// CHECK-ARM64-ERROR:        fcmeq v0.8b, v1.4h, #1
-// CHECK-ARM64-ERROR:                             ^
+// CHECK-ERROR: error: expected floating-point constant #0.0
+// CHECK-ERROR:        fcmeq v0.8b, v1.4h, #1.0
+// CHECK-ERROR:                             ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        fcmeq v0.8b, v1.4h, #1
+// CHECK-ERROR:                             ^
 
 //----------------------------------------------------------------------
 // Vector Compare Mask Greater Than or Equal to Zero (Floating Point)
@@ -709,12 +708,12 @@
 // CHECK-ERROR:                 ^
 
 
-// CHECK-ARM64-ERROR: error: expected floating-point constant #0.0
-// CHECK-ARM64-ERROR:        fcmle v17.8h, v15.2d, #-1.0
-// CHECK-ARM64-ERROR:                               ^
-// CHECK-ARM64-ERROR: error: invalid operand for instruction
-// CHECK-ARM64-ERROR:        fcmle v17.8h, v15.2d, #2
-// CHECK-ARM64-ERROR:                               ^
+// CHECK-ERROR: error: expected floating-point constant #0.0
+// CHECK-ERROR:        fcmle v17.8h, v15.2d, #-1.0
+// CHECK-ERROR:                               ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        fcmle v17.8h, v15.2d, #2
+// CHECK-ERROR:                               ^
 
 //----------------------------------------------------------------------
 // Vector Compare Mask Greater Than Zero (Floating Point)
@@ -733,12 +732,12 @@
 // CHECK-ERROR:                        ^
 
 
-// CHECK-ARM64-ERROR: error: expected floating-point constant #0.0
-// CHECK-ARM64-ERROR:        fcmlt v29.2d, v5.2d, #255.0
-// CHECK-ARM64-ERROR:                              ^
-// CHECK-ARM64-ERROR: error: invalid operand for instruction
-// CHECK-ARM64-ERROR:        fcmlt v29.2d, v5.2d, #255
-// CHECK-ARM64-ERROR:                              ^
+// CHECK-ERROR: error: expected floating-point constant #0.0
+// CHECK-ERROR:        fcmlt v29.2d, v5.2d, #255.0
+// CHECK-ERROR:                              ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        fcmlt v29.2d, v5.2d, #255
+// CHECK-ERROR:                              ^
 
 //----------------------------------------------------------------------
 // Vector Compare Mask Less Than or Equal To Zero (Floating Point)
@@ -757,12 +756,12 @@
 // CHECK-ERROR:                 ^
 
 
-// CHECK-ARM64-ERROR: error: expected floating-point constant #0.0
-// CHECK-ARM64-ERROR:        fcmle v17.2d, v15.2d, #15.0
-// CHECK-ARM64-ERROR:                               ^
-// CHECK-ARM64-ERROR: error: invalid operand for instruction
-// CHECK-ARM64-ERROR:        fcmle v17.2d, v15.2d, #15
-// CHECK-ARM64-ERROR:                              ^
+// CHECK-ERROR: error: expected floating-point constant #0.0
+// CHECK-ERROR:        fcmle v17.2d, v15.2d, #15.0
+// CHECK-ERROR:                               ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        fcmle v17.2d, v15.2d, #15
+// CHECK-ERROR:                              ^
 
 //----------------------------------------------------------------------
 // Vector Compare Mask Less Than Zero (Floating Point)
@@ -781,12 +780,12 @@
 // CHECK-ERROR:                        ^
 
 
-// CHECK-ARM64-ERROR: error: expected floating-point constant #0.0
-// CHECK-ARM64-ERROR:        fcmlt v29.2d, v5.2d, #16.0
-// CHECK-ARM64-ERROR:                              ^
-// CHECK-ARM64-ERROR: error: invalid operand for instruction
-// CHECK-ARM64-ERROR:        fcmlt v29.2d, v5.2d, #2
-// CHECK-ARM64-ERROR:                              ^
+// CHECK-ERROR: error: expected floating-point constant #0.0
+// CHECK-ERROR:        fcmlt v29.2d, v5.2d, #16.0
+// CHECK-ERROR:                              ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        fcmlt v29.2d, v5.2d, #2
+// CHECK-ERROR:                              ^
 
 /-----------------------------------------------------------------------
 // Vector Integer Halving Add (Signed)
@@ -1300,9 +1299,9 @@
          shl v0.2d, v1.2d, #64
 
 
-// CHECK-ARM64-ERROR: error: unexpected token in argument list
-// CHECK-ARM64-ERROR:         shl v0.4s, v15,2s, #3
-// CHECK-ARM64-ERROR:                         ^
+// CHECK-ERROR: error: unexpected token in argument list
+// CHECK-ERROR:         shl v0.4s, v15,2s, #3
+// CHECK-ERROR:                         ^
 
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:         shl v0.2d, v17.4s, #3
@@ -2633,9 +2632,9 @@
         pmull2 v0.2d, v1.4s, v2.4s
 
 
-// CHECK-ARM64-ERROR: error: unexpected token in argument list
-// CHECK-ARM64-ERROR:        pmull2 v0.4s, v1.8h v2.8h
-// CHECK-ARM64-ERROR:                            ^
+// CHECK-ERROR: error: unexpected token in argument list
+// CHECK-ERROR:        pmull2 v0.4s, v1.8h v2.8h
+// CHECK-ERROR:                            ^
 
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:        pmull2 v0.2d, v1.4s, v2.4s
@@ -2959,19 +2958,19 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:        mla v0.2d, v1.2d, v16.d[1]
 // CHECK-ERROR:               ^
-// CHECK-ARM64-ERROR: vector lane must be an integer in range
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        mla v0.2s, v1.2s, v2.s[4]
 // CHECK-ERROR:                               ^
-// CHECK-ARM64-ERROR: vector lane must be an integer in range
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        mla v0.4s, v1.4s, v2.s[4]
 // CHECK-ERROR:                               ^
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:        mla v0.2h, v1.2h, v2.h[1]
 // CHECK-ERROR:            ^
-// CHECK-ARM64-ERROR: vector lane must be an integer in range
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        mla v0.4h, v1.4h, v2.h[8]
 // CHECK-ERROR:                               ^
-// CHECK-ARM64-ERROR: vector lane must be an integer in range
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        mla v0.8h, v1.8h, v2.h[8]
 // CHECK-ERROR:                               ^
 // CHECK-ERROR: error: invalid operand for instruction
@@ -2993,19 +2992,19 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:        mls v0.2d, v1.2d, v16.d[1]
 // CHECK-ERROR:               ^
-// CHECK-ARM64-ERROR: vector lane must be an integer in range
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        mls v0.2s, v1.2s, v2.s[4]
 // CHECK-ERROR:                               ^
-// CHECK-ARM64-ERROR: vector lane must be an integer in range
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        mls v0.4s, v1.4s, v2.s[4]
 // CHECK-ERROR:                               ^
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:        mls v0.2h, v1.2h, v2.h[1]
 // CHECK-ERROR:            ^
-// CHECK-ARM64-ERROR: vector lane must be an integer in range
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        mls v0.4h, v1.4h, v2.h[8]
 // CHECK-ERROR:                               ^
-// CHECK-ARM64-ERROR: vector lane must be an integer in range
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        mls v0.8h, v1.8h, v2.h[8]
 // CHECK-ERROR:                               ^
 // CHECK-ERROR: error: invalid operand for instruction
@@ -3030,22 +3029,22 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:        fmla v0.8h, v1.8h, v2.h[2]
 // CHECK-ERROR:                ^
-// CHECK-ARM64-ERROR: vector lane must be an integer in range
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        fmla v0.2s, v1.2s, v2.s[4]
 // CHECK-ERROR:                                ^
-// CHECK-ARM64-ERROR: vector lane must be an integer in range
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        fmla v0.2s, v1.2s, v22.s[4]
 // CHECK-ERROR:                                 ^
-// CHECK-ARM64-ERROR: vector lane must be an integer in range
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        fmla v3.4s, v8.4s, v2.s[4]
 // CHECK-ERROR:                                ^
-// CHECK-ARM64-ERROR: vector lane must be an integer in range
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        fmla v3.4s, v8.4s, v22.s[4]
 // CHECK-ERROR:                                 ^
-// CHECK-ARM64-ERROR: vector lane must be an integer in range
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        fmla v0.2d, v1.2d, v2.d[2]
 // CHECK-ERROR:                                ^
-// CHECK-ARM64-ERROR: vector lane must be an integer in range
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        fmla v0.2d, v1.2d, v22.d[2]
 // CHECK-ERROR:                                 ^
 
@@ -3064,22 +3063,22 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:        fmls v0.8h, v1.8h, v2.h[2]
 // CHECK-ERROR:                ^
-// CHECK-ARM64-ERROR: vector lane must be an integer in range
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        fmls v0.2s, v1.2s, v2.s[4]
 // CHECK-ERROR:                                ^
-// CHECK-ARM64-ERROR: vector lane must be an integer in range
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        fmls v0.2s, v1.2s, v22.s[4]
 // CHECK-ERROR:                                 ^
-// CHECK-ARM64-ERROR: vector lane must be an integer in range
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        fmls v3.4s, v8.4s, v2.s[4]
 // CHECK-ERROR:                                ^
-// CHECK-ARM64-ERROR: vector lane must be an integer in range
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        fmls v3.4s, v8.4s, v22.s[4]
 // CHECK-ERROR:                                 ^
-// CHECK-ARM64-ERROR: vector lane must be an integer in range
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        fmls v0.2d, v1.2d, v2.d[2]
 // CHECK-ERROR:                                ^
-// CHECK-ARM64-ERROR: vector lane must be an integer in range
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        fmls v0.2d, v1.2d, v22.d[2]
 // CHECK-ERROR:                                 ^
 
@@ -3099,7 +3098,7 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:        smlal v0.4h, v1.4h, v2.h[2]
 // CHECK-ERROR:              ^
-// CHECK-ARM64-ERROR: vector lane must be an integer in range
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        smlal v0.4s, v1.4h, v2.h[8]
 // CHECK-ERROR:                                 ^
 // CHECK-ERROR: error: invalid operand for instruction
@@ -3108,16 +3107,16 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:        smlal v0.2s, v1.2s, v2.s[1]
 // CHECK-ERROR:                                 ^
-// CHECK-ARM64-ERROR: vector lane must be an integer in range
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        smlal v0.2d, v1.2s, v2.s[4]
 // CHECK-ERROR:                                 ^
-// CHECK-ARM64-ERROR: vector lane must be an integer in range
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        smlal v0.2d, v1.2s, v22.s[4]
 // CHECK-ERROR:                                  ^
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:        smlal2 v0.4h, v1.8h, v1.h[2]
 // CHECK-ERROR:               ^
-// CHECK-ARM64-ERROR: vector lane must be an integer in range
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        smlal2 v0.4s, v1.8h, v1.h[8]
 // CHECK-ERROR:                                  ^
 // CHECK-ERROR: error: invalid operand for instruction
@@ -3126,10 +3125,10 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:        smlal2 v0.2s, v1.4s, v1.s[2]
 // CHECK-ERROR:               ^
-// CHECK-ARM64-ERROR: vector lane must be an integer in range
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        smlal2 v0.2d, v1.4s, v1.s[4]
 // CHECK-ERROR:                                  ^
-// CHECK-ARM64-ERROR: vector lane must be an integer in range
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        smlal2 v0.2d, v1.4s, v22.s[4]
 // CHECK-ERROR:                                   ^
 
@@ -3149,7 +3148,7 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:        smlsl v0.4h, v1.4h, v2.h[2]
 // CHECK-ERROR:              ^
-// CHECK-ARM64-ERROR: vector lane must be an integer in range
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        smlsl v0.4s, v1.4h, v2.h[8]
 // CHECK-ERROR:                                 ^
 // CHECK-ERROR: error: invalid operand for instruction
@@ -3158,16 +3157,16 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:        smlsl v0.2s, v1.2s, v2.s[1]
 // CHECK-ERROR:                                 ^
-// CHECK-ARM64-ERROR: vector lane must be an integer in range
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        smlsl v0.2d, v1.2s, v2.s[4]
 // CHECK-ERROR:                                 ^
-// CHECK-ARM64-ERROR: vector lane must be an integer in range
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        smlsl v0.2d, v1.2s, v22.s[4]
 // CHECK-ERROR:                                  ^
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:        smlsl2 v0.4h, v1.8h, v1.h[2]
 // CHECK-ERROR:               ^
-// CHECK-ARM64-ERROR: vector lane must be an integer in range
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        smlsl2 v0.4s, v1.8h, v1.h[8]
 // CHECK-ERROR:                                  ^
 // CHECK-ERROR: error: invalid operand for instruction
@@ -3176,10 +3175,10 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:        smlsl2 v0.2s, v1.4s, v1.s[2]
 // CHECK-ERROR:               ^
-// CHECK-ARM64-ERROR: vector lane must be an integer in range
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        smlsl2 v0.2d, v1.4s, v1.s[4]
 // CHECK-ERROR:                                  ^
-// CHECK-ARM64-ERROR: vector lane must be an integer in range
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        smlsl2 v0.2d, v1.4s, v22.s[4]
 // CHECK-ERROR:                                   ^
 
@@ -3199,7 +3198,7 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:        umlal v0.4h, v1.4h, v2.h[2]
 // CHECK-ERROR:              ^
-// CHECK-ARM64-ERROR: vector lane must be an integer in range
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        umlal v0.4s, v1.4h, v2.h[8]
 // CHECK-ERROR:                                 ^
 // CHECK-ERROR: error: invalid operand for instruction
@@ -3208,16 +3207,16 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:        umlal v0.2s, v1.2s, v2.s[1]
 // CHECK-ERROR:                                 ^
-// CHECK-ARM64-ERROR: vector lane must be an integer in range
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        umlal v0.2d, v1.2s, v2.s[4]
 // CHECK-ERROR:                                 ^
-// CHECK-ARM64-ERROR: vector lane must be an integer in range
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        umlal v0.2d, v1.2s, v22.s[4]
 // CHECK-ERROR:                                  ^
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:        umlal2 v0.4h, v1.8h, v1.h[2]
 // CHECK-ERROR:               ^
-// CHECK-ARM64-ERROR: vector lane must be an integer in range
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        umlal2 v0.4s, v1.8h, v1.h[8]
 // CHECK-ERROR:                                  ^
 // CHECK-ERROR: error: invalid operand for instruction
@@ -3226,10 +3225,10 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:        umlal2 v0.2s, v1.4s, v1.s[2]
 // CHECK-ERROR:               ^
-// CHECK-ARM64-ERROR: vector lane must be an integer in range
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        umlal2 v0.2d, v1.4s, v1.s[4]
 // CHECK-ERROR:                                  ^
-// CHECK-ARM64-ERROR: vector lane must be an integer in range
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        umlal2 v0.2d, v1.4s, v22.s[4]
 // CHECK-ERROR:                                   ^
 
@@ -3249,7 +3248,7 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:        umlsl v0.4h, v1.4h, v2.h[2]
 // CHECK-ERROR:              ^
-// CHECK-ARM64-ERROR: vector lane must be an integer in range
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        umlsl v0.4s, v1.4h, v2.h[8]
 // CHECK-ERROR:                                 ^
 // CHECK-ERROR: error: invalid operand for instruction
@@ -3258,16 +3257,16 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:        umlsl v0.2s, v1.2s, v2.s[3]
 // CHECK-ERROR:                                 ^
-// CHECK-ARM64-ERROR: vector lane must be an integer in range
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        umlsl v0.2d, v1.2s, v2.s[4]
 // CHECK-ERROR:                                 ^
-// CHECK-ARM64-ERROR: vector lane must be an integer in range
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        umlsl v0.2d, v1.2s, v22.s[4]
 // CHECK-ERROR:                                  ^
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:        umlsl2 v0.4h, v1.8h, v1.h[2]
 // CHECK-ERROR:               ^
-// CHECK-ARM64-ERROR: vector lane must be an integer in range
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        umlsl2 v0.4s, v1.8h, v1.h[8]
 // CHECK-ERROR:                                  ^
 // CHECK-ERROR: error: invalid operand for instruction
@@ -3276,10 +3275,10 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:        umlsl2 v0.2s, v1.4s, v1.s[2]
 // CHECK-ERROR:               ^
-// CHECK-ARM64-ERROR: vector lane must be an integer in range
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        umlsl2 v0.2d, v1.4s, v1.s[4]
 // CHECK-ERROR:                                  ^
-// CHECK-ARM64-ERROR: vector lane must be an integer in range
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        umlsl2 v0.2d, v1.4s, v22.s[4]
 // CHECK-ERROR:                                   ^
 
@@ -3299,7 +3298,7 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:        sqdmlal v0.4h, v1.4h, v2.h[2]
 // CHECK-ERROR:                ^
-// CHECK-ARM64-ERROR: vector lane must be an integer in range
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        sqdmlal v0.4s, v1.4h, v2.h[8]
 // CHECK-ERROR:                                   ^
 // CHECK-ERROR: error: invalid operand for instruction
@@ -3308,16 +3307,16 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:        sqdmlal v0.2s, v1.2s, v2.s[3]
 // CHECK-ERROR:                                   ^
-// CHECK-ARM64-ERROR: vector lane must be an integer in range
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        sqdmlal v0.2d, v1.2s, v2.s[4]
 // CHECK-ERROR:                                   ^
-// CHECK-ARM64-ERROR: vector lane must be an integer in range
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        sqdmlal v0.2d, v1.2s, v22.s[4]
 // CHECK-ERROR:                                    ^
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:        sqdmlal2 v0.4h, v1.8h, v1.h[2]
 // CHECK-ERROR:                 ^
-// CHECK-ARM64-ERROR: vector lane must be an integer in range
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        sqdmlal2 v0.4s, v1.8h, v1.h[8]
 // CHECK-ERROR:                                    ^
 // CHECK-ERROR: error: invalid operand for instruction
@@ -3326,10 +3325,10 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:        sqdmlal2 v0.2s, v1.4s, v1.s[2]
 // CHECK-ERROR:                 ^
-// CHECK-ARM64-ERROR: vector lane must be an integer in range
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        sqdmlal2 v0.2d, v1.4s, v1.s[4]
 // CHECK-ERROR:                                    ^
-// CHECK-ARM64-ERROR: vector lane must be an integer in range
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        sqdmlal2 v0.2d, v1.4s, v22.s[4]
 // CHECK-ERROR:                                     ^
 
@@ -3349,7 +3348,7 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:        sqdmlsl v0.4h, v1.4h, v2.h[2]
 // CHECK-ERROR:                ^
-// CHECK-ARM64-ERROR: vector lane must be an integer in range
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        sqdmlsl v0.4s, v1.4h, v2.h[8]
 // CHECK-ERROR:                                   ^
 // CHECK-ERROR: error: invalid operand for instruction
@@ -3358,16 +3357,16 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:        sqdmlsl v0.2s, v1.2s, v2.s[3]
 // CHECK-ERROR:                                   ^
-// CHECK-ARM64-ERROR: vector lane must be an integer in range
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        sqdmlsl v0.2d, v1.2s, v2.s[4]
 // CHECK-ERROR:                                   ^
-// CHECK-ARM64-ERROR: vector lane must be an integer in range
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        sqdmlsl v0.2d, v1.2s, v22.s[4]
 // CHECK-ERROR:                                    ^
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:        sqdmlsl2 v0.4h, v1.8h, v1.h[2]
 // CHECK-ERROR:                 ^
-// CHECK-ARM64-ERROR: vector lane must be an integer in range
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        sqdmlsl2 v0.4s, v1.8h, v1.h[8]
 // CHECK-ERROR:                                    ^
 // CHECK-ERROR: error: invalid operand for instruction
@@ -3376,10 +3375,10 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:        sqdmlsl2 v0.2s, v1.4s, v1.s[2]
 // CHECK-ERROR:                 ^
-// CHECK-ARM64-ERROR: vector lane must be an integer in range
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        sqdmlsl2 v0.2d, v1.4s, v1.s[4]
 // CHECK-ERROR:                                    ^
-// CHECK-ARM64-ERROR: vector lane must be an integer in range
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        sqdmlsl2 v0.2d, v1.4s, v22.s[4]
 // CHECK-ERROR:                                     ^
 
@@ -3393,28 +3392,28 @@
       mul v0.4s, v1.4s, v22.s[4]
       mul v0.2d, v1.2d, v2.d[1]
 
-// CHECK-ARM64-ERROR: vector lane must be an integer in range
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        mul v0.4h, v1.4h, v2.h[8]
 // CHECK-ERROR:                               ^
-// CHECK-ARM64-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:        mul v0.4h, v1.4h, v16.h[8]
 // CHECK-ERROR:                                ^
-// CHECK-ARM64-ERROR: vector lane must be an integer in range
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        mul v0.8h, v1.8h, v2.h[8]
 // CHECK-ERROR:                               ^
-// CHECK-ARM64-ERROR: invalid operand for instruction
+// CHECK-ERROR: invalid operand for instruction
 // CHECK-ERROR:        mul v0.8h, v1.8h, v16.h[8]
 // CHECK-ERROR:                                ^
-// CHECK-ARM64-ERROR: vector lane must be an integer in range
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        mul v0.2s, v1.2s, v2.s[4]
 // CHECK-ERROR:                               ^
-// CHECK-ARM64-ERROR: vector lane must be an integer in range
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        mul v0.2s, v1.2s, v22.s[4]
 // CHECK-ERROR:                                ^
-// CHECK-ARM64-ERROR: vector lane must be an integer in range
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        mul v0.4s, v1.4s, v2.s[4]
 // CHECK-ERROR:                               ^
-// CHECK-ARM64-ERROR: vector lane must be an integer in range
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        mul v0.4s, v1.4s, v22.s[4]
 // CHECK-ERROR:                                ^
 
@@ -3432,22 +3431,22 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:        fmul v0.4h, v1.4h, v2.h[4]
 // CHECK-ERROR:                ^
-// CHECK-ARM64-ERROR: vector lane must be an integer in range
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        fmul v0.2s, v1.2s, v2.s[4]
 // CHECK-ERROR:                                ^
-// CHECK-ARM64-ERROR: vector lane must be an integer in range
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        fmul v0.2s, v1.2s, v22.s[4]
 // CHECK-ERROR:                                 ^
-// CHECK-ARM64-ERROR: vector lane must be an integer in range
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        fmul v0.4s, v1.4s, v2.s[4]
 // CHECK-ERROR:                                ^
-// CHECK-ARM64-ERROR: vector lane must be an integer in range
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        fmul v0.4s, v1.4s, v22.s[4]
 // CHECK-ERROR:                                 ^
-// CHECK-ARM64-ERROR: vector lane must be an integer in range
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        fmul v0.2d, v1.2d, v2.d[2]
 // CHECK-ERROR:                                ^
-// CHECK-ARM64-ERROR: vector lane must be an integer in range
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        fmul v0.2d, v1.2d, v22.d[2]
 // CHECK-ERROR:                                 ^
 
@@ -3462,22 +3461,22 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:        fmulx v0.4h, v1.4h, v2.h[4]
 // CHECK-ERROR:                 ^
-// CHECK-ARM64-ERROR: vector lane must be an integer in range
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        fmulx v0.2s, v1.2s, v2.s[4]
 // CHECK-ERROR:                                 ^
-// CHECK-ARM64-ERROR: vector lane must be an integer in range
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        fmulx v0.2s, v1.2s, v22.s[4]
 // CHECK-ERROR:                                  ^
-// CHECK-ARM64-ERROR: vector lane must be an integer in range
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        fmulx v0.4s, v1.4s, v2.s[4]
 // CHECK-ERROR:                                 ^
-// CHECK-ARM64-ERROR: vector lane must be an integer in range
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        fmulx v0.4s, v1.4s, v22.s[4]
 // CHECK-ERROR:                                  ^
-// CHECK-ARM64-ERROR: vector lane must be an integer in range
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        fmulx v0.2d, v1.2d, v2.d[2]
 // CHECK-ERROR:                                 ^
-// CHECK-ARM64-ERROR: vector lane must be an integer in range
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        fmulx v0.2d, v1.2d, v22.d[2]
 // CHECK-ERROR:                                  ^
 
@@ -3497,7 +3496,7 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:        smull v0.4h, v1.4h, v2.h[2]
 // CHECK-ERROR:              ^
-// CHECK-ARM64-ERROR: vector lane must be an integer in range
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        smull v0.4s, v1.4h, v2.h[8]
 // CHECK-ERROR:                                 ^
 // CHECK-ERROR: error: invalid operand for instruction
@@ -3506,16 +3505,16 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:        smull v0.2s, v1.2s, v2.s[2]
 // CHECK-ERROR:              ^
-// CHECK-ARM64-ERROR: vector lane must be an integer in range
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        smull v0.2d, v1.2s, v2.s[4]
 // CHECK-ERROR:                                 ^
-// CHECK-ARM64-ERROR: vector lane must be an integer in range
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        smull v0.2d, v1.2s, v22.s[4]
 // CHECK-ERROR:                                  ^
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:        smull2 v0.4h, v1.8h, v2.h[2]
 // CHECK-ERROR:               ^
-// CHECK-ARM64-ERROR: vector lane must be an integer in range
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        smull2 v0.4s, v1.8h, v2.h[8]
 // CHECK-ERROR:                                  ^
 // CHECK-ERROR: error: invalid operand for instruction
@@ -3524,10 +3523,10 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:        smull2 v0.2s, v1.4s, v2.s[2]
 // CHECK-ERROR:               ^
-// CHECK-ARM64-ERROR: vector lane must be an integer in range
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        smull2 v0.2d, v1.4s, v2.s[4]
 // CHECK-ERROR:                                  ^
-// CHECK-ARM64-ERROR: vector lane must be an integer in range
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        smull2 v0.2d, v1.4s, v22.s[4]
 // CHECK-ERROR:                                   ^
 
@@ -3547,7 +3546,7 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:        umull v0.4h, v1.4h, v2.h[2]
 // CHECK-ERROR:              ^
-// CHECK-ARM64-ERROR: vector lane must be an integer in range
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        umull v0.4s, v1.4h, v2.h[8]
 // CHECK-ERROR:                                 ^
 // CHECK-ERROR: error: invalid operand for instruction
@@ -3556,16 +3555,16 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:        umull v0.2s, v1.2s, v2.s[2]
 // CHECK-ERROR:              ^
-// CHECK-ARM64-ERROR: vector lane must be an integer in range
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        umull v0.2d, v1.2s, v2.s[4]
 // CHECK-ERROR:                                 ^
-// CHECK-ARM64-ERROR: vector lane must be an integer in range
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        umull v0.2d, v1.2s, v22.s[4]
 // CHECK-ERROR:                                  ^
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:        umull2 v0.4h, v1.8h, v2.h[2]
 // CHECK-ERROR:               ^
-// CHECK-ARM64-ERROR: vector lane must be an integer in range
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        umull2 v0.4s, v1.8h, v2.h[8]
 // CHECK-ERROR:                                  ^
 // CHECK-ERROR: error: invalid operand for instruction
@@ -3574,10 +3573,10 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:        umull2 v0.2s, v1.4s, v2.s[2]
 // CHECK-ERROR:               ^
-// CHECK-ARM64-ERROR: vector lane must be an integer in range
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        umull2 v0.2d, v1.4s, v2.s[4]
 // CHECK-ERROR:                                  ^
-// CHECK-ARM64-ERROR: vector lane must be an integer in range
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        umull2 v0.2d, v1.4s, v22.s[4]
 // CHECK-ERROR:                                   ^
 
@@ -3597,7 +3596,7 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:        sqdmull v0.4h, v1.4h, v2.h[2]
 // CHECK-ERROR:                ^
-// CHECK-ARM64-ERROR: vector lane must be an integer in range
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        sqdmull v0.4s, v1.4h, v2.h[8]
 // CHECK-ERROR:                                   ^
 // CHECK-ERROR: error: invalid operand for instruction
@@ -3606,16 +3605,16 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:        sqdmull v0.2s, v1.2s, v2.s[2]
 // CHECK-ERROR:                ^
-// CHECK-ARM64-ERROR: vector lane must be an integer in range
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        sqdmull v0.2d, v1.2s, v2.s[4]
 // CHECK-ERROR:                                   ^
-// CHECK-ARM64-ERROR: vector lane must be an integer in range
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        sqdmull v0.2d, v1.2s, v22.s[4]
 // CHECK-ERROR:                                    ^
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:        sqdmull2 v0.4h, v1.8h, v2.h[2]
 // CHECK-ERROR:                 ^
-// CHECK-ARM64-ERROR: vector lane must be an integer in range
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        sqdmull2 v0.4s, v1.8h, v2.h[8]
 // CHECK-ERROR:                                    ^
 // CHECK-ERROR: error: invalid operand for instruction
@@ -3624,10 +3623,10 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:        sqdmull2 v0.2s, v1.4s, v2.s[2]
 // CHECK-ERROR:                 ^
-// CHECK-ARM64-ERROR: vector lane must be an integer in range
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        sqdmull2 v0.2d, v1.4s, v2.s[4]
 // CHECK-ERROR:                                    ^
-// CHECK-ARM64-ERROR: vector lane must be an integer in range
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        sqdmull2 v0.2d, v1.4s, v22.s[4]
 // CHECK-ERROR:                                     ^
 
@@ -3641,28 +3640,28 @@
       sqdmulh v0.4s, v1.4s, v22.s[4]
       sqdmulh v0.2d, v1.2d, v22.d[1]
 
-// CHECK-ARM64-ERROR: vector lane must be an integer in range
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        sqdmulh v0.4h, v1.4h, v2.h[8]
 // CHECK-ERROR:                                   ^
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:        sqdmulh v0.4h, v1.4h, v16.h[2]
 // CHECK-ERROR:                              ^
-// CHECK-ARM64-ERROR: vector lane must be an integer in range
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        sqdmulh v0.8h, v1.8h, v2.h[8]
 // CHECK-ERROR:                                   ^
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:        sqdmulh v0.8h, v1.8h, v16.h[2]
 // CHECK-ERROR:                                  ^
-// CHECK-ARM64-ERROR: vector lane must be an integer in range
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        sqdmulh v0.2s, v1.2s, v2.s[4]
 // CHECK-ERROR:                                   ^
-// CHECK-ARM64-ERROR: vector lane must be an integer in range
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        sqdmulh v0.2s, v1.2s, v22.s[4]
 // CHECK-ERROR:                                    ^
-// CHECK-ARM64-ERROR: vector lane must be an integer in range
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        sqdmulh v0.4s, v1.4s, v2.s[4]
 // CHECK-ERROR:                                   ^
-// CHECK-ARM64-ERROR: vector lane must be an integer in range
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        sqdmulh v0.4s, v1.4s, v22.s[4]
 // CHECK-ERROR:                                    ^
 // CHECK-ERROR: error: invalid operand for instruction
@@ -3679,28 +3678,28 @@
       sqrdmulh v0.4s, v1.4s, v22.s[4]
       sqrdmulh v0.2d, v1.2d, v22.d[1]
 
-// CHECK-ARM64-ERROR: vector lane must be an integer in range
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        sqrdmulh v0.4h, v1.4h, v2.h[8]
 // CHECK-ERROR:                                    ^
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:        sqrdmulh v0.4h, v1.4h, v16.h[2]
 // CHECK-ERROR:                               ^
-// CHECK-ARM64-ERROR: vector lane must be an integer in range
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        sqrdmulh v0.8h, v1.8h, v2.h[8]
 // CHECK-ERROR:                                    ^
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:        sqrdmulh v0.8h, v1.8h, v16.h[2]
 // CHECK-ERROR:                                   ^
-// CHECK-ARM64-ERROR: vector lane must be an integer in range
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        sqrdmulh v0.2s, v1.2s, v2.s[4]
 // CHECK-ERROR:                                    ^
-// CHECK-ARM64-ERROR: vector lane must be an integer in range
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        sqrdmulh v0.2s, v1.2s, v22.s[4]
 // CHECK-ERROR:                                     ^
-// CHECK-ARM64-ERROR: vector lane must be an integer in range
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        sqrdmulh v0.4s, v1.4s, v2.s[4]
 // CHECK-ERROR:                                    ^
-// CHECK-ARM64-ERROR: vector lane must be an integer in range
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        sqrdmulh v0.4s, v1.4s, v22.s[4]
 // CHECK-ERROR:                                     ^
 // CHECK-ERROR: error: invalid operand for instruction
@@ -3918,13 +3917,13 @@
          ld1 {v4}, [x0]
          ld1 {v32.16b}, [x0]
          ld1 {v15.8h}, [x32]
-// CHECK-ARM64-ERROR: error: vector register expected
+// CHECK-ERROR: error: vector register expected
 // CHECK-ERROR:        ld1 {x3}, [x2]
 // CHECK-ERROR:             ^
-// CHECK-ARM64-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:        ld1 {v4}, [x0]
 // CHECK-ERROR:             ^
-// CHECK-ARM64-ERROR: error: vector register expected
+// CHECK-ERROR: error: vector register expected
 // CHECK-ERROR:        ld1 {v32.16b}, [x0]
 // CHECK-ERROR:             ^
 // CHECK-ERROR: error: invalid operand for instruction
@@ -3938,13 +3937,13 @@
          ld1 {v1.8h-v1.8h}, [x0]
          ld1 {v15.8h-v17.4h}, [x15]
          ld1 {v0.8b-v2.8b, [x0]
-// CHECK-ARM64-ERROR: error: registers must be sequential
+// CHECK-ERROR: error: registers must be sequential
 // CHECK-ERROR:        ld1 {v0.16b, v2.16b}, [x0]
 // CHECK-ERROR:                     ^
 // CHECK-ERROR: error: invalid number of vectors
 // CHECK-ERROR:        ld1 {v0.8h, v1.8h, v2.8h, v3.8h, v4.8h}, [x0]
 // CHECK-ERROR:                                         ^
-// CHECK-ARM64-ERROR: error: unexpected token in argument list
+// CHECK-ERROR: error: unexpected token in argument list
 // CHECK-ERROR:        ld1 v0.8b, v1.8b}, [x0]
 // CHECK-ERROR:            ^
 // CHECK-ERROR: error: invalid number of vectors
@@ -3953,7 +3952,7 @@
 // CHECK-ERROR: error: invalid number of vectors
 // CHECK-ERROR:        ld1 {v1.8h-v1.8h}, [x0]
 // CHECK-ERROR:                   ^
-// CHECK-ARM64-ERROR: error: mismatched register size suffix
+// CHECK-ERROR: error: mismatched register size suffix
 // CHECK-ERROR:        ld1 {v15.8h-v17.4h}, [x15]
 // CHECK-ERROR:                        ^
 // CHECK-ERROR: error: '}' expected
@@ -3965,15 +3964,15 @@
          ld2 {v15.4h, v16.4h, v17.4h}, [x32]
          ld2 {v15.8h-v16.4h}, [x15]
          ld2 {v0.2d-v2.2d}, [x0]
-// CHECK-ARM64-ERROR: error: mismatched register size suffix
+// CHECK-ERROR: error: mismatched register size suffix
 // CHECK-ERROR:        ld2 {v15.8h, v16.4h}, [x15]
 // CHECK-ERROR:                     ^
-// CHECK-ARM64-ERROR: error: registers must be sequential
+// CHECK-ERROR: error: registers must be sequential
 // CHECK-ERROR:        ld2 {v0.8b, v2.8b}, [x0]
 // CHECK-ERROR:                    ^
 // CHECK-ERROR:        ld2 {v15.4h, v16.4h, v17.4h}, [x32]
 // CHECK-ERROR:            ^
-// CHECK-ARM64-ERROR: error: mismatched register size suffix
+// CHECK-ERROR: error: mismatched register size suffix
 // CHECK-ERROR:        ld2 {v15.8h-v16.4h}, [x15]
 // CHECK-ERROR:                        ^
 // CHECK-ERROR: error: invalid operand for instruction
@@ -3985,16 +3984,16 @@
          ld3 {v0.8b, v2.8b, v3.8b}, [x0]
          ld3 {v15.8h-v17.4h}, [x15]
          ld3 {v31.4s-v2.4s}, [sp]
-// CHECK-ARM64-ERROR: error: mismatched register size suffix
+// CHECK-ERROR: error: mismatched register size suffix
 // CHECK-ERROR:        ld3 {v15.8h, v16.8h, v17.4h}, [x15]
 // CHECK-ERROR:                             ^
-// CHECK-ARM64-ERROR: error: mismatched register size suffix
+// CHECK-ERROR: error: mismatched register size suffix
 // CHECK-ERROR:        ld3 {v0.8b, v1,8b, v2.8b, v3.8b}, [x0]
 // CHECK-ERROR:                    ^
-// CHECK-ARM64-ERROR: error: registers must be sequential
+// CHECK-ERROR: error: registers must be sequential
 // CHECK-ERROR:        ld3 {v0.8b, v2.8b, v3.8b}, [x0]
 // CHECK-ERROR:                    ^
-// CHECK-ARM64-ERROR: error: mismatched register size suffix
+// CHECK-ERROR: error: mismatched register size suffix
 // CHECK-ERROR:        ld3 {v15.8h-v17.4h}, [x15]
 // CHECK-ERROR:                        ^
 // CHECK-ERROR: error: invalid operand for instruction
@@ -4006,16 +4005,16 @@
          ld4 {v15.4h, v16.4h, v17.4h, v18.4h, v19.4h}, [x31]
          ld4 {v15.8h-v18.4h}, [x15]
          ld4 {v31.2s-v1.2s}, [x31]
-// CHECK-ARM64-ERROR: error: mismatched register size suffix
+// CHECK-ERROR: error: mismatched register size suffix
 // CHECK-ERROR:        ld4 {v15.8h, v16.8h, v17.4h, v18.8h}, [x15]
 // CHECK-ERROR:                             ^
-// CHECK-ARM64-ERROR: error: registers must be sequential
+// CHECK-ERROR: error: registers must be sequential
 // CHECK-ERROR:        ld4 {v0.8b, v2.8b, v3.8b, v4.8b}, [x0]
 // CHECK-ERROR:                    ^
 // CHECK-ERROR: error: invalid number of vectors
 // CHECK-ERROR:        ld4 {v15.4h, v16.4h, v17.4h, v18.4h, v19.4h}, [x31]
 // CHECK-ERROR:                                             ^
-// CHECK-ARM64-ERROR: error: mismatched register size suffix
+// CHECK-ERROR: error: mismatched register size suffix
 // CHECK-ERROR:        ld4 {v15.8h-v18.4h}, [x15]
 // CHECK-ERROR:                        ^
 // CHECK-ERROR: error: invalid operand for instruction
@@ -4026,13 +4025,13 @@
          st1 {v4}, [x0]
          st1 {v32.16b}, [x0]
          st1 {v15.8h}, [x32]
-// CHECK-ARM64-ERROR: error: vector register expected
+// CHECK-ERROR: error: vector register expected
 // CHECK-ERROR:        st1 {x3}, [x2]
 // CHECK-ERROR:             ^
-// CHECK-ARM64-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:        st1 {v4}, [x0]
 // CHECK-ERROR:             ^
-// CHECK-ARM64-ERROR: error: vector register expected
+// CHECK-ERROR: error: vector register expected
 // CHECK-ERROR:        st1 {v32.16b}, [x0]
 // CHECK-ERROR:             ^
 // CHECK-ERROR: error: invalid operand for instruction
@@ -4046,13 +4045,13 @@
          st1 {v1.8h-v1.8h}, [x0]
          st1 {v15.8h-v17.4h}, [x15]
          st1 {v0.8b-v2.8b, [x0]
-// CHECK-ARM64-ERROR: error: registers must be sequential
+// CHECK-ERROR: error: registers must be sequential
 // CHECK-ERROR:        st1 {v0.16b, v2.16b}, [x0]
 // CHECK-ERROR:                     ^
 // CHECK-ERROR: error: invalid number of vectors
 // CHECK-ERROR:        st1 {v0.8h, v1.8h, v2.8h, v3.8h, v4.8h}, [x0]
 // CHECK-ERROR:                                         ^
-// CHECK-ARM64-ERROR: error: unexpected token in argument list
+// CHECK-ERROR: error: unexpected token in argument list
 // CHECK-ERROR:        st1 v0.8b, v1.8b}, [x0]
 // CHECK-ERROR:            ^
 // CHECK-ERROR: error: invalid number of vectors
@@ -4061,7 +4060,7 @@
 // CHECK-ERROR: error: invalid number of vectors
 // CHECK-ERROR:        st1 {v1.8h-v1.8h}, [x0]
 // CHECK-ERROR:                   ^
-// CHECK-ARM64-ERROR: error: mismatched register size suffix
+// CHECK-ERROR: error: mismatched register size suffix
 // CHECK-ERROR:        st1 {v15.8h-v17.4h}, [x15]
 // CHECK-ERROR:                        ^
 // CHECK-ERROR: error: '}' expected
@@ -4073,16 +4072,16 @@
          st2 {v15.4h, v16.4h, v17.4h}, [x30]
          st2 {v15.8h-v16.4h}, [x15]
          st2 {v0.2d-v2.2d}, [x0]
-// CHECK-ARM64-ERROR: error: mismatched register size suffix
+// CHECK-ERROR: error: mismatched register size suffix
 // CHECK-ERROR:        st2 {v15.8h, v16.4h}, [x15]
 // CHECK-ERROR:                     ^
-// CHECK-ARM64-ERROR: error: registers must be sequential
+// CHECK-ERROR: error: registers must be sequential
 // CHECK-ERROR:        st2 {v0.8b, v2.8b}, [x0]
 // CHECK-ERROR:                    ^
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:        st2 {v15.4h, v16.4h, v17.4h}, [x30]
 // CHECK-ERROR:            ^
-// CHECK-ARM64-ERROR: error: mismatched register size suffix
+// CHECK-ERROR: error: mismatched register size suffix
 // CHECK-ERROR:        st2 {v15.8h-v16.4h}, [x15]
 // CHECK-ERROR:                        ^
 // CHECK-ERROR: error: invalid operand for instruction
@@ -4094,16 +4093,16 @@
          st3 {v0.8b, v2.8b, v3.8b}, [x0]
          st3 {v15.8h-v17.4h}, [x15]
          st3 {v31.4s-v2.4s}, [sp]
-// CHECK-ARM64-ERROR: error: mismatched register size suffix
+// CHECK-ERROR: error: mismatched register size suffix
 // CHECK-ERROR:        st3 {v15.8h, v16.8h, v17.4h}, [x15]
 // CHECK-ERROR:                             ^
-// CHECK-ARM64-ERROR: error: mismatched register size suffix
+// CHECK-ERROR: error: mismatched register size suffix
 // CHECK-ERROR:        st3 {v0.8b, v1,8b, v2.8b, v3.8b}, [x0]
 // CHECK-ERROR:                    ^
-// CHECK-ARM64-ERROR: error: registers must be sequential
+// CHECK-ERROR: error: registers must be sequential
 // CHECK-ERROR:        st3 {v0.8b, v2.8b, v3.8b}, [x0]
 // CHECK-ERROR:                    ^
-// CHECK-ARM64-ERROR: error: mismatched register size suffix
+// CHECK-ERROR: error: mismatched register size suffix
 // CHECK-ERROR:        st3 {v15.8h-v17.4h}, [x15]
 // CHECK-ERROR:                        ^
 // CHECK-ERROR: error: invalid operand for instruction
@@ -4115,16 +4114,16 @@
          st4 {v15.4h, v16.4h, v17.4h, v18.4h, v19.4h}, [x31]
          st4 {v15.8h-v18.4h}, [x15]
          st4 {v31.2s-v1.2s}, [x31]
-// CHECK-ARM64-ERROR: error: mismatched register size suffix
+// CHECK-ERROR: error: mismatched register size suffix
 // CHECK-ERROR:        st4 {v15.8h, v16.8h, v17.4h, v18.8h}, [x15]
 // CHECK-ERROR:                             ^
-// CHECK-ARM64-ERROR: error: registers must be sequential
+// CHECK-ERROR: error: registers must be sequential
 // CHECK-ERROR:        st4 {v0.8b, v2.8b, v3.8b, v4.8b}, [x0]
 // CHECK-ERROR:                    ^
 // CHECK-ERROR: error: invalid number of vectors
 // CHECK-ERROR:        st4 {v15.4h, v16.4h, v17.4h, v18.4h, v19.4h}, [x31]
 // CHECK-ERROR:                                             ^
-// CHECK-ARM64-ERROR: error: mismatched register size suffix
+// CHECK-ERROR: error: mismatched register size suffix
 // CHECK-ERROR:        st4 {v15.8h-v18.4h}, [x15]
 // CHECK-ERROR:                        ^
 // CHECK-ERROR: error: invalid operand for instruction
@@ -4141,7 +4140,7 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:          ld1 {v0.16b}, [x0], #8
 // CHECK-ERROR:                              ^
-// CHECK-ARM64-ERROR: error: invalid vector kind qualifier
+// CHECK-ERROR: error: invalid vector kind qualifier
 // CHECK-ERROR:          ld1 {v0.8h, v1.16h}, [x0], x1
 // CHECK-ERROR:                      ^
 // CHECK-ERROR:  error: invalid operand for instruction
@@ -4157,7 +4156,7 @@
 // CHECK-ERROR:  error: invalid operand for instruction
 // CHECK-ERROR:          ld3 {v5.2s, v6.2s, v7.2s}, [x1], #48
 // CHECK-ERROR:                                           ^
-// CHECK-ARM64-ERROR: error: mismatched register size suffix
+// CHECK-ERROR: error: mismatched register size suffix
 // CHECK-ERROR:          ld4 {v31.2d, v0.2d, v1.2d, v2.1d}, [x3], x1
 // CHECK-ERROR:                                     ^
 
@@ -4167,7 +4166,7 @@
 // CHECK-ERROR:  error: invalid operand for instruction
 // CHECK-ERROR:          st1 {v0.16b}, [x0], #8
 // CHECK-ERROR:                              ^
-// CHECK-ARM64-ERROR: error: invalid vector kind qualifier
+// CHECK-ERROR: error: invalid vector kind qualifier
 // CHECK-ERROR:          st1 {v0.8h, v1.16h}, [x0], x1
 // CHECK-ERROR:                      ^
 // CHECK-ERROR:  error: invalid operand for instruction
@@ -4183,7 +4182,7 @@
 // CHECK-ERROR:  error: invalid operand for instruction
 // CHECK-ERROR:          st3 {v5.2s, v6.2s, v7.2s}, [x1], #48
 // CHECK-ERROR:                                           ^
-// CHECK-ARM64-ERROR: error: mismatched register size suffix
+// CHECK-ERROR: error: mismatched register size suffix
 // CHECK-ERROR:          st4 {v31.2d, v0.2d, v1.2d, v2.1d}, [x3], x1
 // CHECK-ERROR:                                     ^
 
@@ -4195,16 +4194,16 @@
          ld2r {v31.4s, v0.2s}, [sp]
          ld3r {v0.8b, v1.8b, v2.8b, v3.8b}, [x0]
          ld4r {v31.2s, v0.2s, v1.2d, v2.2s}, [sp]
-// CHECK-ARM64-ERROR: error: vector register expected
+// CHECK-ERROR: error: vector register expected
 // CHECK-ERROR: ld1r {x1}, [x0]
 // CHECK-ERROR:       ^
-// CHECK-ARM64-ERROR: error: mismatched register size suffix
+// CHECK-ERROR: error: mismatched register size suffix
 // CHECK-ERROR: ld2r {v31.4s, v0.2s}, [sp]
 // CHECK-ERROR:               ^
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR: ld3r {v0.8b, v1.8b, v2.8b, v3.8b}, [x0]
 // CHECK-ERROR:      ^
-// CHECK-ARM64-ERROR: error: mismatched register size suffix
+// CHECK-ERROR: error: mismatched register size suffix
 // CHECK-ERROR: ld4r {v31.2s, v0.2s, v1.2d, v2.2s}, [sp]
 // CHECK-ERROR:                      ^
 
@@ -4216,16 +4215,16 @@
          ld2 {v15.h, v16.h}[8], [x15]
          ld3 {v31.s, v0.s, v1.s}[-1], [sp]
          ld4 {v0.d, v1.d, v2.d, v3.d}[2], [x0]
-// CHECK-ARM64-ERROR: vector lane must be an integer in range
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR: ld1 {v0.b}[16], [x0]
 // CHECK-ERROR:            ^
-// CHECK-ARM64-ERROR: vector lane must be an integer in range
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR: ld2 {v15.h, v16.h}[8], [x15]
 // CHECK-ERROR:                    ^
-// CHECK-ARM64-ERROR: error: vector lane must be an integer in range
+// CHECK-ERROR: error: vector lane must be an integer in range
 // CHECK-ERROR: ld3 {v31.s, v0.s, v1.s}[-1], [sp]
 // CHECK-ERROR:                         ^
-// CHECK-ARM64-ERROR: vector lane must be an integer in range
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR: ld4 {v0.d, v1.d, v2.d, v3.d}[2], [x0]
 // CHECK-ERROR:                              ^
 
@@ -4233,16 +4232,16 @@
          st2 {v31.s, v0.s}[3], [8]
          st3 {v15.h, v16.h, v17.h}[-1], [x15]
          st4 {v0.d, v1.d, v2.d, v3.d}[2], [x0]
-// CHECK-ARM64-ERROR: vector lane must be an integer in range
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR: st1 {v0.d}[16], [x0]
 // CHECK-ERROR:            ^
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR: st2 {v31.s, v0.s}[3], [8]
 // CHECK-ERROR:                        ^
-// CHECK-ARM64-ERROR: error: vector lane must be an integer in range
+// CHECK-ERROR: error: vector lane must be an integer in range
 // CHECK-ERROR: st3 {v15.h, v16.h, v17.h}[-1], [x15]
 // CHECK-ERROR:                           ^
-// CHECK-ARM64-ERROR: vector lane must be an integer in range
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR: st4 {v0.d, v1.d, v2.d, v3.d}[2], [x0]
 // CHECK-ERROR:                              ^
 
@@ -4281,7 +4280,7 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR: ld2 {v15.h, v16.h}[0], [x15], #3
 // CHECK-ERROR:                               ^
-// CHECK-ARM64-ERROR: error: mismatched register size suffix
+// CHECK-ERROR: error: mismatched register size suffix
 // CHECK-ERROR: ld3 {v31.s, v0.s, v1.d}[0], [sp], x9
 // CHECK-ERROR:                      ^
 // CHECK-ERROR: error: invalid operand for instruction
@@ -4315,16 +4314,16 @@
          ins v20.s[1], s30
          ins v1.d[0], d7
 
-// CHECK-ARM64-ERROR: vector lane must be an integer in range
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:         ins v2.b[16], w1
 // CHECK-ERROR:                  ^
-// CHECK-ARM64-ERROR: vector lane must be an integer in range
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:         ins v7.h[8], w14
 // CHECK-ERROR:                  ^
-// CHECK-ARM64-ERROR: vector lane must be an integer in range
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:         ins v20.s[5], w30
 // CHECK-ERROR:                   ^
-// CHECK-ARM64-ERROR: vector lane must be an integer in range
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:         ins v1.d[2], x7
 // CHECK-ERROR:                  ^
 // CHECK-ERROR: error: invalid operand for instruction
@@ -4351,19 +4350,19 @@
          smov x14, v6.d[1]
          smov x20, v9.d[0]
 
-// CHECK-ARM64-ERROR: vector lane must be an integer in range
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR         smov w1, v0.b[16]
 // CHECK-ERROR                       ^
-// CHECK-ARM64-ERROR: vector lane must be an integer in range
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR         smov w14, v6.h[8]
 // CHECK-ERROR                        ^
-// CHECK-ARM64-ERROR: vector lane must be an integer in range
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR         smov x1, v0.b[16]
 // CHECK-ERROR                       ^
-// CHECK-ARM64-ERROR: vector lane must be an integer in range
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR         smov x14, v6.h[8]
 // CHECK-ERROR                        ^
-// CHECK-ARM64-ERROR: vector lane must be an integer in range
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR         smov x20, v9.s[5]
 // CHECK-ERROR                        ^
 // CHECK-ERROR error: invalid operand for instruction
@@ -4390,16 +4389,16 @@
          umov s20, v9.s[2]
          umov d7, v18.d[1]
 
-// CHECK-ARM64-ERROR: vector lane must be an integer in range
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR         umov w1, v0.b[16]
 // CHECK-ERROR                       ^
-// CHECK-ARM64-ERROR: vector lane must be an integer in range
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR         umov w14, v6.h[8]
 // CHECK-ERROR                        ^
-// CHECK-ARM64-ERROR: vector lane must be an integer in range
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR         umov w20, v9.s[5]
 // CHECK-ERROR                        ^
-// CHECK-ARM64-ERROR: vector lane must be an integer in range
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR         umov x7, v18.d[3]
 // CHECK-ERROR                        ^
 // CHECK-ERROR error: invalid operand for instruction
@@ -4815,7 +4814,7 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:        sqdmlal s17, h27, s12
 // CHECK-ERROR:                          ^
-// CHECK-ARM64-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:        sqdmlal d19, s24, d12
 // CHECK-ERROR:                          ^
 
@@ -4829,7 +4828,7 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:        sqdmlsl s14, h12, s25
 // CHECK-ERROR:                          ^
-// CHECK-ARM64-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:        sqdmlsl d12, s23, d13
 // CHECK-ERROR:                          ^
 
@@ -4843,7 +4842,7 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:        sqdmull s12, h22, s12
 // CHECK-ERROR:                          ^
-// CHECK-ARM64-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:        sqdmull d15, s22, d12
 // CHECK-ERROR:                          ^
 
@@ -6885,7 +6884,7 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:          fmul    h0, h1, v1.s[0]
 // CHECK-ERROR:                  ^
-// CHECK-ARM64-ERROR: vector lane must be an integer in range
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:          fmul    s2, s29, v10.s[4]
 // CHECK-ERROR:                                 ^
 
@@ -6904,7 +6903,7 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:          fmulx    h0, h1, v1.d[0]
 // CHECK-ERROR:                   ^
-// CHECK-ARM64-ERROR: vector lane must be an integer in range
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:          fmulx    d2, d29, v10.d[3]
 // CHECK-ERROR:                                  ^
 
@@ -6923,7 +6922,7 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:          fmla    d30, s11, v1.d[1]
 // CHECK-ERROR:                       ^
-// CHECK-ARM64-ERROR: vector lane must be an integer in range
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:          fmla    s16, s22, v16.s[5]
 // CHECK-ERROR:                                  ^
 
@@ -6942,7 +6941,7 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:          fmls    h7, h17, v26.s[2]
 // CHECK-ERROR:                  ^
-// CHECK-ARM64-ERROR: error: vector lane must be an integer in range [0, 1]
+// CHECK-ERROR: error: vector lane must be an integer in range [0, 1]
 // CHECK-ERROR:          fmls    d16, d22, v16.d[-1]
 // CHECK-ERROR:                                  ^
 
@@ -6964,7 +6963,7 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:          sqdmlal s8, s9, v14.s[1]
 // CHECK-ERROR:                  ^
-// CHECK-ARM64-ERROR: vector lane must be an integer in range
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:          sqdmlal d4, s5, v1.s[5]
 // CHECK-ERROR:                               ^
 // CHECK-ERROR: error: invalid operand for instruction
@@ -6989,7 +6988,7 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:          sqdmlsl d1, h1, v13.s[0]
 // CHECK-ERROR:                      ^
-// CHECK-ARM64-ERROR: vector lane must be an integer in range
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:          sqdmlsl d1, s1, v13.s[4]
 // CHECK-ERROR:                                ^
 // CHECK-ERROR: error: invalid operand for instruction
@@ -7016,7 +7015,7 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:          sqdmull s1, s1, v4.s[0]
 // CHECK-ERROR:                  ^
-// CHECK-ARM64-ERROR: vector lane must be an integer in range
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:          sqdmull s12, h17, v9.h[9]
 // CHECK-ERROR:                                 ^
 // CHECK-ERROR: error: invalid operand for instruction
@@ -7041,7 +7040,7 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:          sqdmulh s25, s26, v27.h[3]
 // CHECK-ERROR:                  ^
-// CHECK-ARM64-ERROR: vector lane must be an integer in range
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:          sqdmulh s25, s26, v27.s[4]
 // CHECK-ERROR:                                  ^
 // CHECK-ERROR: error: invalid operand for instruction
@@ -7066,7 +7065,7 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:          sqrdmulh s5, h6, v7.s[2]
 // CHECK-ERROR:                       ^
-// CHECK-ARM64-ERROR: vector lane must be an integer in range
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:          sqrdmulh h31, h30, v14.h[9]
 // CHECK-ERROR:                                 ^
 // CHECK-ERROR: error: invalid operand for instruction
@@ -7098,16 +7097,16 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:          dup d0, v17.s[3]
 // CHECK-ERROR:                      ^
-// CHECK-ARM64-ERROR: vector lane must be an integer in range
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:          dup d0, v17.d[4]
 // CHECK-ERROR:                        ^
-// CHECK-ARM64-ERROR: vector lane must be an integer in range
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:          dup s0, v1.s[7]
 // CHECK-ERROR:                       ^
-// CHECK-ARM64-ERROR: vector lane must be an integer in range
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:          dup h0, v31.h[16]
 // CHECK-ERROR:                        ^
-// CHECK-ARM64-ERROR: vector lane must be an integer in range
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:          dup b1, v3.b[16]
 // CHECK-ERROR:                       ^
 
diff --git a/test/MC/AArch64/neon-facge-facgt.s b/test/MC/AArch64/neon-facge-facgt.s
index 799b85ff42f..212eda2f209 100644
--- a/test/MC/AArch64/neon-facge-facgt.s
+++ b/test/MC/AArch64/neon-facge-facgt.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
 
 // Check that the assembler can handle the documented syntax for AArch64
 
diff --git a/test/MC/AArch64/neon-frsqrt-frecp.s b/test/MC/AArch64/neon-frsqrt-frecp.s
index 56bc47154a0..79fe5da5e76 100644
--- a/test/MC/AArch64/neon-frsqrt-frecp.s
+++ b/test/MC/AArch64/neon-frsqrt-frecp.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
 
 // Check that the assembler can handle the documented syntax for AArch64
 
diff --git a/test/MC/AArch64/neon-halving-add-sub.s b/test/MC/AArch64/neon-halving-add-sub.s
index 19b56ced3e6..555f1b83b4f 100644
--- a/test/MC/AArch64/neon-halving-add-sub.s
+++ b/test/MC/AArch64/neon-halving-add-sub.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
 
 // Check that the assembler can handle the documented syntax for AArch64
 
diff --git a/test/MC/AArch64/neon-max-min-pairwise.s b/test/MC/AArch64/neon-max-min-pairwise.s
index e48f9753586..8d2dadb1997 100644
--- a/test/MC/AArch64/neon-max-min-pairwise.s
+++ b/test/MC/AArch64/neon-max-min-pairwise.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
 
 // Check that the assembler can handle the documented syntax for AArch64
 
diff --git a/test/MC/AArch64/neon-max-min.s b/test/MC/AArch64/neon-max-min.s
index 8cc4ac86e65..6d1efde5077 100644
--- a/test/MC/AArch64/neon-max-min.s
+++ b/test/MC/AArch64/neon-max-min.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
 
 // Check that the assembler can handle the documented syntax for AArch64
 
diff --git a/test/MC/AArch64/neon-mla-mls-instructions.s b/test/MC/AArch64/neon-mla-mls-instructions.s
index 5c8b7d8788a..3072e6f1200 100644
--- a/test/MC/AArch64/neon-mla-mls-instructions.s
+++ b/test/MC/AArch64/neon-mla-mls-instructions.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
 
 // Check that the assembler can handle the documented syntax for AArch64
 
diff --git a/test/MC/AArch64/neon-mov.s b/test/MC/AArch64/neon-mov.s
index 6231ffe49c5..567a5ecc541 100644
--- a/test/MC/AArch64/neon-mov.s
+++ b/test/MC/AArch64/neon-mov.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
 
 // Check that the assembler can handle the documented syntax for AArch64
 
diff --git a/test/MC/AArch64/neon-mul-div-instructions.s b/test/MC/AArch64/neon-mul-div-instructions.s
index 2601d50f131..1fe6d2b819c 100644
--- a/test/MC/AArch64/neon-mul-div-instructions.s
+++ b/test/MC/AArch64/neon-mul-div-instructions.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
 
 // Check that the assembler can handle the documented syntax for AArch64
 
diff --git a/test/MC/AArch64/neon-rounding-halving-add.s b/test/MC/AArch64/neon-rounding-halving-add.s
index 55c9f921da7..47ac2126802 100644
--- a/test/MC/AArch64/neon-rounding-halving-add.s
+++ b/test/MC/AArch64/neon-rounding-halving-add.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
 
 // Check that the assembler can handle the documented syntax for AArch64
 
diff --git a/test/MC/AArch64/neon-rounding-shift.s b/test/MC/AArch64/neon-rounding-shift.s
index 38924e7c4bd..e70f766f2b6 100644
--- a/test/MC/AArch64/neon-rounding-shift.s
+++ b/test/MC/AArch64/neon-rounding-shift.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
 
 // Check that the assembler can handle the documented syntax for AArch64
 
diff --git a/test/MC/AArch64/neon-saturating-add-sub.s b/test/MC/AArch64/neon-saturating-add-sub.s
index d39997901f7..4a7ed109426 100644
--- a/test/MC/AArch64/neon-saturating-add-sub.s
+++ b/test/MC/AArch64/neon-saturating-add-sub.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
 
 // Check that the assembler can handle the documented syntax for AArch64
 
diff --git a/test/MC/AArch64/neon-saturating-rounding-shift.s b/test/MC/AArch64/neon-saturating-rounding-shift.s
index 702b9d2c60e..9215c1cabef 100644
--- a/test/MC/AArch64/neon-saturating-rounding-shift.s
+++ b/test/MC/AArch64/neon-saturating-rounding-shift.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
 
 // Check that the assembler can handle the documented syntax for AArch64
 
diff --git a/test/MC/AArch64/neon-saturating-shift.s b/test/MC/AArch64/neon-saturating-shift.s
index d03172b1788..9ae393a040b 100644
--- a/test/MC/AArch64/neon-saturating-shift.s
+++ b/test/MC/AArch64/neon-saturating-shift.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
 
 // Check that the assembler can handle the documented syntax for AArch64
 
diff --git a/test/MC/AArch64/neon-scalar-abs.s b/test/MC/AArch64/neon-scalar-abs.s
index 897c93506e1..d08756c0c10 100644
--- a/test/MC/AArch64/neon-scalar-abs.s
+++ b/test/MC/AArch64/neon-scalar-abs.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
 
 // Check that the assembler can handle the documented syntax for AArch64
 
diff --git a/test/MC/AArch64/neon-scalar-add-sub.s b/test/MC/AArch64/neon-scalar-add-sub.s
index 955c30716b4..0a3eba73212 100644
--- a/test/MC/AArch64/neon-scalar-add-sub.s
+++ b/test/MC/AArch64/neon-scalar-add-sub.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
 
 //------------------------------------------------------------------------------
 // Scalar Integer Add
diff --git a/test/MC/AArch64/neon-scalar-by-elem-mla.s b/test/MC/AArch64/neon-scalar-by-elem-mla.s
index d4f3682dc2b..fec9d12d8b8 100644
--- a/test/MC/AArch64/neon-scalar-by-elem-mla.s
+++ b/test/MC/AArch64/neon-scalar-by-elem-mla.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
 
 //------------------------------------------------------------------------------
 // Floating Point fused multiply-add (scalar, by element)
diff --git a/test/MC/AArch64/neon-scalar-by-elem-mul.s b/test/MC/AArch64/neon-scalar-by-elem-mul.s
index d22aa9b15b2..8b8a3f57a9c 100644
--- a/test/MC/AArch64/neon-scalar-by-elem-mul.s
+++ b/test/MC/AArch64/neon-scalar-by-elem-mul.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
 
 //------------------------------------------------------------------------------
 // Floating Point  multiply (scalar, by element)
diff --git a/test/MC/AArch64/neon-scalar-by-elem-saturating-mla.s b/test/MC/AArch64/neon-scalar-by-elem-saturating-mla.s
index dadb8db9936..e3d7e0514f9 100644
--- a/test/MC/AArch64/neon-scalar-by-elem-saturating-mla.s
+++ b/test/MC/AArch64/neon-scalar-by-elem-saturating-mla.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
 
 //-----------------------------------------------------------------------------
 // Signed saturating doubling multiply-add long (scalar, by element)
diff --git a/test/MC/AArch64/neon-scalar-by-elem-saturating-mul.s b/test/MC/AArch64/neon-scalar-by-elem-saturating-mul.s
index 90eeb5e64c0..8a8405ef282 100644
--- a/test/MC/AArch64/neon-scalar-by-elem-saturating-mul.s
+++ b/test/MC/AArch64/neon-scalar-by-elem-saturating-mul.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
 
 //-----------------------------------------------------------------------------
 // Signed saturating doubling multiply long (scalar, by element)
diff --git a/test/MC/AArch64/neon-scalar-compare.s b/test/MC/AArch64/neon-scalar-compare.s
index 16ba92e0797..28de46a7733 100644
--- a/test/MC/AArch64/neon-scalar-compare.s
+++ b/test/MC/AArch64/neon-scalar-compare.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
 
 // Check that the assembler can handle the documented syntax for AArch64
 
diff --git a/test/MC/AArch64/neon-scalar-cvt.s b/test/MC/AArch64/neon-scalar-cvt.s
index 047495276fb..97416daf080 100644
--- a/test/MC/AArch64/neon-scalar-cvt.s
+++ b/test/MC/AArch64/neon-scalar-cvt.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
 
 // Check that the assembler can handle the documented syntax for AArch64
 
diff --git a/test/MC/AArch64/neon-scalar-dup.s b/test/MC/AArch64/neon-scalar-dup.s
index ba4f3c2ad79..db11ea2aa08 100644
--- a/test/MC/AArch64/neon-scalar-dup.s
+++ b/test/MC/AArch64/neon-scalar-dup.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
 
 //------------------------------------------------------------------------------
 // Duplicate element (scalar)
diff --git a/test/MC/AArch64/neon-scalar-extract-narrow.s b/test/MC/AArch64/neon-scalar-extract-narrow.s
index e6167930d1c..e25224e386f 100644
--- a/test/MC/AArch64/neon-scalar-extract-narrow.s
+++ b/test/MC/AArch64/neon-scalar-extract-narrow.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
 
 // Check that the assembler can handle the documented syntax for AArch64
 
diff --git a/test/MC/AArch64/neon-scalar-fp-compare.s b/test/MC/AArch64/neon-scalar-fp-compare.s
index cb9e7a7a66e..b798b341067 100644
--- a/test/MC/AArch64/neon-scalar-fp-compare.s
+++ b/test/MC/AArch64/neon-scalar-fp-compare.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
 
 // Check that the assembler can handle the documented syntax for AArch64
 
diff --git a/test/MC/AArch64/neon-scalar-mul.s b/test/MC/AArch64/neon-scalar-mul.s
index 21be537cbb7..e33bdad91a9 100644
--- a/test/MC/AArch64/neon-scalar-mul.s
+++ b/test/MC/AArch64/neon-scalar-mul.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
 
 // Check that the assembler can handle the documented syntax for AArch64
 
diff --git a/test/MC/AArch64/neon-scalar-neg.s b/test/MC/AArch64/neon-scalar-neg.s
index e902c2307a1..8e5d61dd245 100644
--- a/test/MC/AArch64/neon-scalar-neg.s
+++ b/test/MC/AArch64/neon-scalar-neg.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
 
 // Check that the assembler can handle the documented syntax for AArch64
 
diff --git a/test/MC/AArch64/neon-scalar-recip.s b/test/MC/AArch64/neon-scalar-recip.s
index dde26b557be..7a886f3b4a7 100644
--- a/test/MC/AArch64/neon-scalar-recip.s
+++ b/test/MC/AArch64/neon-scalar-recip.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
 
 // Check that the assembler can handle the documented syntax for AArch64
 
diff --git a/test/MC/AArch64/neon-scalar-reduce-pairwise.s b/test/MC/AArch64/neon-scalar-reduce-pairwise.s
index cb7564ac68d..403a940ec2f 100644
--- a/test/MC/AArch64/neon-scalar-reduce-pairwise.s
+++ b/test/MC/AArch64/neon-scalar-reduce-pairwise.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
 
 //----------------------------------------------------------------------
 // Scalar Reduce Add Pairwise (Integer)
diff --git a/test/MC/AArch64/neon-scalar-rounding-shift.s b/test/MC/AArch64/neon-scalar-rounding-shift.s
index 2594c2f2ac5..6113e09af38 100644
--- a/test/MC/AArch64/neon-scalar-rounding-shift.s
+++ b/test/MC/AArch64/neon-scalar-rounding-shift.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
 
 
 //------------------------------------------------------------------------------
diff --git a/test/MC/AArch64/neon-scalar-saturating-add-sub.s b/test/MC/AArch64/neon-scalar-saturating-add-sub.s
index d5cd838a92b..0bf24349599 100644
--- a/test/MC/AArch64/neon-scalar-saturating-add-sub.s
+++ b/test/MC/AArch64/neon-scalar-saturating-add-sub.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
 
 //------------------------------------------------------------------------------
 // Scalar Integer Saturating Add (Signed)
diff --git a/test/MC/AArch64/neon-scalar-saturating-rounding-shift.s b/test/MC/AArch64/neon-scalar-saturating-rounding-shift.s
index 83bd59f50c8..b09a5892344 100644
--- a/test/MC/AArch64/neon-scalar-saturating-rounding-shift.s
+++ b/test/MC/AArch64/neon-scalar-saturating-rounding-shift.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
 
 //------------------------------------------------------------------------------
 // Scalar Integer Saturating Rounding Shift Lef (Signed)
diff --git a/test/MC/AArch64/neon-scalar-saturating-shift.s b/test/MC/AArch64/neon-scalar-saturating-shift.s
index 679f1f4052c..b53c9f072f3 100644
--- a/test/MC/AArch64/neon-scalar-saturating-shift.s
+++ b/test/MC/AArch64/neon-scalar-saturating-shift.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
 
 //------------------------------------------------------------------------------
 // Scalar Integer Saturating Shift Lef (Signed)
diff --git a/test/MC/AArch64/neon-scalar-shift-imm.s b/test/MC/AArch64/neon-scalar-shift-imm.s
index 47a8dec212b..96cb815eafa 100644
--- a/test/MC/AArch64/neon-scalar-shift-imm.s
+++ b/test/MC/AArch64/neon-scalar-shift-imm.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
 
 // Check that the assembler can handle the documented syntax for AArch64
 
diff --git a/test/MC/AArch64/neon-scalar-shift.s b/test/MC/AArch64/neon-scalar-shift.s
index 98aa51a63da..366840a9315 100644
--- a/test/MC/AArch64/neon-scalar-shift.s
+++ b/test/MC/AArch64/neon-scalar-shift.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
 
 //------------------------------------------------------------------------------
 // Scalar Integer Shift Lef (Signed)
diff --git a/test/MC/AArch64/neon-shift-left-long.s b/test/MC/AArch64/neon-shift-left-long.s
index 87204683104..97604587424 100644
--- a/test/MC/AArch64/neon-shift-left-long.s
+++ b/test/MC/AArch64/neon-shift-left-long.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
 
 // Check that the assembler can handle the documented syntax for AArch64
 
diff --git a/test/MC/AArch64/neon-shift.s b/test/MC/AArch64/neon-shift.s
index dcff992a782..614e6de1622 100644
--- a/test/MC/AArch64/neon-shift.s
+++ b/test/MC/AArch64/neon-shift.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
 
 // Check that the assembler can handle the documented syntax for AArch64
 
diff --git a/test/MC/AArch64/neon-simd-copy.s b/test/MC/AArch64/neon-simd-copy.s
index 917f7cb524e..4837a4cb9ee 100644
--- a/test/MC/AArch64/neon-simd-copy.s
+++ b/test/MC/AArch64/neon-simd-copy.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple=arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+// RUN: llvm-mc -triple=aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
 
 // Check that the assembler can handle the documented syntax for AArch64
 
diff --git a/test/MC/AArch64/neon-simd-shift.s b/test/MC/AArch64/neon-simd-shift.s
index 1c1ad7489d5..a16432324ef 100644
--- a/test/MC/AArch64/neon-simd-shift.s
+++ b/test/MC/AArch64/neon-simd-shift.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple=arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+// RUN: llvm-mc -triple=aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
 
 // Check that the assembler can handle the documented syntax for AArch64
 
diff --git a/test/MC/AArch64/neon-sxtl.s b/test/MC/AArch64/neon-sxtl.s
index 363796ee334..0fe26cb5e8e 100644
--- a/test/MC/AArch64/neon-sxtl.s
+++ b/test/MC/AArch64/neon-sxtl.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple=arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+// RUN: llvm-mc -triple=aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
 
 // Check that the assembler can handle the documented syntax for AArch64
 
diff --git a/test/MC/AArch64/neon-uxtl.s b/test/MC/AArch64/neon-uxtl.s
index 46c56625c0f..685b6362bcb 100644
--- a/test/MC/AArch64/neon-uxtl.s
+++ b/test/MC/AArch64/neon-uxtl.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple=arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+// RUN: llvm-mc -triple=aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
 
 // Check that the assembler can handle the documented syntax for AArch64
 
diff --git a/test/MC/AArch64/noneon-diagnostics.s b/test/MC/AArch64/noneon-diagnostics.s
index 470a74d5b31..60a5fd208af 100644
--- a/test/MC/AArch64/noneon-diagnostics.s
+++ b/test/MC/AArch64/noneon-diagnostics.s
@@ -1,48 +1,29 @@
-// RUN: not llvm-mc  -triple arm64-none-linux-gnu -mattr=-neon < %s 2> %t
-// RUN: FileCheck --check-prefix=CHECK-ARM64-ERROR < %t %s
+// RUN: not llvm-mc  -triple aarch64-none-linux-gnu -mattr=-neon < %s 2> %t
+// RUN: FileCheck --check-prefix=CHECK-ERROR < %t %s
 
         fmla v3.4s, v12.4s, v17.4s
         fmla v1.2d, v30.2d, v20.2d
         fmla v9.2s, v9.2s, v0.2s
-// CHECK-ERROR: error: instruction requires a CPU feature not currently enabled
+// CHECK-ERROR: error: instruction requires: neon
 // CHECK-ERROR-NEXT:    fmla v3.4s, v12.4s, v17.4s
 // CHECK-ERROR-NEXT:    ^
-// CHECK-ERROR-NEXT: error: instruction requires a CPU feature not currently enabled
+// CHECK-ERROR-NEXT: error: instruction requires: neon
 // CHECK-ERROR-NEXT:    fmla v1.2d, v30.2d, v20.2d
 // CHECK-ERROR-NEXT:    ^
-// CHECK-ERROR-NEXT: error: instruction requires a CPU feature not currently enabled
+// CHECK-ERROR-NEXT: error: instruction requires: neon
 // CHECK-ERROR-NEXT:    fmla v9.2s, v9.2s, v0.2s
 // CHECK-ERROR-NEXT:    ^
 
-// CHECK-ARM64-ERROR: error: instruction requires: neon
-// CHECK-ARM64-ERROR-NEXT:    fmla v3.4s, v12.4s, v17.4s
-// CHECK-ARM64-ERROR-NEXT:    ^
-// CHECK-ARM64-ERROR-NEXT: error: instruction requires: neon
-// CHECK-ARM64-ERROR-NEXT:    fmla v1.2d, v30.2d, v20.2d
-// CHECK-ARM64-ERROR-NEXT:    ^
-// CHECK-ARM64-ERROR-NEXT: error: instruction requires: neon
-// CHECK-ARM64-ERROR-NEXT:    fmla v9.2s, v9.2s, v0.2s
-// CHECK-ARM64-ERROR-NEXT:    ^
-
         fmls v3.4s, v12.4s, v17.4s
         fmls v1.2d, v30.2d, v20.2d
         fmls v9.2s, v9.2s, v0.2s
-// CHECK-ERROR: error: instruction requires a CPU feature not currently enabled
+
+// CHECK-ERROR: error: instruction requires: neon
 // CHECK-ERROR-NEXT:    fmls v3.4s, v12.4s, v17.4s
 // CHECK-ERROR-NEXT:    ^
-// CHECK-ERROR-NEXT: error: instruction requires a CPU feature not currently enabled
+// CHECK-ERROR-NEXT: error: instruction requires: neon
 // CHECK-ERROR-NEXT:    fmls v1.2d, v30.2d, v20.2d
 // CHECK-ERROR-NEXT:    ^
-// CHECK-ERROR-NEXT: error: instruction requires a CPU feature not currently enabled
+// CHECK-ERROR-NEXT: error: instruction requires: neon
 // CHECK-ERROR-NEXT:    fmls v9.2s, v9.2s, v0.2s
 // CHECK-ERROR-NEXT:    ^
-
-// CHECK-ARM64-ERROR: error: instruction requires: neon
-// CHECK-ARM64-ERROR-NEXT:    fmls v3.4s, v12.4s, v17.4s
-// CHECK-ARM64-ERROR-NEXT:    ^
-// CHECK-ARM64-ERROR-NEXT: error: instruction requires: neon
-// CHECK-ARM64-ERROR-NEXT:    fmls v1.2d, v30.2d, v20.2d
-// CHECK-ARM64-ERROR-NEXT:    ^
-// CHECK-ARM64-ERROR-NEXT: error: instruction requires: neon
-// CHECK-ARM64-ERROR-NEXT:    fmls v9.2s, v9.2s, v0.2s
-// CHECK-ARM64-ERROR-NEXT:    ^
diff --git a/test/MC/AArch64/optional-hash.s b/test/MC/AArch64/optional-hash.s
index 7ae1aa49047..3922b5be34a 100644
--- a/test/MC/AArch64/optional-hash.s
+++ b/test/MC/AArch64/optional-hash.s
@@ -1,5 +1,5 @@
 // PR18929
-// RUN: llvm-mc < %s -triple=arm64-linux-gnueabi -mattr=+fp-armv8,+neon -filetype=obj -o - \
+// RUN: llvm-mc < %s -triple=aarch64-linux-gnueabi -mattr=+fp-armv8,+neon -filetype=obj -o - \
 // RUN: | llvm-objdump --disassemble -arch=arm64 -mattr=+fp-armv8,+neon - | FileCheck %s
 
     .text
diff --git a/test/MC/AArch64/tls-relocs.s b/test/MC/AArch64/tls-relocs.s
index ae7b20cefd5..ebf02167a8f 100644
--- a/test/MC/AArch64/tls-relocs.s
+++ b/test/MC/AArch64/tls-relocs.s
@@ -1,5 +1,5 @@
-// RUN: llvm-mc -triple=arm64-none-linux-gnu -show-encoding < %s | FileCheck %s --check-prefix=CHECK-ARM64
-// RUN: llvm-mc -triple=arm64-none-linux-gnu -filetype=obj < %s -o - | \
+// RUN: llvm-mc -triple=aarch64-none-linux-gnu -show-encoding < %s | FileCheck %s
+// RUN: llvm-mc -triple=aarch64-none-linux-gnu -filetype=obj < %s -o - | \
 // RUN:   llvm-readobj -r -t | FileCheck --check-prefix=CHECK-ELF %s
 
         // TLS local-dynamic forms
@@ -8,14 +8,14 @@
         movz x3, #:dtprel_g2:var
         movn x4, #:dtprel_g2:var
 
-// CHECK-ARM64: movz    x1, #:dtprel_g2:var     // encoding: [0bAAA00001,A,0b110AAAAA,0x92]
-// CHECK-ARM64:                                 //   fixup A - offset: 0, value: :dtprel_g2:var, kind: fixup_arm64_movw
-// CHECK-ARM64: movn    x2, #:dtprel_g2:var     // encoding: [0bAAA00010,A,0b110AAAAA,0x92]
-// CHECK-ARM64:                                 //   fixup A - offset: 0, value: :dtprel_g2:var, kind: fixup_arm64_movw
-// CHECK-ARM64: movz    x3, #:dtprel_g2:var     // encoding: [0bAAA00011,A,0b110AAAAA,0x92]
-// CHECK-ARM64:                                 //   fixup A - offset: 0, value: :dtprel_g2:var, kind: fixup_arm64_movw
-// CHECK-ARM64: movn    x4, #:dtprel_g2:var     // encoding: [0bAAA00100,A,0b110AAAAA,0x92]
-// CHECK-ARM64:                                 //   fixup A - offset: 0, value: :dtprel_g2:var, kind: fixup_arm64_movw
+// CHECK: movz    x1, #:dtprel_g2:var     // encoding: [0bAAA00001,A,0b110AAAAA,0x92]
+// CHECK:                                 //   fixup A - offset: 0, value: :dtprel_g2:var, kind: fixup_aarch64_movw
+// CHECK: movn    x2, #:dtprel_g2:var     // encoding: [0bAAA00010,A,0b110AAAAA,0x92]
+// CHECK:                                 //   fixup A - offset: 0, value: :dtprel_g2:var, kind: fixup_aarch64_movw
+// CHECK: movz    x3, #:dtprel_g2:var     // encoding: [0bAAA00011,A,0b110AAAAA,0x92]
+// CHECK:                                 //   fixup A - offset: 0, value: :dtprel_g2:var, kind: fixup_aarch64_movw
+// CHECK: movn    x4, #:dtprel_g2:var     // encoding: [0bAAA00100,A,0b110AAAAA,0x92]
+// CHECK:                                 //   fixup A - offset: 0, value: :dtprel_g2:var, kind: fixup_aarch64_movw
 
 // CHECK-ELF:      Relocations [
 // CHECK-ELF-NEXT:   Section (2) .rela.text {
@@ -30,14 +30,14 @@
         movz w7, #:dtprel_g1:var
         movn w8, #:dtprel_g1:var
 
-// CHECK-ARM64: movz    x5, #:dtprel_g1:var     // encoding: [0bAAA00101,A,0b101AAAAA,0x92]
-// CHECK-ARM64:                                 //   fixup A - offset: 0, value: :dtprel_g1:var, kind: fixup_arm64_movw
-// CHECK-ARM64: movn    x6, #:dtprel_g1:var     // encoding: [0bAAA00110,A,0b101AAAAA,0x92]
-// CHECK-ARM64:                                 //   fixup A - offset: 0, value: :dtprel_g1:var, kind: fixup_arm64_movw
-// CHECK-ARM64: movz    w7, #:dtprel_g1:var     // encoding: [0bAAA00111,A,0b101AAAAA,0x12]
-// CHECK-ARM64:                                 //   fixup A - offset: 0, value: :dtprel_g1:var, kind: fixup_arm64_movw
-// CHECK-ARM64: movn    w8, #:dtprel_g1:var     // encoding: [0bAAA01000,A,0b101AAAAA,0x12]
-// CHECK-ARM64:                                 //   fixup A - offset: 0, value: :dtprel_g1:var, kind: fixup_arm64_movw
+// CHECK: movz    x5, #:dtprel_g1:var     // encoding: [0bAAA00101,A,0b101AAAAA,0x92]
+// CHECK:                                 //   fixup A - offset: 0, value: :dtprel_g1:var, kind: fixup_aarch64_movw
+// CHECK: movn    x6, #:dtprel_g1:var     // encoding: [0bAAA00110,A,0b101AAAAA,0x92]
+// CHECK:                                 //   fixup A - offset: 0, value: :dtprel_g1:var, kind: fixup_aarch64_movw
+// CHECK: movz    w7, #:dtprel_g1:var     // encoding: [0bAAA00111,A,0b101AAAAA,0x12]
+// CHECK:                                 //   fixup A - offset: 0, value: :dtprel_g1:var, kind: fixup_aarch64_movw
+// CHECK: movn    w8, #:dtprel_g1:var     // encoding: [0bAAA01000,A,0b101AAAAA,0x12]
+// CHECK:                                 //   fixup A - offset: 0, value: :dtprel_g1:var, kind: fixup_aarch64_movw
 
 // CHECK-ELF-NEXT:     0x10 R_AARCH64_TLSLD_MOVW_DTPREL_G1 [[VARSYM]]
 // CHECK-ELF-NEXT:     0x14 R_AARCH64_TLSLD_MOVW_DTPREL_G1 [[VARSYM]]
@@ -48,10 +48,10 @@
         movk x9, #:dtprel_g1_nc:var
         movk w10, #:dtprel_g1_nc:var
 
-// CHECK-ARM64: movk    x9, #:dtprel_g1_nc:var  // encoding: [0bAAA01001,A,0b101AAAAA,0xf2]
-// CHECK-ARM64:                                 //   fixup A - offset: 0, value: :dtprel_g1_nc:var, kind: fixup_arm64_movw
-// CHECK-ARM64: movk    w10, #:dtprel_g1_nc:var // encoding: [0bAAA01010,A,0b101AAAAA,0x72]
-// CHECK-ARM64:                                 //   fixup A - offset: 0, value: :dtprel_g1_nc:var, kind: fixup_arm64_movw
+// CHECK: movk    x9, #:dtprel_g1_nc:var  // encoding: [0bAAA01001,A,0b101AAAAA,0xf2]
+// CHECK:                                 //   fixup A - offset: 0, value: :dtprel_g1_nc:var, kind: fixup_aarch64_movw
+// CHECK: movk    w10, #:dtprel_g1_nc:var // encoding: [0bAAA01010,A,0b101AAAAA,0x72]
+// CHECK:                                 //   fixup A - offset: 0, value: :dtprel_g1_nc:var, kind: fixup_aarch64_movw
 
 // CHECK-ELF-NEXT:     0x20 R_AARCH64_TLSLD_MOVW_DTPREL_G1_NC [[VARSYM]]
 // CHECK-ELF-NEXT:     0x24 R_AARCH64_TLSLD_MOVW_DTPREL_G1_NC [[VARSYM]]
@@ -62,14 +62,14 @@
         movz w13, #:dtprel_g0:var
         movn w14, #:dtprel_g0:var
 
-// CHECK-ARM64: movz    x11, #:dtprel_g0:var    // encoding: [0bAAA01011,A,0b100AAAAA,0x92]
-// CHECK-ARM64:                                 //   fixup A - offset: 0, value: :dtprel_g0:var, kind: fixup_arm64_movw
-// CHECK-ARM64: movn    x12, #:dtprel_g0:var    // encoding: [0bAAA01100,A,0b100AAAAA,0x92]
-// CHECK-ARM64:                                 //   fixup A - offset: 0, value: :dtprel_g0:var, kind: fixup_arm64_movw
-// CHECK-ARM64: movz    w13, #:dtprel_g0:var    // encoding: [0bAAA01101,A,0b100AAAAA,0x12]
-// CHECK-ARM64:                                 //   fixup A - offset: 0, value: :dtprel_g0:var, kind: fixup_arm64_movw
-// CHECK-ARM64: movn    w14, #:dtprel_g0:var    // encoding: [0bAAA01110,A,0b100AAAAA,0x12]
-// CHECK-ARM64:                                 //   fixup A - offset: 0, value: :dtprel_g0:var, kind: fixup_arm64_movw
+// CHECK: movz    x11, #:dtprel_g0:var    // encoding: [0bAAA01011,A,0b100AAAAA,0x92]
+// CHECK:                                 //   fixup A - offset: 0, value: :dtprel_g0:var, kind: fixup_aarch64_movw
+// CHECK: movn    x12, #:dtprel_g0:var    // encoding: [0bAAA01100,A,0b100AAAAA,0x92]
+// CHECK:                                 //   fixup A - offset: 0, value: :dtprel_g0:var, kind: fixup_aarch64_movw
+// CHECK: movz    w13, #:dtprel_g0:var    // encoding: [0bAAA01101,A,0b100AAAAA,0x12]
+// CHECK:                                 //   fixup A - offset: 0, value: :dtprel_g0:var, kind: fixup_aarch64_movw
+// CHECK: movn    w14, #:dtprel_g0:var    // encoding: [0bAAA01110,A,0b100AAAAA,0x12]
+// CHECK:                                 //   fixup A - offset: 0, value: :dtprel_g0:var, kind: fixup_aarch64_movw
 
 // CHECK-ELF-NEXT:     0x28 R_AARCH64_TLSLD_MOVW_DTPREL_G0 [[VARSYM]]
 // CHECK-ELF-NEXT:     0x2C R_AARCH64_TLSLD_MOVW_DTPREL_G0 [[VARSYM]]
@@ -80,10 +80,10 @@
         movk x15, #:dtprel_g0_nc:var
         movk w16, #:dtprel_g0_nc:var
 
-// CHECK-ARM64: movk    x15, #:dtprel_g0_nc:var // encoding: [0bAAA01111,A,0b100AAAAA,0xf2]
-// CHECK-ARM64:                                 //   fixup A - offset: 0, value: :dtprel_g0_nc:var, kind: fixup_arm64_movw
-// CHECK-ARM64: movk    w16, #:dtprel_g0_nc:var // encoding: [0bAAA10000,A,0b100AAAAA,0x72]
-// CHECK-ARM64:                                 //   fixup A - offset: 0, value: :dtprel_g0_nc:var, kind: fixup_arm64_movw
+// CHECK: movk    x15, #:dtprel_g0_nc:var // encoding: [0bAAA01111,A,0b100AAAAA,0xf2]
+// CHECK:                                 //   fixup A - offset: 0, value: :dtprel_g0_nc:var, kind: fixup_aarch64_movw
+// CHECK: movk    w16, #:dtprel_g0_nc:var // encoding: [0bAAA10000,A,0b100AAAAA,0x72]
+// CHECK:                                 //   fixup A - offset: 0, value: :dtprel_g0_nc:var, kind: fixup_aarch64_movw
 
 // CHECK-ELF-NEXT:     0x38 R_AARCH64_TLSLD_MOVW_DTPREL_G0_NC [[VARSYM]]
 // CHECK-ELF-NEXT:     0x3C R_AARCH64_TLSLD_MOVW_DTPREL_G0_NC [[VARSYM]]
@@ -92,10 +92,10 @@
         add x17, x18, #:dtprel_hi12:var, lsl #12
         add w19, w20, #:dtprel_hi12:var, lsl #12
 
-// CHECK-ARM64: add    x17, x18, :dtprel_hi12:var, lsl #12 // encoding: [0x51,0bAAAAAA10,0b00AAAAAA,0x91]
-// CHECK-ARM64:                                            //   fixup A - offset: 0, value: :dtprel_hi12:var, kind: fixup_arm64_add_imm12
-// CHECK-ARM64: add    w19, w20, :dtprel_hi12:var, lsl #12 // encoding: [0x93,0bAAAAAA10,0b00AAAAAA,0x11]
-// CHECK-ARM64:                                            //   fixup A - offset: 0, value: :dtprel_hi12:var, kind: fixup_arm64_add_imm12
+// CHECK: add    x17, x18, :dtprel_hi12:var, lsl #12 // encoding: [0x51,0bAAAAAA10,0b00AAAAAA,0x91]
+// CHECK:                                            //   fixup A - offset: 0, value: :dtprel_hi12:var, kind: fixup_aarch64_add_imm12
+// CHECK: add    w19, w20, :dtprel_hi12:var, lsl #12 // encoding: [0x93,0bAAAAAA10,0b00AAAAAA,0x11]
+// CHECK:                                            //   fixup A - offset: 0, value: :dtprel_hi12:var, kind: fixup_aarch64_add_imm12
 
 // CHECK-ELF-NEXT:     0x40 R_AARCH64_TLSLD_ADD_DTPREL_HI12 [[VARSYM]]
 // CHECK-ELF-NEXT:     0x44 R_AARCH64_TLSLD_ADD_DTPREL_HI12 [[VARSYM]]
@@ -104,10 +104,10 @@
         add x21, x22, #:dtprel_lo12:var
         add w23, w24, #:dtprel_lo12:var
 
-// CHECK-ARM64: add    x21, x22, :dtprel_lo12:var // encoding: [0xd5,0bAAAAAA10,0b00AAAAAA,0x91]
-// CHECK-ARM64:                                   //   fixup A - offset: 0, value: :dtprel_lo12:var, kind: fixup_arm64_add_imm12
-// CHECK-ARM64: add    w23, w24, :dtprel_lo12:var // encoding: [0x17,0bAAAAAA11,0b00AAAAAA,0x11]
-// CHECK-ARM64:                                   //   fixup A - offset: 0, value: :dtprel_lo12:var, kind: fixup_arm64_add_imm12
+// CHECK: add    x21, x22, :dtprel_lo12:var // encoding: [0xd5,0bAAAAAA10,0b00AAAAAA,0x91]
+// CHECK:                                   //   fixup A - offset: 0, value: :dtprel_lo12:var, kind: fixup_aarch64_add_imm12
+// CHECK: add    w23, w24, :dtprel_lo12:var // encoding: [0x17,0bAAAAAA11,0b00AAAAAA,0x11]
+// CHECK:                                   //   fixup A - offset: 0, value: :dtprel_lo12:var, kind: fixup_aarch64_add_imm12
 
 // CHECK-ELF-NEXT:     0x48 R_AARCH64_TLSLD_ADD_DTPREL_LO12 [[VARSYM]]
 // CHECK-ELF-NEXT:     0x4C R_AARCH64_TLSLD_ADD_DTPREL_LO12 [[VARSYM]]
@@ -116,10 +116,10 @@
         add x25, x26, #:dtprel_lo12_nc:var
         add w27, w28, #:dtprel_lo12_nc:var
 
-// CHECK-ARM64: add    x25, x26, :dtprel_lo12_nc:var // encoding: [0x59,0bAAAAAA11,0b00AAAAAA,0x91]
-// CHECK-ARM64:                                      //   fixup A - offset: 0, value: :dtprel_lo12_nc:var, kind: fixup_arm64_add_imm12
-// CHECK-ARM64: add    w27, w28, :dtprel_lo12_nc:var // encoding: [0x9b,0bAAAAAA11,0b00AAAAAA,0x11]
-// CHECK-ARM64:                                      //   fixup A - offset: 0, value: :dtprel_lo12_nc:var, kind: fixup_arm64_add_imm12
+// CHECK: add    x25, x26, :dtprel_lo12_nc:var // encoding: [0x59,0bAAAAAA11,0b00AAAAAA,0x91]
+// CHECK:                                      //   fixup A - offset: 0, value: :dtprel_lo12_nc:var, kind: fixup_aarch64_add_imm12
+// CHECK: add    w27, w28, :dtprel_lo12_nc:var // encoding: [0x9b,0bAAAAAA11,0b00AAAAAA,0x11]
+// CHECK:                                      //   fixup A - offset: 0, value: :dtprel_lo12_nc:var, kind: fixup_aarch64_add_imm12
 
 // CHECK-ELF-NEXT:     0x50 R_AARCH64_TLSLD_ADD_DTPREL_LO12_NC [[VARSYM]]
 // CHECK-ELF-NEXT:     0x54 R_AARCH64_TLSLD_ADD_DTPREL_LO12_NC [[VARSYM]]
@@ -128,10 +128,10 @@
         ldrb w29, [x30, #:dtprel_lo12:var]
         ldrsb x29, [x28, #:dtprel_lo12_nc:var]
 
-// CHECK-ARM64: ldrb    w29, [x30, :dtprel_lo12:var] // encoding: [0xdd,0bAAAAAA11,0b01AAAAAA,0x39]
-// CHECK-ARM64:                                      //   fixup A - offset: 0, value: :dtprel_lo12:var, kind: fixup_arm64_ldst_imm12_scale1
-// CHECK-ARM64: ldrsb    x29, [x28, :dtprel_lo12_nc:var] // encoding: [0x9d,0bAAAAAA11,0b10AAAAAA,0x39]
-// CHECK-ARM64:                                          //   fixup A - offset: 0, value: :dtprel_lo12_nc:var, kind: fixup_arm64_ldst_imm12_scale1
+// CHECK: ldrb    w29, [x30, :dtprel_lo12:var] // encoding: [0xdd,0bAAAAAA11,0b01AAAAAA,0x39]
+// CHECK:                                      //   fixup A - offset: 0, value: :dtprel_lo12:var, kind: fixup_aarch64_ldst_imm12_scale1
+// CHECK: ldrsb    x29, [x28, :dtprel_lo12_nc:var] // encoding: [0x9d,0bAAAAAA11,0b10AAAAAA,0x39]
+// CHECK:                                          //   fixup A - offset: 0, value: :dtprel_lo12_nc:var, kind: fixup_aarch64_ldst_imm12_scale1
 
 // CHECK-ELF-NEXT:     0x58 R_AARCH64_TLSLD_LDST8_DTPREL_LO12 [[VARSYM]]
 // CHECK-ELF-NEXT:     0x5C R_AARCH64_TLSLD_LDST8_DTPREL_LO12_NC [[VARSYM]]
@@ -140,10 +140,10 @@
         strh w27, [x26, #:dtprel_lo12:var]
         ldrsh x25, [x24, #:dtprel_lo12_nc:var]
 
-// CHECK-ARM64: strh    w27, [x26, :dtprel_lo12:var] // encoding: [0x5b,0bAAAAAA11,0b00AAAAAA,0x79]
-// CHECK-ARM64:                                      //   fixup A - offset: 0, value: :dtprel_lo12:var, kind: fixup_arm64_ldst_imm12_scale2
-// CHECK-ARM64: ldrsh    x25, [x24, :dtprel_lo12_nc:var] // encoding: [0x19,0bAAAAAA11,0b10AAAAAA,0x79]
-// CHECK-ARM64:                                          //   fixup A - offset: 0, value: :dtprel_lo12_nc:var, kind: fixup_arm64_ldst_imm12_scale2
+// CHECK: strh    w27, [x26, :dtprel_lo12:var] // encoding: [0x5b,0bAAAAAA11,0b00AAAAAA,0x79]
+// CHECK:                                      //   fixup A - offset: 0, value: :dtprel_lo12:var, kind: fixup_aarch64_ldst_imm12_scale2
+// CHECK: ldrsh    x25, [x24, :dtprel_lo12_nc:var] // encoding: [0x19,0bAAAAAA11,0b10AAAAAA,0x79]
+// CHECK:                                          //   fixup A - offset: 0, value: :dtprel_lo12_nc:var, kind: fixup_aarch64_ldst_imm12_scale2
 
 // CHECK-ELF-NEXT:     0x60 R_AARCH64_TLSLD_LDST16_DTPREL_LO12 [[VARSYM]]
 // CHECK-ELF-NEXT:     0x64 R_AARCH64_TLSLD_LDST16_DTPREL_LO12_NC [[VARSYM]]
@@ -152,10 +152,10 @@
         ldr w23, [x22, #:dtprel_lo12:var]
         ldrsw x21, [x20, #:dtprel_lo12_nc:var]
 
-// CHECK-ARM64: ldr    w23, [x22, :dtprel_lo12:var] // encoding: [0xd7,0bAAAAAA10,0b01AAAAAA,0xb9]
-// CHECK-ARM64:                                     //   fixup A - offset: 0, value: :dtprel_lo12:var, kind: fixup_arm64_ldst_imm12_scale4
-// CHECK-ARM64: ldrsw    x21, [x20, :dtprel_lo12_nc:var] // encoding: [0x95,0bAAAAAA10,0b10AAAAAA,0xb9]
-// CHECK-ARM64:                                          //   fixup A - offset: 0, value: :dtprel_lo12_nc:var, kind: fixup_arm64_ldst_imm12_scale4
+// CHECK: ldr    w23, [x22, :dtprel_lo12:var] // encoding: [0xd7,0bAAAAAA10,0b01AAAAAA,0xb9]
+// CHECK:                                     //   fixup A - offset: 0, value: :dtprel_lo12:var, kind: fixup_aarch64_ldst_imm12_scale4
+// CHECK: ldrsw    x21, [x20, :dtprel_lo12_nc:var] // encoding: [0x95,0bAAAAAA10,0b10AAAAAA,0xb9]
+// CHECK:                                          //   fixup A - offset: 0, value: :dtprel_lo12_nc:var, kind: fixup_aarch64_ldst_imm12_scale4
 
 // CHECK-ELF-NEXT:     0x68 R_AARCH64_TLSLD_LDST32_DTPREL_LO12 [[VARSYM]]
 // CHECK-ELF-NEXT:     0x6C R_AARCH64_TLSLD_LDST32_DTPREL_LO12_NC [[VARSYM]]
@@ -164,10 +164,10 @@
         ldr x19, [x18, #:dtprel_lo12:var]
         str x17, [x16, #:dtprel_lo12_nc:var]
 
-// CHECK-ARM64: ldr    x19, [x18, :dtprel_lo12:var] // encoding: [0x53,0bAAAAAA10,0b01AAAAAA,0xf9]
-// CHECK-ARM64:                                     //   fixup A - offset: 0, value: :dtprel_lo12:var, kind: fixup_arm64_ldst_imm12_scale8
-// CHECK-ARM64: str    x17, [x16, :dtprel_lo12_nc:var] // encoding: [0x11,0bAAAAAA10,0b00AAAAAA,0xf9]
-// CHECK-ARM64:                                        //   fixup A - offset: 0, value: :dtprel_lo12_nc:var, kind: fixup_arm64_ldst_imm12_scale8
+// CHECK: ldr    x19, [x18, :dtprel_lo12:var] // encoding: [0x53,0bAAAAAA10,0b01AAAAAA,0xf9]
+// CHECK:                                     //   fixup A - offset: 0, value: :dtprel_lo12:var, kind: fixup_aarch64_ldst_imm12_scale8
+// CHECK: str    x17, [x16, :dtprel_lo12_nc:var] // encoding: [0x11,0bAAAAAA10,0b00AAAAAA,0xf9]
+// CHECK:                                        //   fixup A - offset: 0, value: :dtprel_lo12_nc:var, kind: fixup_aarch64_ldst_imm12_scale8
 
 // CHECK-ELF-NEXT:     0x70 R_AARCH64_TLSLD_LDST64_DTPREL_LO12 [[VARSYM]]
 // CHECK-ELF-NEXT:     0x74 R_AARCH64_TLSLD_LDST64_DTPREL_LO12_NC [[VARSYM]]
@@ -177,10 +177,10 @@
         movz x15, #:gottprel_g1:var
         movz w14, #:gottprel_g1:var
 
-// CHECK-ARM64: movz    x15, #:gottprel_g1:var  // encoding: [0bAAA01111,A,0b101AAAAA,0x92]
-// CHECK-ARM64:                                 //   fixup A - offset: 0, value: :gottprel_g1:var, kind: fixup_arm64_movw
-// CHECK-ARM64: movz    w14, #:gottprel_g1:var  // encoding: [0bAAA01110,A,0b101AAAAA,0x12]
-// CHECK-ARM64:                                 //   fixup A - offset: 0, value: :gottprel_g1:var, kind: fixup_arm64_movw
+// CHECK: movz    x15, #:gottprel_g1:var  // encoding: [0bAAA01111,A,0b101AAAAA,0x92]
+// CHECK:                                 //   fixup A - offset: 0, value: :gottprel_g1:var, kind: fixup_aarch64_movw
+// CHECK: movz    w14, #:gottprel_g1:var  // encoding: [0bAAA01110,A,0b101AAAAA,0x12]
+// CHECK:                                 //   fixup A - offset: 0, value: :gottprel_g1:var, kind: fixup_aarch64_movw
 
 // CHECK-ELF-NEXT:     0x78 R_AARCH64_TLSIE_MOVW_GOTTPREL_G1 [[VARSYM]]
 // CHECK-ELF-NEXT:     0x7C R_AARCH64_TLSIE_MOVW_GOTTPREL_G1 [[VARSYM]]
@@ -189,10 +189,10 @@
         movk x13, #:gottprel_g0_nc:var
         movk w12, #:gottprel_g0_nc:var
 
-// CHECK-ARM64: movk    x13, #:gottprel_g0_nc:var // encoding: [0bAAA01101,A,0b100AAAAA,0xf2]
-// CHECK-ARM64:                                   //   fixup A - offset: 0, value: :gottprel_g0_nc:var, kind: fixup_arm64_movw
-// CHECK-ARM64: movk    w12, #:gottprel_g0_nc:var // encoding: [0bAAA01100,A,0b100AAAAA,0x72]
-// CHECK-ARM64:                                   //   fixup A - offset: 0, value: :gottprel_g0_nc:var, kind: fixup_arm64_movw
+// CHECK: movk    x13, #:gottprel_g0_nc:var // encoding: [0bAAA01101,A,0b100AAAAA,0xf2]
+// CHECK:                                   //   fixup A - offset: 0, value: :gottprel_g0_nc:var, kind: fixup_aarch64_movw
+// CHECK: movk    w12, #:gottprel_g0_nc:var // encoding: [0bAAA01100,A,0b100AAAAA,0x72]
+// CHECK:                                   //   fixup A - offset: 0, value: :gottprel_g0_nc:var, kind: fixup_aarch64_movw
 
 // CHECK-ELF-NEXT:     0x80 R_AARCH64_TLSIE_MOVW_GOTTPREL_G0_NC [[VARSYM]]
 // CHECK-ELF-NEXT:     0x84 R_AARCH64_TLSIE_MOVW_GOTTPREL_G0_NC [[VARSYM]]
@@ -202,12 +202,12 @@
         ldr x10, [x0, #:gottprel_lo12:var]
         ldr x9, :gottprel:var
 
-// CHECK-ARM64: adrp    x11, :gottprel:var      // encoding: [0x0b'A',A,A,0x90'A']
-// CHECK-ARM64:                                 //   fixup A - offset: 0, value: :gottprel:var, kind: fixup_arm64_pcrel_adrp_imm21
-// CHECK-ARM64: ldr    x10, [x0, :gottprel_lo12:var] // encoding: [0x0a,0bAAAAAA00,0b01AAAAAA,0xf9]
-// CHECK-ARM64:                                      //   fixup A - offset: 0, value: :gottprel_lo12:var, kind: fixup_arm64_ldst_imm12_scale8
-// CHECK-ARM64: ldr    x9, :gottprel:var       // encoding: [0bAAA01001,A,A,0x58]
-// CHECK-ARM64:                                //   fixup A - offset: 0, value: :gottprel:var, kind: fixup_arm64_ldr_pcrel_imm19
+// CHECK: adrp    x11, :gottprel:var      // encoding: [0x0b'A',A,A,0x90'A']
+// CHECK:                                 //   fixup A - offset: 0, value: :gottprel:var, kind: fixup_aarch64_pcrel_adrp_imm21
+// CHECK: ldr    x10, [x0, :gottprel_lo12:var] // encoding: [0x0a,0bAAAAAA00,0b01AAAAAA,0xf9]
+// CHECK:                                      //   fixup A - offset: 0, value: :gottprel_lo12:var, kind: fixup_aarch64_ldst_imm12_scale8
+// CHECK: ldr    x9, :gottprel:var       // encoding: [0bAAA01001,A,A,0x58]
+// CHECK:                                //   fixup A - offset: 0, value: :gottprel:var, kind: fixup_aarch64_ldr_pcrel_imm19
 
 // CHECK-ELF-NEXT:     0x88 R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21 [[VARSYM]]
 // CHECK-ELF-NEXT:     0x8C R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC [[VARSYM]]
@@ -218,10 +218,10 @@
         movz x3, #:tprel_g2:var
         movn x4, #:tprel_g2:var
 
-// CHECK-ARM64: movz    x3, #:tprel_g2:var      // encoding: [0bAAA00011,A,0b110AAAAA,0x92]
-// CHECK-ARM64:                                 //   fixup A - offset: 0, value: :tprel_g2:var, kind: fixup_arm64_movw
-// CHECK-ARM64: movn    x4, #:tprel_g2:var      // encoding: [0bAAA00100,A,0b110AAAAA,0x92]
-// CHECK-ARM64:                                 //   fixup A - offset: 0, value: :tprel_g2:var, kind: fixup_arm64_movw
+// CHECK: movz    x3, #:tprel_g2:var      // encoding: [0bAAA00011,A,0b110AAAAA,0x92]
+// CHECK:                                 //   fixup A - offset: 0, value: :tprel_g2:var, kind: fixup_aarch64_movw
+// CHECK: movn    x4, #:tprel_g2:var      // encoding: [0bAAA00100,A,0b110AAAAA,0x92]
+// CHECK:                                 //   fixup A - offset: 0, value: :tprel_g2:var, kind: fixup_aarch64_movw
 
 // CHECK-ELF-NEXT:     0x94 R_AARCH64_TLSLE_MOVW_TPREL_G2 [[VARSYM]]
 // CHECK-ELF-NEXT:     0x98 R_AARCH64_TLSLE_MOVW_TPREL_G2 [[VARSYM]]
@@ -232,14 +232,14 @@
         movz w7, #:tprel_g1:var
         movn w8, #:tprel_g1:var
 
-// CHECK-ARM64: movz    x5, #:tprel_g1:var      // encoding: [0bAAA00101,A,0b101AAAAA,0x92]
-// CHECK-ARM64:                                 //   fixup A - offset: 0, value: :tprel_g1:var, kind: fixup_arm64_movw
-// CHECK-ARM64: movn    x6, #:tprel_g1:var      // encoding: [0bAAA00110,A,0b101AAAAA,0x92]
-// CHECK-ARM64:                                 //   fixup A - offset: 0, value: :tprel_g1:var, kind: fixup_arm64_movw
-// CHECK-ARM64: movz    w7, #:tprel_g1:var      // encoding: [0bAAA00111,A,0b101AAAAA,0x12]
-// CHECK-ARM64:                                 //   fixup A - offset: 0, value: :tprel_g1:var, kind: fixup_arm64_movw
-// CHECK-ARM64: movn    w8, #:tprel_g1:var      // encoding: [0bAAA01000,A,0b101AAAAA,0x12]
-// CHECK-ARM64:                                 //   fixup A - offset: 0, value: :tprel_g1:var, kind: fixup_arm64_movw
+// CHECK: movz    x5, #:tprel_g1:var      // encoding: [0bAAA00101,A,0b101AAAAA,0x92]
+// CHECK:                                 //   fixup A - offset: 0, value: :tprel_g1:var, kind: fixup_aarch64_movw
+// CHECK: movn    x6, #:tprel_g1:var      // encoding: [0bAAA00110,A,0b101AAAAA,0x92]
+// CHECK:                                 //   fixup A - offset: 0, value: :tprel_g1:var, kind: fixup_aarch64_movw
+// CHECK: movz    w7, #:tprel_g1:var      // encoding: [0bAAA00111,A,0b101AAAAA,0x12]
+// CHECK:                                 //   fixup A - offset: 0, value: :tprel_g1:var, kind: fixup_aarch64_movw
+// CHECK: movn    w8, #:tprel_g1:var      // encoding: [0bAAA01000,A,0b101AAAAA,0x12]
+// CHECK:                                 //   fixup A - offset: 0, value: :tprel_g1:var, kind: fixup_aarch64_movw
 
 // CHECK-ELF-NEXT:     0x9C R_AARCH64_TLSLE_MOVW_TPREL_G1 [[VARSYM]]
 // CHECK-ELF-NEXT:     0xA0 R_AARCH64_TLSLE_MOVW_TPREL_G1 [[VARSYM]]
@@ -250,10 +250,10 @@
         movk x9, #:tprel_g1_nc:var
         movk w10, #:tprel_g1_nc:var
 
-// CHECK-ARM64: movk    x9, #:tprel_g1_nc:var   // encoding: [0bAAA01001,A,0b101AAAAA,0xf2]
-// CHECK-ARM64:                                 //   fixup A - offset: 0, value: :tprel_g1_nc:var, kind: fixup_arm64_movw
-// CHECK-ARM64: movk    w10, #:tprel_g1_nc:var  // encoding: [0bAAA01010,A,0b101AAAAA,0x72]
-// CHECK-ARM64:                                 //   fixup A - offset: 0, value: :tprel_g1_nc:var, kind: fixup_arm64_movw
+// CHECK: movk    x9, #:tprel_g1_nc:var   // encoding: [0bAAA01001,A,0b101AAAAA,0xf2]
+// CHECK:                                 //   fixup A - offset: 0, value: :tprel_g1_nc:var, kind: fixup_aarch64_movw
+// CHECK: movk    w10, #:tprel_g1_nc:var  // encoding: [0bAAA01010,A,0b101AAAAA,0x72]
+// CHECK:                                 //   fixup A - offset: 0, value: :tprel_g1_nc:var, kind: fixup_aarch64_movw
 
 // CHECK-ELF-NEXT:     0xAC R_AARCH64_TLSLE_MOVW_TPREL_G1_NC [[VARSYM]]
 // CHECK-ELF-NEXT:     0xB0 R_AARCH64_TLSLE_MOVW_TPREL_G1_NC [[VARSYM]]
@@ -264,14 +264,14 @@
         movz w13, #:tprel_g0:var
         movn w14, #:tprel_g0:var
 
-// CHECK-ARM64: movz    x11, #:tprel_g0:var     // encoding: [0bAAA01011,A,0b100AAAAA,0x92]
-// CHECK-ARM64:                                 //   fixup A - offset: 0, value: :tprel_g0:var, kind: fixup_arm64_movw
-// CHECK-ARM64: movn    x12, #:tprel_g0:var     // encoding: [0bAAA01100,A,0b100AAAAA,0x92]
-// CHECK-ARM64:                                 //   fixup A - offset: 0, value: :tprel_g0:var, kind: fixup_arm64_movw
-// CHECK-ARM64: movz    w13, #:tprel_g0:var     // encoding: [0bAAA01101,A,0b100AAAAA,0x12]
-// CHECK-ARM64:                                 //   fixup A - offset: 0, value: :tprel_g0:var, kind: fixup_arm64_movw
-// CHECK-ARM64: movn    w14, #:tprel_g0:var     // encoding: [0bAAA01110,A,0b100AAAAA,0x12]
-// CHECK-ARM64:                                 //   fixup A - offset: 0, value: :tprel_g0:var, kind: fixup_arm64_movw
+// CHECK: movz    x11, #:tprel_g0:var     // encoding: [0bAAA01011,A,0b100AAAAA,0x92]
+// CHECK:                                 //   fixup A - offset: 0, value: :tprel_g0:var, kind: fixup_aarch64_movw
+// CHECK: movn    x12, #:tprel_g0:var     // encoding: [0bAAA01100,A,0b100AAAAA,0x92]
+// CHECK:                                 //   fixup A - offset: 0, value: :tprel_g0:var, kind: fixup_aarch64_movw
+// CHECK: movz    w13, #:tprel_g0:var     // encoding: [0bAAA01101,A,0b100AAAAA,0x12]
+// CHECK:                                 //   fixup A - offset: 0, value: :tprel_g0:var, kind: fixup_aarch64_movw
+// CHECK: movn    w14, #:tprel_g0:var     // encoding: [0bAAA01110,A,0b100AAAAA,0x12]
+// CHECK:                                 //   fixup A - offset: 0, value: :tprel_g0:var, kind: fixup_aarch64_movw
 
 // CHECK-ELF-NEXT:     0xB4 R_AARCH64_TLSLE_MOVW_TPREL_G0 [[VARSYM]]
 // CHECK-ELF-NEXT:     0xB8 R_AARCH64_TLSLE_MOVW_TPREL_G0 [[VARSYM]]
@@ -282,10 +282,10 @@
         movk x15, #:tprel_g0_nc:var
         movk w16, #:tprel_g0_nc:var
 
-// CHECK-ARM64: movk    x15, #:tprel_g0_nc:var  // encoding: [0bAAA01111,A,0b100AAAAA,0xf2]
-// CHECK-ARM64:                                 //   fixup A - offset: 0, value: :tprel_g0_nc:var, kind: fixup_arm64_movw
-// CHECK-ARM64: movk    w16, #:tprel_g0_nc:var  // encoding: [0bAAA10000,A,0b100AAAAA,0x72]
-// CHECK-ARM64:                                 //   fixup A - offset: 0, value: :tprel_g0_nc:var, kind: fixup_arm64_movw
+// CHECK: movk    x15, #:tprel_g0_nc:var  // encoding: [0bAAA01111,A,0b100AAAAA,0xf2]
+// CHECK:                                 //   fixup A - offset: 0, value: :tprel_g0_nc:var, kind: fixup_aarch64_movw
+// CHECK: movk    w16, #:tprel_g0_nc:var  // encoding: [0bAAA10000,A,0b100AAAAA,0x72]
+// CHECK:                                 //   fixup A - offset: 0, value: :tprel_g0_nc:var, kind: fixup_aarch64_movw
 
 // CHECK-ELF-NEXT:     0xC4 R_AARCH64_TLSLE_MOVW_TPREL_G0_NC [[VARSYM]]
 // CHECK-ELF-NEXT:     0xC8 R_AARCH64_TLSLE_MOVW_TPREL_G0_NC [[VARSYM]]
@@ -294,10 +294,10 @@
         add x17, x18, #:tprel_hi12:var, lsl #12
         add w19, w20, #:tprel_hi12:var, lsl #12
 
-// CHECK-ARM64: add    x17, x18, :tprel_hi12:var, lsl #12 // encoding: [0x51,0bAAAAAA10,0b00AAAAAA,0x91]
-// CHECK-ARM64:                                           //   fixup A - offset: 0, value: :tprel_hi12:var, kind: fixup_arm64_add_imm12
-// CHECK-ARM64: add    w19, w20, :tprel_hi12:var, lsl #12 // encoding: [0x93,0bAAAAAA10,0b00AAAAAA,0x11]
-// CHECK-ARM64:                                           //   fixup A - offset: 0, value: :tprel_hi12:var, kind: fixup_arm64_add_imm12
+// CHECK: add    x17, x18, :tprel_hi12:var, lsl #12 // encoding: [0x51,0bAAAAAA10,0b00AAAAAA,0x91]
+// CHECK:                                           //   fixup A - offset: 0, value: :tprel_hi12:var, kind: fixup_aarch64_add_imm12
+// CHECK: add    w19, w20, :tprel_hi12:var, lsl #12 // encoding: [0x93,0bAAAAAA10,0b00AAAAAA,0x11]
+// CHECK:                                           //   fixup A - offset: 0, value: :tprel_hi12:var, kind: fixup_aarch64_add_imm12
 
 // CHECK-ELF-NEXT:     0xCC R_AARCH64_TLSLE_ADD_TPREL_HI12 [[VARSYM]]
 // CHECK-ELF-NEXT:     0xD0 R_AARCH64_TLSLE_ADD_TPREL_HI12 [[VARSYM]]
@@ -306,10 +306,10 @@
         add x21, x22, #:tprel_lo12:var
         add w23, w24, #:tprel_lo12:var
 
-// CHECK-ARM64: add    x21, x22, :tprel_lo12:var // encoding: [0xd5,0bAAAAAA10,0b00AAAAAA,0x91]
-// CHECK-ARM64:                                  //   fixup A - offset: 0, value: :tprel_lo12:var, kind: fixup_arm64_add_imm12
-// CHECK-ARM64: add    w23, w24, :tprel_lo12:var // encoding: [0x17,0bAAAAAA11,0b00AAAAAA,0x11]
-// CHECK-ARM64:                                  //   fixup A - offset: 0, value: :tprel_lo12:var, kind: fixup_arm64_add_imm12
+// CHECK: add    x21, x22, :tprel_lo12:var // encoding: [0xd5,0bAAAAAA10,0b00AAAAAA,0x91]
+// CHECK:                                  //   fixup A - offset: 0, value: :tprel_lo12:var, kind: fixup_aarch64_add_imm12
+// CHECK: add    w23, w24, :tprel_lo12:var // encoding: [0x17,0bAAAAAA11,0b00AAAAAA,0x11]
+// CHECK:                                  //   fixup A - offset: 0, value: :tprel_lo12:var, kind: fixup_aarch64_add_imm12
 
 // CHECK-ELF-NEXT:     0xD4 R_AARCH64_TLSLE_ADD_TPREL_LO12 [[VARSYM]]
 // CHECK-ELF-NEXT:     0xD8 R_AARCH64_TLSLE_ADD_TPREL_LO12 [[VARSYM]]
@@ -318,10 +318,10 @@
         add x25, x26, #:tprel_lo12_nc:var
         add w27, w28, #:tprel_lo12_nc:var
 
-// CHECK-ARM64: add    x25, x26, :tprel_lo12_nc:var // encoding: [0x59,0bAAAAAA11,0b00AAAAAA,0x91]
-// CHECK-ARM64:                                     //   fixup A - offset: 0, value: :tprel_lo12_nc:var, kind: fixup_arm64_add_imm12
-// CHECK-ARM64: add    w27, w28, :tprel_lo12_nc:var // encoding: [0x9b,0bAAAAAA11,0b00AAAAAA,0x11]
-// CHECK-ARM64:                                     //   fixup A - offset: 0, value: :tprel_lo12_nc:var, kind: fixup_arm64_add_imm12
+// CHECK: add    x25, x26, :tprel_lo12_nc:var // encoding: [0x59,0bAAAAAA11,0b00AAAAAA,0x91]
+// CHECK:                                     //   fixup A - offset: 0, value: :tprel_lo12_nc:var, kind: fixup_aarch64_add_imm12
+// CHECK: add    w27, w28, :tprel_lo12_nc:var // encoding: [0x9b,0bAAAAAA11,0b00AAAAAA,0x11]
+// CHECK:                                     //   fixup A - offset: 0, value: :tprel_lo12_nc:var, kind: fixup_aarch64_add_imm12
 
 // CHECK-ELF-NEXT:     0xDC R_AARCH64_TLSLE_ADD_TPREL_LO12_NC [[VARSYM]]
 // CHECK-ELF-NEXT:     0xE0 R_AARCH64_TLSLE_ADD_TPREL_LO12_NC [[VARSYM]]
@@ -330,10 +330,10 @@
         ldrb w29, [x30, #:tprel_lo12:var]
         ldrsb x29, [x28, #:tprel_lo12_nc:var]
 
-// CHECK-ARM64: ldrb    w29, [x30, :tprel_lo12:var] // encoding: [0xdd,0bAAAAAA11,0b01AAAAAA,0x39]
-// CHECK-ARM64:                                     //   fixup A - offset: 0, value: :tprel_lo12:var, kind: fixup_arm64_ldst_imm12_scale1
-// CHECK-ARM64: ldrsb    x29, [x28, :tprel_lo12_nc:var] // encoding: [0x9d,0bAAAAAA11,0b10AAAAAA,0x39]
-// CHECK-ARM64:                                     //   fixup A - offset: 0, value: :tprel_lo12_nc:var, kind: fixup_arm64_ldst_imm12_scale1
+// CHECK: ldrb    w29, [x30, :tprel_lo12:var] // encoding: [0xdd,0bAAAAAA11,0b01AAAAAA,0x39]
+// CHECK:                                     //   fixup A - offset: 0, value: :tprel_lo12:var, kind: fixup_aarch64_ldst_imm12_scale1
+// CHECK: ldrsb    x29, [x28, :tprel_lo12_nc:var] // encoding: [0x9d,0bAAAAAA11,0b10AAAAAA,0x39]
+// CHECK:                                     //   fixup A - offset: 0, value: :tprel_lo12_nc:var, kind: fixup_aarch64_ldst_imm12_scale1
 
 // CHECK-ELF-NEXT:     0xE4 R_AARCH64_TLSLE_LDST8_TPREL_LO12 [[VARSYM]]
 // CHECK-ELF-NEXT:     0xE8 R_AARCH64_TLSLE_LDST8_TPREL_LO12_NC [[VARSYM]]
@@ -342,10 +342,10 @@
         strh w27, [x26, #:tprel_lo12:var]
         ldrsh x25, [x24, #:tprel_lo12_nc:var]
 
-// CHECK-ARM64: strh    w27, [x26, :tprel_lo12:var] // encoding: [0x5b,0bAAAAAA11,0b00AAAAAA,0x79]
-// CHECK-ARM64:                                     //   fixup A - offset: 0, value: :tprel_lo12:var, kind: fixup_arm64_ldst_imm12_scale2
-// CHECK-ARM64: ldrsh    x25, [x24, :tprel_lo12_nc:var] // encoding: [0x19,0bAAAAAA11,0b10AAAAAA,0x79]
-// CHECK-ARM64:                                         //   fixup A - offset: 0, value: :tprel_lo12_nc:var, kind: fixup_arm64_ldst_imm12_scale2
+// CHECK: strh    w27, [x26, :tprel_lo12:var] // encoding: [0x5b,0bAAAAAA11,0b00AAAAAA,0x79]
+// CHECK:                                     //   fixup A - offset: 0, value: :tprel_lo12:var, kind: fixup_aarch64_ldst_imm12_scale2
+// CHECK: ldrsh    x25, [x24, :tprel_lo12_nc:var] // encoding: [0x19,0bAAAAAA11,0b10AAAAAA,0x79]
+// CHECK:                                         //   fixup A - offset: 0, value: :tprel_lo12_nc:var, kind: fixup_aarch64_ldst_imm12_scale2
 
 // CHECK-ELF-NEXT:     0xEC R_AARCH64_TLSLE_LDST16_TPREL_LO12 [[VARSYM]]
 // CHECK-ELF-NEXT:     0xF0 R_AARCH64_TLSLE_LDST16_TPREL_LO12_NC [[VARSYM]]
@@ -354,10 +354,10 @@
         ldr w23, [x22, #:tprel_lo12:var]
         ldrsw x21, [x20, #:tprel_lo12_nc:var]
 
-// CHECK-ARM64: ldr    w23, [x22, :tprel_lo12:var] // encoding: [0xd7,0bAAAAAA10,0b01AAAAAA,0xb9]
-// CHECK-ARM64:                                    //   fixup A - offset: 0, value: :tprel_lo12:var, kind: fixup_arm64_ldst_imm12_scale4
-// CHECK-ARM64: ldrsw    x21, [x20, :tprel_lo12_nc:var] // encoding: [0x95,0bAAAAAA10,0b10AAAAAA,0xb9]
-// CHECK-ARM64:                                         //   fixup A - offset: 0, value: :tprel_lo12_nc:var, kind: fixup_arm64_ldst_imm12_scale4
+// CHECK: ldr    w23, [x22, :tprel_lo12:var] // encoding: [0xd7,0bAAAAAA10,0b01AAAAAA,0xb9]
+// CHECK:                                    //   fixup A - offset: 0, value: :tprel_lo12:var, kind: fixup_aarch64_ldst_imm12_scale4
+// CHECK: ldrsw    x21, [x20, :tprel_lo12_nc:var] // encoding: [0x95,0bAAAAAA10,0b10AAAAAA,0xb9]
+// CHECK:                                         //   fixup A - offset: 0, value: :tprel_lo12_nc:var, kind: fixup_aarch64_ldst_imm12_scale4
 
 // CHECK-ELF-NEXT:     0xF4 R_AARCH64_TLSLE_LDST32_TPREL_LO12 [[VARSYM]]
 // CHECK-ELF-NEXT:     0xF8 R_AARCH64_TLSLE_LDST32_TPREL_LO12_NC [[VARSYM]]
@@ -365,10 +365,10 @@
         ldr x19, [x18, #:tprel_lo12:var]
         str x17, [x16, #:tprel_lo12_nc:var]
 
-// CHECK-ARM64: ldr    x19, [x18, :tprel_lo12:var] // encoding: [0x53,0bAAAAAA10,0b01AAAAAA,0xf9]
-// CHECK-ARM64:                                    //   fixup A - offset: 0, value: :tprel_lo12:var, kind: fixup_arm64_ldst_imm12_scale8
-// CHECK-ARM64: str    x17, [x16, :tprel_lo12_nc:var] // encoding: [0x11,0bAAAAAA10,0b00AAAAAA,0xf9]
-// CHECK-ARM64:                                       //   fixup A - offset: 0, value: :tprel_lo12_nc:var, kind: fixup_arm64_ldst_imm12_scale8
+// CHECK: ldr    x19, [x18, :tprel_lo12:var] // encoding: [0x53,0bAAAAAA10,0b01AAAAAA,0xf9]
+// CHECK:                                    //   fixup A - offset: 0, value: :tprel_lo12:var, kind: fixup_aarch64_ldst_imm12_scale8
+// CHECK: str    x17, [x16, :tprel_lo12_nc:var] // encoding: [0x11,0bAAAAAA10,0b00AAAAAA,0xf9]
+// CHECK:                                       //   fixup A - offset: 0, value: :tprel_lo12_nc:var, kind: fixup_aarch64_ldst_imm12_scale8
 
 // CHECK-ELF-NEXT:     0xFC  R_AARCH64_TLSLE_LDST64_TPREL_LO12 [[VARSYM]]
 // CHECK-ELF-NEXT:     0x100 R_AARCH64_TLSLE_LDST64_TPREL_LO12_NC [[VARSYM]]
@@ -381,15 +381,15 @@
         blr x3
 
 
-// CHECK-ARM64: adrp    x8, :tlsdesc:var        // encoding: [0x08'A',A,A,0x90'A']
-// CHECK-ARM64:                                 //   fixup A - offset: 0, value: :tlsdesc:var, kind: fixup_arm64_pcrel_adrp_imm21
-// CHECK-ARM64: ldr    x7, [x6, :tlsdesc_lo12:var] // encoding: [0xc7,0bAAAAAA00,0b01AAAAAA,0xf9]
-// CHECK-ARM64:                                    //   fixup A - offset: 0, value: :tlsdesc_lo12:var, kind: fixup_arm64_ldst_imm12_scale8
-// CHECK-ARM64: add    x5, x4, :tlsdesc_lo12:var // encoding: [0x85,0bAAAAAA00,0b00AAAAAA,0x91]
-// CHECK-ARM64:                                  //   fixup A - offset: 0, value: :tlsdesc_lo12:var, kind: fixup_arm64_add_imm12
-// CHECK-ARM64: .tlsdesccall var                // encoding: []
-// CHECK-ARM64:                                 //   fixup A - offset: 0, value: var, kind: fixup_arm64_tlsdesc_call
-// CHECK-ARM64: blr    x3                      // encoding: [0x60,0x00,0x3f,0xd6]
+// CHECK: adrp    x8, :tlsdesc:var        // encoding: [0x08'A',A,A,0x90'A']
+// CHECK:                                 //   fixup A - offset: 0, value: :tlsdesc:var, kind: fixup_aarch64_pcrel_adrp_imm21
+// CHECK: ldr    x7, [x6, :tlsdesc_lo12:var] // encoding: [0xc7,0bAAAAAA00,0b01AAAAAA,0xf9]
+// CHECK:                                    //   fixup A - offset: 0, value: :tlsdesc_lo12:var, kind: fixup_aarch64_ldst_imm12_scale8
+// CHECK: add    x5, x4, :tlsdesc_lo12:var // encoding: [0x85,0bAAAAAA00,0b00AAAAAA,0x91]
+// CHECK:                                  //   fixup A - offset: 0, value: :tlsdesc_lo12:var, kind: fixup_aarch64_add_imm12
+// CHECK: .tlsdesccall var                // encoding: []
+// CHECK:                                 //   fixup A - offset: 0, value: var, kind: fixup_aarch64_tlsdesc_call
+// CHECK: blr    x3                      // encoding: [0x60,0x00,0x3f,0xd6]
 
 // CHECK-ELF-NEXT:     0x104 R_AARCH64_TLSDESC_ADR_PAGE [[VARSYM]]
 // CHECK-ELF-NEXT:     0x108 R_AARCH64_TLSDESC_LD64_LO12_NC [[VARSYM]]
diff --git a/test/MC/AArch64/trace-regs-diagnostics.s b/test/MC/AArch64/trace-regs-diagnostics.s
index fa57817dd38..41331e7703c 100644
--- a/test/MC/AArch64/trace-regs-diagnostics.s
+++ b/test/MC/AArch64/trace-regs-diagnostics.s
@@ -1,4 +1,4 @@
-// RUN: not llvm-mc -triple arm64-none-linux-gnu < %s 2>&1 | FileCheck %s
+// RUN: not llvm-mc -triple aarch64-none-linux-gnu < %s 2>&1 | FileCheck %s
         // Write-only
         mrs x12, trcoslar
         mrs x10, trclar
diff --git a/test/MC/AArch64/trace-regs.s b/test/MC/AArch64/trace-regs.s
index be25f08947b..92f16cd54f3 100644
--- a/test/MC/AArch64/trace-regs.s
+++ b/test/MC/AArch64/trace-regs.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple=arm64-none-linux-gnu -show-encoding < %s | FileCheck %s
+// RUN: llvm-mc -triple=aarch64-none-linux-gnu -show-encoding < %s | FileCheck %s
 
         mrs x8, trcstatr
         mrs x9, trcidr8
diff --git a/test/MC/ARM64/lit.local.cfg b/test/MC/ARM64/lit.local.cfg
deleted file mode 100644
index 4d6d8826b53..00000000000
--- a/test/MC/ARM64/lit.local.cfg
+++ /dev/null
@@ -1,6 +0,0 @@
-config.suffixes = ['.ll', '.s']
-
-targets = set(config.root.targets_to_build.split())
-if not 'ARM64' in targets:
-    config.unsupported = True
-
diff --git a/test/MC/Disassembler/ARM64/advsimd.txt b/test/MC/Disassembler/AArch64/arm64-advsimd.txt
similarity index 100%
rename from test/MC/Disassembler/ARM64/advsimd.txt
rename to test/MC/Disassembler/AArch64/arm64-advsimd.txt
diff --git a/test/MC/Disassembler/ARM64/arithmetic.txt b/test/MC/Disassembler/AArch64/arm64-arithmetic.txt
similarity index 100%
rename from test/MC/Disassembler/ARM64/arithmetic.txt
rename to test/MC/Disassembler/AArch64/arm64-arithmetic.txt
diff --git a/test/MC/Disassembler/ARM64/basic-a64-undefined.txt b/test/MC/Disassembler/AArch64/arm64-basic-a64-undefined.txt
similarity index 100%
rename from test/MC/Disassembler/ARM64/basic-a64-undefined.txt
rename to test/MC/Disassembler/AArch64/arm64-basic-a64-undefined.txt
diff --git a/test/MC/Disassembler/ARM64/bitfield.txt b/test/MC/Disassembler/AArch64/arm64-bitfield.txt
similarity index 100%
rename from test/MC/Disassembler/ARM64/bitfield.txt
rename to test/MC/Disassembler/AArch64/arm64-bitfield.txt
diff --git a/test/MC/Disassembler/ARM64/branch.txt b/test/MC/Disassembler/AArch64/arm64-branch.txt
similarity index 100%
rename from test/MC/Disassembler/ARM64/branch.txt
rename to test/MC/Disassembler/AArch64/arm64-branch.txt
diff --git a/test/MC/Disassembler/ARM64/canonical-form.txt b/test/MC/Disassembler/AArch64/arm64-canonical-form.txt
similarity index 100%
rename from test/MC/Disassembler/ARM64/canonical-form.txt
rename to test/MC/Disassembler/AArch64/arm64-canonical-form.txt
diff --git a/test/MC/Disassembler/ARM64/crc32.txt b/test/MC/Disassembler/AArch64/arm64-crc32.txt
similarity index 100%
rename from test/MC/Disassembler/ARM64/crc32.txt
rename to test/MC/Disassembler/AArch64/arm64-crc32.txt
diff --git a/test/MC/Disassembler/ARM64/crypto.txt b/test/MC/Disassembler/AArch64/arm64-crypto.txt
similarity index 100%
rename from test/MC/Disassembler/ARM64/crypto.txt
rename to test/MC/Disassembler/AArch64/arm64-crypto.txt
diff --git a/test/MC/Disassembler/ARM64/invalid-logical.txt b/test/MC/Disassembler/AArch64/arm64-invalid-logical.txt
similarity index 100%
rename from test/MC/Disassembler/ARM64/invalid-logical.txt
rename to test/MC/Disassembler/AArch64/arm64-invalid-logical.txt
diff --git a/test/MC/Disassembler/ARM64/logical.txt b/test/MC/Disassembler/AArch64/arm64-logical.txt
similarity index 100%
rename from test/MC/Disassembler/ARM64/logical.txt
rename to test/MC/Disassembler/AArch64/arm64-logical.txt
diff --git a/test/MC/Disassembler/ARM64/memory.txt b/test/MC/Disassembler/AArch64/arm64-memory.txt
similarity index 100%
rename from test/MC/Disassembler/ARM64/memory.txt
rename to test/MC/Disassembler/AArch64/arm64-memory.txt
diff --git a/test/MC/Disassembler/ARM64/non-apple-fmov.txt b/test/MC/Disassembler/AArch64/arm64-non-apple-fmov.txt
similarity index 100%
rename from test/MC/Disassembler/ARM64/non-apple-fmov.txt
rename to test/MC/Disassembler/AArch64/arm64-non-apple-fmov.txt
diff --git a/test/MC/Disassembler/ARM64/scalar-fp.txt b/test/MC/Disassembler/AArch64/arm64-scalar-fp.txt
similarity index 100%
rename from test/MC/Disassembler/ARM64/scalar-fp.txt
rename to test/MC/Disassembler/AArch64/arm64-scalar-fp.txt
diff --git a/test/MC/Disassembler/ARM64/system.txt b/test/MC/Disassembler/AArch64/arm64-system.txt
similarity index 100%
rename from test/MC/Disassembler/ARM64/system.txt
rename to test/MC/Disassembler/AArch64/arm64-system.txt
diff --git a/test/MC/Disassembler/AArch64/lit.local.cfg b/test/MC/Disassembler/AArch64/lit.local.cfg
index 65369741489..2c423d139bb 100644
--- a/test/MC/Disassembler/AArch64/lit.local.cfg
+++ b/test/MC/Disassembler/AArch64/lit.local.cfg
@@ -1,4 +1,4 @@
 targets = set(config.root.targets_to_build.split())
-if 'ARM64' not in targets:
+if 'AArch64' not in targets:
     config.unsupported = True
 
diff --git a/test/MC/Disassembler/ARM64/lit.local.cfg b/test/MC/Disassembler/ARM64/lit.local.cfg
deleted file mode 100644
index 46a946845e1..00000000000
--- a/test/MC/Disassembler/ARM64/lit.local.cfg
+++ /dev/null
@@ -1,5 +0,0 @@
-config.suffixes = ['.txt']
-
-targets = set(config.root.targets_to_build.split())
-if not 'ARM64' in targets:
-    config.unsupported = True
diff --git a/test/MC/MachO/ARM64/darwin-ARM64-local-label-diff.s b/test/MC/MachO/AArch64/darwin-ARM64-local-label-diff.s
similarity index 100%
rename from test/MC/MachO/ARM64/darwin-ARM64-local-label-diff.s
rename to test/MC/MachO/AArch64/darwin-ARM64-local-label-diff.s
diff --git a/test/MC/MachO/ARM64/darwin-ARM64-reloc.s b/test/MC/MachO/AArch64/darwin-ARM64-reloc.s
similarity index 100%
rename from test/MC/MachO/ARM64/darwin-ARM64-reloc.s
rename to test/MC/MachO/AArch64/darwin-ARM64-reloc.s
diff --git a/test/Transforms/GlobalMerge/ARM64/lit.local.cfg b/test/MC/MachO/AArch64/lit.local.cfg
similarity index 74%
rename from test/Transforms/GlobalMerge/ARM64/lit.local.cfg
rename to test/MC/MachO/AArch64/lit.local.cfg
index a75a42b6f74..9a66a00189e 100644
--- a/test/Transforms/GlobalMerge/ARM64/lit.local.cfg
+++ b/test/MC/MachO/AArch64/lit.local.cfg
@@ -1,4 +1,4 @@
 targets = set(config.root.targets_to_build.split())
-if not 'ARM64' in targets:
+if not 'AArch64' in targets:
     config.unsupported = True
 
diff --git a/test/Transforms/ConstantHoisting/ARM64/const-addr.ll b/test/Transforms/ConstantHoisting/AArch64/const-addr.ll
similarity index 100%
rename from test/Transforms/ConstantHoisting/ARM64/const-addr.ll
rename to test/Transforms/ConstantHoisting/AArch64/const-addr.ll
diff --git a/test/Transforms/ConstantHoisting/ARM64/large-immediate.ll b/test/Transforms/ConstantHoisting/AArch64/large-immediate.ll
similarity index 100%
rename from test/Transforms/ConstantHoisting/ARM64/large-immediate.ll
rename to test/Transforms/ConstantHoisting/AArch64/large-immediate.ll
diff --git a/test/Transforms/ConstantHoisting/ARM64/lit.local.cfg b/test/Transforms/ConstantHoisting/AArch64/lit.local.cfg
similarity index 73%
rename from test/Transforms/ConstantHoisting/ARM64/lit.local.cfg
rename to test/Transforms/ConstantHoisting/AArch64/lit.local.cfg
index 84ac9811f01..c42034979fc 100644
--- a/test/Transforms/ConstantHoisting/ARM64/lit.local.cfg
+++ b/test/Transforms/ConstantHoisting/AArch64/lit.local.cfg
@@ -1,3 +1,3 @@
 targets = set(config.root.targets_to_build.split())
-if not 'ARM64' in targets:
+if not 'AArch64' in targets:
     config.unsupported = True
diff --git a/test/Transforms/GlobalMerge/ARM64/arm64.ll b/test/Transforms/GlobalMerge/AArch64/arm64.ll
similarity index 100%
rename from test/Transforms/GlobalMerge/ARM64/arm64.ll
rename to test/Transforms/GlobalMerge/AArch64/arm64.ll
diff --git a/test/DebugInfo/ARM64/lit.local.cfg b/test/Transforms/GlobalMerge/AArch64/lit.local.cfg
similarity index 74%
rename from test/DebugInfo/ARM64/lit.local.cfg
rename to test/Transforms/GlobalMerge/AArch64/lit.local.cfg
index a75a42b6f74..9a66a00189e 100644
--- a/test/DebugInfo/ARM64/lit.local.cfg
+++ b/test/Transforms/GlobalMerge/AArch64/lit.local.cfg
@@ -1,4 +1,4 @@
 targets = set(config.root.targets_to_build.split())
-if not 'ARM64' in targets:
+if not 'AArch64' in targets:
     config.unsupported = True
 
diff --git a/test/Transforms/InstCombine/2012-04-23-Neon-Intrinsics.ll b/test/Transforms/InstCombine/2012-04-23-Neon-Intrinsics.ll
index 1883a8fc8e6..39408a2d394 100644
--- a/test/Transforms/InstCombine/2012-04-23-Neon-Intrinsics.ll
+++ b/test/Transforms/InstCombine/2012-04-23-Neon-Intrinsics.ll
@@ -68,7 +68,7 @@ declare <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16>, <4 x i16>) nounwind rea
 
 define <4 x i32> @mulByZeroARM64(<4 x i16> %x) nounwind readnone ssp {
 entry:
-  %a = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> %x, <4 x i16> zeroinitializer) nounwind
+  %a = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %x, <4 x i16> zeroinitializer) nounwind
   ret <4 x i32> %a
 ; CHECK: entry:
 ; CHECK-NEXT: ret <4 x i32> zeroinitializer
@@ -76,7 +76,7 @@ entry:
 
 define <4 x i32> @mulByOneARM64(<4 x i16> %x) nounwind readnone ssp {
 entry:
-  %a = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> %x, <4 x i16> <i16 1, i16 1, i16 1, i16 1>) nounwind
+  %a = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %x, <4 x i16> <i16 1, i16 1, i16 1, i16 1>) nounwind
   ret <4 x i32> %a
 ; CHECK: entry:
 ; CHECK-NEXT: %a = sext <4 x i16> %x to <4 x i32>
@@ -85,7 +85,7 @@ entry:
 
 define <4 x i32> @constantMulARM64() nounwind readnone ssp {
 entry:
-  %a = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> <i16 3, i16 3, i16 3, i16 3>, <4 x i16> <i16 2, i16 2, i16 2, i16 2>) nounwind
+  %a = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> <i16 3, i16 3, i16 3, i16 3>, <4 x i16> <i16 2, i16 2, i16 2, i16 2>) nounwind
   ret <4 x i32> %a
 ; CHECK: entry:
 ; CHECK-NEXT: ret <4 x i32> <i32 6, i32 6, i32 6, i32 6>
@@ -93,7 +93,7 @@ entry:
 
 define <4 x i32> @constantMulSARM64() nounwind readnone ssp {
 entry:
-  %b = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>, <4 x i16> <i16 1, i16 1, i16 1, i16 1>) nounwind
+  %b = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>, <4 x i16> <i16 1, i16 1, i16 1, i16 1>) nounwind
   ret <4 x i32> %b
 ; CHECK: entry:
 ; CHECK-NEXT: ret <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>
@@ -101,7 +101,7 @@ entry:
 
 define <4 x i32> @constantMulUARM64() nounwind readnone ssp {
 entry:
-  %b = tail call <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>, <4 x i16> <i16 1, i16 1, i16 1, i16 1>) nounwind
+  %b = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>, <4 x i16> <i16 1, i16 1, i16 1, i16 1>) nounwind
   ret <4 x i32> %b
 ; CHECK: entry:
 ; CHECK-NEXT: ret <4 x i32> <i32 65535, i32 65535, i32 65535, i32 65535>
@@ -109,17 +109,17 @@ entry:
 
 define <4 x i32> @complex1ARM64(<4 x i16> %x) nounwind readnone ssp {
 entry:
-  %a = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> <i16 2, i16 2, i16 2, i16 2>, <4 x i16> %x) nounwind
+  %a = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> <i16 2, i16 2, i16 2, i16 2>, <4 x i16> %x) nounwind
   %b = add <4 x i32> zeroinitializer, %a
   ret <4 x i32> %b
 ; CHECK: entry:
-; CHECK-NEXT: %a = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> <i16 2, i16 2, i16 2, i16 2>, <4 x i16> %x) [[NUW:#[0-9]+]]
+; CHECK-NEXT: %a = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> <i16 2, i16 2, i16 2, i16 2>, <4 x i16> %x) [[NUW:#[0-9]+]]
 ; CHECK-NEXT: ret <4 x i32> %a
 }
 
 define <4 x i32> @complex2ARM64(<4 x i32> %x) nounwind readnone ssp {
 entry:
-  %a = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> <i16 3, i16 3, i16 3, i16 3>, <4 x i16> <i16 2, i16 2, i16 2, i16 2>) nounwind
+  %a = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> <i16 3, i16 3, i16 3, i16 3>, <4 x i16> <i16 2, i16 2, i16 2, i16 2>) nounwind
   %b = add <4 x i32> %x, %a
   ret <4 x i32> %b
 ; CHECK: entry:
@@ -127,8 +127,8 @@ entry:
 ; CHECK-NEXT: ret <4 x i32> %b
 }
 
-declare <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
-declare <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
+declare <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
+declare <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
 
 ; CHECK: attributes #0 = { nounwind readnone ssp }
 ; CHECK: attributes #1 = { nounwind readnone }
diff --git a/test/Transforms/LoopStrengthReduce/ARM64/lit.local.cfg b/test/Transforms/LoopStrengthReduce/AArch64/lit.local.cfg
similarity index 78%
rename from test/Transforms/LoopStrengthReduce/ARM64/lit.local.cfg
rename to test/Transforms/LoopStrengthReduce/AArch64/lit.local.cfg
index a49957999f0..6642d287068 100644
--- a/test/Transforms/LoopStrengthReduce/ARM64/lit.local.cfg
+++ b/test/Transforms/LoopStrengthReduce/AArch64/lit.local.cfg
@@ -1,5 +1,5 @@
 config.suffixes = ['.ll']
 
 targets = set(config.root.targets_to_build.split())
-if not 'ARM64' in targets:
+if not 'AArch64' in targets:
     config.unsupported = True
diff --git a/test/Transforms/LoopStrengthReduce/ARM64/lsr-memcpy.ll b/test/Transforms/LoopStrengthReduce/AArch64/lsr-memcpy.ll
similarity index 100%
rename from test/Transforms/LoopStrengthReduce/ARM64/lsr-memcpy.ll
rename to test/Transforms/LoopStrengthReduce/AArch64/lsr-memcpy.ll
diff --git a/test/Transforms/LoopStrengthReduce/ARM64/lsr-memset.ll b/test/Transforms/LoopStrengthReduce/AArch64/lsr-memset.ll
similarity index 100%
rename from test/Transforms/LoopStrengthReduce/ARM64/lsr-memset.ll
rename to test/Transforms/LoopStrengthReduce/AArch64/lsr-memset.ll
diff --git a/test/Transforms/LoopStrengthReduce/ARM64/req-regs.ll b/test/Transforms/LoopStrengthReduce/AArch64/req-regs.ll
similarity index 100%
rename from test/Transforms/LoopStrengthReduce/ARM64/req-regs.ll
rename to test/Transforms/LoopStrengthReduce/AArch64/req-regs.ll
diff --git a/test/Transforms/LoopVectorize/ARM64/arm64-unroll.ll b/test/Transforms/LoopVectorize/AArch64/arm64-unroll.ll
similarity index 100%
rename from test/Transforms/LoopVectorize/ARM64/arm64-unroll.ll
rename to test/Transforms/LoopVectorize/AArch64/arm64-unroll.ll
diff --git a/test/Transforms/LoopVectorize/ARM64/gather-cost.ll b/test/Transforms/LoopVectorize/AArch64/gather-cost.ll
similarity index 100%
rename from test/Transforms/LoopVectorize/ARM64/gather-cost.ll
rename to test/Transforms/LoopVectorize/AArch64/gather-cost.ll
diff --git a/test/Transforms/LoopVectorize/ARM64/lit.local.cfg b/test/Transforms/LoopVectorize/ARM64/lit.local.cfg
deleted file mode 100644
index f1d1f88cf39..00000000000
--- a/test/Transforms/LoopVectorize/ARM64/lit.local.cfg
+++ /dev/null
@@ -1,6 +0,0 @@
-config.suffixes = ['.ll']
-
-targets = set(config.root.targets_to_build.split())
-if not 'ARM64' in targets:
-    config.unsupported = True
-
diff --git a/test/MC/MachO/ARM64/lit.local.cfg b/test/Transforms/SLPVectorizer/AArch64/lit.local.cfg
similarity index 73%
rename from test/MC/MachO/ARM64/lit.local.cfg
rename to test/Transforms/SLPVectorizer/AArch64/lit.local.cfg
index a75a42b6f74..c42034979fc 100644
--- a/test/MC/MachO/ARM64/lit.local.cfg
+++ b/test/Transforms/SLPVectorizer/AArch64/lit.local.cfg
@@ -1,4 +1,3 @@
 targets = set(config.root.targets_to_build.split())
-if not 'ARM64' in targets:
+if not 'AArch64' in targets:
     config.unsupported = True
-
diff --git a/test/Transforms/SLPVectorizer/ARM64/mismatched-intrinsics.ll b/test/Transforms/SLPVectorizer/AArch64/mismatched-intrinsics.ll
similarity index 100%
rename from test/Transforms/SLPVectorizer/ARM64/mismatched-intrinsics.ll
rename to test/Transforms/SLPVectorizer/AArch64/mismatched-intrinsics.ll
diff --git a/test/Transforms/SLPVectorizer/ARM64/lit.local.cfg b/test/Transforms/SLPVectorizer/ARM64/lit.local.cfg
deleted file mode 100644
index 84ac9811f01..00000000000
--- a/test/Transforms/SLPVectorizer/ARM64/lit.local.cfg
+++ /dev/null
@@ -1,3 +0,0 @@
-targets = set(config.root.targets_to_build.split())
-if not 'ARM64' in targets:
-    config.unsupported = True
-- 
2.34.1