From: Owen Anderson Date: Tue, 2 Nov 2010 01:24:55 +0000 (+0000) Subject: Add correct NEON encodings for vld2, vld3, and vld4 basic variants. X-Git-Url: http://plrg.eecs.uci.edu/git/?a=commitdiff_plain;h=cf667be17b479fe276fd606b8fd72ccfa3065bb8;p=oota-llvm.git Add correct NEON encodings for vld2, vld3, and vld4 basic variants. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@117997 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/ARM/ARMCodeEmitter.cpp b/lib/Target/ARM/ARMCodeEmitter.cpp index 3fa9a9c7247..429b1d3357c 100644 --- a/lib/Target/ARM/ARMCodeEmitter.cpp +++ b/lib/Target/ARM/ARMCodeEmitter.cpp @@ -102,6 +102,7 @@ namespace { unsigned getMachineSoImmOpValue(unsigned SoImm); unsigned getAddrMode6RegisterOperand(const MachineInstr &MI); + unsigned getAddrMode6OffsetOperand(const MachineInstr &MI); unsigned getAddrModeSBit(const MachineInstr &MI, const TargetInstrDesc &TID) const; @@ -175,6 +176,8 @@ namespace { const { return 0; } unsigned getAddrMode6RegisterOperand(const MachineInstr &MI, unsigned Op) const { return 0; } + unsigned getAddrMode6OffsetOperand(const MachineInstr &MI, unsigned Op) + const { return 0; } unsigned getBitfieldInvertedMaskOpValue(const MachineInstr &MI, unsigned Op) const { return 0; } unsigned getAddrModeImm12OpValue(const MachineInstr &MI, unsigned Op) diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index 649c83bbd1f..94718cb53dc 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -478,6 +478,7 @@ def addrmode6 : Operand, def am6offset : Operand { let PrintMethod = "printAddrMode6OffsetOperand"; let MIOperandInfo = (ops GPR); + string EncoderMethod = "getAddrMode6OffsetOperand"; } // addrmodepc := pc + reg diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index 48ded47bbf1..82ade478eaf 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -284,22 +284,28 @@ def VLD1d64QPseudo_UPD : VLDQQWBPseudo; // VLD2 : Vector Load (multiple 2-element structures) class VLD2D op11_8, bits<4> op7_4, string Dt> - : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$dst1, DPR:$dst2), - (ins addrmode6:$addr), IIC_VLD2, - "vld2", Dt, "\\{$dst1, $dst2\\}, $addr", "", []>; + : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2), + (ins addrmode6:$Rn), IIC_VLD2, + "vld2", Dt, "\\{$Vd, $dst2\\}, $Rn", "", []> { + let Rm = 0b1111; + let Inst{5-4} = Rn{5-4}; +} class VLD2Q op7_4, string Dt> : NLdSt<0, 0b10, 0b0011, op7_4, - (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4), - (ins addrmode6:$addr), IIC_VLD2x2, - "vld2", Dt, "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr", "", []>; + (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4), + (ins addrmode6:$Rn), IIC_VLD2x2, + "vld2", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn", "", []> { + let Rm = 0b1111; + let Inst{5-4} = Rn{5-4}; +} -def VLD2d8 : VLD2D<0b1000, 0b0000, "8">; -def VLD2d16 : VLD2D<0b1000, 0b0100, "16">; -def VLD2d32 : VLD2D<0b1000, 0b1000, "32">; +def VLD2d8 : VLD2D<0b1000, {0,0,?,?}, "8">; +def VLD2d16 : VLD2D<0b1000, {0,1,?,?}, "16">; +def VLD2d32 : VLD2D<0b1000, {1,0,?,?}, "32">; -def VLD2q8 : VLD2Q<0b0000, "8">; -def VLD2q16 : VLD2Q<0b0100, "16">; -def VLD2q32 : VLD2Q<0b1000, "32">; +def VLD2q8 : VLD2Q<{0,0,?,?}, "8">; +def VLD2q16 : VLD2Q<{0,1,?,?}, "16">; +def VLD2q32 : VLD2Q<{1,0,?,?}, "32">; def VLD2d8Pseudo : VLDQPseudo; def VLD2d16Pseudo : VLDQPseudo; @@ -311,24 +317,28 @@ def VLD2q32Pseudo : VLDQQPseudo; // ...with address register writeback: class VLD2DWB op11_8, bits<4> op7_4, string Dt> - : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$dst1, DPR:$dst2, GPR:$wb), - (ins addrmode6:$addr, am6offset:$offset), IIC_VLD2u, - "vld2", Dt, "\\{$dst1, $dst2\\}, $addr$offset", - "$addr.addr = $wb", []>; + : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, GPR:$wb), + (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD2u, + "vld2", Dt, "\\{$Vd, $dst2\\}, $Rn$Rm", + "$Rn.addr = $wb", []> { + let Inst{5-4} = Rn{5-4}; +} class VLD2QWB op7_4, string Dt> : NLdSt<0, 0b10, 0b0011, op7_4, - (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb), - (ins addrmode6:$addr, am6offset:$offset), IIC_VLD2x2u, - "vld2", Dt, "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr$offset", - "$addr.addr = $wb", []>; + (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb), + (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD2x2u, + "vld2", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn$Rm", + "$Rn.addr = $wb", []> { + let Inst{5-4} = Rn{5-4}; +} -def VLD2d8_UPD : VLD2DWB<0b1000, 0b0000, "8">; -def VLD2d16_UPD : VLD2DWB<0b1000, 0b0100, "16">; -def VLD2d32_UPD : VLD2DWB<0b1000, 0b1000, "32">; +def VLD2d8_UPD : VLD2DWB<0b1000, {0,0,?,?}, "8">; +def VLD2d16_UPD : VLD2DWB<0b1000, {0,1,?,?}, "16">; +def VLD2d32_UPD : VLD2DWB<0b1000, {1,0,?,?}, "32">; -def VLD2q8_UPD : VLD2QWB<0b0000, "8">; -def VLD2q16_UPD : VLD2QWB<0b0100, "16">; -def VLD2q32_UPD : VLD2QWB<0b1000, "32">; +def VLD2q8_UPD : VLD2QWB<{0,0,?,?}, "8">; +def VLD2q16_UPD : VLD2QWB<{0,1,?,?}, "16">; +def VLD2q32_UPD : VLD2QWB<{1,0,?,?}, "32">; def VLD2d8Pseudo_UPD : VLDQWBPseudo; def VLD2d16Pseudo_UPD : VLDQWBPseudo; @@ -339,22 +349,25 @@ def VLD2q16Pseudo_UPD : VLDQQWBPseudo; def VLD2q32Pseudo_UPD : VLDQQWBPseudo; // ...with double-spaced registers (for disassembly only): -def VLD2b8 : VLD2D<0b1001, 0b0000, "8">; -def VLD2b16 : VLD2D<0b1001, 0b0100, "16">; -def VLD2b32 : VLD2D<0b1001, 0b1000, "32">; -def VLD2b8_UPD : VLD2DWB<0b1001, 0b0000, "8">; -def VLD2b16_UPD : VLD2DWB<0b1001, 0b0100, "16">; -def VLD2b32_UPD : VLD2DWB<0b1001, 0b1000, "32">; +def VLD2b8 : VLD2D<0b1001, {0,0,?,?}, "8">; +def VLD2b16 : VLD2D<0b1001, {0,1,?,?}, "16">; +def VLD2b32 : VLD2D<0b1001, {1,0,?,?}, "32">; +def VLD2b8_UPD : VLD2DWB<0b1001, {0,0,?,?}, "8">; +def VLD2b16_UPD : VLD2DWB<0b1001, {0,1,?,?}, "16">; +def VLD2b32_UPD : VLD2DWB<0b1001, {1,0,?,?}, "32">; // VLD3 : Vector Load (multiple 3-element structures) class VLD3D op11_8, bits<4> op7_4, string Dt> - : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3), - (ins addrmode6:$addr), IIC_VLD3, - "vld3", Dt, "\\{$dst1, $dst2, $dst3\\}, $addr", "", []>; + : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3), + (ins addrmode6:$Rn), IIC_VLD3, + "vld3", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn", "", []> { + let Rm = 0b1111; + let Inst{4} = Rn{4}; +} -def VLD3d8 : VLD3D<0b0100, 0b0000, "8">; -def VLD3d16 : VLD3D<0b0100, 0b0100, "16">; -def VLD3d32 : VLD3D<0b0100, 0b1000, "32">; +def VLD3d8 : VLD3D<0b0100, {0,0,0,?}, "8">; +def VLD3d16 : VLD3D<0b0100, {0,1,0,?}, "16">; +def VLD3d32 : VLD3D<0b0100, {1,0,0,?}, "32">; def VLD3d8Pseudo : VLDQQPseudo; def VLD3d16Pseudo : VLDQQPseudo; @@ -363,26 +376,28 @@ def VLD3d32Pseudo : VLDQQPseudo; // ...with address register writeback: class VLD3DWB op11_8, bits<4> op7_4, string Dt> : NLdSt<0, 0b10, op11_8, op7_4, - (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, GPR:$wb), - (ins addrmode6:$addr, am6offset:$offset), IIC_VLD3u, - "vld3", Dt, "\\{$dst1, $dst2, $dst3\\}, $addr$offset", - "$addr.addr = $wb", []>; + (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb), + (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD3u, + "vld3", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn$Rm", + "$Rn.addr = $wb", []> { + let Inst{4} = Rn{4}; +} -def VLD3d8_UPD : VLD3DWB<0b0100, 0b0000, "8">; -def VLD3d16_UPD : VLD3DWB<0b0100, 0b0100, "16">; -def VLD3d32_UPD : VLD3DWB<0b0100, 0b1000, "32">; +def VLD3d8_UPD : VLD3DWB<0b0100, {0,0,0,?}, "8">; +def VLD3d16_UPD : VLD3DWB<0b0100, {0,1,0,?}, "16">; +def VLD3d32_UPD : VLD3DWB<0b0100, {1,0,0,?}, "32">; def VLD3d8Pseudo_UPD : VLDQQWBPseudo; def VLD3d16Pseudo_UPD : VLDQQWBPseudo; def VLD3d32Pseudo_UPD : VLDQQWBPseudo; // ...with double-spaced registers (non-updating versions for disassembly only): -def VLD3q8 : VLD3D<0b0101, 0b0000, "8">; -def VLD3q16 : VLD3D<0b0101, 0b0100, "16">; -def VLD3q32 : VLD3D<0b0101, 0b1000, "32">; -def VLD3q8_UPD : VLD3DWB<0b0101, 0b0000, "8">; -def VLD3q16_UPD : VLD3DWB<0b0101, 0b0100, "16">; -def VLD3q32_UPD : VLD3DWB<0b0101, 0b1000, "32">; +def VLD3q8 : VLD3D<0b0101, {0,0,0,?}, "8">; +def VLD3q16 : VLD3D<0b0101, {0,1,0,?}, "16">; +def VLD3q32 : VLD3D<0b0101, {1,0,0,?}, "32">; +def VLD3q8_UPD : VLD3DWB<0b0101, {0,0,0,?}, "8">; +def VLD3q16_UPD : VLD3DWB<0b0101, {0,1,0,?}, "16">; +def VLD3q32_UPD : VLD3DWB<0b0101, {1,0,0,?}, "32">; def VLD3q8Pseudo_UPD : VLDQQQQWBPseudo; def VLD3q16Pseudo_UPD : VLDQQQQWBPseudo; @@ -396,13 +411,16 @@ def VLD3q32oddPseudo_UPD : VLDQQQQWBPseudo; // VLD4 : Vector Load (multiple 4-element structures) class VLD4D op11_8, bits<4> op7_4, string Dt> : NLdSt<0, 0b10, op11_8, op7_4, - (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4), - (ins addrmode6:$addr), IIC_VLD4, - "vld4", Dt, "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr", "", []>; + (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4), + (ins addrmode6:$Rn), IIC_VLD4, + "vld4", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn", "", []> { + let Rm = 0b1111; + let Inst{5-4} = Rn{5-4}; +} -def VLD4d8 : VLD4D<0b0000, 0b0000, "8">; -def VLD4d16 : VLD4D<0b0000, 0b0100, "16">; -def VLD4d32 : VLD4D<0b0000, 0b1000, "32">; +def VLD4d8 : VLD4D<0b0000, {0,0,?,?}, "8">; +def VLD4d16 : VLD4D<0b0000, {0,1,?,?}, "16">; +def VLD4d32 : VLD4D<0b0000, {1,0,?,?}, "32">; def VLD4d8Pseudo : VLDQQPseudo; def VLD4d16Pseudo : VLDQQPseudo; @@ -411,26 +429,28 @@ def VLD4d32Pseudo : VLDQQPseudo; // ...with address register writeback: class VLD4DWB op11_8, bits<4> op7_4, string Dt> : NLdSt<0, 0b10, op11_8, op7_4, - (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb), - (ins addrmode6:$addr, am6offset:$offset), IIC_VLD4, - "vld4", Dt, "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr$offset", - "$addr.addr = $wb", []>; + (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb), + (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD4, + "vld4", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn$Rm", + "$Rn.addr = $wb", []> { + let Inst{5-4} = Rn{5-4}; +} -def VLD4d8_UPD : VLD4DWB<0b0000, 0b0000, "8">; -def VLD4d16_UPD : VLD4DWB<0b0000, 0b0100, "16">; -def VLD4d32_UPD : VLD4DWB<0b0000, 0b1000, "32">; +def VLD4d8_UPD : VLD4DWB<0b0000, {0,0,?,?}, "8">; +def VLD4d16_UPD : VLD4DWB<0b0000, {0,1,?,?}, "16">; +def VLD4d32_UPD : VLD4DWB<0b0000, {1,0,?,?}, "32">; def VLD4d8Pseudo_UPD : VLDQQWBPseudo; def VLD4d16Pseudo_UPD : VLDQQWBPseudo; def VLD4d32Pseudo_UPD : VLDQQWBPseudo; // ...with double-spaced registers (non-updating versions for disassembly only): -def VLD4q8 : VLD4D<0b0001, 0b0000, "8">; -def VLD4q16 : VLD4D<0b0001, 0b0100, "16">; -def VLD4q32 : VLD4D<0b0001, 0b1000, "32">; -def VLD4q8_UPD : VLD4DWB<0b0001, 0b0000, "8">; -def VLD4q16_UPD : VLD4DWB<0b0001, 0b0100, "16">; -def VLD4q32_UPD : VLD4DWB<0b0001, 0b1000, "32">; +def VLD4q8 : VLD4D<0b0001, {0,0,?,?}, "8">; +def VLD4q16 : VLD4D<0b0001, {0,1,?,?}, "16">; +def VLD4q32 : VLD4D<0b0001, {1,0,?,?}, "32">; +def VLD4q8_UPD : VLD4DWB<0b0001, {0,0,?,?}, "8">; +def VLD4q16_UPD : VLD4DWB<0b0001, {0,1,?,?}, "16">; +def VLD4q32_UPD : VLD4DWB<0b0001, {1,0,?,?}, "32">; def VLD4q8Pseudo_UPD : VLDQQQQWBPseudo; def VLD4q16Pseudo_UPD : VLDQQQQWBPseudo; diff --git a/lib/Target/ARM/ARMMCCodeEmitter.cpp b/lib/Target/ARM/ARMMCCodeEmitter.cpp index 6f32ce7645d..fcf10ff745e 100644 --- a/lib/Target/ARM/ARMMCCodeEmitter.cpp +++ b/lib/Target/ARM/ARMMCCodeEmitter.cpp @@ -100,6 +100,7 @@ public: unsigned getRegisterListOpValue(const MCInst &MI, unsigned Op) const; unsigned getAddrMode6RegisterOperand(const MCInst &MI, unsigned Op) const; + unsigned getAddrMode6OffsetOperand(const MCInst &MI, unsigned Op) const; unsigned getNumFixupKinds() const { assert(0 && "ARMMCCodeEmitter::getNumFixupKinds() not yet implemented."); @@ -312,6 +313,14 @@ unsigned ARMMCCodeEmitter::getAddrMode6RegisterOperand(const MCInst &MI, return RegNo | (Align << 4); } +unsigned ARMMCCodeEmitter::getAddrMode6OffsetOperand(const MCInst &MI, + unsigned Op) const { + const MCOperand ®no = MI.getOperand(Op); + if (regno.getReg() == 0) return 0x0D; + return regno.getReg(); +} + + void ARMMCCodeEmitter:: EncodeInstruction(const MCInst &MI, raw_ostream &OS, SmallVectorImpl &Fixups) const { diff --git a/test/MC/ARM/neon-vld-encoding.s b/test/MC/ARM/neon-vld-encoding.s index 93919c82392..eb8f80e3670 100644 --- a/test/MC/ARM/neon-vld-encoding.s +++ b/test/MC/ARM/neon-vld-encoding.s @@ -17,3 +17,57 @@ vld1.32 {d16, d17}, [r0] @ CHECK: vld1.64 {d16, d17}, [r0] @ encoding: [0xcf,0x0a,0x60,0xf4] vld1.64 {d16, d17}, [r0] + +@ CHECK: vld2.8 {d16, d17}, [r0, :64] @ encoding: [0x1f,0x08,0x60,0xf4] + vld2.8 {d16, d17}, [r0, :64] +@ CHECK: vld2.16 {d16, d17}, [r0, :128] @ encoding: [0x6f,0x08,0x60,0xf4] + vld2.16 {d16, d17}, [r0, :128] +@ CHECK: vld2.32 {d16, d17}, [r0] @ encoding: [0x8f,0x08,0x60,0xf4] + vld2.32 {d16, d17}, [r0] +@ CHECK: vld2.8 {d16, d17, d18, d19}, [r0, :64] @ encoding: [0x1f,0x03,0x60,0xf4] + vld2.8 {d16, d17, d18, d19}, [r0, :64] +@ CHECK: vld2.16 {d16, d17, d18, d19}, [r0, :128] @ encoding: [0x6f,0x03,0x60,0xf4] + vld2.16 {d16, d17, d18, d19}, [r0, :128] +@ CHECK: vld2.32 {d16, d17, d18, d19}, [r0, :256] @ encoding: [0xbf,0x03,0x60,0xf4] + vld2.32 {d16, d17, d18, d19}, [r0, :256] + +@ CHECK: vld3.8 {d16, d17, d18}, [r0, :64] @ encoding: [0x1f,0x04,0x60,0xf4] + vld3.8 {d16, d17, d18}, [r0, :64] +@ CHECK: vld3.16 {d16, d17, d18}, [r0] @ encoding: [0x4f,0x04,0x60,0xf4] + vld3.16 {d16, d17, d18}, [r0] +@ CHECK: vld3.32 {d16, d17, d18}, [r0] @ encoding: [0x8f,0x04,0x60,0xf4] + vld3.32 {d16, d17, d18}, [r0] +@ CHECK: vld3.8 {d16, d18, d20}, [r0, :64]! @ encoding: [0x1d,0x05,0x60,0xf4] + vld3.8 {d16, d18, d20}, [r0, :64]! +@ CHECK: vld3.8 {d17, d19, d21}, [r0, :64]! @ encoding: [0x1d,0x15,0x60,0xf4] + vld3.8 {d17, d19, d21}, [r0, :64]! +@ CHECK: vld3.16 {d16, d18, d20}, [r0]! @ encoding: [0x4d,0x05,0x60,0xf4] + vld3.16 {d16, d18, d20}, [r0]! +@ CHECK: vld3.16 {d17, d19, d21}, [r0]! @ encoding: [0x4d,0x15,0x60,0xf4] + vld3.16 {d17, d19, d21}, [r0]! +@ CHECK: vld3.32 {d16, d18, d20}, [r0]! @ encoding: [0x8d,0x05,0x60,0xf4] + vld3.32 {d16, d18, d20}, [r0]! +@ CHECK: vld3.32 {d17, d19, d21}, [r0]! @ encoding: [0x8d,0x15,0x60,0xf4] + vld3.32 {d17, d19, d21}, [r0]! + +@ CHECK: vld4.8 {d16, d17, d18, d19}, [r0, :64] @ encoding: [0x1f,0x00,0x60,0xf4] + vld4.8 {d16, d17, d18, d19}, [r0, :64] +@ CHECK: vld4.16 {d16, d17, d18, d19}, [r0, :128] @ encoding: [0x6f,0x00,0x60,0xf4] + vld4.16 {d16, d17, d18, d19}, [r0, :128] +@ CHECK: vld4.32 {d16, d17, d18, d19}, [r0, :256] @ encoding: [0xbf,0x00,0x60,0xf4] + vld4.32 {d16, d17, d18, d19}, [r0, :256] +@ CHECK: vld4.8 {d16, d18, d20, d22}, [r0, :256]! @ encoding: [0x3d,0x01,0x60,0xf4] + vld4.8 {d16, d18, d20, d22}, [r0, :256]! +@ CHECK: vld4.8 {d17, d19, d21, d23}, [r0, :256]! @ encoding: [0x3d,0x11,0x60,0xf4] + vld4.8 {d17, d19, d21, d23}, [r0, :256]! +@ CHECK: vld4.16 {d16, d18, d20, d22}, [r0]! @ encoding: [0x4d,0x01,0x60,0xf4] + vld4.16 {d16, d18, d20, d22}, [r0]! +@ CHECK: vld4.16 {d17, d19, d21, d23}, [r0]! @ encoding: [0x4d,0x11,0x60,0xf4] + vld4.16 {d17, d19, d21, d23}, [r0]! +@ CHECK: vld4.32 {d16, d18, d20, d22}, [r0]! @ encoding: [0x8d,0x01,0x60,0xf4] + vld4.32 {d16, d18, d20, d22}, [r0]! +@ CHECK: vld4.32 {d17, d19, d21, d23}, [r0]! @ encoding: [0x8d,0x11,0x60,0xf4] + vld4.32 {d17, d19, d21, d23}, [r0]! + + + \ No newline at end of file