1 //===-- ARMAsmParser.cpp - Parse ARM assembly to MCInst instructions ------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
11 #include "llvm/ADT/SmallVector.h"
12 #include "llvm/ADT/Twine.h"
13 #include "llvm/MC/MCAsmLexer.h"
14 #include "llvm/MC/MCAsmParser.h"
15 #include "llvm/MC/MCStreamer.h"
16 #include "llvm/MC/MCExpr.h"
17 #include "llvm/MC/MCInst.h"
18 #include "llvm/Support/SourceMgr.h"
19 #include "llvm/Target/TargetRegistry.h"
20 #include "llvm/Target/TargetAsmParser.h"
26 // The shift types for register controlled shifts in arm memory addressing
35 class ARMAsmParser : public TargetAsmParser {
39 MCAsmParser &getParser() const { return Parser; }
41 MCAsmLexer &getLexer() const { return Parser.getLexer(); }
43 void Warning(SMLoc L, const Twine &Msg) { Parser.Warning(L, Msg); }
45 bool Error(SMLoc L, const Twine &Msg) { return Parser.Error(L, Msg); }
47 bool MaybeParseRegister(ARMOperand &Op, bool ParseWriteBack);
49 bool ParseRegisterList(ARMOperand &Op);
51 bool ParseMemory(ARMOperand &Op);
53 bool ParseMemoryOffsetReg(bool &Negative,
54 bool &OffsetRegShifted,
55 enum ShiftType &ShiftType,
56 const MCExpr *&ShiftAmount,
57 const MCExpr *&Offset,
61 bool ParseShift(enum ShiftType &St, const MCExpr *&ShiftAmount);
63 bool ParseOperand(ARMOperand &Op);
65 bool ParseDirectiveWord(unsigned Size, SMLoc L);
67 bool ParseDirectiveThumb(SMLoc L);
69 bool ParseDirectiveThumbFunc(SMLoc L);
71 bool ParseDirectiveCode(SMLoc L);
73 bool ParseDirectiveSyntax(SMLoc L);
75 // TODO - For now hacked versions of the next two are in here in this file to
76 // allow some parser testing until the table gen versions are implemented.
78 /// @name Auto-generated Match Functions
80 bool MatchInstruction(SmallVectorImpl<ARMOperand> &Operands,
83 /// MatchRegisterName - Match the given string to a register name and return
84 /// its register number, or -1 if there is no match. To allow return values
85 /// to be used directly in register lists, arm registers have values between
87 int MatchRegisterName(const StringRef &Name);
93 ARMAsmParser(const Target &T, MCAsmParser &_Parser)
94 : TargetAsmParser(T), Parser(_Parser) {}
96 virtual bool ParseInstruction(const StringRef &Name, MCInst &Inst);
98 virtual bool ParseDirective(AsmToken DirectiveID);
101 } // end anonymous namespace
105 /// ARMOperand - Instances of this class represent a parsed ARM machine
131 // This is for all forms of ARM address expressions
134 unsigned OffsetRegNum; // used when OffsetIsReg is true
135 const MCExpr *Offset; // used when OffsetIsReg is false
136 const MCExpr *ShiftAmount; // used when OffsetRegShifted is true
137 enum ShiftType ShiftType; // used when OffsetRegShifted is true
139 OffsetRegShifted : 1, // only used when OffsetIsReg is true
143 Negative : 1, // only used when OffsetIsReg is true
149 StringRef getToken() const {
150 assert(Kind == Token && "Invalid access!");
151 return StringRef(Tok.Data, Tok.Length);
154 unsigned getReg() const {
155 assert(Kind == Register && "Invalid access!");
159 const MCExpr *getImm() const {
160 assert(Kind == Immediate && "Invalid access!");
164 bool isToken() const {return Kind == Token; }
166 bool isReg() const { return Kind == Register; }
168 void addRegOperands(MCInst &Inst, unsigned N) const {
169 assert(N == 1 && "Invalid number of operands!");
170 Inst.addOperand(MCOperand::CreateReg(getReg()));
173 static ARMOperand CreateToken(StringRef Str) {
176 Res.Tok.Data = Str.data();
177 Res.Tok.Length = Str.size();
181 static ARMOperand CreateReg(unsigned RegNum, bool Writeback) {
184 Res.Reg.RegNum = RegNum;
185 Res.Reg.Writeback = Writeback;
189 static ARMOperand CreateImm(const MCExpr *Val) {
191 Res.Kind = Immediate;
196 static ARMOperand CreateMem(unsigned BaseRegNum, bool OffsetIsReg,
197 const MCExpr *Offset, unsigned OffsetRegNum,
198 bool OffsetRegShifted, enum ShiftType ShiftType,
199 const MCExpr *ShiftAmount, bool Preindexed,
200 bool Postindexed, bool Negative, bool Writeback) {
203 Res.Mem.BaseRegNum = BaseRegNum;
204 Res.Mem.OffsetIsReg = OffsetIsReg;
205 Res.Mem.Offset = Offset;
206 Res.Mem.OffsetRegNum = OffsetRegNum;
207 Res.Mem.OffsetRegShifted = OffsetRegShifted;
208 Res.Mem.ShiftType = ShiftType;
209 Res.Mem.ShiftAmount = ShiftAmount;
210 Res.Mem.Preindexed = Preindexed;
211 Res.Mem.Postindexed = Postindexed;
212 Res.Mem.Negative = Negative;
213 Res.Mem.Writeback = Writeback;
218 } // end anonymous namespace.
220 /// Try to parse a register name. The token must be an Identifier when called,
221 /// and if it is a register name a Reg operand is created, the token is eaten
222 /// and false is returned. Else true is returned and no token is eaten.
223 /// TODO this is likely to change to allow different register types and or to
224 /// parse for a specific register type.
225 bool ARMAsmParser::MaybeParseRegister(ARMOperand &Op, bool ParseWriteBack) {
226 const AsmToken &Tok = getLexer().getTok();
227 assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier");
229 // FIXME: Validate register for the current architecture; we have to do
230 // validation later, so maybe there is no need for this here.
233 RegNum = MatchRegisterName(Tok.getString());
236 getLexer().Lex(); // Eat identifier token.
238 bool Writeback = false;
239 if (ParseWriteBack) {
240 const AsmToken &ExclaimTok = getLexer().getTok();
241 if (ExclaimTok.is(AsmToken::Exclaim)) {
243 getLexer().Lex(); // Eat exclaim token
247 Op = ARMOperand::CreateReg(RegNum, Writeback);
252 /// Parse a register list, return false if successful else return true or an
253 /// error. The first token must be a '{' when called.
254 bool ARMAsmParser::ParseRegisterList(ARMOperand &Op) {
255 assert(getLexer().getTok().is(AsmToken::LCurly) &&
256 "Token is not an Left Curly Brace");
257 getLexer().Lex(); // Eat left curly brace token.
259 const AsmToken &RegTok = getLexer().getTok();
260 SMLoc RegLoc = RegTok.getLoc();
261 if (RegTok.isNot(AsmToken::Identifier))
262 return Error(RegLoc, "register expected");
263 int RegNum = MatchRegisterName(RegTok.getString());
265 return Error(RegLoc, "register expected");
266 getLexer().Lex(); // Eat identifier token.
267 unsigned RegList = 1 << RegNum;
269 int HighRegNum = RegNum;
270 // TODO ranges like "{Rn-Rm}"
271 while (getLexer().getTok().is(AsmToken::Comma)) {
272 getLexer().Lex(); // Eat comma token.
274 const AsmToken &RegTok = getLexer().getTok();
275 SMLoc RegLoc = RegTok.getLoc();
276 if (RegTok.isNot(AsmToken::Identifier))
277 return Error(RegLoc, "register expected");
278 int RegNum = MatchRegisterName(RegTok.getString());
280 return Error(RegLoc, "register expected");
282 if (RegList & (1 << RegNum))
283 Warning(RegLoc, "register duplicated in register list");
284 else if (RegNum <= HighRegNum)
285 Warning(RegLoc, "register not in ascending order in register list");
286 RegList |= 1 << RegNum;
289 getLexer().Lex(); // Eat identifier token.
291 const AsmToken &RCurlyTok = getLexer().getTok();
292 if (RCurlyTok.isNot(AsmToken::RCurly))
293 return Error(RCurlyTok.getLoc(), "'}' expected");
294 getLexer().Lex(); // Eat left curly brace token.
299 /// Parse an arm memory expression, return false if successful else return true
300 /// or an error. The first token must be a '[' when called.
301 /// TODO Only preindexing and postindexing addressing are started, unindexed
302 /// with option, etc are still to do.
303 bool ARMAsmParser::ParseMemory(ARMOperand &Op) {
304 assert(getLexer().getTok().is(AsmToken::LBrac) &&
305 "Token is not an Left Bracket");
306 getLexer().Lex(); // Eat left bracket token.
308 const AsmToken &BaseRegTok = getLexer().getTok();
309 if (BaseRegTok.isNot(AsmToken::Identifier))
310 return Error(BaseRegTok.getLoc(), "register expected");
311 if (MaybeParseRegister(Op, false))
312 return Error(BaseRegTok.getLoc(), "register expected");
313 int BaseRegNum = Op.getReg();
315 bool Preindexed = false;
316 bool Postindexed = false;
317 bool OffsetIsReg = false;
318 bool Negative = false;
319 bool Writeback = false;
321 // First look for preindexed address forms, that is after the "[Rn" we now
322 // have to see if the next token is a comma.
323 const AsmToken &Tok = getLexer().getTok();
324 if (Tok.is(AsmToken::Comma)) {
326 getLexer().Lex(); // Eat comma token.
328 bool OffsetRegShifted;
329 enum ShiftType ShiftType;
330 const MCExpr *ShiftAmount;
331 const MCExpr *Offset;
332 if(ParseMemoryOffsetReg(Negative, OffsetRegShifted, ShiftType, ShiftAmount,
333 Offset, OffsetIsReg, OffsetRegNum))
335 const AsmToken &RBracTok = getLexer().getTok();
336 if (RBracTok.isNot(AsmToken::RBrac))
337 return Error(RBracTok.getLoc(), "']' expected");
338 getLexer().Lex(); // Eat right bracket token.
340 const AsmToken &ExclaimTok = getLexer().getTok();
341 if (ExclaimTok.is(AsmToken::Exclaim)) {
343 getLexer().Lex(); // Eat exclaim token
345 Op = ARMOperand::CreateMem(BaseRegNum, OffsetIsReg, Offset, OffsetRegNum,
346 OffsetRegShifted, ShiftType, ShiftAmount,
347 Preindexed, Postindexed, Negative, Writeback);
350 // The "[Rn" we have so far was not followed by a comma.
351 else if (Tok.is(AsmToken::RBrac)) {
352 // This is a post indexing addressing forms, that is a ']' follows after
356 getLexer().Lex(); // Eat right bracket token.
358 int OffsetRegNum = 0;
359 bool OffsetRegShifted = false;
360 enum ShiftType ShiftType;
361 const MCExpr *ShiftAmount;
362 const MCExpr *Offset;
364 const AsmToken &NextTok = getLexer().getTok();
365 if (NextTok.isNot(AsmToken::EndOfStatement)) {
366 if (NextTok.isNot(AsmToken::Comma))
367 return Error(NextTok.getLoc(), "',' expected");
368 getLexer().Lex(); // Eat comma token.
369 if(ParseMemoryOffsetReg(Negative, OffsetRegShifted, ShiftType,
370 ShiftAmount, Offset, OffsetIsReg, OffsetRegNum))
374 Op = ARMOperand::CreateMem(BaseRegNum, OffsetIsReg, Offset, OffsetRegNum,
375 OffsetRegShifted, ShiftType, ShiftAmount,
376 Preindexed, Postindexed, Negative, Writeback);
383 /// Parse the offset of a memory operand after we have seen "[Rn," or "[Rn],"
384 /// we will parse the following (were +/- means that a plus or minus is
389 /// we return false on success or an error otherwise.
390 bool ARMAsmParser::ParseMemoryOffsetReg(bool &Negative,
391 bool &OffsetRegShifted,
392 enum ShiftType &ShiftType,
393 const MCExpr *&ShiftAmount,
394 const MCExpr *&Offset,
399 OffsetRegShifted = false;
402 const AsmToken &NextTok = getLexer().getTok();
403 if (NextTok.is(AsmToken::Plus))
404 getLexer().Lex(); // Eat plus token.
405 else if (NextTok.is(AsmToken::Minus)) {
407 getLexer().Lex(); // Eat minus token
409 // See if there is a register following the "[Rn," or "[Rn]," we have so far.
410 const AsmToken &OffsetRegTok = getLexer().getTok();
411 if (OffsetRegTok.is(AsmToken::Identifier)) {
412 OffsetIsReg = !MaybeParseRegister(Op, false);
414 OffsetRegNum = Op.getReg();
416 // If we parsed a register as the offset then their can be a shift after that
417 if (OffsetRegNum != -1) {
418 // Look for a comma then a shift
419 const AsmToken &Tok = getLexer().getTok();
420 if (Tok.is(AsmToken::Comma)) {
421 getLexer().Lex(); // Eat comma token.
423 const AsmToken &Tok = getLexer().getTok();
424 if (ParseShift(ShiftType, ShiftAmount))
425 return Error(Tok.getLoc(), "shift expected");
426 OffsetRegShifted = true;
429 else { // the "[Rn," or "[Rn,]" we have so far was not followed by "Rm"
430 // Look for #offset following the "[Rn," or "[Rn],"
431 const AsmToken &HashTok = getLexer().getTok();
432 if (HashTok.isNot(AsmToken::Hash))
433 return Error(HashTok.getLoc(), "'#' expected");
434 getLexer().Lex(); // Eat hash token.
436 if (getParser().ParseExpression(Offset))
442 /// ParseShift as one of these two:
443 /// ( lsl | lsr | asr | ror ) , # shift_amount
445 /// and returns true if it parses a shift otherwise it returns false.
446 bool ARMAsmParser::ParseShift(ShiftType &St, const MCExpr *&ShiftAmount) {
447 const AsmToken &Tok = getLexer().getTok();
448 if (Tok.isNot(AsmToken::Identifier))
450 const StringRef &ShiftName = Tok.getString();
451 if (ShiftName == "lsl" || ShiftName == "LSL")
453 else if (ShiftName == "lsr" || ShiftName == "LSR")
455 else if (ShiftName == "asr" || ShiftName == "ASR")
457 else if (ShiftName == "ror" || ShiftName == "ROR")
459 else if (ShiftName == "rrx" || ShiftName == "RRX")
463 getLexer().Lex(); // Eat shift type token.
469 // Otherwise, there must be a '#' and a shift amount.
470 const AsmToken &HashTok = getLexer().getTok();
471 if (HashTok.isNot(AsmToken::Hash))
472 return Error(HashTok.getLoc(), "'#' expected");
473 getLexer().Lex(); // Eat hash token.
475 if (getParser().ParseExpression(ShiftAmount))
481 /// A hack to allow some testing, to be replaced by a real table gen version.
482 int ARMAsmParser::MatchRegisterName(const StringRef &Name) {
483 if (Name == "r0" || Name == "R0")
485 else if (Name == "r1" || Name == "R1")
487 else if (Name == "r2" || Name == "R2")
489 else if (Name == "r3" || Name == "R3")
491 else if (Name == "r3" || Name == "R3")
493 else if (Name == "r4" || Name == "R4")
495 else if (Name == "r5" || Name == "R5")
497 else if (Name == "r6" || Name == "R6")
499 else if (Name == "r7" || Name == "R7")
501 else if (Name == "r8" || Name == "R8")
503 else if (Name == "r9" || Name == "R9")
505 else if (Name == "r10" || Name == "R10")
507 else if (Name == "r11" || Name == "R11" || Name == "fp")
509 else if (Name == "r12" || Name == "R12" || Name == "ip")
511 else if (Name == "r13" || Name == "R13" || Name == "sp")
513 else if (Name == "r14" || Name == "R14" || Name == "lr")
515 else if (Name == "r15" || Name == "R15" || Name == "pc")
520 /// A hack to allow some testing, to be replaced by a real table gen version.
521 bool ARMAsmParser::MatchInstruction(SmallVectorImpl<ARMOperand> &Operands,
523 struct ARMOperand Op0 = Operands[0];
524 assert(Op0.Kind == ARMOperand::Token && "First operand not a Token");
525 const StringRef &Mnemonic = Op0.getToken();
526 if (Mnemonic == "add" ||
527 Mnemonic == "stmfd" ||
529 Mnemonic == "ldmfd" ||
534 Mnemonic == "push" ||
537 // Hard-coded to a valid instruction, till we have a real matcher.
539 Inst.setOpcode(ARM::MOVr);
540 Inst.addOperand(MCOperand::CreateReg(2));
541 Inst.addOperand(MCOperand::CreateReg(2));
542 Inst.addOperand(MCOperand::CreateImm(0));
543 Inst.addOperand(MCOperand::CreateImm(0));
544 Inst.addOperand(MCOperand::CreateReg(0));
551 /// Parse a arm instruction operand. For now this parses the operand regardless
553 bool ARMAsmParser::ParseOperand(ARMOperand &Op) {
554 switch (getLexer().getKind()) {
555 case AsmToken::Identifier:
556 if (!MaybeParseRegister(Op, true))
558 // This was not a register so parse other operands that start with an
559 // identifier (like labels) as expressions and create them as immediates.
561 if (getParser().ParseExpression(IdVal))
563 Op = ARMOperand::CreateImm(IdVal);
565 case AsmToken::LBrac:
566 return ParseMemory(Op);
567 case AsmToken::LCurly:
568 return ParseRegisterList(Op);
571 // TODO: ":lower16:" and ":upper16:" modifiers after # before immediate
573 const MCExpr *ImmVal;
574 if (getParser().ParseExpression(ImmVal))
576 Op = ARMOperand::CreateImm(ImmVal);
579 return Error(getLexer().getTok().getLoc(), "unexpected token in operand");
583 /// Parse an arm instruction mnemonic followed by its operands.
584 bool ARMAsmParser::ParseInstruction(const StringRef &Name, MCInst &Inst) {
585 SmallVector<ARMOperand, 7> Operands;
587 Operands.push_back(ARMOperand::CreateToken(Name));
589 SMLoc Loc = getLexer().getTok().getLoc();
590 if (getLexer().isNot(AsmToken::EndOfStatement)) {
592 // Read the first operand.
593 Operands.push_back(ARMOperand());
594 if (ParseOperand(Operands.back()))
597 while (getLexer().is(AsmToken::Comma)) {
598 getLexer().Lex(); // Eat the comma.
600 // Parse and remember the operand.
601 Operands.push_back(ARMOperand());
602 if (ParseOperand(Operands.back()))
606 if (!MatchInstruction(Operands, Inst))
609 Error(Loc, "ARMAsmParser::ParseInstruction only partly implemented");
613 /// ParseDirective parses the arm specific directives
614 bool ARMAsmParser::ParseDirective(AsmToken DirectiveID) {
615 StringRef IDVal = DirectiveID.getIdentifier();
616 if (IDVal == ".word")
617 return ParseDirectiveWord(4, DirectiveID.getLoc());
618 else if (IDVal == ".thumb")
619 return ParseDirectiveThumb(DirectiveID.getLoc());
620 else if (IDVal == ".thumb_func")
621 return ParseDirectiveThumbFunc(DirectiveID.getLoc());
622 else if (IDVal == ".code")
623 return ParseDirectiveCode(DirectiveID.getLoc());
624 else if (IDVal == ".syntax")
625 return ParseDirectiveSyntax(DirectiveID.getLoc());
629 /// ParseDirectiveWord
630 /// ::= .word [ expression (, expression)* ]
631 bool ARMAsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
632 if (getLexer().isNot(AsmToken::EndOfStatement)) {
635 if (getParser().ParseExpression(Value))
638 getParser().getStreamer().EmitValue(Value, Size);
640 if (getLexer().is(AsmToken::EndOfStatement))
643 // FIXME: Improve diagnostic.
644 if (getLexer().isNot(AsmToken::Comma))
645 return Error(L, "unexpected token in directive");
654 /// ParseDirectiveThumb
656 bool ARMAsmParser::ParseDirectiveThumb(SMLoc L) {
657 if (getLexer().isNot(AsmToken::EndOfStatement))
658 return Error(L, "unexpected token in directive");
661 // TODO: set thumb mode
662 // TODO: tell the MC streamer the mode
663 // getParser().getStreamer().Emit???();
667 /// ParseDirectiveThumbFunc
668 /// ::= .thumbfunc symbol_name
669 bool ARMAsmParser::ParseDirectiveThumbFunc(SMLoc L) {
670 const AsmToken &Tok = getLexer().getTok();
671 if (Tok.isNot(AsmToken::Identifier) && Tok.isNot(AsmToken::String))
672 return Error(L, "unexpected token in .syntax directive");
673 StringRef SymbolName = getLexer().getTok().getIdentifier();
674 getLexer().Lex(); // Consume the identifier token.
676 if (getLexer().isNot(AsmToken::EndOfStatement))
677 return Error(L, "unexpected token in directive");
680 // TODO: mark symbol as a thumb symbol
681 // getParser().getStreamer().Emit???();
685 /// ParseDirectiveSyntax
686 /// ::= .syntax unified | divided
687 bool ARMAsmParser::ParseDirectiveSyntax(SMLoc L) {
688 const AsmToken &Tok = getLexer().getTok();
689 if (Tok.isNot(AsmToken::Identifier))
690 return Error(L, "unexpected token in .syntax directive");
691 const StringRef &Mode = Tok.getString();
693 if (Mode == "unified" || Mode == "UNIFIED") {
695 unified_syntax = true;
697 else if (Mode == "divided" || Mode == "DIVIDED") {
699 unified_syntax = false;
702 return Error(L, "unrecognized syntax mode in .syntax directive");
704 if (getLexer().isNot(AsmToken::EndOfStatement))
705 return Error(getLexer().getTok().getLoc(), "unexpected token in directive");
708 // TODO tell the MC streamer the mode
709 // getParser().getStreamer().Emit???();
713 /// ParseDirectiveCode
714 /// ::= .code 16 | 32
715 bool ARMAsmParser::ParseDirectiveCode(SMLoc L) {
716 const AsmToken &Tok = getLexer().getTok();
717 if (Tok.isNot(AsmToken::Integer))
718 return Error(L, "unexpected token in .code directive");
719 int64_t Val = getLexer().getTok().getIntVal();
725 else if (Val == 32) {
730 return Error(L, "invalid operand to .code directive");
732 if (getLexer().isNot(AsmToken::EndOfStatement))
733 return Error(getLexer().getTok().getLoc(), "unexpected token in directive");
736 // TODO tell the MC streamer the mode
737 // getParser().getStreamer().Emit???();
741 /// Force static initialization.
742 extern "C" void LLVMInitializeARMAsmParser() {
743 RegisterAsmParser<ARMAsmParser> X(TheARMTarget);
744 RegisterAsmParser<ARMAsmParser> Y(TheThumbTarget);