#include "llvm/Target/TargetAsmParser.h"
#include "X86.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringSwitch.h"
#include "llvm/ADT/Twine.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCExpr.h"
class X86ATTAsmParser : public TargetAsmParser {
MCAsmParser &Parser;
+protected:
+ unsigned Is64Bit : 1;
+
private:
MCAsmParser &getParser() const { return Parser; }
bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc);
X86Operand *ParseOperand();
- X86Operand *ParseMemOperand();
+ X86Operand *ParseMemOperand(unsigned SegReg, SMLoc StartLoc);
bool ParseDirectiveWord(unsigned Size, SMLoc L);
+ void InstructionCleanup(MCInst &Inst);
+
/// @name Auto-generated Match Functions
- /// {
+ /// {
bool MatchInstruction(const SmallVectorImpl<MCParsedAsmOperand*> &Operands,
MCInst &Inst);
+ bool MatchInstructionImpl(
+ const SmallVectorImpl<MCParsedAsmOperand*> &Operands, MCInst &Inst);
+
/// }
public:
virtual bool ParseDirective(AsmToken DirectiveID);
};
-
+
+class X86_32ATTAsmParser : public X86ATTAsmParser {
+public:
+ X86_32ATTAsmParser(const Target &T, MCAsmParser &_Parser)
+ : X86ATTAsmParser(T, _Parser) {
+ Is64Bit = false;
+ }
+};
+
+class X86_64ATTAsmParser : public X86ATTAsmParser {
+public:
+ X86_64ATTAsmParser(const Target &T, MCAsmParser &_Parser)
+ : X86ATTAsmParser(T, _Parser) {
+ Is64Bit = true;
+ }
+};
+
} // end anonymous namespace
/// @name Auto-generated Match Functions
X86Operand(KindTy K, SMLoc Start, SMLoc End)
: Kind(K), StartLoc(Start), EndLoc(End) {}
-
+
/// getStartLoc - Get the location of the first token of this operand.
SMLoc getStartLoc() const { return StartLoc; }
/// getEndLoc - Get the location of the last token of this operand.
assert(Kind == Token && "Invalid access!");
return StringRef(Tok.Data, Tok.Length);
}
+ void setTokenValue(StringRef Value) {
+ assert(Kind == Token && "Invalid access!");
+ Tok.Data = Value.data();
+ Tok.Length = Value.size();
+ }
unsigned getReg() const {
assert(Kind == Register && "Invalid access!");
return true;
}
+ bool isImmSExt32() const {
+ // Accept immediates which fit in 32 bits when sign extended, and
+ // non-absolute immediates.
+ if (!isImm())
+ return false;
+
+ if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm())) {
+ int64_t Value = CE->getValue();
+ return Value == (int64_t) (int32_t) Value;
+ }
+
+ return true;
+ }
+
bool isMem() const { return Kind == Memory; }
bool isAbsMem() const {
bool isReg() const { return Kind == Register; }
+ void addExpr(MCInst &Inst, const MCExpr *Expr) const {
+ // Add as immediates when possible.
+ if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr))
+ Inst.addOperand(MCOperand::CreateImm(CE->getValue()));
+ else
+ Inst.addOperand(MCOperand::CreateExpr(Expr));
+ }
+
void addRegOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
Inst.addOperand(MCOperand::CreateReg(getReg()));
void addImmOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
- Inst.addOperand(MCOperand::CreateExpr(getImm()));
+ addExpr(Inst, getImm());
}
void addImmSExt8Operands(MCInst &Inst, unsigned N) const {
// FIXME: Support user customization of the render method.
assert(N == 1 && "Invalid number of operands!");
- Inst.addOperand(MCOperand::CreateExpr(getImm()));
+ addExpr(Inst, getImm());
+ }
+
+ void addImmSExt32Operands(MCInst &Inst, unsigned N) const {
+ // FIXME: Support user customization of the render method.
+ assert(N == 1 && "Invalid number of operands!");
+ addExpr(Inst, getImm());
}
void addMemOperands(MCInst &Inst, unsigned N) const {
Inst.addOperand(MCOperand::CreateReg(getMemBaseReg()));
Inst.addOperand(MCOperand::CreateImm(getMemScale()));
Inst.addOperand(MCOperand::CreateReg(getMemIndexReg()));
- Inst.addOperand(MCOperand::CreateExpr(getMemDisp()));
+ addExpr(Inst, getMemDisp());
Inst.addOperand(MCOperand::CreateReg(getMemSegReg()));
}
Inst.addOperand(MCOperand::CreateReg(getMemBaseReg()));
Inst.addOperand(MCOperand::CreateImm(getMemScale()));
Inst.addOperand(MCOperand::CreateReg(getMemIndexReg()));
- Inst.addOperand(MCOperand::CreateExpr(getMemDisp()));
+ addExpr(Inst, getMemDisp());
}
static X86Operand *CreateToken(StringRef Str, SMLoc Loc) {
X86Operand *X86ATTAsmParser::ParseOperand() {
switch (getLexer().getKind()) {
default:
- return ParseMemOperand();
+ // Parse a memory operand with no segment register.
+ return ParseMemOperand(0, Parser.getTok().getLoc());
case AsmToken::Percent: {
- // FIXME: if a segment register, this could either be just the seg reg, or
- // the start of a memory operand.
+ // Read the register.
unsigned RegNo;
SMLoc Start, End;
if (ParseRegister(RegNo, Start, End)) return 0;
- return X86Operand::CreateReg(RegNo, Start, End);
+
+ // If this is a segment register followed by a ':', then this is the start
+ // of a memory reference, otherwise this is a normal register reference.
+ if (getLexer().isNot(AsmToken::Colon))
+ return X86Operand::CreateReg(RegNo, Start, End);
+
+
+ getParser().Lex(); // Eat the colon.
+ return ParseMemOperand(RegNo, Start);
}
case AsmToken::Dollar: {
// $42 -> immediate.
}
}
-/// ParseMemOperand: segment: disp(basereg, indexreg, scale)
-X86Operand *X86ATTAsmParser::ParseMemOperand() {
- SMLoc MemStart = Parser.getTok().getLoc();
-
- // FIXME: If SegReg ':' (e.g. %gs:), eat and remember.
- unsigned SegReg = 0;
-
+/// ParseMemOperand: segment: disp(basereg, indexreg, scale). The '%ds:' prefix
+/// has already been parsed if present.
+X86Operand *X86ATTAsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) {
+
// We have to disambiguate a parenthesized expression "(4+5)" from the start
// of a memory operand with a missing displacement "(%ebx)" or "(,%eax)". The
// only way to do this without lookahead is to eat the '(' and see what is
bool X86ATTAsmParser::
ParseInstruction(const StringRef &Name, SMLoc NameLoc,
SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
- // FIXME: Hack to recognize "sal..." for now. We need a way to represent
- // alternative syntaxes in the .td file, without requiring instruction
- // duplication.
- if (Name.startswith("sal")) {
- std::string Tmp = "shl" + Name.substr(3).str();
- Operands.push_back(X86Operand::CreateToken(Tmp, NameLoc));
+ // The various flavors of pushf and popf use Requires<In32BitMode> and
+ // Requires<In64BitMode>, but the assembler doesn't yet implement that.
+ // For now, just do a manual check to prevent silent misencoding.
+ if (Is64Bit) {
+ if (Name == "popfl")
+ return Error(NameLoc, "popfl cannot be encoded in 64-bit mode");
+ else if (Name == "pushfl")
+ return Error(NameLoc, "pushfl cannot be encoded in 64-bit mode");
} else {
- // FIXME: This is a hack. We eventually want to add a general pattern
- // mechanism to be used in the table gen file for these assembly names that
- // use the same opcodes. Also we should only allow the "alternate names"
- // for rep and repne with the instructions they can only appear with.
- StringRef PatchedName = Name;
- if (Name == "repe" || Name == "repz")
- PatchedName = "rep";
- else if (Name == "repnz")
- PatchedName = "repne";
- Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc));
- }
+ if (Name == "popfq")
+ return Error(NameLoc, "popfq cannot be encoded in 32-bit mode");
+ else if (Name == "pushfq")
+ return Error(NameLoc, "pushfq cannot be encoded in 32-bit mode");
+ }
+
+ // FIXME: Hack to recognize "sal..." and "rep..." for now. We need a way to
+ // represent alternative syntaxes in the .td file, without requiring
+ // instruction duplication.
+ StringRef PatchedName = StringSwitch<StringRef>(Name)
+ .Case("sal", "shl")
+ .Case("salb", "shlb")
+ .Case("sall", "shll")
+ .Case("salq", "shlq")
+ .Case("salw", "shlw")
+ .Case("repe", "rep")
+ .Case("repz", "rep")
+ .Case("repnz", "repne")
+ .Case("pushf", Is64Bit ? "pushfq" : "pushfl")
+ .Case("popf", Is64Bit ? "popfq" : "popfl")
+ .Default(Name);
+ Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc));
if (getLexer().isNot(AsmToken::EndOfStatement)) {
}
}
+ // FIXME: Hack to handle recognizing s{hr,ar,hl}? $1.
+ if ((Name.startswith("shr") || Name.startswith("sar") ||
+ Name.startswith("shl")) &&
+ Operands.size() == 3 &&
+ static_cast<X86Operand*>(Operands[1])->isImm() &&
+ isa<MCConstantExpr>(static_cast<X86Operand*>(Operands[1])->getImm()) &&
+ cast<MCConstantExpr>(static_cast<X86Operand*>(Operands[1])->getImm())->getValue() == 1) {
+ delete Operands[1];
+ Operands.erase(Operands.begin() + 1);
+ }
+
return false;
}
return false;
}
+/// LowerMOffset - Lower an 'moffset' form of an instruction, which just has a
+/// imm operand, to having "rm" or "mr" operands with the offset in the disp
+/// field.
+static void LowerMOffset(MCInst &Inst, unsigned Opc, unsigned RegNo,
+ bool isMR) {
+ MCOperand Disp = Inst.getOperand(0);
+
+ // Start over with an empty instruction.
+ Inst = MCInst();
+ Inst.setOpcode(Opc);
+
+ if (!isMR)
+ Inst.addOperand(MCOperand::CreateReg(RegNo));
+
+ // Add the mem operand.
+ Inst.addOperand(MCOperand::CreateReg(0)); // Segment
+ Inst.addOperand(MCOperand::CreateImm(1)); // Scale
+ Inst.addOperand(MCOperand::CreateReg(0)); // IndexReg
+ Inst.addOperand(Disp); // Displacement
+ Inst.addOperand(MCOperand::CreateReg(0)); // BaseReg
+
+ if (isMR)
+ Inst.addOperand(MCOperand::CreateReg(RegNo));
+}
+
+// FIXME: Custom X86 cleanup function to implement a temporary hack to handle
+// matching INCL/DECL correctly for x86_64. This needs to be replaced by a
+// proper mechanism for supporting (ambiguous) feature dependent instructions.
+void X86ATTAsmParser::InstructionCleanup(MCInst &Inst) {
+ if (!Is64Bit) return;
+
+ switch (Inst.getOpcode()) {
+ case X86::DEC16r: Inst.setOpcode(X86::DEC64_16r); break;
+ case X86::DEC16m: Inst.setOpcode(X86::DEC64_16m); break;
+ case X86::DEC32r: Inst.setOpcode(X86::DEC64_32r); break;
+ case X86::DEC32m: Inst.setOpcode(X86::DEC64_32m); break;
+ case X86::INC16r: Inst.setOpcode(X86::INC64_16r); break;
+ case X86::INC16m: Inst.setOpcode(X86::INC64_16m); break;
+ case X86::INC32r: Inst.setOpcode(X86::INC64_32r); break;
+ case X86::INC32m: Inst.setOpcode(X86::INC64_32m); break;
+
+ // moffset instructions are x86-32 only.
+ case X86::MOV8o8a: LowerMOffset(Inst, X86::MOV8rm , X86::AL , false); break;
+ case X86::MOV16o16a: LowerMOffset(Inst, X86::MOV16rm, X86::AX , false); break;
+ case X86::MOV32o32a: LowerMOffset(Inst, X86::MOV32rm, X86::EAX, false); break;
+ case X86::MOV8ao8: LowerMOffset(Inst, X86::MOV8mr , X86::AL , true); break;
+ case X86::MOV16ao16: LowerMOffset(Inst, X86::MOV16mr, X86::AX , true); break;
+ case X86::MOV32ao32: LowerMOffset(Inst, X86::MOV32mr, X86::EAX, true); break;
+ }
+}
+
+bool
+X86ATTAsmParser::MatchInstruction(const SmallVectorImpl<MCParsedAsmOperand*>
+ &Operands,
+ MCInst &Inst) {
+ // First, try a direct match.
+ if (!MatchInstructionImpl(Operands, Inst))
+ return false;
+
+ // Ignore anything which is obviously not a suffix match.
+ if (Operands.size() == 0)
+ return true;
+ X86Operand *Op = static_cast<X86Operand*>(Operands[0]);
+ if (!Op->isToken() || Op->getToken().size() > 15)
+ return true;
+
+ // FIXME: Ideally, we would only attempt suffix matches for things which are
+ // valid prefixes, and we could just infer the right unambiguous
+ // type. However, that requires substantially more matcher support than the
+ // following hack.
+
+ // Change the operand to point to a temporary token.
+ char Tmp[16];
+ StringRef Base = Op->getToken();
+ memcpy(Tmp, Base.data(), Base.size());
+ Op->setTokenValue(StringRef(Tmp, Base.size() + 1));
+
+ // Check for the various suffix matches.
+ Tmp[Base.size()] = 'b';
+ bool MatchB = MatchInstructionImpl(Operands, Inst);
+ Tmp[Base.size()] = 'w';
+ bool MatchW = MatchInstructionImpl(Operands, Inst);
+ Tmp[Base.size()] = 'l';
+ bool MatchL = MatchInstructionImpl(Operands, Inst);
+ Tmp[Base.size()] = 'q';
+ bool MatchQ = MatchInstructionImpl(Operands, Inst);
+
+ // Restore the old token.
+ Op->setTokenValue(Base);
+
+ // If exactly one matched, then we treat that as a successful match (and the
+ // instruction will already have been filled in correctly, since the failing
+ // matches won't have modified it).
+ if (MatchB + MatchW + MatchL + MatchQ == 3)
+ return false;
+
+ // Otherwise, the match failed.
+ return true;
+}
+
+
extern "C" void LLVMInitializeX86AsmLexer();
// Force static initialization.
extern "C" void LLVMInitializeX86AsmParser() {
- RegisterAsmParser<X86ATTAsmParser> X(TheX86_32Target);
- RegisterAsmParser<X86ATTAsmParser> Y(TheX86_64Target);
+ RegisterAsmParser<X86_32ATTAsmParser> X(TheX86_32Target);
+ RegisterAsmParser<X86_64ATTAsmParser> Y(TheX86_64Target);
LLVMInitializeX86AsmLexer();
}