From: Hans Wennborg Date: Thu, 17 Oct 2013 01:13:02 +0000 (+0000) Subject: Re-commit r192758 - MC: quote tricky symbol names in asm output X-Git-Url: http://plrg.eecs.uci.edu/git/?p=oota-llvm.git;a=commitdiff_plain;h=b74b88edac9ab490ba428aef0bdebc957399bbd7 Re-commit r192758 - MC: quote tricky symbol names in asm output The reason this got reverted was that the @feat.00 symbol which was emitted for every TU became quoted, and on cygwin/mingw we use the gas assembler which couldn't handle the quotes. This commit fixes the problem by only emitting @feat.00 for win32, where we use clang -cc1as to assemble. gas would just drop this symbol anyway, so there is no loss there. With @feat.00 gone, there shouldn't be quoted symbols showing up on cygwin since it uses the Itanium ABI, which doesn't put these funny characters in symbols. > Because of win32 mangling, we produce symbol and section names with > funny characters in them, most notably @ characters. > > MC would choke on trying to parse its own assembly output. This patch addresses > that by: > > - Making @ trigger quoting of symbol names > - Also quote section names in the same way > - Just parse section names like other identifiers (to allow for quotes) > - Don't assume @ signifies a symbol variant if it is in a string. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@192859 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp index 9a362563a7e..75f5c78b70b 100644 --- a/lib/MC/MCParser/AsmParser.cpp +++ b/lib/MC/MCParser/AsmParser.cpp @@ -792,19 +792,25 @@ bool AsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) { EndLoc = SMLoc::getFromPointer(Identifier.end()); // This is a symbol reference. + StringRef SymbolName = Identifier; + MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None; std::pair Split = Identifier.split('@'); - MCSymbol *Sym = getContext().GetOrCreateSymbol(Split.first); - // Lookup the symbol variant if used. - MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None; - if (Split.first.size() != Identifier.size()) { - Variant = MCSymbolRefExpr::getVariantKindForName(Split.second); + if (Split.first.size() != Identifier.size() && + FirstTokenKind != AsmToken::String) { + SymbolName = Split.first; + StringRef VariantName = Split.second; + + // Lookup the symbol variant. + Variant = MCSymbolRefExpr::getVariantKindForName(VariantName); if (Variant == MCSymbolRefExpr::VK_Invalid) { Variant = MCSymbolRefExpr::VK_None; - return TokError("invalid variant '" + Split.second + "'"); + return TokError("invalid variant '" + VariantName + "'"); } } + MCSymbol *Sym = getContext().GetOrCreateSymbol(SymbolName); + // If this is an absolute variable reference, substitute it now to preserve // semantics in the face of reassignment. if (Sym->isVariable() && isa(Sym->getVariableValue())) { diff --git a/lib/MC/MCParser/COFFAsmParser.cpp b/lib/MC/MCParser/COFFAsmParser.cpp index df1794c9799..b3c094366ab 100644 --- a/lib/MC/MCParser/COFFAsmParser.cpp +++ b/lib/MC/MCParser/COFFAsmParser.cpp @@ -295,12 +295,7 @@ bool COFFAsmParser::ParseSectionSwitch(StringRef Section, } bool COFFAsmParser::ParseSectionName(StringRef &SectionName) { - if (!getLexer().is(AsmToken::Identifier)) - return true; - - SectionName = getTok().getIdentifier(); - Lex(); - return false; + return getParser().parseIdentifier(SectionName); } // .section name [, "flags"] diff --git a/lib/MC/MCSectionCOFF.cpp b/lib/MC/MCSectionCOFF.cpp index 64aa2c5c49e..a8f5db095b1 100644 --- a/lib/MC/MCSectionCOFF.cpp +++ b/lib/MC/MCSectionCOFF.cpp @@ -39,6 +39,22 @@ void MCSectionCOFF::setSelection(int Selection, Characteristics |= COFF::IMAGE_SCN_LNK_COMDAT; } +static bool isAcceptableSectionNameChar(char C) { + return (C >= 'a' && C <= 'z') || + (C >= 'A' && C <= 'Z') || + (C >= '0' && C <= '9') || + C == '_' || C == '$' || C == '.'; +} + +/// NameNeedsQuoting - Return true if the identifier \p Str needs quotes to be +/// syntactically correct. +static bool sectionNameNeedsQuoting(StringRef Name) { + for (unsigned i = 0, e = Name.size(); i != e; ++i) + if (!isAcceptableSectionNameChar(Name[i])) + return true; + return false; +} + void MCSectionCOFF::PrintSwitchToSection(const MCAsmInfo &MAI, raw_ostream &OS, const MCExpr *Subsection) const { @@ -49,7 +65,10 @@ void MCSectionCOFF::PrintSwitchToSection(const MCAsmInfo &MAI, return; } - OS << "\t.section\t" << getSectionName() << ",\""; + if (sectionNameNeedsQuoting(getSectionName())) + OS << "\t.section\t" << '"' << getSectionName() << '"' << ",\""; + else + OS << "\t.section\t" << getSectionName() << ",\""; if (getKind().isText()) OS << 'x'; if (getKind().isWriteable()) diff --git a/lib/MC/MCSymbol.cpp b/lib/MC/MCSymbol.cpp index b973c57f7b8..f386c3bc90a 100644 --- a/lib/MC/MCSymbol.cpp +++ b/lib/MC/MCSymbol.cpp @@ -18,12 +18,10 @@ const MCSection *MCSymbol::AbsolutePseudoSection = reinterpret_cast(1); static bool isAcceptableChar(char C) { - if ((C < 'a' || C > 'z') && - (C < 'A' || C > 'Z') && - (C < '0' || C > '9') && - C != '_' && C != '$' && C != '.' && C != '@') - return false; - return true; + return (C >= 'a' && C <= 'z') || + (C >= 'A' && C <= 'Z') || + (C >= '0' && C <= '9') || + C == '_' || C == '$' || C == '.'; } /// NameNeedsQuoting - Return true if the identifier \p Str needs quotes to be diff --git a/lib/Target/X86/X86AsmPrinter.cpp b/lib/Target/X86/X86AsmPrinter.cpp index 7d7a1add221..d7f7c3e55da 100644 --- a/lib/Target/X86/X86AsmPrinter.cpp +++ b/lib/Target/X86/X86AsmPrinter.cpp @@ -519,9 +519,11 @@ void X86AsmPrinter::EmitStartOfAsmFile(Module &M) { if (Subtarget->isTargetEnvMacho()) OutStreamer.SwitchSection(getObjFileLowering().getTextSection()); - if (Subtarget->isTargetCOFF()) { + if (Subtarget->isTargetCOFF() && Subtarget->isTargetWindows()) { // Emit an absolute @feat.00 symbol. This appears to be some kind of // compiler features bitfield read by link.exe. + // We only do this on win32, since on cygwin etc. we use the GNU assembler, + // which doesn't handle this symbol. if (!Subtarget->is64Bit()) { MCSymbol *S = MMI->getContext().GetOrCreateSymbol(StringRef("@feat.00")); OutStreamer.BeginCOFFSymbolDef(S); diff --git a/test/CodeGen/X86/coff-feat00.ll b/test/CodeGen/X86/coff-feat00.ll index 1dcd4276399..048aa34345d 100644 --- a/test/CodeGen/X86/coff-feat00.ll +++ b/test/CodeGen/X86/coff-feat00.ll @@ -1,7 +1,9 @@ -; RUN: llc -O0 -mtriple=i386-pc-win32 -filetype=asm -o - %s | FileCheck %s +; RUN: llc -O0 -mtriple=i386-pc-win32 -filetype=asm -o - %s | FileCheck %s --check-prefix=WIN32 +; RUN: llc -O0 -mtriple=i386-pc-cygwin -filetype=asm -o - %s | FileCheck %s --check-prefix=CYGWIN define i32 @foo() { ret i32 0 } -; CHECK: @feat.00 = 1 +; WIN32: "@feat.00" = 1 +; CYGWIN-NOT: "@feat.00" = 1 diff --git a/test/CodeGen/X86/fastcall-correct-mangling.ll b/test/CodeGen/X86/fastcall-correct-mangling.ll index 3569d36541f..17addbc3e16 100644 --- a/test/CodeGen/X86/fastcall-correct-mangling.ll +++ b/test/CodeGen/X86/fastcall-correct-mangling.ll @@ -3,7 +3,7 @@ ; Check that a fastcall function gets correct mangling define x86_fastcallcc void @func(i64 %X, i8 %Y, i8 %G, i16 %Z) { -; CHECK: @func@20: +; CHECK: "@func@20": ret void } diff --git a/test/CodeGen/X86/stdcall.ll b/test/CodeGen/X86/stdcall.ll index 73826ed0b29..34f90c5ee38 100644 --- a/test/CodeGen/X86/stdcall.ll +++ b/test/CodeGen/X86/stdcall.ll @@ -5,7 +5,7 @@ define internal x86_stdcallcc void @MyFunc() nounwind { entry: -; CHECK: MyFunc@0: +; CHECK: "_MyFunc@0": ; CHECK: ret ret void } @@ -20,5 +20,5 @@ entry: @B = global %0 { void (...)* bitcast (void ()* @MyFunc to void (...)*) }, align 4 ; CHECK: _B: -; CHECK: .long _MyFunc@0 +; CHECK: .long "_MyFunc@0" diff --git a/test/MC/COFF/quoted-names.ll b/test/MC/COFF/quoted-names.ll new file mode 100644 index 00000000000..920035f555b --- /dev/null +++ b/test/MC/COFF/quoted-names.ll @@ -0,0 +1,20 @@ +; Check that certain symbol and section names are quoted in the asm output. +; RUN: llc -mtriple=i686-pc-win32 %s -o - | FileCheck %s + +; Check that the symbol and section names can round-trip through the assembler. +; RUN: llc -mtriple=i686-pc-win32 %s -o - | llvm-mc -triple i686-pc-win32 -filetype=obj | llvm-readobj -s -section-symbols | FileCheck %s --check-prefix=READOBJ + +@"\01??__E_Generic_object@?$_Error_objects@H@std@@YAXXZ" = global i32 0 + +define weak i32 @"\01??_B?$num_put@_WV?$back_insert_iterator@V?$basic_string@_WU?$char_traits@_W@std@@V?$allocator@_W@2@@std@@@std@@@std@@51"() section ".text" { + %res = load i32* @"\01??__E_Generic_object@?$_Error_objects@H@std@@YAXXZ" + ret i32 %res +} + +; CHECK: .section ".text$??_B?$num_put@_WV?$back_insert_iterator@V?$basic_string@_WU?$char_traits@_W@std@@V?$allocator@_W@2@@std@@@std@@@std@@51","xr" +; CHECK: .globl "??_B?$num_put@_WV?$back_insert_iterator@V?$basic_string@_WU?$char_traits@_W@std@@V?$allocator@_W@2@@std@@@std@@@std@@51" +; CHECK: "??_B?$num_put@_WV?$back_insert_iterator@V?$basic_string@_WU?$char_traits@_W@std@@V?$allocator@_W@2@@std@@@std@@@std@@51" + +; READOBJ: Symbol +; READOBJ: Name: ??_B?$num_put@_WV?$back_insert_iterator@V?$basic_string@_WU?$char_traits@_W@std@@V?$allocator@_W@2@@std@@@std@@@std@@51 +; READOBJ: Section: .text$??_B?$num_put@_WV?$back_insert_iterator@V?$basic_string@_WU?$char_traits@_W@std@@V?$allocator@_W@2@@std@@@std@@@std@@51