From: Daniel Dunbar Date: Wed, 14 Aug 2013 15:55:25 +0000 (+0000) Subject: Revert r188376, "[lit] Support parsing scripts with inconsistent or invalid encodings... X-Git-Url: http://plrg.eecs.uci.edu/git/?a=commitdiff_plain;h=14a5c695a9a4dc773cfc37c0c4a847e0160e5396;p=oota-llvm.git Revert r188376, "[lit] Support parsing scripts with inconsistent or invalid encodings.", this doesn't work yet for bots using the internal shell. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188379 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/utils/lit/lit/TestRunner.py b/utils/lit/lit/TestRunner.py index 8a9bddd2a23..068e4991b21 100644 --- a/utils/lit/lit/TestRunner.py +++ b/utils/lit/lit/TestRunner.py @@ -305,54 +305,24 @@ def isExpectedFail(test, xfails): return False -def parseIntegratedTestScriptCommands(source_path): +def parseIntegratedTestScriptCommands(sourcepath): """ parseIntegratedTestScriptCommands(source_path) -> commands Parse the commands in an integrated test script file into a list of (line_number, command_type, line). """ - - # This code is carefully written to be dual compatible with Python 2.5+ and - # Python 3 without requiring input files to always have valid codings. The - # trick we use is to open the file in binary mode and use the regular - # expression library to find the commands, with it scanning strings in - # Python2 and bytes in Python3. - # - # Once we find a match, we do require each script line to be decodable to - # ascii, so we convert the outputs to ascii before returning. This way the - # remaining code can work with "strings" agnostic of the executing Python - # version. - - def to_bytes(str): - # Encode to Latin1 to get binary data. - return str.encode('ISO-8859-1') - keywords = ('RUN:', 'XFAIL:', 'REQUIRES:', 'END.') - keywords_re = re.compile( - to_bytes("(%s)(.*)\n" % ("|".join(k for k in keywords),))) - - f = open(source_path, 'rb') - try: - # Read the entire file contents. - data = f.read() - - # Iterate over the matches. - line_number = 1 - last_match_position = 0 - for match in keywords_re.finditer(data): - # Compute the updated line number by counting the intervening - # newlines. - match_position = match.start() - line_number += data.count(to_bytes('\n'), last_match_position, - match_position) - last_match_position = match_position - - # Convert the keyword and line to ascii and yield the command. - keyword,ln = match.groups() - yield (line_number, keyword[:-1].decode('ascii'), - ln.decode('ascii')) - finally: - f.close() + line_number = 0 + for ln in open(sourcepath): + line_number += 1 + if 'RUN:' in ln: + yield (line_number, 'RUN', ln[ln.index('RUN:')+4:]) + elif 'XFAIL:' in ln: + yield (line_number, 'XFAIL', ln[ln.index('XFAIL:') + 6:]) + elif 'REQUIRES:' in ln: + yield (line_number, 'REQUIRES', ln[ln.index('REQUIRES:') + 9:]) + elif 'END.' in ln: + yield (line_number, 'END', ln[ln.index('END.') + 4:]) def parseIntegratedTestScript(test, normalize_slashes=False, extra_substitutions=[]): diff --git a/utils/lit/tests/shtest-encoding.py b/utils/lit/tests/shtest-encoding.py deleted file mode 100644 index dfc987f6df7..00000000000 --- a/utils/lit/tests/shtest-encoding.py +++ /dev/null @@ -1,3 +0,0 @@ -# RUN: true - -# Here is a string that cannot be decoded in line mode: Â.