From: Daniel Dunbar <daniel@zuster.org>
Date: Wed, 14 Aug 2013 15:55:25 +0000 (+0000)
Subject: Revert r188376, "[lit] Support parsing scripts with inconsistent or invalid encodings... 
X-Git-Url: http://plrg.eecs.uci.edu/git/?a=commitdiff_plain;h=14a5c695a9a4dc773cfc37c0c4a847e0160e5396;p=oota-llvm.git

Revert r188376, "[lit] Support parsing scripts with inconsistent or invalid encodings.", this doesn't work yet for bots using the internal shell.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188379 91177308-0d34-0410-b5e6-96231b3b80d8
---

diff --git a/utils/lit/lit/TestRunner.py b/utils/lit/lit/TestRunner.py
index 8a9bddd2a23..068e4991b21 100644
--- a/utils/lit/lit/TestRunner.py
+++ b/utils/lit/lit/TestRunner.py
@@ -305,54 +305,24 @@ def isExpectedFail(test, xfails):
 
     return False
 
-def parseIntegratedTestScriptCommands(source_path):
+def parseIntegratedTestScriptCommands(sourcepath):
     """
     parseIntegratedTestScriptCommands(source_path) -> commands
 
     Parse the commands in an integrated test script file into a list of
     (line_number, command_type, line).
     """
-
-    # This code is carefully written to be dual compatible with Python 2.5+ and
-    # Python 3 without requiring input files to always have valid codings. The
-    # trick we use is to open the file in binary mode and use the regular
-    # expression library to find the commands, with it scanning strings in
-    # Python2 and bytes in Python3.
-    #
-    # Once we find a match, we do require each script line to be decodable to
-    # ascii, so we convert the outputs to ascii before returning. This way the
-    # remaining code can work with "strings" agnostic of the executing Python
-    # version.
-    
-    def to_bytes(str):
-        # Encode to Latin1 to get binary data.
-        return str.encode('ISO-8859-1')
-    keywords = ('RUN:', 'XFAIL:', 'REQUIRES:', 'END.')
-    keywords_re = re.compile(
-        to_bytes("(%s)(.*)\n" % ("|".join(k for k in keywords),)))
-
-    f = open(source_path, 'rb')
-    try:
-        # Read the entire file contents.
-        data = f.read()
-
-        # Iterate over the matches.
-        line_number = 1
-        last_match_position = 0
-        for match in keywords_re.finditer(data):
-            # Compute the updated line number by counting the intervening
-            # newlines.
-            match_position = match.start()
-            line_number += data.count(to_bytes('\n'), last_match_position,
-                                      match_position)
-            last_match_position = match_position
-
-            # Convert the keyword and line to ascii and yield the command.
-            keyword,ln = match.groups()
-            yield (line_number, keyword[:-1].decode('ascii'),
-                   ln.decode('ascii'))
-    finally:
-        f.close()
+    line_number = 0
+    for ln in open(sourcepath):
+        line_number += 1
+        if 'RUN:' in ln:
+            yield (line_number, 'RUN', ln[ln.index('RUN:')+4:])
+        elif 'XFAIL:' in ln:
+            yield (line_number, 'XFAIL', ln[ln.index('XFAIL:') + 6:])
+        elif 'REQUIRES:' in ln:
+            yield (line_number, 'REQUIRES', ln[ln.index('REQUIRES:') + 9:])
+        elif 'END.' in ln:
+            yield (line_number, 'END', ln[ln.index('END.') + 4:])
 
 def parseIntegratedTestScript(test, normalize_slashes=False,
                               extra_substitutions=[]):
diff --git a/utils/lit/tests/shtest-encoding.py b/utils/lit/tests/shtest-encoding.py
deleted file mode 100644
index dfc987f6df7..00000000000
--- a/utils/lit/tests/shtest-encoding.py
+++ /dev/null
@@ -1,3 +0,0 @@
-# RUN: true
-
-# Here is a string that cannot be decoded in line mode: Â.