Remove duplicate implementation of excludes functionality, and support excluding
[oota-llvm.git] / utils / lit / ShUtil.py
1 import itertools
2
3 import Util
4 from ShCommands import Command, Pipeline, Seq
5
6 class ShLexer:
7     def __init__(self, data, win32Escapes = False):
8         self.data = data
9         self.pos = 0
10         self.end = len(data)
11         self.win32Escapes = win32Escapes
12
13     def eat(self):
14         c = self.data[self.pos]
15         self.pos += 1
16         return c
17
18     def look(self):
19         return self.data[self.pos]
20
21     def maybe_eat(self, c):
22         """
23         maybe_eat(c) - Consume the character c if it is the next character,
24         returning True if a character was consumed. """
25         if self.data[self.pos] == c:
26             self.pos += 1
27             return True
28         return False
29
30     def lex_arg_fast(self, c):
31         # Get the leading whitespace free section.
32         chunk = self.data[self.pos - 1:].split(None, 1)[0]
33         
34         # If it has special characters, the fast path failed.
35         if ('|' in chunk or '&' in chunk or 
36             '<' in chunk or '>' in chunk or
37             "'" in chunk or '"' in chunk or
38             '\\' in chunk):
39             return None
40         
41         self.pos = self.pos - 1 + len(chunk)
42         return chunk
43         
44     def lex_arg_slow(self, c):
45         if c in "'\"":
46             str = self.lex_arg_quoted(c)
47         else:
48             str = c
49         while self.pos != self.end:
50             c = self.look()
51             if c.isspace() or c in "|&":
52                 break
53             elif c in '><':
54                 # This is an annoying case; we treat '2>' as a single token so
55                 # we don't have to track whitespace tokens.
56
57                 # If the parse string isn't an integer, do the usual thing.
58                 if not str.isdigit():
59                     break
60
61                 # Otherwise, lex the operator and convert to a redirection
62                 # token.
63                 num = int(str)
64                 tok = self.lex_one_token()
65                 assert isinstance(tok, tuple) and len(tok) == 1
66                 return (tok[0], num)                    
67             elif c == '"':
68                 self.eat()
69                 str += self.lex_arg_quoted('"')\r
70             elif not self.win32Escapes and c == '\\':
71                 # Outside of a string, '\\' escapes everything.
72                 self.eat()
73                 if self.pos == self.end:
74                     Util.warning("escape at end of quoted argument in: %r" % 
75                                  self.data)
76                     return str
77                 str += self.eat()
78             else:
79                 str += self.eat()
80         return str
81
82     def lex_arg_quoted(self, delim):
83         str = ''
84         while self.pos != self.end:
85             c = self.eat()
86             if c == delim:
87                 return str
88             elif c == '\\' and delim == '"':
89                 # Inside a '"' quoted string, '\\' only escapes the quote
90                 # character and backslash, otherwise it is preserved.
91                 if self.pos == self.end:
92                     Util.warning("escape at end of quoted argument in: %r" % 
93                                  self.data)
94                     return str
95                 c = self.eat()
96                 if c == '"': # 
97                     str += '"'
98                 elif c == '\\':
99                     str += '\\'
100                 else:
101                     str += '\\' + c
102             else:
103                 str += c
104         Util.warning("missing quote character in %r" % self.data)
105         return str
106     
107     def lex_arg_checked(self, c):
108         pos = self.pos
109         res = self.lex_arg_fast(c)
110         end = self.pos
111
112         self.pos = pos
113         reference = self.lex_arg_slow(c)
114         if res is not None:
115             if res != reference:
116                 raise ValueError,"Fast path failure: %r != %r" % (res, reference)
117             if self.pos != end:
118                 raise ValueError,"Fast path failure: %r != %r" % (self.pos, end)
119         return reference
120         
121     def lex_arg(self, c):
122         return self.lex_arg_fast(c) or self.lex_arg_slow(c)
123         
124     def lex_one_token(self):
125         """
126         lex_one_token - Lex a single 'sh' token. """
127
128         c = self.eat()
129         if c in ';!':
130             return (c,)
131         if c == '|':
132             if self.maybe_eat('|'):
133                 return ('||',)
134             return (c,)
135         if c == '&':
136             if self.maybe_eat('&'):
137                 return ('&&',)
138             if self.maybe_eat('>'): 
139                 return ('&>',)
140             return (c,)
141         if c == '>':
142             if self.maybe_eat('&'):
143                 return ('>&',)
144             if self.maybe_eat('>'):
145                 return ('>>',)
146             return (c,)
147         if c == '<':
148             if self.maybe_eat('&'):
149                 return ('<&',)
150             if self.maybe_eat('>'):
151                 return ('<<',)
152             return (c,)
153
154         return self.lex_arg(c)
155
156     def lex(self):
157         while self.pos != self.end:
158             if self.look().isspace():
159                 self.eat()
160             else:
161                 yield self.lex_one_token()
162
163 ###
164  
165 class ShParser:
166     def __init__(self, data, win32Escapes = False):
167         self.data = data
168         self.tokens = ShLexer(data, win32Escapes = win32Escapes).lex()
169     
170     def lex(self):
171         try:
172             return self.tokens.next()
173         except StopIteration:
174             return None
175     
176     def look(self):
177         next = self.lex()
178         if next is not None:
179             self.tokens = itertools.chain([next], self.tokens)
180         return next
181     
182     def parse_command(self):
183         tok = self.lex()
184         if not tok:
185             raise ValueError,"empty command!"
186         if isinstance(tok, tuple):
187             raise ValueError,"syntax error near unexpected token %r" % tok[0]
188         
189         args = [tok]
190         redirects = []
191         while 1:
192             tok = self.look()
193
194             # EOF?
195             if tok is None:
196                 break
197
198             # If this is an argument, just add it to the current command.
199             if isinstance(tok, str):
200                 args.append(self.lex())
201                 continue
202
203             # Otherwise see if it is a terminator.
204             assert isinstance(tok, tuple)
205             if tok[0] in ('|',';','&','||','&&'):
206                 break
207             
208             # Otherwise it must be a redirection.
209             op = self.lex()
210             arg = self.lex()
211             if not arg:
212                 raise ValueError,"syntax error near token %r" % op[0]
213             redirects.append((op, arg))
214
215         return Command(args, redirects)
216
217     def parse_pipeline(self):
218         negate = False
219         if self.look() == ('!',):
220             self.lex()
221             negate = True
222
223         commands = [self.parse_command()]
224         while self.look() == ('|',):
225             self.lex()
226             commands.append(self.parse_command())
227         return Pipeline(commands, negate)
228             
229     def parse(self):
230         lhs = self.parse_pipeline()
231
232         while self.look():
233             operator = self.lex()
234             assert isinstance(operator, tuple) and len(operator) == 1
235
236             if not self.look():
237                 raise ValueError, "missing argument to operator %r" % operator[0]
238             
239             # FIXME: Operator precedence!!
240             lhs = Seq(lhs, operator[0], self.parse_pipeline())
241
242         return lhs
243
244 ###
245
246 import unittest
247
248 class TestShLexer(unittest.TestCase):
249     def lex(self, str, *args, **kwargs):
250         return list(ShLexer(str, *args, **kwargs).lex())
251
252     def test_basic(self):
253         self.assertEqual(self.lex('a|b>c&d<e'),
254                          ['a', ('|',), 'b', ('>',), 'c', ('&',), 'd', 
255                           ('<',), 'e'])
256
257     def test_redirection_tokens(self):
258         self.assertEqual(self.lex('a2>c'),
259                          ['a2', ('>',), 'c'])
260         self.assertEqual(self.lex('a 2>c'),
261                          ['a', ('>',2), 'c'])
262         
263     def test_quoting(self):
264         self.assertEqual(self.lex(""" 'a' """),
265                          ['a'])
266         self.assertEqual(self.lex(""" "hello\\"world" """),
267                          ['hello"world'])
268         self.assertEqual(self.lex(""" "hello\\'world" """),
269                          ["hello\\'world"])
270         self.assertEqual(self.lex(""" "hello\\\\world" """),
271                          ["hello\\world"])
272         self.assertEqual(self.lex(""" he"llo wo"rld """),
273                          ["hello world"])
274         self.assertEqual(self.lex(""" a\\ b a\\\\b """),
275                          ["a b", "a\\b"])
276         self.assertEqual(self.lex(""" "" "" """),
277                          ["", ""])
278         self.assertEqual(self.lex(""" a\\ b """, win32Escapes = True),
279                          ['a\\', 'b'])
280
281 class TestShParse(unittest.TestCase):
282     def parse(self, str):
283         return ShParser(str).parse()
284
285     def test_basic(self):
286         self.assertEqual(self.parse('echo hello'),
287                          Pipeline([Command(['echo', 'hello'], [])], False))
288         self.assertEqual(self.parse('echo ""'),
289                          Pipeline([Command(['echo', ''], [])], False))
290
291     def test_redirection(self):
292         self.assertEqual(self.parse('echo hello > c'),
293                          Pipeline([Command(['echo', 'hello'], 
294                                            [((('>'),), 'c')])], False))
295         self.assertEqual(self.parse('echo hello > c >> d'),
296                          Pipeline([Command(['echo', 'hello'], [(('>',), 'c'),
297                                                      (('>>',), 'd')])], False))
298         self.assertEqual(self.parse('a 2>&1'),
299                          Pipeline([Command(['a'], [(('>&',2), '1')])], False))
300
301     def test_pipeline(self):
302         self.assertEqual(self.parse('a | b'),
303                          Pipeline([Command(['a'], []),
304                                    Command(['b'], [])],
305                                   False))
306
307         self.assertEqual(self.parse('a | b | c'),
308                          Pipeline([Command(['a'], []),
309                                    Command(['b'], []),
310                                    Command(['c'], [])],
311                                   False))
312
313         self.assertEqual(self.parse('! a'),
314                          Pipeline([Command(['a'], [])],
315                                   True))
316
317     def test_list(self):        
318         self.assertEqual(self.parse('a ; b'),
319                          Seq(Pipeline([Command(['a'], [])], False),
320                              ';',
321                              Pipeline([Command(['b'], [])], False)))
322
323         self.assertEqual(self.parse('a & b'),
324                          Seq(Pipeline([Command(['a'], [])], False),
325                              '&',
326                              Pipeline([Command(['b'], [])], False)))
327
328         self.assertEqual(self.parse('a && b'),
329                          Seq(Pipeline([Command(['a'], [])], False),
330                              '&&',
331                              Pipeline([Command(['b'], [])], False)))
332
333         self.assertEqual(self.parse('a || b'),
334                          Seq(Pipeline([Command(['a'], [])], False),
335                              '||',
336                              Pipeline([Command(['b'], [])], False)))
337
338         self.assertEqual(self.parse('a && b || c'),
339                          Seq(Seq(Pipeline([Command(['a'], [])], False),
340                                  '&&',
341                                  Pipeline([Command(['b'], [])], False)),
342                              '||',
343                              Pipeline([Command(['c'], [])], False)))
344
345 if __name__ == '__main__':
346     unittest.main()