4 from ShCommands import Command, Pipeline, Seq
7 def __init__(self, data, win32Escapes = False):
11 self.win32Escapes = win32Escapes
14 c = self.data[self.pos]
19 return self.data[self.pos]
21 def maybe_eat(self, c):
23 maybe_eat(c) - Consume the character c if it is the next character,
24 returning True if a character was consumed. """
25 if self.data[self.pos] == c:
30 def lex_arg_fast(self, c):
31 # Get the leading whitespace free section.
32 chunk = self.data[self.pos - 1:].split(None, 1)[0]
34 # If it has special characters, the fast path failed.
35 if ('|' in chunk or '&' in chunk or
36 '<' in chunk or '>' in chunk or
37 "'" in chunk or '"' in chunk or
38 ';' in chunk or '\\' in chunk):
41 self.pos = self.pos - 1 + len(chunk)
44 def lex_arg_slow(self, c):
46 str = self.lex_arg_quoted(c)
49 while self.pos != self.end:
51 if c.isspace() or c in "|&;":
54 # This is an annoying case; we treat '2>' as a single token so
55 # we don't have to track whitespace tokens.
57 # If the parse string isn't an integer, do the usual thing.
61 # Otherwise, lex the operator and convert to a redirection
64 tok = self.lex_one_token()
65 assert isinstance(tok, tuple) and len(tok) == 1
69 str += self.lex_arg_quoted('"')
72 str += self.lex_arg_quoted("'")
73 elif not self.win32Escapes and c == '\\':
74 # Outside of a string, '\\' escapes everything.
76 if self.pos == self.end:
77 Util.warning("escape at end of quoted argument in: %r" %
85 def lex_arg_quoted(self, delim):
87 while self.pos != self.end:
91 elif c == '\\' and delim == '"':
92 # Inside a '"' quoted string, '\\' only escapes the quote
93 # character and backslash, otherwise it is preserved.
94 if self.pos == self.end:
95 Util.warning("escape at end of quoted argument in: %r" %
107 Util.warning("missing quote character in %r" % self.data)
110 def lex_arg_checked(self, c):
112 res = self.lex_arg_fast(c)
116 reference = self.lex_arg_slow(c)
119 raise ValueError("Fast path failure: %r != %r" % (
122 raise ValueError("Fast path failure: %r != %r" % (
126 def lex_arg(self, c):
127 return self.lex_arg_fast(c) or self.lex_arg_slow(c)
129 def lex_one_token(self):
131 lex_one_token - Lex a single 'sh' token. """
137 if self.maybe_eat('|'):
141 if self.maybe_eat('&'):
143 if self.maybe_eat('>'):
147 if self.maybe_eat('&'):
149 if self.maybe_eat('>'):
153 if self.maybe_eat('&'):
155 if self.maybe_eat('>'):
159 return self.lex_arg(c)
162 while self.pos != self.end:
163 if self.look().isspace():
166 yield self.lex_one_token()
171 def __init__(self, data, win32Escapes = False, pipefail = False):
173 self.pipefail = pipefail
174 self.tokens = ShLexer(data, win32Escapes = win32Escapes).lex()
177 for item in self.tokens:
183 if token is not None:
184 self.tokens = itertools.chain([token], self.tokens)
187 def parse_command(self):
190 raise ValueError("empty command!")
191 if isinstance(tok, tuple):
192 raise ValueError("syntax error near unexpected token %r" % tok[0])
203 # If this is an argument, just add it to the current command.
204 if isinstance(tok, str):
205 args.append(self.lex())
208 # Otherwise see if it is a terminator.
209 assert isinstance(tok, tuple)
210 if tok[0] in ('|',';','&','||','&&'):
213 # Otherwise it must be a redirection.
217 raise ValueError("syntax error near token %r" % op[0])
218 redirects.append((op, arg))
220 return Command(args, redirects)
222 def parse_pipeline(self):
225 commands = [self.parse_command()]
226 while self.look() == ('|',):
228 commands.append(self.parse_command())
229 return Pipeline(commands, negate, self.pipefail)
232 lhs = self.parse_pipeline()
235 operator = self.lex()
236 assert isinstance(operator, tuple) and len(operator) == 1
240 "missing argument to operator %r" % operator[0])
242 # FIXME: Operator precedence!!
243 lhs = Seq(lhs, operator[0], self.parse_pipeline())
251 class TestShLexer(unittest.TestCase):
252 def lex(self, str, *args, **kwargs):
253 return list(ShLexer(str, *args, **kwargs).lex())
255 def test_basic(self):
256 self.assertEqual(self.lex('a|b>c&d<e;f'),
257 ['a', ('|',), 'b', ('>',), 'c', ('&',), 'd',
258 ('<',), 'e', (';',), 'f'])
260 def test_redirection_tokens(self):
261 self.assertEqual(self.lex('a2>c'),
263 self.assertEqual(self.lex('a 2>c'),
266 def test_quoting(self):
267 self.assertEqual(self.lex(""" 'a' """),
269 self.assertEqual(self.lex(""" "hello\\"world" """),
271 self.assertEqual(self.lex(""" "hello\\'world" """),
273 self.assertEqual(self.lex(""" "hello\\\\world" """),
275 self.assertEqual(self.lex(""" he"llo wo"rld """),
277 self.assertEqual(self.lex(""" a\\ b a\\\\b """),
279 self.assertEqual(self.lex(""" "" "" """),
281 self.assertEqual(self.lex(""" a\\ b """, win32Escapes = True),
284 class TestShParse(unittest.TestCase):
285 def parse(self, str):
286 return ShParser(str).parse()
288 def test_basic(self):
289 self.assertEqual(self.parse('echo hello'),
290 Pipeline([Command(['echo', 'hello'], [])], False))
291 self.assertEqual(self.parse('echo ""'),
292 Pipeline([Command(['echo', ''], [])], False))
293 self.assertEqual(self.parse("""echo -DFOO='a'"""),
294 Pipeline([Command(['echo', '-DFOO=a'], [])], False))
295 self.assertEqual(self.parse('echo -DFOO="a"'),
296 Pipeline([Command(['echo', '-DFOO=a'], [])], False))
298 def test_redirection(self):
299 self.assertEqual(self.parse('echo hello > c'),
300 Pipeline([Command(['echo', 'hello'],
301 [((('>'),), 'c')])], False))
302 self.assertEqual(self.parse('echo hello > c >> d'),
303 Pipeline([Command(['echo', 'hello'], [(('>',), 'c'),
304 (('>>',), 'd')])], False))
305 self.assertEqual(self.parse('a 2>&1'),
306 Pipeline([Command(['a'], [(('>&',2), '1')])], False))
308 def test_pipeline(self):
309 self.assertEqual(self.parse('a | b'),
310 Pipeline([Command(['a'], []),
314 self.assertEqual(self.parse('a | b | c'),
315 Pipeline([Command(['a'], []),
321 self.assertEqual(self.parse('a ; b'),
322 Seq(Pipeline([Command(['a'], [])], False),
324 Pipeline([Command(['b'], [])], False)))
326 self.assertEqual(self.parse('a & b'),
327 Seq(Pipeline([Command(['a'], [])], False),
329 Pipeline([Command(['b'], [])], False)))
331 self.assertEqual(self.parse('a && b'),
332 Seq(Pipeline([Command(['a'], [])], False),
334 Pipeline([Command(['b'], [])], False)))
336 self.assertEqual(self.parse('a || b'),
337 Seq(Pipeline([Command(['a'], [])], False),
339 Pipeline([Command(['b'], [])], False)))
341 self.assertEqual(self.parse('a && b || c'),
342 Seq(Seq(Pipeline([Command(['a'], [])], False),
344 Pipeline([Command(['b'], [])], False)),
346 Pipeline([Command(['c'], [])], False)))
348 self.assertEqual(self.parse('a; b'),
349 Seq(Pipeline([Command(['a'], [])], False),
351 Pipeline([Command(['b'], [])], False)))
353 if __name__ == '__main__':