"""parser.py --- A small scheme parser. This version was adapted by Tim Finin from the original by Danny Yoo (dyoo@hkn.eecs.berkeley.edu) to remove the trampolining. """ __license__ = "MIT License" from lex import tokenize from symbol import symbol from pair import cons, NIL, list import unittest class ParserError(Exception): """Our personalized exception class.""" pass def peek(tokens): """Take a quick glance at the first token in our tokens list.""" if len(tokens) == 0: raise ParserError, "While peeking: ran out of tokens." return tokens[0] def eat_safe(tokens, tokenType): """Digest the first token in our tokens list, making sure that we're biting on the right tokenType of thing.""" if len(tokens) == 0: raise ParserError, "While trying to eat %s: ran out of tokens." % \ (repr(tokenType),) if tokens[0][0] != tokenType: raise ParserError, "Seeking %s got %s" % (tokenType, tokens[0]) return tokens.pop(0) def eat(tokens, desired_type): """If the type of the next token is desired_type, pop it from the list and return it, else return False""" if len(tokens) == 0: raise ParserError, 'No tokens left, seeking ' + desired_type return tokens.pop(0) if tokens[0][0] == desired_type else False ###################################################################### """Below is our parser for symbol-lists. We'll use a recursive descent parsing technique, since it's well suited for Scheme's syntax""" def parseSingleExpression(tokens): """Returns a single Expression, given a sequence of tokens. Raises a ParserException if our tokens haven't been exhausted.""" exp = parseExpression(tokens) if eat(tokens, None): return exp else: raise ParserError, "extra stuff after expression: %s." % tokens def parseExpression(tokens): """Returns an Expression, given a sequence of tokens. An expression is made up of one of the following things: o A quoted expression o An atom (like a number or symbol or string) o A list. This procedure tries to take care of all these potentials.""" if eat(tokens, '\''): return cons(symbol('quote'), cons(parseExpression(tokens), NIL)) if eat(tokens, '`'): return cons(symbol('quasiquote'), cons(parseExpression(tokens), NIL)) elif eat(tokens, ','): return cons(symbol('unquote'), cons(parseExpression(tokens), NIL)) elif eat(tokens, '('): return parse_list_members(tokens) elif peek(tokens)[0] in ('number', 'symbol', 'string'): return parse_atom(tokens) else: raise ParserError, "While parsing Expression: no alternatives." def parse_atom(tokens): """Returns an Atom, given a sequence of tokens. An atom is either a number, a symbol, or a string.""" next_type = peek(tokens)[0] if next_type == 'number': return to_number(eat(tokens, 'number')[1]) elif next_type == 'symbol': return symbol(eat(tokens, 'symbol')[1]) elif next_type == 'string': return eat(tokens, 'string')[1] else: raise ParserError, "While parsing Atom: no alternatives." def to_number(s): """Tries to convert string 's' into a number.""" try: return int(s) except ValueError: return float(s) def parse_list_members(tokens): """Tries to eat as many expressions as it can see.""" START_EXPR_SET = ('\'', '`', ',', '(', 'number', 'symbol', 'string') # Dotted pairs if eat(tokens, 'dot'): final = parseExpression(tokens) eat_safe(tokens, ')') return final if peek(tokens)[0] in START_EXPR_SET: return cons(parseExpression(tokens), parse_list_members(tokens)) if eat(tokens, ')'): return NIL raise ParserError, "Can't finish list: %s." % tokens def parse(s): """Parse a single string. This is just a convenience function.""" return parseSingleExpression(tokenize(s)) ###################################################################### class ParserTests(unittest.TestCase): def testAtomParsing(self): self.assertEquals(42, parse("42")) self.assertEquals("particle-man", parse('"particle-man"')) def testBrokenNegativeExampleFromBaypiggies(self): ## embarassing case that didn't work during the Baypiggies meeting ## on September 9, 2004. Doh! self.assertEquals(-42, parse("-42")) def testLists(self): self.assertEquals(list(1, 2), parse("(1 2)")) self.assertEquals(list(1, 2, list(3), 4), parse("(1 2 (3) 4)")) self.assertEquals(list(list(list())), parse("((()))")) def testQuotation(self): self.assertEquals(list(symbol("quote"), symbol("atom-man")), parse("'atom-man")) self.assertEquals(list(symbol("quasiquote"), symbol("istanbul")), parse("`istanbul")) self.assertEquals(list(symbol("unquote"), symbol("constantinople")), parse(",constantinople")) def testDottedPair(self): self.assertEquals(cons(symbol("alpha"), symbol("beta")), parse("(alpha . beta)")) self.assertEquals(cons(symbol("a"), cons(symbol("b"), cons(cons(symbol("c"), symbol("d")), NIL))), parse("(a b (c . d))")) def testQuotedList(self): self.assertEquals(list(symbol("quote"), list(symbol("foo"), symbol("bar"))), parse("'(foo bar)")) def testEmptyList(self): self.assertEquals(NIL, parse("()")) def testStressWithSuperNesting(self): ## An evil test to see if this raises bad errors. N = 1000 bigNestedList = list() for ignored in xrange(N-1): bigNestedList = list(bigNestedList) try: self.assertEquals(bigNestedList, parse( "(" * N + ")" * N)) except RuntimeError, e: self.fail(e) if __name__ == '__main__': unittest.main()