#!/usr/bin/env python
# -*- coding: utf-8 -*-
# java2python.lang.base -> lexer and parser support classes.
#
# This module provides the following:
#
# * `Tokens`
#
# This class is used to create the single `token` instance in this
# module. It is used to map between parser tokens and their ids and
# vice-versa.
#
# * `TreeAdaptor`
#
# This class is used by `java2python.compiler.tool`, where the
# `buildAST` function associates an instance of it to a parser. The
# `TreeAdaptor` class creates `LocalTree` instances.
#
# * `LocalTree`
#
# This class provides a slew of extra utility methods that are useful
# when inspecting and printing tree nodes.
#
# ANTLR notes:
#
# recognizers: lexer, parser, treeparser
# streams: string, file name, file handle
#
# Parsers use TokenStreams (CommonTokenStream or TokenRewriteStream)
#
# Tree parsers use TreeNodeStream (CommonTreeNodeStream)
#
# Lexers emit Token objects (buffered in TokenStream objects)
#
# Parsers build trees if their output is AST.
#
# token types: CommonToken and ClassicToken. Our tree adaptor
# creates LocalTree instances instead.
#
# Tree (CommonTree) wraps Token objects. We provide extra functionality via
# the LocalTree class.
#
# TreeAdaptor (CommonTreeAdaptor) is used by the parser to create
# Tree objects. Our adaptor, TreeAdaptor, creates the LocalTree
# instances.
#
from cStringIO import StringIO
from antlr3 import ANTLRStringStream as StringStream, CommonTokenStream as TokenStream
from antlr3.tree import CommonTreeAdaptor, CommonTree
from java2python.lib import colors
class Tokens(object):
""" Tokens -> simplifies token id-name and name-id mapping. """
def __init__(self):
self.cache, self.parserModule = {}, None
def __getattr__(self, name):
""" tokenname -> tokenvalue """
return getattr(self.module, name)
@property
def commentTypes(self):
""" Well-known comment types. """
mod = self.module
return (mod.COMMENT, mod.LINE_COMMENT, mod.JAVADOC_COMMENT, )
@property
def map(self):
""" (tokentype, tokenname) mapping as a dictionary """
cache, module = self.cache, self.module
if cache:
return cache
mapping = [(getattr(module, k, None), k) for k in module.tokenNames]
mapping = [(k, v) for k, v in mapping if k is not None]
cache.update(mapping)
return cache
@property
def methodTypes(self):
""" Well-known method types. """
mod = self.module
return (mod.VOID_METHOD_DECL, mod.FUNCTION_METHOD_DECL, )
@property
def primitiveTypeNames(self):
""" Type name of well-known primitive types """
return ('bool', 'str', 'int', 'long', 'float', )
@property
def module(self):
""" Provides lazy import to the parser module. """
module = self.parserModule
if module:
return module
import java2python.lang.JavaParser as module
self.parserModule = module
return module
@staticmethod
def title(name):
""" Returns a nice title given a token type name. """
return ''.join(part.title() for part in name.split('_'))
## sometimes you really do only need one.
tokens = Tokens()
class TreeAdaptor(CommonTreeAdaptor):
""" TreeAdaptor -> defered tree node creator (for parsers). """
def __init__(self, lexer, parser):
# CommonTreeAdaptor doesn't need to be __init__'ed
self.lexer, self.parser = lexer, parser
def createWithPayload(self, payload):
""" Returns a new tree for the calling parser. """
return LocalTree(payload, self.lexer, self.parser)
class LocalTree(CommonTree):
""" LocalTree -> like CommonTree, but with more stuff. """
colorTypeMap = {
'CLASS' : colors.green,
'JAVA_SOURCE' : colors.green,
'VOID_METHOD_DECL' : colors.green,
'IDENT' : colors.yellow,
'TYPE' : colors.magenta,
'EXPR' : colors.blue,
'TRUE' : colors.yellow,
'FALSE' : colors.yellow,
'NULL' : colors.yellow,
}
def __init__(self, payload, lexer=None, parser=None):
super(LocalTree, self).__init__(payload)
self.lexer, self.parser = lexer, parser
def childrenOfType(self, type):
""" Returns a generator yielding children of this tree of the given type. """
return (c for c in self.children if c.type==type)
def colorType(self, tokenType):
""" Returns a color suitable for the given token type. """
return self.colorTypeMap.get(tokenType, colors.white)(tokenType)
def colorText(self, tokenType, tokenText):
""" Returns a colorized string from the given token type and text. """
return self.colorTypeMap.get(tokenType, colors.white)(tokenText)
def colorComments(self, token):
""" Formats, colors, and returns the comment text from the given token. """
ttyp = tokens.map.get(token.type)
text = token.text.replace('\n', '\\n').replace('\r', '\\r').replace('\t', '\\t')
item = '{0} [{1}:{2}] {3}'.format(ttyp, token.start, token.stop, text)
yield colors.black(item)
def dump(self, fd, level=0):
""" Writes a debug representation of this tree to the given file. """
extras = lambda x, y:x and (x != y)
seen, nform = set(), '{0}{1}{2}{3}'
def innerDump(root, offset):
token, indent = root.token, ' ' * offset
start, stop = root.tokenStartIndex, root.tokenStopIndex
idxes, ttyp = '', tokens.map.get(token.type, '?')
line = token.line
if start and stop and start == stop:
idxes = 'start={}'.format(start)
elif start and stop:
idxes = 'start={}, stop={}'.format(start, stop)
if line:
idxes = 'line={}{}{}'.format(line, ', ' if idxes else '', idxes)
idxes = ' [{}]'.format(idxes) if idxes else ''
idxes = colors.black(idxes)
args = [indent, self.colorType(ttyp), '', idxes, '']
if extras(token.text, ttyp):
args[2] = ' ' + self.colorText(ttyp, token.text)
for com in self.selectComments(start, seen):
for line in self.colorComments(com):
print >> fd, '{0}{1}'.format(indent, line)
print >> fd, nform.format(*args)
for child in root.getChildren():
innerDump(child, offset+1)
for com in self.selectComments(root.tokenStopIndex, seen):
for line in self.colorComments(com):
print >> fd, '{0}{1}'.format(indent, line)
innerDump(self, level)
def dumps(self, level=0):
""" Dump this token to a string. """
fd = StringIO()
self.dump(fd, level)
return fd.getvalue()
def dupNode(self):
""" Called by the parser to create a duplicate of this tree. """
get = lambda v:getattr(self, v, None)
return LocalTree(self, get('lexer'), get('parser'))
def findChildren(self, pred=lambda c:True):
""" Depth-first search that yields nodes meeting the predicate. """
for child in self.children:
if pred(child):
yield child
for sub in child.findChildren(pred):
yield sub
def findChildrenOfType(self, type):
""" Depth-first search that yields nodes of the given type. """
return self.findChildren(lambda c:c.type==type)
def firstChild(self, default=None):
""" Returns the first child of this tree or the default. """
try:
return self.children[0]
except (IndexError, ):
return default
def firstChildOfType(self, type, default=None):
""" Returns the first child of this tree that matches the given type. """
for child in self.children:
if child.type == type:
return child
return default
@property
def isJavaSource(self):
""" True if this tree is the outer most type. """
return self.token.type == tokens.JAVA_SOURCE
@property
def parentType(self):
""" Returns the type of the parent tree. """
return self.parent.type
def parents(self, pred=lambda v:True):
""" Yield each parent in the family tree. """
while self:
if pred(self):
yield self
self = self.parent
@property
def parserTokens(self):
""" Returns the sequence of tokens used to create this tree. """
return self.parser.input.tokens[self.tokenStartIndex:self.tokenStopIndex]
def selectComments(self, stop, memo):
""" Returns the comment tokens for this tree up to the given index. """
pred = lambda k:k.type in tokens.commentTypes and k.index not in memo
ctoks = [t for t in self.parser.input.tokens[0:stop] if pred(t)]
memo.update(t.index for t in ctoks)
return ctoks
@property
def withinExpr(self):
""" True if this tree is contained within an expression. """
self = getattr(self.parent, 'parent', None) # skip first expr
while self:
if self.type in (tokens.EXPR, ):
return True
self = self.parent