summaryrefslogtreecommitdiff
path: root/utils/ipc/mojo/public/tools/mojom/mojom/parse/parser.py
diff options
context:
space:
mode:
Diffstat (limited to 'utils/ipc/mojo/public/tools/mojom/mojom/parse/parser.py')
-rw-r--r--utils/ipc/mojo/public/tools/mojom/mojom/parse/parser.py488
1 files changed, 488 insertions, 0 deletions
diff --git a/utils/ipc/mojo/public/tools/mojom/mojom/parse/parser.py b/utils/ipc/mojo/public/tools/mojom/mojom/parse/parser.py
new file mode 100644
index 00000000..b3b803d6
--- /dev/null
+++ b/utils/ipc/mojo/public/tools/mojom/mojom/parse/parser.py
@@ -0,0 +1,488 @@
+# Copyright 2014 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+"""Generates a syntax tree from a Mojo IDL file."""
+
+import os.path
+import sys
+
+from mojom import fileutil
+from mojom.error import Error
+from mojom.parse import ast
+from mojom.parse.lexer import Lexer
+
+fileutil.AddLocalRepoThirdPartyDirToModulePath()
+from ply import lex
+from ply import yacc
+
+_MAX_ORDINAL_VALUE = 0xffffffff
+_MAX_ARRAY_SIZE = 0xffffffff
+
+
+class ParseError(Error):
+ """Class for errors from the parser."""
+
+ def __init__(self, filename, message, lineno=None, snippet=None):
+ Error.__init__(
+ self,
+ filename,
+ message,
+ lineno=lineno,
+ addenda=([snippet] if snippet else None))
+
+
+# We have methods which look like they could be functions:
+# pylint: disable=R0201
+class Parser(object):
+ def __init__(self, lexer, source, filename):
+ self.tokens = lexer.tokens
+ self.source = source
+ self.filename = filename
+
+ # Names of functions
+ #
+ # In general, we name functions after the left-hand-side of the rule(s) that
+ # they handle. E.g., |p_foo_bar| for a rule |foo_bar : ...|.
+ #
+ # There may be multiple functions handling rules for the same left-hand-side;
+ # then we name the functions |p_foo_bar_N| (for left-hand-side |foo_bar|),
+ # where N is a number (numbered starting from 1). Note that using multiple
+ # functions is actually more efficient than having single functions handle
+ # multiple rules (and, e.g., distinguishing them by examining |len(p)|).
+ #
+ # It's also possible to have a function handling multiple rules with different
+ # left-hand-sides. We do not do this.
+ #
+ # See http://www.dabeaz.com/ply/ply.html#ply_nn25 for more details.
+
+ # TODO(vtl): Get rid of the braces in the module "statement". (Consider
+ # renaming "module" -> "package".) Then we'll be able to have a single rule
+ # for root (by making module "optional").
+ def p_root_1(self, p):
+ """root : """
+ p[0] = ast.Mojom(None, ast.ImportList(), [])
+
+ def p_root_2(self, p):
+ """root : root module"""
+ if p[1].module is not None:
+ raise ParseError(
+ self.filename,
+ "Multiple \"module\" statements not allowed:",
+ p[2].lineno,
+ snippet=self._GetSnippet(p[2].lineno))
+ if p[1].import_list.items or p[1].definition_list:
+ raise ParseError(
+ self.filename,
+ "\"module\" statements must precede imports and definitions:",
+ p[2].lineno,
+ snippet=self._GetSnippet(p[2].lineno))
+ p[0] = p[1]
+ p[0].module = p[2]
+
+ def p_root_3(self, p):
+ """root : root import"""
+ if p[1].definition_list:
+ raise ParseError(
+ self.filename,
+ "\"import\" statements must precede definitions:",
+ p[2].lineno,
+ snippet=self._GetSnippet(p[2].lineno))
+ p[0] = p[1]
+ p[0].import_list.Append(p[2])
+
+ def p_root_4(self, p):
+ """root : root definition"""
+ p[0] = p[1]
+ p[0].definition_list.append(p[2])
+
+ def p_import(self, p):
+ """import : attribute_section IMPORT STRING_LITERAL SEMI"""
+ # 'eval' the literal to strip the quotes.
+ # TODO(vtl): This eval is dubious. We should unquote/unescape ourselves.
+ p[0] = ast.Import(
+ p[1], eval(p[3]), filename=self.filename, lineno=p.lineno(2))
+
+ def p_module(self, p):
+ """module : attribute_section MODULE identifier_wrapped SEMI"""
+ p[0] = ast.Module(p[3], p[1], filename=self.filename, lineno=p.lineno(2))
+
+ def p_definition(self, p):
+ """definition : struct
+ | union
+ | interface
+ | enum
+ | const"""
+ p[0] = p[1]
+
+ def p_attribute_section_1(self, p):
+ """attribute_section : """
+ p[0] = None
+
+ def p_attribute_section_2(self, p):
+ """attribute_section : LBRACKET attribute_list RBRACKET"""
+ p[0] = p[2]
+
+ def p_attribute_list_1(self, p):
+ """attribute_list : """
+ p[0] = ast.AttributeList()
+
+ def p_attribute_list_2(self, p):
+ """attribute_list : nonempty_attribute_list"""
+ p[0] = p[1]
+
+ def p_nonempty_attribute_list_1(self, p):
+ """nonempty_attribute_list : attribute"""
+ p[0] = ast.AttributeList(p[1])
+
+ def p_nonempty_attribute_list_2(self, p):
+ """nonempty_attribute_list : nonempty_attribute_list COMMA attribute"""
+ p[0] = p[1]
+ p[0].Append(p[3])
+
+ def p_attribute_1(self, p):
+ """attribute : NAME EQUALS evaled_literal
+ | NAME EQUALS NAME"""
+ p[0] = ast.Attribute(p[1], p[3], filename=self.filename, lineno=p.lineno(1))
+
+ def p_attribute_2(self, p):
+ """attribute : NAME"""
+ p[0] = ast.Attribute(p[1], True, filename=self.filename, lineno=p.lineno(1))
+
+ def p_evaled_literal(self, p):
+ """evaled_literal : literal"""
+ # 'eval' the literal to strip the quotes. Handle keywords "true" and "false"
+ # specially since they cannot directly be evaluated to python boolean
+ # values.
+ if p[1] == "true":
+ p[0] = True
+ elif p[1] == "false":
+ p[0] = False
+ else:
+ p[0] = eval(p[1])
+
+ def p_struct_1(self, p):
+ """struct : attribute_section STRUCT NAME LBRACE struct_body RBRACE SEMI"""
+ p[0] = ast.Struct(p[3], p[1], p[5])
+
+ def p_struct_2(self, p):
+ """struct : attribute_section STRUCT NAME SEMI"""
+ p[0] = ast.Struct(p[3], p[1], None)
+
+ def p_struct_body_1(self, p):
+ """struct_body : """
+ p[0] = ast.StructBody()
+
+ def p_struct_body_2(self, p):
+ """struct_body : struct_body const
+ | struct_body enum
+ | struct_body struct_field"""
+ p[0] = p[1]
+ p[0].Append(p[2])
+
+ def p_struct_field(self, p):
+ """struct_field : attribute_section typename NAME ordinal default SEMI"""
+ p[0] = ast.StructField(p[3], p[1], p[4], p[2], p[5])
+
+ def p_union(self, p):
+ """union : attribute_section UNION NAME LBRACE union_body RBRACE SEMI"""
+ p[0] = ast.Union(p[3], p[1], p[5])
+
+ def p_union_body_1(self, p):
+ """union_body : """
+ p[0] = ast.UnionBody()
+
+ def p_union_body_2(self, p):
+ """union_body : union_body union_field"""
+ p[0] = p[1]
+ p[1].Append(p[2])
+
+ def p_union_field(self, p):
+ """union_field : attribute_section typename NAME ordinal SEMI"""
+ p[0] = ast.UnionField(p[3], p[1], p[4], p[2])
+
+ def p_default_1(self, p):
+ """default : """
+ p[0] = None
+
+ def p_default_2(self, p):
+ """default : EQUALS constant"""
+ p[0] = p[2]
+
+ def p_interface(self, p):
+ """interface : attribute_section INTERFACE NAME LBRACE interface_body \
+ RBRACE SEMI"""
+ p[0] = ast.Interface(p[3], p[1], p[5])
+
+ def p_interface_body_1(self, p):
+ """interface_body : """
+ p[0] = ast.InterfaceBody()
+
+ def p_interface_body_2(self, p):
+ """interface_body : interface_body const
+ | interface_body enum
+ | interface_body method"""
+ p[0] = p[1]
+ p[0].Append(p[2])
+
+ def p_response_1(self, p):
+ """response : """
+ p[0] = None
+
+ def p_response_2(self, p):
+ """response : RESPONSE LPAREN parameter_list RPAREN"""
+ p[0] = p[3]
+
+ def p_method(self, p):
+ """method : attribute_section NAME ordinal LPAREN parameter_list RPAREN \
+ response SEMI"""
+ p[0] = ast.Method(p[2], p[1], p[3], p[5], p[7])
+
+ def p_parameter_list_1(self, p):
+ """parameter_list : """
+ p[0] = ast.ParameterList()
+
+ def p_parameter_list_2(self, p):
+ """parameter_list : nonempty_parameter_list"""
+ p[0] = p[1]
+
+ def p_nonempty_parameter_list_1(self, p):
+ """nonempty_parameter_list : parameter"""
+ p[0] = ast.ParameterList(p[1])
+
+ def p_nonempty_parameter_list_2(self, p):
+ """nonempty_parameter_list : nonempty_parameter_list COMMA parameter"""
+ p[0] = p[1]
+ p[0].Append(p[3])
+
+ def p_parameter(self, p):
+ """parameter : attribute_section typename NAME ordinal"""
+ p[0] = ast.Parameter(
+ p[3], p[1], p[4], p[2], filename=self.filename, lineno=p.lineno(3))
+
+ def p_typename(self, p):
+ """typename : nonnullable_typename QSTN
+ | nonnullable_typename"""
+ if len(p) == 2:
+ p[0] = p[1]
+ else:
+ p[0] = p[1] + "?"
+
+ def p_nonnullable_typename(self, p):
+ """nonnullable_typename : basictypename
+ | array
+ | fixed_array
+ | associative_array
+ | interfacerequest"""
+ p[0] = p[1]
+
+ def p_basictypename(self, p):
+ """basictypename : remotetype
+ | receivertype
+ | associatedremotetype
+ | associatedreceivertype
+ | identifier
+ | ASSOCIATED identifier
+ | handletype"""
+ if len(p) == 2:
+ p[0] = p[1]
+ else:
+ p[0] = "asso<" + p[2] + ">"
+
+ def p_remotetype(self, p):
+ """remotetype : PENDING_REMOTE LANGLE identifier RANGLE"""
+ p[0] = "rmt<%s>" % p[3]
+
+ def p_receivertype(self, p):
+ """receivertype : PENDING_RECEIVER LANGLE identifier RANGLE"""
+ p[0] = "rcv<%s>" % p[3]
+
+ def p_associatedremotetype(self, p):
+ """associatedremotetype : PENDING_ASSOCIATED_REMOTE LANGLE identifier \
+ RANGLE"""
+ p[0] = "rma<%s>" % p[3]
+
+ def p_associatedreceivertype(self, p):
+ """associatedreceivertype : PENDING_ASSOCIATED_RECEIVER LANGLE identifier \
+ RANGLE"""
+ p[0] = "rca<%s>" % p[3]
+
+ def p_handletype(self, p):
+ """handletype : HANDLE
+ | HANDLE LANGLE NAME RANGLE"""
+ if len(p) == 2:
+ p[0] = p[1]
+ else:
+ if p[3] not in ('data_pipe_consumer', 'data_pipe_producer',
+ 'message_pipe', 'shared_buffer', 'platform'):
+ # Note: We don't enable tracking of line numbers for everything, so we
+ # can't use |p.lineno(3)|.
+ raise ParseError(
+ self.filename,
+ "Invalid handle type %r:" % p[3],
+ lineno=p.lineno(1),
+ snippet=self._GetSnippet(p.lineno(1)))
+ p[0] = "handle<" + p[3] + ">"
+
+ def p_array(self, p):
+ """array : ARRAY LANGLE typename RANGLE"""
+ p[0] = p[3] + "[]"
+
+ def p_fixed_array(self, p):
+ """fixed_array : ARRAY LANGLE typename COMMA INT_CONST_DEC RANGLE"""
+ value = int(p[5])
+ if value == 0 or value > _MAX_ARRAY_SIZE:
+ raise ParseError(
+ self.filename,
+ "Fixed array size %d invalid:" % value,
+ lineno=p.lineno(5),
+ snippet=self._GetSnippet(p.lineno(5)))
+ p[0] = p[3] + "[" + p[5] + "]"
+
+ def p_associative_array(self, p):
+ """associative_array : MAP LANGLE identifier COMMA typename RANGLE"""
+ p[0] = p[5] + "{" + p[3] + "}"
+
+ def p_interfacerequest(self, p):
+ """interfacerequest : identifier AMP
+ | ASSOCIATED identifier AMP"""
+ if len(p) == 3:
+ p[0] = p[1] + "&"
+ else:
+ p[0] = "asso<" + p[2] + "&>"
+
+ def p_ordinal_1(self, p):
+ """ordinal : """
+ p[0] = None
+
+ def p_ordinal_2(self, p):
+ """ordinal : ORDINAL"""
+ value = int(p[1][1:])
+ if value > _MAX_ORDINAL_VALUE:
+ raise ParseError(
+ self.filename,
+ "Ordinal value %d too large:" % value,
+ lineno=p.lineno(1),
+ snippet=self._GetSnippet(p.lineno(1)))
+ p[0] = ast.Ordinal(value, filename=self.filename, lineno=p.lineno(1))
+
+ def p_enum_1(self, p):
+ """enum : attribute_section ENUM NAME LBRACE enum_value_list \
+ RBRACE SEMI
+ | attribute_section ENUM NAME LBRACE nonempty_enum_value_list \
+ COMMA RBRACE SEMI"""
+ p[0] = ast.Enum(
+ p[3], p[1], p[5], filename=self.filename, lineno=p.lineno(2))
+
+ def p_enum_2(self, p):
+ """enum : attribute_section ENUM NAME SEMI"""
+ p[0] = ast.Enum(
+ p[3], p[1], None, filename=self.filename, lineno=p.lineno(2))
+
+ def p_enum_value_list_1(self, p):
+ """enum_value_list : """
+ p[0] = ast.EnumValueList()
+
+ def p_enum_value_list_2(self, p):
+ """enum_value_list : nonempty_enum_value_list"""
+ p[0] = p[1]
+
+ def p_nonempty_enum_value_list_1(self, p):
+ """nonempty_enum_value_list : enum_value"""
+ p[0] = ast.EnumValueList(p[1])
+
+ def p_nonempty_enum_value_list_2(self, p):
+ """nonempty_enum_value_list : nonempty_enum_value_list COMMA enum_value"""
+ p[0] = p[1]
+ p[0].Append(p[3])
+
+ def p_enum_value(self, p):
+ """enum_value : attribute_section NAME
+ | attribute_section NAME EQUALS int
+ | attribute_section NAME EQUALS identifier_wrapped"""
+ p[0] = ast.EnumValue(
+ p[2],
+ p[1],
+ p[4] if len(p) == 5 else None,
+ filename=self.filename,
+ lineno=p.lineno(2))
+
+ def p_const(self, p):
+ """const : attribute_section CONST typename NAME EQUALS constant SEMI"""
+ p[0] = ast.Const(p[4], p[1], p[3], p[6])
+
+ def p_constant(self, p):
+ """constant : literal
+ | identifier_wrapped"""
+ p[0] = p[1]
+
+ def p_identifier_wrapped(self, p):
+ """identifier_wrapped : identifier"""
+ p[0] = ('IDENTIFIER', p[1])
+
+ # TODO(vtl): Make this produce a "wrapped" identifier (probably as an
+ # |ast.Identifier|, to be added) and get rid of identifier_wrapped.
+ def p_identifier(self, p):
+ """identifier : NAME
+ | NAME DOT identifier"""
+ p[0] = ''.join(p[1:])
+
+ def p_literal(self, p):
+ """literal : int
+ | float
+ | TRUE
+ | FALSE
+ | DEFAULT
+ | STRING_LITERAL"""
+ p[0] = p[1]
+
+ def p_int(self, p):
+ """int : int_const
+ | PLUS int_const
+ | MINUS int_const"""
+ p[0] = ''.join(p[1:])
+
+ def p_int_const(self, p):
+ """int_const : INT_CONST_DEC
+ | INT_CONST_HEX"""
+ p[0] = p[1]
+
+ def p_float(self, p):
+ """float : FLOAT_CONST
+ | PLUS FLOAT_CONST
+ | MINUS FLOAT_CONST"""
+ p[0] = ''.join(p[1:])
+
+ def p_error(self, e):
+ if e is None:
+ # Unexpected EOF.
+ # TODO(vtl): Can we figure out what's missing?
+ raise ParseError(self.filename, "Unexpected end of file")
+
+ raise ParseError(
+ self.filename,
+ "Unexpected %r:" % e.value,
+ lineno=e.lineno,
+ snippet=self._GetSnippet(e.lineno))
+
+ def _GetSnippet(self, lineno):
+ return self.source.split('\n')[lineno - 1]
+
+
+def Parse(source, filename):
+ """Parse source file to AST.
+
+ Args:
+ source: The source text as a str (Python 2 or 3) or unicode (Python 2).
+ filename: The filename that |source| originates from.
+
+ Returns:
+ The AST as a mojom.parse.ast.Mojom object.
+ """
+ lexer = Lexer(filename)
+ parser = Parser(lexer, source, filename)
+
+ lex.lex(object=lexer)
+ yacc.yacc(module=parser, debug=0, write_tables=0)
+
+ tree = yacc.parse(source)
+ return tree