summaryrefslogtreecommitdiff
path: root/utils/ipc/mojo/public/tools/mojom/mojom/parse/lexer.py
diff options
context:
space:
mode:
Diffstat (limited to 'utils/ipc/mojo/public/tools/mojom/mojom/parse/lexer.py')
-rw-r--r--utils/ipc/mojo/public/tools/mojom/mojom/parse/lexer.py251
1 files changed, 251 insertions, 0 deletions
diff --git a/utils/ipc/mojo/public/tools/mojom/mojom/parse/lexer.py b/utils/ipc/mojo/public/tools/mojom/mojom/parse/lexer.py
new file mode 100644
index 00000000..3e084bbf
--- /dev/null
+++ b/utils/ipc/mojo/public/tools/mojom/mojom/parse/lexer.py
@@ -0,0 +1,251 @@
+# Copyright 2014 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+import imp
+import os.path
+import sys
+
+from mojom import fileutil
+from mojom.error import Error
+
+fileutil.AddLocalRepoThirdPartyDirToModulePath()
+from ply.lex import TOKEN
+
+
+class LexError(Error):
+ """Class for errors from the lexer."""
+
+ def __init__(self, filename, message, lineno):
+ Error.__init__(self, filename, message, lineno=lineno)
+
+
+# We have methods which look like they could be functions:
+# pylint: disable=R0201
+class Lexer(object):
+ def __init__(self, filename):
+ self.filename = filename
+
+ ######################-- PRIVATE --######################
+
+ ##
+ ## Internal auxiliary methods
+ ##
+ def _error(self, msg, token):
+ raise LexError(self.filename, msg, token.lineno)
+
+ ##
+ ## Reserved keywords
+ ##
+ keywords = (
+ 'HANDLE',
+ 'IMPORT',
+ 'MODULE',
+ 'STRUCT',
+ 'UNION',
+ 'INTERFACE',
+ 'ENUM',
+ 'CONST',
+ 'TRUE',
+ 'FALSE',
+ 'DEFAULT',
+ 'ARRAY',
+ 'MAP',
+ 'ASSOCIATED',
+ 'PENDING_REMOTE',
+ 'PENDING_RECEIVER',
+ 'PENDING_ASSOCIATED_REMOTE',
+ 'PENDING_ASSOCIATED_RECEIVER',
+ )
+
+ keyword_map = {}
+ for keyword in keywords:
+ keyword_map[keyword.lower()] = keyword
+
+ ##
+ ## All the tokens recognized by the lexer
+ ##
+ tokens = keywords + (
+ # Identifiers
+ 'NAME',
+
+ # Constants
+ 'ORDINAL',
+ 'INT_CONST_DEC',
+ 'INT_CONST_HEX',
+ 'FLOAT_CONST',
+
+ # String literals
+ 'STRING_LITERAL',
+
+ # Operators
+ 'MINUS',
+ 'PLUS',
+ 'AMP',
+ 'QSTN',
+
+ # Assignment
+ 'EQUALS',
+
+ # Request / response
+ 'RESPONSE',
+
+ # Delimiters
+ 'LPAREN',
+ 'RPAREN', # ( )
+ 'LBRACKET',
+ 'RBRACKET', # [ ]
+ 'LBRACE',
+ 'RBRACE', # { }
+ 'LANGLE',
+ 'RANGLE', # < >
+ 'SEMI', # ;
+ 'COMMA',
+ 'DOT' # , .
+ )
+
+ ##
+ ## Regexes for use in tokens
+ ##
+
+ # valid C identifiers (K&R2: A.2.3)
+ identifier = r'[a-zA-Z_][0-9a-zA-Z_]*'
+
+ hex_prefix = '0[xX]'
+ hex_digits = '[0-9a-fA-F]+'
+
+ # integer constants (K&R2: A.2.5.1)
+ decimal_constant = '0|([1-9][0-9]*)'
+ hex_constant = hex_prefix + hex_digits
+ # Don't allow octal constants (even invalid octal).
+ octal_constant_disallowed = '0[0-9]+'
+
+ # character constants (K&R2: A.2.5.2)
+ # Note: a-zA-Z and '.-~^_!=&;,' are allowed as escape chars to support #line
+ # directives with Windows paths as filenames (..\..\dir\file)
+ # For the same reason, decimal_escape allows all digit sequences. We want to
+ # parse all correct code, even if it means to sometimes parse incorrect
+ # code.
+ #
+ simple_escape = r"""([a-zA-Z._~!=&\^\-\\?'"])"""
+ decimal_escape = r"""(\d+)"""
+ hex_escape = r"""(x[0-9a-fA-F]+)"""
+ bad_escape = r"""([\\][^a-zA-Z._~^!=&\^\-\\?'"x0-7])"""
+
+ escape_sequence = \
+ r"""(\\("""+simple_escape+'|'+decimal_escape+'|'+hex_escape+'))'
+
+ # string literals (K&R2: A.2.6)
+ string_char = r"""([^"\\\n]|""" + escape_sequence + ')'
+ string_literal = '"' + string_char + '*"'
+ bad_string_literal = '"' + string_char + '*' + bad_escape + string_char + '*"'
+
+ # floating constants (K&R2: A.2.5.3)
+ exponent_part = r"""([eE][-+]?[0-9]+)"""
+ fractional_constant = r"""([0-9]*\.[0-9]+)|([0-9]+\.)"""
+ floating_constant = \
+ '(((('+fractional_constant+')'+ \
+ exponent_part+'?)|([0-9]+'+exponent_part+')))'
+
+ # Ordinals
+ ordinal = r'@[0-9]+'
+ missing_ordinal_value = r'@'
+ # Don't allow ordinal values in octal (even invalid octal, like 09) or
+ # hexadecimal.
+ octal_or_hex_ordinal_disallowed = (
+ r'@((0[0-9]+)|(' + hex_prefix + hex_digits + '))')
+
+ ##
+ ## Rules for the normal state
+ ##
+ t_ignore = ' \t\r'
+
+ # Newlines
+ def t_NEWLINE(self, t):
+ r'\n+'
+ t.lexer.lineno += len(t.value)
+
+ # Operators
+ t_MINUS = r'-'
+ t_PLUS = r'\+'
+ t_AMP = r'&'
+ t_QSTN = r'\?'
+
+ # =
+ t_EQUALS = r'='
+
+ # =>
+ t_RESPONSE = r'=>'
+
+ # Delimiters
+ t_LPAREN = r'\('
+ t_RPAREN = r'\)'
+ t_LBRACKET = r'\['
+ t_RBRACKET = r'\]'
+ t_LBRACE = r'\{'
+ t_RBRACE = r'\}'
+ t_LANGLE = r'<'
+ t_RANGLE = r'>'
+ t_COMMA = r','
+ t_DOT = r'\.'
+ t_SEMI = r';'
+
+ t_STRING_LITERAL = string_literal
+
+ # The following floating and integer constants are defined as
+ # functions to impose a strict order (otherwise, decimal
+ # is placed before the others because its regex is longer,
+ # and this is bad)
+ #
+ @TOKEN(floating_constant)
+ def t_FLOAT_CONST(self, t):
+ return t
+
+ @TOKEN(hex_constant)
+ def t_INT_CONST_HEX(self, t):
+ return t
+
+ @TOKEN(octal_constant_disallowed)
+ def t_OCTAL_CONSTANT_DISALLOWED(self, t):
+ msg = "Octal values not allowed"
+ self._error(msg, t)
+
+ @TOKEN(decimal_constant)
+ def t_INT_CONST_DEC(self, t):
+ return t
+
+ # unmatched string literals are caught by the preprocessor
+
+ @TOKEN(bad_string_literal)
+ def t_BAD_STRING_LITERAL(self, t):
+ msg = "String contains invalid escape code"
+ self._error(msg, t)
+
+ # Handle ordinal-related tokens in the right order:
+ @TOKEN(octal_or_hex_ordinal_disallowed)
+ def t_OCTAL_OR_HEX_ORDINAL_DISALLOWED(self, t):
+ msg = "Octal and hexadecimal ordinal values not allowed"
+ self._error(msg, t)
+
+ @TOKEN(ordinal)
+ def t_ORDINAL(self, t):
+ return t
+
+ @TOKEN(missing_ordinal_value)
+ def t_BAD_ORDINAL(self, t):
+ msg = "Missing ordinal value"
+ self._error(msg, t)
+
+ @TOKEN(identifier)
+ def t_NAME(self, t):
+ t.type = self.keyword_map.get(t.value, "NAME")
+ return t
+
+ # Ignore C and C++ style comments
+ def t_COMMENT(self, t):
+ r'(/\*(.|\n)*?\*/)|(//.*(\n[ \t]*//.*)*)'
+ t.lexer.lineno += t.value.count("\n")
+
+ def t_error(self, t):
+ msg = "Illegal character %s" % repr(t.value[0])
+ self._error(msg, t)