Move Python code to its own directory.

2025-11-22 12:05:53 +00:00 · 2015-04-16 17:51:06 -04:00
parent 0dd6b95ac2
commit adf4860988
52 changed files with 1 additions and 1 deletions
--- a/bin/python/jsonpath_rw/lexer.py
+++ b/bin/python/jsonpath_rw/lexer.py
@@ -0,0 +1,171 @@
+from __future__ import unicode_literals, print_function, absolute_import, division, generators, nested_scopes
+import sys
+import logging
+
+import ply.lex
+
+logger = logging.getLogger(__name__)
+
+class JsonPathLexerError(Exception):
+    pass
+
+class JsonPathLexer(object):
+    '''
+    A Lexical analyzer for JsonPath.
+    '''
+
+    def __init__(self, debug=False):
+        self.debug = debug
+        if self.__doc__ == None:
+            raise JsonPathLexerError('Docstrings have been removed! By design of PLY, jsonpath-rw requires docstrings. You must not use PYTHONOPTIMIZE=2 or python -OO.')
+
+    def tokenize(self, string):
+        '''
+        Maps a string to an iterator over tokens. In other words: [char] -> [token]
+        '''
+
+        new_lexer = ply.lex.lex(module=self, debug=self.debug, errorlog=logger)
+        new_lexer.latest_newline = 0
+        new_lexer.string_value = None
+        new_lexer.input(string)
+
+        while True:
+            t = new_lexer.token()
+            if t is None: break
+            t.col = t.lexpos - new_lexer.latest_newline
+            yield t
+
+        if new_lexer.string_value is not None:
+            raise JsonPathLexerError('Unexpected EOF in string literal or identifier')
+
+    # ============== PLY Lexer specification ==================
+    #
+    # This probably should be private but:
+    #   - the parser requires access to `tokens` (perhaps they should be defined in a third, shared dependency)
+    #   - things like `literals` might be a legitimate part of the public interface.
+    #
+    # Anyhow, it is pythonic to give some rope to hang oneself with :-)
+
+    literals = ['*', '.', '[', ']', '(', ')', '$', ',', ':', '|', '&']
+
+    reserved_words = { 'where': 'WHERE' }
+
+    tokens = ['DOUBLEDOT', 'NUMBER', 'ID', 'NAMED_OPERATOR'] + list(reserved_words.values())
+
+    states = [ ('singlequote', 'exclusive'),
+               ('doublequote', 'exclusive'),
+               ('backquote', 'exclusive') ]
+
+    # Normal lexing, rather easy
+    t_DOUBLEDOT = r'\.\.'
+    t_ignore = ' \t'
+
+    def t_ID(self, t):
+        r'[a-zA-Z_@][a-zA-Z0-9_@\-]*'
+        t.type = self.reserved_words.get(t.value, 'ID')
+        return t
+
+    def t_NUMBER(self, t):
+        r'-?\d+'
+        t.value = int(t.value)
+        return t
+
+
+    # Single-quoted strings
+    t_singlequote_ignore = ''
+    def t_singlequote(self, t):
+        r"'"
+        t.lexer.string_start = t.lexer.lexpos
+        t.lexer.string_value = ''
+        t.lexer.push_state('singlequote')
+
+    def t_singlequote_content(self, t):
+        r"[^'\\]+"
+        t.lexer.string_value += t.value
+
+    def t_singlequote_escape(self, t):
+        r'\\.'
+        t.lexer.string_value += t.value[1]
+
+    def t_singlequote_end(self, t):
+        r"'"
+        t.value = t.lexer.string_value
+        t.type = 'ID'
+        t.lexer.string_value = None
+        t.lexer.pop_state()
+        return t
+
+    def t_singlequote_error(self, t):
+        raise JsonPathLexerError('Error on line %s, col %s while lexing singlequoted field: Unexpected character: %s ' % (t.lexer.lineno, t.lexpos - t.lexer.latest_newline, t.value[0]))
+
+
+    # Double-quoted strings
+    t_doublequote_ignore = ''
+    def t_doublequote(self, t):
+        r'"'
+        t.lexer.string_start = t.lexer.lexpos
+        t.lexer.string_value = ''
+        t.lexer.push_state('doublequote')
+
+    def t_doublequote_content(self, t):
+        r'[^"\\]+'
+        t.lexer.string_value += t.value
+
+    def t_doublequote_escape(self, t):
+        r'\\.'
+        t.lexer.string_value += t.value[1]
+
+    def t_doublequote_end(self, t):
+        r'"'
+        t.value = t.lexer.string_value
+        t.type = 'ID'
+        t.lexer.string_value = None
+        t.lexer.pop_state()
+        return t
+
+    def t_doublequote_error(self, t):
+        raise JsonPathLexerError('Error on line %s, col %s while lexing doublequoted field: Unexpected character: %s ' % (t.lexer.lineno, t.lexpos - t.lexer.latest_newline, t.value[0]))
+
+
+    # Back-quoted "magic" operators
+    t_backquote_ignore = ''
+    def t_backquote(self, t):
+        r'`'
+        t.lexer.string_start = t.lexer.lexpos
+        t.lexer.string_value = ''
+        t.lexer.push_state('backquote')
+
+    def t_backquote_escape(self, t):
+        r'\\.'
+        t.lexer.string_value += t.value[1]
+
+    def t_backquote_content(self, t):
+        r"[^`\\]+"
+        t.lexer.string_value += t.value
+
+    def t_backquote_end(self, t):
+        r'`'
+        t.value = t.lexer.string_value
+        t.type = 'NAMED_OPERATOR'
+        t.lexer.string_value = None
+        t.lexer.pop_state()
+        return t
+
+    def t_backquote_error(self, t):
+        raise JsonPathLexerError('Error on line %s, col %s while lexing backquoted operator: Unexpected character: %s ' % (t.lexer.lineno, t.lexpos - t.lexer.latest_newline, t.value[0]))
+
+
+    # Counting lines, handling errors
+    def t_newline(self, t):
+        r'\n'
+        t.lexer.lineno += 1
+        t.lexer.latest_newline = t.lexpos
+
+    def t_error(self, t):
+        raise JsonPathLexerError('Error on line %s, col %s: Unexpected character: %s ' % (t.lexer.lineno, t.lexpos - t.lexer.latest_newline, t.value[0]))
+
+if __name__ == '__main__':
+    logging.basicConfig()
+    lexer = JsonPathLexer(debug=True)
+    for token in lexer.tokenize(sys.stdin.read()):
+        print('%-20s%s' % (token.value, token.type))