Move Python code to its own directory.

2025-11-21 03:26:01 +00:00 · 2015-04-16 17:51:06 -04:00
parent 0dd6b95ac2
commit adf4860988
52 changed files with 1 additions and 1 deletions
--- a/bin/python/jsonpath_rw/init.py
+++ b/bin/python/jsonpath_rw/init.py
@@ -0,0 +1,4 @@
+from .jsonpath import *
+from .parser import parse
+
+__version__ = '1.3.0'
--- a/bin/python/jsonpath_rw/jsonpath.py
+++ b/bin/python/jsonpath_rw/jsonpath.py
@@ -0,0 +1,510 @@
+from __future__ import unicode_literals, print_function, absolute_import, division, generators, nested_scopes
+import logging
+import six
+from six.moves import xrange
+from itertools import *
+
+logger = logging.getLogger(__name__)
+
+# Turn on/off the automatic creation of id attributes
+# ... could be a kwarg pervasively but uses are rare and simple today
+auto_id_field = None
+
+class JSONPath(object):
+    """
+    The base class for JSONPath abstract syntax; those
+    methods stubbed here are the interface to supported 
+    JSONPath semantics.
+    """
+
+    def find(self, data):
+        """
+        All `JSONPath` types support `find()`, which returns an iterable of `DatumInContext`s.
+        They keep track of the path followed to the current location, so if the calling code
+        has some opinion about that, it can be passed in here as a starting point.
+        """
+        raise NotImplementedError()
+
+    def update(self, data, val):
+        "Returns `data` with the specified path replaced by `val`"
+        raise NotImplementedError()
+
+    def child(self, child):
+        """
+        Equivalent to Child(self, next) but with some canonicalization
+        """
+        if isinstance(self, This) or isinstance(self, Root):
+            return child
+        elif isinstance(child, This):
+            return self
+        elif isinstance(child, Root):
+            return child
+        else:
+            return Child(self, child)
+
+    def make_datum(self, value):
+        if isinstance(value, DatumInContext):
+            return value
+        else:
+            return DatumInContext(value, path=Root(), context=None)
+
+class DatumInContext(object):
+    """
+    Represents a datum along a path from a context.
+
+    Essentially a zipper but with a structure represented by JsonPath, 
+    and where the context is more of a parent pointer than a proper 
+    representation of the context.
+
+    For quick-and-dirty work, this proxies any non-special attributes
+    to the underlying datum, but the actual datum can (and usually should)
+    be retrieved via the `value` attribute.
+
+    To place `datum` within another, use `datum.in_context(context=..., path=...)`
+    which extends the path. If the datum already has a context, it places the entire
+    context within that passed in, so an object can be built from the inside
+    out.
+    """
+    @classmethod
+    def wrap(cls, data):
+        if isinstance(data, cls):
+            return data
+        else:
+            return cls(data)
+
+    def __init__(self, value, path=None, context=None):
+        self.value = value
+        self.path = path or This()
+        self.context = None if context is None else DatumInContext.wrap(context)
+
+    def in_context(self, context, path):
+        context = DatumInContext.wrap(context)
+
+        if self.context:
+            return DatumInContext(value=self.value, path=self.path, context=context.in_context(path=path, context=context))
+        else:
+            return DatumInContext(value=self.value, path=path, context=context)
+
+    @property
+    def full_path(self):
+        return self.path if self.context is None else self.context.full_path.child(self.path)
+
+    @property
+    def id_pseudopath(self):
+        """
+        Looks like a path, but with ids stuck in when available
+        """
+        try:
+            pseudopath = Fields(str(self.value[auto_id_field]))
+        except (TypeError, AttributeError, KeyError): # This may not be all the interesting exceptions
+            pseudopath = self.path
+
+        if self.context:
+            return self.context.id_pseudopath.child(pseudopath)
+        else:
+            return pseudopath
+
+    def __repr__(self):
+        return '%s(value=%r, path=%r, context=%r)' % (self.__class__.__name__, self.value, self.path, self.context)
+
+    def __eq__(self, other):
+        return isinstance(other, DatumInContext) and other.value == self.value and other.path == self.path and self.context == other.context
+
+class AutoIdForDatum(DatumInContext):
+    """
+    This behaves like a DatumInContext, but the value is
+    always the path leading up to it, not including the "id",
+    and with any "id" fields along the way replacing the prior 
+    segment of the path
+
+    For example, it will make "foo.bar.id" return a datum
+    that behaves like DatumInContext(value="foo.bar", path="foo.bar.id").
+
+    This is disabled by default; it can be turned on by
+    settings the `auto_id_field` global to a value other
+    than `None`. 
+    """
+    
+    def __init__(self, datum, id_field=None):
+        """
+        Invariant is that datum.path is the path from context to datum. The auto id
+        will either be the id in the datum (if present) or the id of the context
+        followed by the path to the datum.
+
+        The path to this datum is always the path to the context, the path to the
+        datum, and then the auto id field.
+        """
+        self.datum = datum
+        self.id_field = id_field or auto_id_field
+
+    @property
+    def value(self):
+        return str(self.datum.id_pseudopath)
+
+    @property
+    def path(self):
+        return self.id_field
+
+    @property
+    def context(self):
+        return self.datum
+
+    def __repr__(self):
+        return '%s(%r)' % (self.__class__.__name__, self.datum)
+
+    def in_context(self, context, path):
+        return AutoIdForDatum(self.datum.in_context(context=context, path=path))
+
+    def __eq__(self, other):
+        return isinstance(other, AutoIdForDatum) and other.datum == self.datum and self.id_field == other.id_field
+
+
+class Root(JSONPath):
+    """
+    The JSONPath referring to the "root" object. Concrete syntax is '$'.
+    The root is the topmost datum without any context attached.
+    """
+
+    def find(self, data):
+        if not isinstance(data, DatumInContext):
+            return [DatumInContext(data, path=Root(), context=None)]
+        else:
+            if data.context is None:
+                return [DatumInContext(data.value, context=None, path=Root())]
+            else:
+                return Root().find(data.context)
+
+    def update(self, data, val):
+        return val
+
+    def __str__(self):
+        return '$'
+
+    def __repr__(self):
+        return 'Root()'
+
+    def __eq__(self, other):
+        return isinstance(other, Root)
+
+class This(JSONPath):
+    """
+    The JSONPath referring to the current datum. Concrete syntax is '@'.
+    """
+
+    def find(self, datum):
+        return [DatumInContext.wrap(datum)]
+
+    def update(self, data, val):
+        return val
+
+    def __str__(self):
+        return '`this`'
+
+    def __repr__(self):
+        return 'This()'
+
+    def __eq__(self, other):
+        return isinstance(other, This)
+
+class Child(JSONPath):
+    """
+    JSONPath that first matches the left, then the right.
+    Concrete syntax is <left> '.' <right>
+    """
+    
+    def __init__(self, left, right):
+        self.left = left
+        self.right = right
+
+    def find(self, datum):
+        """
+        Extra special case: auto ids do not have children,
+        so cut it off right now rather than auto id the auto id
+        """
+        
+        return [submatch
+                for subdata in self.left.find(datum)
+                if not isinstance(subdata, AutoIdForDatum)
+                for submatch in self.right.find(subdata)]
+
+    def __eq__(self, other):
+        return isinstance(other, Child) and self.left == other.left and self.right == other.right
+
+    def __str__(self):
+        return '%s.%s' % (self.left, self.right)
+
+    def __repr__(self):
+        return '%s(%r, %r)' % (self.__class__.__name__, self.left, self.right)
+
+class Parent(JSONPath):
+    """
+    JSONPath that matches the parent node of the current match.
+    Will crash if no such parent exists.
+    Available via named operator `parent`.
+    """
+
+    def find(self, datum):
+        datum = DatumInContext.wrap(datum)
+        return [datum.context]
+
+    def __eq__(self, other):
+        return isinstance(other, Parent)
+
+    def __str__(self):
+        return '`parent`'
+
+    def __repr__(self):
+        return 'Parent()'
+        
+
+class Where(JSONPath):
+    """
+    JSONPath that first matches the left, and then
+    filters for only those nodes that have
+    a match on the right.
+
+    WARNING: Subject to change. May want to have "contains"
+    or some other better word for it.
+    """
+    
+    def __init__(self, left, right):
+        self.left = left
+        self.right = right
+
+    def find(self, data):
+        return [subdata for subdata in self.left.find(data) if self.right.find(data)]
+
+    def __str__(self):
+        return '%s where %s' % (self.left, self.right)
+
+    def __eq__(self, other):
+        return isinstance(other, Where) and other.left == self.left and other.right == self.right
+
+class Descendants(JSONPath):
+    """
+    JSONPath that matches first the left expression then any descendant
+    of it which matches the right expression.
+    """
+    
+    def __init__(self, left, right):
+        self.left = left
+        self.right = right
+
+    def find(self, datum):
+        # <left> .. <right> ==> <left> . (<right> | *..<right> | [*]..<right>)
+        #
+        # With with a wonky caveat that since Slice() has funky coercions
+        # we cannot just delegate to that equivalence or we'll hit an 
+        # infinite loop. So right here we implement the coercion-free version.
+
+        # Get all left matches into a list
+        left_matches = self.left.find(datum)
+        if not isinstance(left_matches, list):
+            left_matches = [left_matches]
+
+        def match_recursively(datum):
+            right_matches = self.right.find(datum)
+
+            # Manually do the * or [*] to avoid coercion and recurse just the right-hand pattern
+            if isinstance(datum.value, list):
+                recursive_matches = [submatch
+                                     for i in range(0, len(datum.value))
+                                     for submatch in match_recursively(DatumInContext(datum.value[i], context=datum, path=Index(i)))]
+
+            elif isinstance(datum.value, dict):
+                recursive_matches = [submatch
+                                     for field in datum.value.keys()
+                                     for submatch in match_recursively(DatumInContext(datum.value[field], context=datum, path=Fields(field)))]
+
+            else:
+                recursive_matches = []
+
+            return right_matches + list(recursive_matches)
+                
+        # TODO: repeatable iterator instead of list?
+        return [submatch
+                for left_match in left_matches
+                for submatch in match_recursively(left_match)]
+            
+    def is_singular():
+        return False
+
+    def __str__(self):
+        return '%s..%s' % (self.left, self.right)
+
+    def __eq__(self, other):
+        return isinstance(other, Descendants) and self.left == other.left and self.right == other.right
+
+class Union(JSONPath):
+    """
+    JSONPath that returns the union of the results of each match.
+    This is pretty shoddily implemented for now. The nicest semantics
+    in case of mismatched bits (list vs atomic) is to put
+    them all in a list, but I haven't done that yet.
+
+    WARNING: Any appearance of this being the _concatenation_ is
+    coincidence. It may even be a bug! (or laziness)
+    """
+    def __init__(self, left, right):
+        self.left = left
+        self.right = right
+
+    def is_singular(self):
+        return False
+
+    def find(self, data):
+        return self.left.find(data) + self.right.find(data)
+
+class Intersect(JSONPath):
+    """
+    JSONPath for bits that match *both* patterns.
+
+    This can be accomplished a couple of ways. The most
+    efficient is to actually build the intersected
+    AST as in building a state machine for matching the
+    intersection of regular languages. The next
+    idea is to build a filtered data and match against
+    that.
+    """
+    def __init__(self, left, right):
+        self.left = left
+        self.right = right
+
+    def is_singular(self):
+        return False
+
+    def find(self, data):
+        raise NotImplementedError()
+
+class Fields(JSONPath):
+    """
+    JSONPath referring to some field of the current object.
+    Concrete syntax ix comma-separated field names.
+
+    WARNING: If '*' is any of the field names, then they will
+    all be returned.
+    """
+    
+    def __init__(self, *fields):
+        self.fields = fields
+
+    def get_field_datum(self, datum, field):
+        if field == auto_id_field:
+            return AutoIdForDatum(datum)
+        else:
+            try:
+                field_value = datum.value[field] # Do NOT use `val.get(field)` since that confuses None as a value and None due to `get`
+                return DatumInContext(value=field_value, path=Fields(field), context=datum)
+            except (TypeError, KeyError, AttributeError):
+                return None
+
+    def reified_fields(self, datum):
+        if '*' not in self.fields:
+            return self.fields
+        else:
+            try:
+                fields = tuple(datum.value.keys())
+                return fields if auto_id_field is None else fields + (auto_id_field,)
+            except AttributeError:
+                return ()
+
+    def find(self, datum):
+        datum  = DatumInContext.wrap(datum)
+        
+        return  [field_datum
+                 for field_datum in [self.get_field_datum(datum, field) for field in self.reified_fields(datum)]
+                 if field_datum is not None]
+
+    def __str__(self):
+        return ','.join(self.fields)
+
+    def __repr__(self):
+        return '%s(%s)' % (self.__class__.__name__, ','.join(map(repr, self.fields)))
+
+    def __eq__(self, other):
+        return isinstance(other, Fields) and tuple(self.fields) == tuple(other.fields)
+
+
+class Index(JSONPath):
+    """
+    JSONPath that matches indices of the current datum, or none if not large enough.
+    Concrete syntax is brackets. 
+
+    WARNING: If the datum is not long enough, it will not crash but will not match anything.
+    NOTE: For the concrete syntax of `[*]`, the abstract syntax is a Slice() with no parameters (equiv to `[:]`
+    """
+
+    def __init__(self, index):
+        self.index = index
+
+    def find(self, datum):
+        datum = DatumInContext.wrap(datum)
+        
+        if len(datum.value) > self.index:
+            return [DatumInContext(datum.value[self.index], path=self, context=datum)]
+        else:
+            return []
+
+    def __eq__(self, other):
+        return isinstance(other, Index) and self.index == other.index
+
+    def __str__(self):
+        return '[%i]' % self.index
+
+class Slice(JSONPath):
+    """
+    JSONPath matching a slice of an array. 
+
+    Because of a mismatch between JSON and XML when schema-unaware,
+    this always returns an iterable; if the incoming data
+    was not a list, then it returns a one element list _containing_ that
+    data.
+
+    Consider these two docs, and their schema-unaware translation to JSON:
+    
+    <a><b>hello</b></a> ==> {"a": {"b": "hello"}}
+    <a><b>hello</b><b>goodbye</b></a> ==> {"a": {"b": ["hello", "goodbye"]}}
+
+    If there were a schema, it would be known that "b" should always be an
+    array (unless the schema were wonky, but that is too much to fix here)
+    so when querying with JSON if the one writing the JSON knows that it
+    should be an array, they can write a slice operator and it will coerce
+    a non-array value to an array.
+
+    This may be a bit unfortunate because it would be nice to always have
+    an iterator, but dictionaries and other objects may also be iterable,
+    so this is the compromise.
+    """
+    def __init__(self, start=None, end=None, step=None):
+        self.start = start
+        self.end = end
+        self.step = step
+    
+    def find(self, datum):
+        datum = DatumInContext.wrap(datum)
+        
+        # Here's the hack. If it is a dictionary or some kind of constant,
+        # put it in a single-element list
+        if (isinstance(datum.value, dict) or isinstance(datum.value, six.integer_types) or isinstance(datum.value, six.string_types)):
+            return self.find(DatumInContext([datum.value], path=datum.path, context=datum.context))
+
+        # Some iterators do not support slicing but we can still
+        # at least work for '*'
+        if self.start == None and self.end == None and self.step == None:
+            return [DatumInContext(datum.value[i], path=Index(i), context=datum) for i in xrange(0, len(datum.value))]
+        else:
+            return [DatumInContext(datum.value[i], path=Index(i), context=datum) for i in range(0, len(datum.value))[self.start:self.end:self.step]]
+
+    def __str__(self):
+        if self.start == None and self.end == None and self.step == None:
+            return '[*]'
+        else:
+            return '[%s%s%s]' % (self.start or '', 
+                                   ':%d'%self.end if self.end else '',
+                                   ':%d'%self.step if self.step else '')
+
+    def __repr__(self):
+        return '%s(start=%r,end=%r,step=%r)' % (self.__class__.__name__, self.start, self.end, self.step)
+
+    def __eq__(self, other):
+        return isinstance(other, Slice) and other.start == self.start and self.end == other.end and other.step == self.step
--- a/bin/python/jsonpath_rw/lexer.py
+++ b/bin/python/jsonpath_rw/lexer.py
@@ -0,0 +1,171 @@
+from __future__ import unicode_literals, print_function, absolute_import, division, generators, nested_scopes
+import sys
+import logging
+
+import ply.lex
+
+logger = logging.getLogger(__name__)
+
+class JsonPathLexerError(Exception):
+    pass
+
+class JsonPathLexer(object):
+    '''
+    A Lexical analyzer for JsonPath.
+    '''
+
+    def __init__(self, debug=False):
+        self.debug = debug
+        if self.__doc__ == None:
+            raise JsonPathLexerError('Docstrings have been removed! By design of PLY, jsonpath-rw requires docstrings. You must not use PYTHONOPTIMIZE=2 or python -OO.')
+
+    def tokenize(self, string):
+        '''
+        Maps a string to an iterator over tokens. In other words: [char] -> [token]
+        '''
+
+        new_lexer = ply.lex.lex(module=self, debug=self.debug, errorlog=logger)
+        new_lexer.latest_newline = 0
+        new_lexer.string_value = None
+        new_lexer.input(string)
+
+        while True:
+            t = new_lexer.token()
+            if t is None: break
+            t.col = t.lexpos - new_lexer.latest_newline
+            yield t
+
+        if new_lexer.string_value is not None:
+            raise JsonPathLexerError('Unexpected EOF in string literal or identifier')
+
+    # ============== PLY Lexer specification ==================
+    #
+    # This probably should be private but:
+    #   - the parser requires access to `tokens` (perhaps they should be defined in a third, shared dependency)
+    #   - things like `literals` might be a legitimate part of the public interface.
+    #
+    # Anyhow, it is pythonic to give some rope to hang oneself with :-)
+
+    literals = ['*', '.', '[', ']', '(', ')', '$', ',', ':', '|', '&']
+
+    reserved_words = { 'where': 'WHERE' }
+
+    tokens = ['DOUBLEDOT', 'NUMBER', 'ID', 'NAMED_OPERATOR'] + list(reserved_words.values())
+
+    states = [ ('singlequote', 'exclusive'),
+               ('doublequote', 'exclusive'),
+               ('backquote', 'exclusive') ]
+
+    # Normal lexing, rather easy
+    t_DOUBLEDOT = r'\.\.'
+    t_ignore = ' \t'
+
+    def t_ID(self, t):
+        r'[a-zA-Z_@][a-zA-Z0-9_@\-]*'
+        t.type = self.reserved_words.get(t.value, 'ID')
+        return t
+
+    def t_NUMBER(self, t):
+        r'-?\d+'
+        t.value = int(t.value)
+        return t
+
+
+    # Single-quoted strings
+    t_singlequote_ignore = ''
+    def t_singlequote(self, t):
+        r"'"
+        t.lexer.string_start = t.lexer.lexpos
+        t.lexer.string_value = ''
+        t.lexer.push_state('singlequote')
+
+    def t_singlequote_content(self, t):
+        r"[^'\\]+"
+        t.lexer.string_value += t.value
+
+    def t_singlequote_escape(self, t):
+        r'\\.'
+        t.lexer.string_value += t.value[1]
+
+    def t_singlequote_end(self, t):
+        r"'"
+        t.value = t.lexer.string_value
+        t.type = 'ID'
+        t.lexer.string_value = None
+        t.lexer.pop_state()
+        return t
+
+    def t_singlequote_error(self, t):
+        raise JsonPathLexerError('Error on line %s, col %s while lexing singlequoted field: Unexpected character: %s ' % (t.lexer.lineno, t.lexpos - t.lexer.latest_newline, t.value[0]))
+
+
+    # Double-quoted strings
+    t_doublequote_ignore = ''
+    def t_doublequote(self, t):
+        r'"'
+        t.lexer.string_start = t.lexer.lexpos
+        t.lexer.string_value = ''
+        t.lexer.push_state('doublequote')
+
+    def t_doublequote_content(self, t):
+        r'[^"\\]+'
+        t.lexer.string_value += t.value
+
+    def t_doublequote_escape(self, t):
+        r'\\.'
+        t.lexer.string_value += t.value[1]
+
+    def t_doublequote_end(self, t):
+        r'"'
+        t.value = t.lexer.string_value
+        t.type = 'ID'
+        t.lexer.string_value = None
+        t.lexer.pop_state()
+        return t
+
+    def t_doublequote_error(self, t):
+        raise JsonPathLexerError('Error on line %s, col %s while lexing doublequoted field: Unexpected character: %s ' % (t.lexer.lineno, t.lexpos - t.lexer.latest_newline, t.value[0]))
+
+
+    # Back-quoted "magic" operators
+    t_backquote_ignore = ''
+    def t_backquote(self, t):
+        r'`'
+        t.lexer.string_start = t.lexer.lexpos
+        t.lexer.string_value = ''
+        t.lexer.push_state('backquote')
+
+    def t_backquote_escape(self, t):
+        r'\\.'
+        t.lexer.string_value += t.value[1]
+
+    def t_backquote_content(self, t):
+        r"[^`\\]+"
+        t.lexer.string_value += t.value
+
+    def t_backquote_end(self, t):
+        r'`'
+        t.value = t.lexer.string_value
+        t.type = 'NAMED_OPERATOR'
+        t.lexer.string_value = None
+        t.lexer.pop_state()
+        return t
+
+    def t_backquote_error(self, t):
+        raise JsonPathLexerError('Error on line %s, col %s while lexing backquoted operator: Unexpected character: %s ' % (t.lexer.lineno, t.lexpos - t.lexer.latest_newline, t.value[0]))
+
+
+    # Counting lines, handling errors
+    def t_newline(self, t):
+        r'\n'
+        t.lexer.lineno += 1
+        t.lexer.latest_newline = t.lexpos
+
+    def t_error(self, t):
+        raise JsonPathLexerError('Error on line %s, col %s: Unexpected character: %s ' % (t.lexer.lineno, t.lexpos - t.lexer.latest_newline, t.value[0]))
+
+if __name__ == '__main__':
+    logging.basicConfig()
+    lexer = JsonPathLexer(debug=True)
+    for token in lexer.tokenize(sys.stdin.read()):
+        print('%-20s%s' % (token.value, token.type))
--- a/bin/python/jsonpath_rw/parser.py
+++ b/bin/python/jsonpath_rw/parser.py
@@ -0,0 +1,187 @@
+from __future__ import print_function, absolute_import, division, generators, nested_scopes
+import sys
+import os.path
+import logging
+
+import ply.yacc
+
+from jsonpath_rw.jsonpath import *
+from jsonpath_rw.lexer import JsonPathLexer
+
+logger = logging.getLogger(__name__)
+
+def parse(string):
+    return JsonPathParser().parse(string)
+
+class JsonPathParser(object):
+    '''
+    An LALR-parser for JsonPath
+    '''
+    
+    tokens = JsonPathLexer.tokens
+
+    def __init__(self, debug=False, lexer_class=None):
+        if self.__doc__ == None:
+            raise Exception('Docstrings have been removed! By design of PLY, jsonpath-rw requires docstrings. You must not use PYTHONOPTIMIZE=2 or python -OO.')
+
+        self.debug = debug
+        self.lexer_class = lexer_class or JsonPathLexer # Crufty but works around statefulness in PLY
+
+    def parse(self, string, lexer = None):
+        lexer = lexer or self.lexer_class()
+        return self.parse_token_stream(lexer.tokenize(string))
+
+    def parse_token_stream(self, token_iterator, start_symbol='jsonpath'):
+
+        # Since PLY has some crufty aspects and dumps files, we try to keep them local
+        # However, we need to derive the name of the output Python file :-/
+        output_directory = os.path.dirname(__file__)
+        try:
+            module_name = os.path.splitext(os.path.split(__file__)[1])[0]
+        except:
+            module_name = __name__
+        
+        parsing_table_module = '_'.join([module_name, start_symbol, 'parsetab'])
+
+        # And we regenerate the parse table every time; it doesn't actually take that long!
+        new_parser = ply.yacc.yacc(module=self,
+                                   debug=self.debug,
+                                   tabmodule = parsing_table_module,
+                                   outputdir = output_directory,
+                                   write_tables=0,
+                                   start = start_symbol,
+                                   errorlog = logger)
+
+        return new_parser.parse(lexer = IteratorToTokenStream(token_iterator))
+
+    # ===================== PLY Parser specification =====================
+    
+    precedence = [
+        ('left', ','),
+        ('left', 'DOUBLEDOT'),
+        ('left', '.'),
+        ('left', '|'),
+        ('left', '&'),
+        ('left', 'WHERE'),
+    ]
+
+    def p_error(self, t):
+        raise Exception('Parse error at %s:%s near token %s (%s)' % (t.lineno, t.col, t.value, t.type)) 
+
+    def p_jsonpath_binop(self, p):
+        """jsonpath : jsonpath '.' jsonpath 
+                    | jsonpath DOUBLEDOT jsonpath
+                    | jsonpath WHERE jsonpath
+                    | jsonpath '|' jsonpath
+                    | jsonpath '&' jsonpath"""
+        op = p[2]
+
+        if op == '.':
+            p[0] = Child(p[1], p[3])
+        elif op == '..':
+            p[0] = Descendants(p[1], p[3])
+        elif op == 'where':
+            p[0] = Where(p[1], p[3])
+        elif op == '|':
+            p[0] = Union(p[1], p[3])
+        elif op == '&':
+            p[0] = Intersect(p[1], p[3])
+
+    def p_jsonpath_fields(self, p):
+        "jsonpath : fields_or_any"
+        p[0] = Fields(*p[1])
+
+    def p_jsonpath_named_operator(self, p):
+        "jsonpath : NAMED_OPERATOR"
+        if p[1] == 'this':
+            p[0] = This()
+        elif p[1] == 'parent':
+            p[0] = Parent()
+        else:
+            raise Exception('Unknown named operator `%s` at %s:%s' % (p[1], p.lineno(1), p.lexpos(1)))
+
+    def p_jsonpath_root(self, p):
+        "jsonpath : '$'"
+        p[0] = Root()
+
+    def p_jsonpath_idx(self, p):
+        "jsonpath : '[' idx ']'"
+        p[0] = p[2]
+
+    def p_jsonpath_slice(self, p):
+        "jsonpath : '[' slice ']'"
+        p[0] = p[2]
+
+    def p_jsonpath_fieldbrackets(self, p):
+        "jsonpath : '[' fields ']'"
+        p[0] = Fields(*p[2])
+
+    def p_jsonpath_child_fieldbrackets(self, p):
+        "jsonpath : jsonpath '[' fields ']'"
+        p[0] = Child(p[1], Fields(*p[3]))
+
+    def p_jsonpath_child_idxbrackets(self, p):
+        "jsonpath : jsonpath '[' idx ']'"
+        p[0] = Child(p[1], p[3])
+
+    def p_jsonpath_child_slicebrackets(self, p):
+        "jsonpath : jsonpath '[' slice ']'"
+        p[0] = Child(p[1], p[3])
+
+    def p_jsonpath_parens(self, p):
+        "jsonpath : '(' jsonpath ')'"
+        p[0] = p[2]
+
+    # Because fields in brackets cannot be '*' - that is reserved for array indices
+    def p_fields_or_any(self, p):
+        """fields_or_any : fields 
+                         | '*'    """
+        if p[1] == '*':
+            p[0] = ['*']
+        else:
+            p[0] = p[1]
+
+    def p_fields_id(self, p):
+        "fields : ID"
+        p[0] = [p[1]]
+
+    def p_fields_comma(self, p):
+        "fields : fields ',' fields"
+        p[0] = p[1] + p[3]
+
+    def p_idx(self, p):
+        "idx : NUMBER"
+        p[0] = Index(p[1])
+
+    def p_slice_any(self, p):
+        "slice : '*'"
+        p[0] = Slice()
+
+    def p_slice(self, p): # Currently does not support `step`
+        "slice : maybe_int ':' maybe_int"
+        p[0] = Slice(start=p[1], end=p[3])
+
+    def p_maybe_int(self, p):
+        """maybe_int : NUMBER
+                     | empty"""
+        p[0] = p[1]
+    
+    def p_empty(self, p):
+        'empty :'
+        p[0] = None
+
+class IteratorToTokenStream(object):
+    def __init__(self, iterator):
+        self.iterator = iterator
+
+    def token(self):
+        try:
+            return next(self.iterator)
+        except StopIteration:
+            return None
+
+
+if __name__ == '__main__':
+    logging.basicConfig()
+    parser = JsonPathParser(debug=True)
+    print(parser.parse(sys.stdin.read()))