mirror of
https://github.com/XRPLF/rippled.git
synced 2025-11-21 03:26:01 +00:00
Move Python code to its own directory.
This commit is contained in:
4
bin/python/jsonpath_rw/__init__.py
Normal file
4
bin/python/jsonpath_rw/__init__.py
Normal file
@@ -0,0 +1,4 @@
|
||||
from .jsonpath import *
|
||||
from .parser import parse
|
||||
|
||||
__version__ = '1.3.0'
|
||||
510
bin/python/jsonpath_rw/jsonpath.py
Normal file
510
bin/python/jsonpath_rw/jsonpath.py
Normal file
@@ -0,0 +1,510 @@
|
||||
from __future__ import unicode_literals, print_function, absolute_import, division, generators, nested_scopes
|
||||
import logging
|
||||
import six
|
||||
from six.moves import xrange
|
||||
from itertools import *
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Turn on/off the automatic creation of id attributes
|
||||
# ... could be a kwarg pervasively but uses are rare and simple today
|
||||
auto_id_field = None
|
||||
|
||||
class JSONPath(object):
|
||||
"""
|
||||
The base class for JSONPath abstract syntax; those
|
||||
methods stubbed here are the interface to supported
|
||||
JSONPath semantics.
|
||||
"""
|
||||
|
||||
def find(self, data):
|
||||
"""
|
||||
All `JSONPath` types support `find()`, which returns an iterable of `DatumInContext`s.
|
||||
They keep track of the path followed to the current location, so if the calling code
|
||||
has some opinion about that, it can be passed in here as a starting point.
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
def update(self, data, val):
|
||||
"Returns `data` with the specified path replaced by `val`"
|
||||
raise NotImplementedError()
|
||||
|
||||
def child(self, child):
|
||||
"""
|
||||
Equivalent to Child(self, next) but with some canonicalization
|
||||
"""
|
||||
if isinstance(self, This) or isinstance(self, Root):
|
||||
return child
|
||||
elif isinstance(child, This):
|
||||
return self
|
||||
elif isinstance(child, Root):
|
||||
return child
|
||||
else:
|
||||
return Child(self, child)
|
||||
|
||||
def make_datum(self, value):
|
||||
if isinstance(value, DatumInContext):
|
||||
return value
|
||||
else:
|
||||
return DatumInContext(value, path=Root(), context=None)
|
||||
|
||||
class DatumInContext(object):
|
||||
"""
|
||||
Represents a datum along a path from a context.
|
||||
|
||||
Essentially a zipper but with a structure represented by JsonPath,
|
||||
and where the context is more of a parent pointer than a proper
|
||||
representation of the context.
|
||||
|
||||
For quick-and-dirty work, this proxies any non-special attributes
|
||||
to the underlying datum, but the actual datum can (and usually should)
|
||||
be retrieved via the `value` attribute.
|
||||
|
||||
To place `datum` within another, use `datum.in_context(context=..., path=...)`
|
||||
which extends the path. If the datum already has a context, it places the entire
|
||||
context within that passed in, so an object can be built from the inside
|
||||
out.
|
||||
"""
|
||||
@classmethod
|
||||
def wrap(cls, data):
|
||||
if isinstance(data, cls):
|
||||
return data
|
||||
else:
|
||||
return cls(data)
|
||||
|
||||
def __init__(self, value, path=None, context=None):
|
||||
self.value = value
|
||||
self.path = path or This()
|
||||
self.context = None if context is None else DatumInContext.wrap(context)
|
||||
|
||||
def in_context(self, context, path):
|
||||
context = DatumInContext.wrap(context)
|
||||
|
||||
if self.context:
|
||||
return DatumInContext(value=self.value, path=self.path, context=context.in_context(path=path, context=context))
|
||||
else:
|
||||
return DatumInContext(value=self.value, path=path, context=context)
|
||||
|
||||
@property
|
||||
def full_path(self):
|
||||
return self.path if self.context is None else self.context.full_path.child(self.path)
|
||||
|
||||
@property
|
||||
def id_pseudopath(self):
|
||||
"""
|
||||
Looks like a path, but with ids stuck in when available
|
||||
"""
|
||||
try:
|
||||
pseudopath = Fields(str(self.value[auto_id_field]))
|
||||
except (TypeError, AttributeError, KeyError): # This may not be all the interesting exceptions
|
||||
pseudopath = self.path
|
||||
|
||||
if self.context:
|
||||
return self.context.id_pseudopath.child(pseudopath)
|
||||
else:
|
||||
return pseudopath
|
||||
|
||||
def __repr__(self):
|
||||
return '%s(value=%r, path=%r, context=%r)' % (self.__class__.__name__, self.value, self.path, self.context)
|
||||
|
||||
def __eq__(self, other):
|
||||
return isinstance(other, DatumInContext) and other.value == self.value and other.path == self.path and self.context == other.context
|
||||
|
||||
class AutoIdForDatum(DatumInContext):
|
||||
"""
|
||||
This behaves like a DatumInContext, but the value is
|
||||
always the path leading up to it, not including the "id",
|
||||
and with any "id" fields along the way replacing the prior
|
||||
segment of the path
|
||||
|
||||
For example, it will make "foo.bar.id" return a datum
|
||||
that behaves like DatumInContext(value="foo.bar", path="foo.bar.id").
|
||||
|
||||
This is disabled by default; it can be turned on by
|
||||
settings the `auto_id_field` global to a value other
|
||||
than `None`.
|
||||
"""
|
||||
|
||||
def __init__(self, datum, id_field=None):
|
||||
"""
|
||||
Invariant is that datum.path is the path from context to datum. The auto id
|
||||
will either be the id in the datum (if present) or the id of the context
|
||||
followed by the path to the datum.
|
||||
|
||||
The path to this datum is always the path to the context, the path to the
|
||||
datum, and then the auto id field.
|
||||
"""
|
||||
self.datum = datum
|
||||
self.id_field = id_field or auto_id_field
|
||||
|
||||
@property
|
||||
def value(self):
|
||||
return str(self.datum.id_pseudopath)
|
||||
|
||||
@property
|
||||
def path(self):
|
||||
return self.id_field
|
||||
|
||||
@property
|
||||
def context(self):
|
||||
return self.datum
|
||||
|
||||
def __repr__(self):
|
||||
return '%s(%r)' % (self.__class__.__name__, self.datum)
|
||||
|
||||
def in_context(self, context, path):
|
||||
return AutoIdForDatum(self.datum.in_context(context=context, path=path))
|
||||
|
||||
def __eq__(self, other):
|
||||
return isinstance(other, AutoIdForDatum) and other.datum == self.datum and self.id_field == other.id_field
|
||||
|
||||
|
||||
class Root(JSONPath):
|
||||
"""
|
||||
The JSONPath referring to the "root" object. Concrete syntax is '$'.
|
||||
The root is the topmost datum without any context attached.
|
||||
"""
|
||||
|
||||
def find(self, data):
|
||||
if not isinstance(data, DatumInContext):
|
||||
return [DatumInContext(data, path=Root(), context=None)]
|
||||
else:
|
||||
if data.context is None:
|
||||
return [DatumInContext(data.value, context=None, path=Root())]
|
||||
else:
|
||||
return Root().find(data.context)
|
||||
|
||||
def update(self, data, val):
|
||||
return val
|
||||
|
||||
def __str__(self):
|
||||
return '$'
|
||||
|
||||
def __repr__(self):
|
||||
return 'Root()'
|
||||
|
||||
def __eq__(self, other):
|
||||
return isinstance(other, Root)
|
||||
|
||||
class This(JSONPath):
|
||||
"""
|
||||
The JSONPath referring to the current datum. Concrete syntax is '@'.
|
||||
"""
|
||||
|
||||
def find(self, datum):
|
||||
return [DatumInContext.wrap(datum)]
|
||||
|
||||
def update(self, data, val):
|
||||
return val
|
||||
|
||||
def __str__(self):
|
||||
return '`this`'
|
||||
|
||||
def __repr__(self):
|
||||
return 'This()'
|
||||
|
||||
def __eq__(self, other):
|
||||
return isinstance(other, This)
|
||||
|
||||
class Child(JSONPath):
|
||||
"""
|
||||
JSONPath that first matches the left, then the right.
|
||||
Concrete syntax is <left> '.' <right>
|
||||
"""
|
||||
|
||||
def __init__(self, left, right):
|
||||
self.left = left
|
||||
self.right = right
|
||||
|
||||
def find(self, datum):
|
||||
"""
|
||||
Extra special case: auto ids do not have children,
|
||||
so cut it off right now rather than auto id the auto id
|
||||
"""
|
||||
|
||||
return [submatch
|
||||
for subdata in self.left.find(datum)
|
||||
if not isinstance(subdata, AutoIdForDatum)
|
||||
for submatch in self.right.find(subdata)]
|
||||
|
||||
def __eq__(self, other):
|
||||
return isinstance(other, Child) and self.left == other.left and self.right == other.right
|
||||
|
||||
def __str__(self):
|
||||
return '%s.%s' % (self.left, self.right)
|
||||
|
||||
def __repr__(self):
|
||||
return '%s(%r, %r)' % (self.__class__.__name__, self.left, self.right)
|
||||
|
||||
class Parent(JSONPath):
|
||||
"""
|
||||
JSONPath that matches the parent node of the current match.
|
||||
Will crash if no such parent exists.
|
||||
Available via named operator `parent`.
|
||||
"""
|
||||
|
||||
def find(self, datum):
|
||||
datum = DatumInContext.wrap(datum)
|
||||
return [datum.context]
|
||||
|
||||
def __eq__(self, other):
|
||||
return isinstance(other, Parent)
|
||||
|
||||
def __str__(self):
|
||||
return '`parent`'
|
||||
|
||||
def __repr__(self):
|
||||
return 'Parent()'
|
||||
|
||||
|
||||
class Where(JSONPath):
|
||||
"""
|
||||
JSONPath that first matches the left, and then
|
||||
filters for only those nodes that have
|
||||
a match on the right.
|
||||
|
||||
WARNING: Subject to change. May want to have "contains"
|
||||
or some other better word for it.
|
||||
"""
|
||||
|
||||
def __init__(self, left, right):
|
||||
self.left = left
|
||||
self.right = right
|
||||
|
||||
def find(self, data):
|
||||
return [subdata for subdata in self.left.find(data) if self.right.find(data)]
|
||||
|
||||
def __str__(self):
|
||||
return '%s where %s' % (self.left, self.right)
|
||||
|
||||
def __eq__(self, other):
|
||||
return isinstance(other, Where) and other.left == self.left and other.right == self.right
|
||||
|
||||
class Descendants(JSONPath):
|
||||
"""
|
||||
JSONPath that matches first the left expression then any descendant
|
||||
of it which matches the right expression.
|
||||
"""
|
||||
|
||||
def __init__(self, left, right):
|
||||
self.left = left
|
||||
self.right = right
|
||||
|
||||
def find(self, datum):
|
||||
# <left> .. <right> ==> <left> . (<right> | *..<right> | [*]..<right>)
|
||||
#
|
||||
# With with a wonky caveat that since Slice() has funky coercions
|
||||
# we cannot just delegate to that equivalence or we'll hit an
|
||||
# infinite loop. So right here we implement the coercion-free version.
|
||||
|
||||
# Get all left matches into a list
|
||||
left_matches = self.left.find(datum)
|
||||
if not isinstance(left_matches, list):
|
||||
left_matches = [left_matches]
|
||||
|
||||
def match_recursively(datum):
|
||||
right_matches = self.right.find(datum)
|
||||
|
||||
# Manually do the * or [*] to avoid coercion and recurse just the right-hand pattern
|
||||
if isinstance(datum.value, list):
|
||||
recursive_matches = [submatch
|
||||
for i in range(0, len(datum.value))
|
||||
for submatch in match_recursively(DatumInContext(datum.value[i], context=datum, path=Index(i)))]
|
||||
|
||||
elif isinstance(datum.value, dict):
|
||||
recursive_matches = [submatch
|
||||
for field in datum.value.keys()
|
||||
for submatch in match_recursively(DatumInContext(datum.value[field], context=datum, path=Fields(field)))]
|
||||
|
||||
else:
|
||||
recursive_matches = []
|
||||
|
||||
return right_matches + list(recursive_matches)
|
||||
|
||||
# TODO: repeatable iterator instead of list?
|
||||
return [submatch
|
||||
for left_match in left_matches
|
||||
for submatch in match_recursively(left_match)]
|
||||
|
||||
def is_singular():
|
||||
return False
|
||||
|
||||
def __str__(self):
|
||||
return '%s..%s' % (self.left, self.right)
|
||||
|
||||
def __eq__(self, other):
|
||||
return isinstance(other, Descendants) and self.left == other.left and self.right == other.right
|
||||
|
||||
class Union(JSONPath):
|
||||
"""
|
||||
JSONPath that returns the union of the results of each match.
|
||||
This is pretty shoddily implemented for now. The nicest semantics
|
||||
in case of mismatched bits (list vs atomic) is to put
|
||||
them all in a list, but I haven't done that yet.
|
||||
|
||||
WARNING: Any appearance of this being the _concatenation_ is
|
||||
coincidence. It may even be a bug! (or laziness)
|
||||
"""
|
||||
def __init__(self, left, right):
|
||||
self.left = left
|
||||
self.right = right
|
||||
|
||||
def is_singular(self):
|
||||
return False
|
||||
|
||||
def find(self, data):
|
||||
return self.left.find(data) + self.right.find(data)
|
||||
|
||||
class Intersect(JSONPath):
|
||||
"""
|
||||
JSONPath for bits that match *both* patterns.
|
||||
|
||||
This can be accomplished a couple of ways. The most
|
||||
efficient is to actually build the intersected
|
||||
AST as in building a state machine for matching the
|
||||
intersection of regular languages. The next
|
||||
idea is to build a filtered data and match against
|
||||
that.
|
||||
"""
|
||||
def __init__(self, left, right):
|
||||
self.left = left
|
||||
self.right = right
|
||||
|
||||
def is_singular(self):
|
||||
return False
|
||||
|
||||
def find(self, data):
|
||||
raise NotImplementedError()
|
||||
|
||||
class Fields(JSONPath):
|
||||
"""
|
||||
JSONPath referring to some field of the current object.
|
||||
Concrete syntax ix comma-separated field names.
|
||||
|
||||
WARNING: If '*' is any of the field names, then they will
|
||||
all be returned.
|
||||
"""
|
||||
|
||||
def __init__(self, *fields):
|
||||
self.fields = fields
|
||||
|
||||
def get_field_datum(self, datum, field):
|
||||
if field == auto_id_field:
|
||||
return AutoIdForDatum(datum)
|
||||
else:
|
||||
try:
|
||||
field_value = datum.value[field] # Do NOT use `val.get(field)` since that confuses None as a value and None due to `get`
|
||||
return DatumInContext(value=field_value, path=Fields(field), context=datum)
|
||||
except (TypeError, KeyError, AttributeError):
|
||||
return None
|
||||
|
||||
def reified_fields(self, datum):
|
||||
if '*' not in self.fields:
|
||||
return self.fields
|
||||
else:
|
||||
try:
|
||||
fields = tuple(datum.value.keys())
|
||||
return fields if auto_id_field is None else fields + (auto_id_field,)
|
||||
except AttributeError:
|
||||
return ()
|
||||
|
||||
def find(self, datum):
|
||||
datum = DatumInContext.wrap(datum)
|
||||
|
||||
return [field_datum
|
||||
for field_datum in [self.get_field_datum(datum, field) for field in self.reified_fields(datum)]
|
||||
if field_datum is not None]
|
||||
|
||||
def __str__(self):
|
||||
return ','.join(self.fields)
|
||||
|
||||
def __repr__(self):
|
||||
return '%s(%s)' % (self.__class__.__name__, ','.join(map(repr, self.fields)))
|
||||
|
||||
def __eq__(self, other):
|
||||
return isinstance(other, Fields) and tuple(self.fields) == tuple(other.fields)
|
||||
|
||||
|
||||
class Index(JSONPath):
|
||||
"""
|
||||
JSONPath that matches indices of the current datum, or none if not large enough.
|
||||
Concrete syntax is brackets.
|
||||
|
||||
WARNING: If the datum is not long enough, it will not crash but will not match anything.
|
||||
NOTE: For the concrete syntax of `[*]`, the abstract syntax is a Slice() with no parameters (equiv to `[:]`
|
||||
"""
|
||||
|
||||
def __init__(self, index):
|
||||
self.index = index
|
||||
|
||||
def find(self, datum):
|
||||
datum = DatumInContext.wrap(datum)
|
||||
|
||||
if len(datum.value) > self.index:
|
||||
return [DatumInContext(datum.value[self.index], path=self, context=datum)]
|
||||
else:
|
||||
return []
|
||||
|
||||
def __eq__(self, other):
|
||||
return isinstance(other, Index) and self.index == other.index
|
||||
|
||||
def __str__(self):
|
||||
return '[%i]' % self.index
|
||||
|
||||
class Slice(JSONPath):
|
||||
"""
|
||||
JSONPath matching a slice of an array.
|
||||
|
||||
Because of a mismatch between JSON and XML when schema-unaware,
|
||||
this always returns an iterable; if the incoming data
|
||||
was not a list, then it returns a one element list _containing_ that
|
||||
data.
|
||||
|
||||
Consider these two docs, and their schema-unaware translation to JSON:
|
||||
|
||||
<a><b>hello</b></a> ==> {"a": {"b": "hello"}}
|
||||
<a><b>hello</b><b>goodbye</b></a> ==> {"a": {"b": ["hello", "goodbye"]}}
|
||||
|
||||
If there were a schema, it would be known that "b" should always be an
|
||||
array (unless the schema were wonky, but that is too much to fix here)
|
||||
so when querying with JSON if the one writing the JSON knows that it
|
||||
should be an array, they can write a slice operator and it will coerce
|
||||
a non-array value to an array.
|
||||
|
||||
This may be a bit unfortunate because it would be nice to always have
|
||||
an iterator, but dictionaries and other objects may also be iterable,
|
||||
so this is the compromise.
|
||||
"""
|
||||
def __init__(self, start=None, end=None, step=None):
|
||||
self.start = start
|
||||
self.end = end
|
||||
self.step = step
|
||||
|
||||
def find(self, datum):
|
||||
datum = DatumInContext.wrap(datum)
|
||||
|
||||
# Here's the hack. If it is a dictionary or some kind of constant,
|
||||
# put it in a single-element list
|
||||
if (isinstance(datum.value, dict) or isinstance(datum.value, six.integer_types) or isinstance(datum.value, six.string_types)):
|
||||
return self.find(DatumInContext([datum.value], path=datum.path, context=datum.context))
|
||||
|
||||
# Some iterators do not support slicing but we can still
|
||||
# at least work for '*'
|
||||
if self.start == None and self.end == None and self.step == None:
|
||||
return [DatumInContext(datum.value[i], path=Index(i), context=datum) for i in xrange(0, len(datum.value))]
|
||||
else:
|
||||
return [DatumInContext(datum.value[i], path=Index(i), context=datum) for i in range(0, len(datum.value))[self.start:self.end:self.step]]
|
||||
|
||||
def __str__(self):
|
||||
if self.start == None and self.end == None and self.step == None:
|
||||
return '[*]'
|
||||
else:
|
||||
return '[%s%s%s]' % (self.start or '',
|
||||
':%d'%self.end if self.end else '',
|
||||
':%d'%self.step if self.step else '')
|
||||
|
||||
def __repr__(self):
|
||||
return '%s(start=%r,end=%r,step=%r)' % (self.__class__.__name__, self.start, self.end, self.step)
|
||||
|
||||
def __eq__(self, other):
|
||||
return isinstance(other, Slice) and other.start == self.start and self.end == other.end and other.step == self.step
|
||||
171
bin/python/jsonpath_rw/lexer.py
Normal file
171
bin/python/jsonpath_rw/lexer.py
Normal file
@@ -0,0 +1,171 @@
|
||||
from __future__ import unicode_literals, print_function, absolute_import, division, generators, nested_scopes
|
||||
import sys
|
||||
import logging
|
||||
|
||||
import ply.lex
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class JsonPathLexerError(Exception):
|
||||
pass
|
||||
|
||||
class JsonPathLexer(object):
|
||||
'''
|
||||
A Lexical analyzer for JsonPath.
|
||||
'''
|
||||
|
||||
def __init__(self, debug=False):
|
||||
self.debug = debug
|
||||
if self.__doc__ == None:
|
||||
raise JsonPathLexerError('Docstrings have been removed! By design of PLY, jsonpath-rw requires docstrings. You must not use PYTHONOPTIMIZE=2 or python -OO.')
|
||||
|
||||
def tokenize(self, string):
|
||||
'''
|
||||
Maps a string to an iterator over tokens. In other words: [char] -> [token]
|
||||
'''
|
||||
|
||||
new_lexer = ply.lex.lex(module=self, debug=self.debug, errorlog=logger)
|
||||
new_lexer.latest_newline = 0
|
||||
new_lexer.string_value = None
|
||||
new_lexer.input(string)
|
||||
|
||||
while True:
|
||||
t = new_lexer.token()
|
||||
if t is None: break
|
||||
t.col = t.lexpos - new_lexer.latest_newline
|
||||
yield t
|
||||
|
||||
if new_lexer.string_value is not None:
|
||||
raise JsonPathLexerError('Unexpected EOF in string literal or identifier')
|
||||
|
||||
# ============== PLY Lexer specification ==================
|
||||
#
|
||||
# This probably should be private but:
|
||||
# - the parser requires access to `tokens` (perhaps they should be defined in a third, shared dependency)
|
||||
# - things like `literals` might be a legitimate part of the public interface.
|
||||
#
|
||||
# Anyhow, it is pythonic to give some rope to hang oneself with :-)
|
||||
|
||||
literals = ['*', '.', '[', ']', '(', ')', '$', ',', ':', '|', '&']
|
||||
|
||||
reserved_words = { 'where': 'WHERE' }
|
||||
|
||||
tokens = ['DOUBLEDOT', 'NUMBER', 'ID', 'NAMED_OPERATOR'] + list(reserved_words.values())
|
||||
|
||||
states = [ ('singlequote', 'exclusive'),
|
||||
('doublequote', 'exclusive'),
|
||||
('backquote', 'exclusive') ]
|
||||
|
||||
# Normal lexing, rather easy
|
||||
t_DOUBLEDOT = r'\.\.'
|
||||
t_ignore = ' \t'
|
||||
|
||||
def t_ID(self, t):
|
||||
r'[a-zA-Z_@][a-zA-Z0-9_@\-]*'
|
||||
t.type = self.reserved_words.get(t.value, 'ID')
|
||||
return t
|
||||
|
||||
def t_NUMBER(self, t):
|
||||
r'-?\d+'
|
||||
t.value = int(t.value)
|
||||
return t
|
||||
|
||||
|
||||
# Single-quoted strings
|
||||
t_singlequote_ignore = ''
|
||||
def t_singlequote(self, t):
|
||||
r"'"
|
||||
t.lexer.string_start = t.lexer.lexpos
|
||||
t.lexer.string_value = ''
|
||||
t.lexer.push_state('singlequote')
|
||||
|
||||
def t_singlequote_content(self, t):
|
||||
r"[^'\\]+"
|
||||
t.lexer.string_value += t.value
|
||||
|
||||
def t_singlequote_escape(self, t):
|
||||
r'\\.'
|
||||
t.lexer.string_value += t.value[1]
|
||||
|
||||
def t_singlequote_end(self, t):
|
||||
r"'"
|
||||
t.value = t.lexer.string_value
|
||||
t.type = 'ID'
|
||||
t.lexer.string_value = None
|
||||
t.lexer.pop_state()
|
||||
return t
|
||||
|
||||
def t_singlequote_error(self, t):
|
||||
raise JsonPathLexerError('Error on line %s, col %s while lexing singlequoted field: Unexpected character: %s ' % (t.lexer.lineno, t.lexpos - t.lexer.latest_newline, t.value[0]))
|
||||
|
||||
|
||||
# Double-quoted strings
|
||||
t_doublequote_ignore = ''
|
||||
def t_doublequote(self, t):
|
||||
r'"'
|
||||
t.lexer.string_start = t.lexer.lexpos
|
||||
t.lexer.string_value = ''
|
||||
t.lexer.push_state('doublequote')
|
||||
|
||||
def t_doublequote_content(self, t):
|
||||
r'[^"\\]+'
|
||||
t.lexer.string_value += t.value
|
||||
|
||||
def t_doublequote_escape(self, t):
|
||||
r'\\.'
|
||||
t.lexer.string_value += t.value[1]
|
||||
|
||||
def t_doublequote_end(self, t):
|
||||
r'"'
|
||||
t.value = t.lexer.string_value
|
||||
t.type = 'ID'
|
||||
t.lexer.string_value = None
|
||||
t.lexer.pop_state()
|
||||
return t
|
||||
|
||||
def t_doublequote_error(self, t):
|
||||
raise JsonPathLexerError('Error on line %s, col %s while lexing doublequoted field: Unexpected character: %s ' % (t.lexer.lineno, t.lexpos - t.lexer.latest_newline, t.value[0]))
|
||||
|
||||
|
||||
# Back-quoted "magic" operators
|
||||
t_backquote_ignore = ''
|
||||
def t_backquote(self, t):
|
||||
r'`'
|
||||
t.lexer.string_start = t.lexer.lexpos
|
||||
t.lexer.string_value = ''
|
||||
t.lexer.push_state('backquote')
|
||||
|
||||
def t_backquote_escape(self, t):
|
||||
r'\\.'
|
||||
t.lexer.string_value += t.value[1]
|
||||
|
||||
def t_backquote_content(self, t):
|
||||
r"[^`\\]+"
|
||||
t.lexer.string_value += t.value
|
||||
|
||||
def t_backquote_end(self, t):
|
||||
r'`'
|
||||
t.value = t.lexer.string_value
|
||||
t.type = 'NAMED_OPERATOR'
|
||||
t.lexer.string_value = None
|
||||
t.lexer.pop_state()
|
||||
return t
|
||||
|
||||
def t_backquote_error(self, t):
|
||||
raise JsonPathLexerError('Error on line %s, col %s while lexing backquoted operator: Unexpected character: %s ' % (t.lexer.lineno, t.lexpos - t.lexer.latest_newline, t.value[0]))
|
||||
|
||||
|
||||
# Counting lines, handling errors
|
||||
def t_newline(self, t):
|
||||
r'\n'
|
||||
t.lexer.lineno += 1
|
||||
t.lexer.latest_newline = t.lexpos
|
||||
|
||||
def t_error(self, t):
|
||||
raise JsonPathLexerError('Error on line %s, col %s: Unexpected character: %s ' % (t.lexer.lineno, t.lexpos - t.lexer.latest_newline, t.value[0]))
|
||||
|
||||
if __name__ == '__main__':
|
||||
logging.basicConfig()
|
||||
lexer = JsonPathLexer(debug=True)
|
||||
for token in lexer.tokenize(sys.stdin.read()):
|
||||
print('%-20s%s' % (token.value, token.type))
|
||||
187
bin/python/jsonpath_rw/parser.py
Normal file
187
bin/python/jsonpath_rw/parser.py
Normal file
@@ -0,0 +1,187 @@
|
||||
from __future__ import print_function, absolute_import, division, generators, nested_scopes
|
||||
import sys
|
||||
import os.path
|
||||
import logging
|
||||
|
||||
import ply.yacc
|
||||
|
||||
from jsonpath_rw.jsonpath import *
|
||||
from jsonpath_rw.lexer import JsonPathLexer
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
def parse(string):
|
||||
return JsonPathParser().parse(string)
|
||||
|
||||
class JsonPathParser(object):
|
||||
'''
|
||||
An LALR-parser for JsonPath
|
||||
'''
|
||||
|
||||
tokens = JsonPathLexer.tokens
|
||||
|
||||
def __init__(self, debug=False, lexer_class=None):
|
||||
if self.__doc__ == None:
|
||||
raise Exception('Docstrings have been removed! By design of PLY, jsonpath-rw requires docstrings. You must not use PYTHONOPTIMIZE=2 or python -OO.')
|
||||
|
||||
self.debug = debug
|
||||
self.lexer_class = lexer_class or JsonPathLexer # Crufty but works around statefulness in PLY
|
||||
|
||||
def parse(self, string, lexer = None):
|
||||
lexer = lexer or self.lexer_class()
|
||||
return self.parse_token_stream(lexer.tokenize(string))
|
||||
|
||||
def parse_token_stream(self, token_iterator, start_symbol='jsonpath'):
|
||||
|
||||
# Since PLY has some crufty aspects and dumps files, we try to keep them local
|
||||
# However, we need to derive the name of the output Python file :-/
|
||||
output_directory = os.path.dirname(__file__)
|
||||
try:
|
||||
module_name = os.path.splitext(os.path.split(__file__)[1])[0]
|
||||
except:
|
||||
module_name = __name__
|
||||
|
||||
parsing_table_module = '_'.join([module_name, start_symbol, 'parsetab'])
|
||||
|
||||
# And we regenerate the parse table every time; it doesn't actually take that long!
|
||||
new_parser = ply.yacc.yacc(module=self,
|
||||
debug=self.debug,
|
||||
tabmodule = parsing_table_module,
|
||||
outputdir = output_directory,
|
||||
write_tables=0,
|
||||
start = start_symbol,
|
||||
errorlog = logger)
|
||||
|
||||
return new_parser.parse(lexer = IteratorToTokenStream(token_iterator))
|
||||
|
||||
# ===================== PLY Parser specification =====================
|
||||
|
||||
precedence = [
|
||||
('left', ','),
|
||||
('left', 'DOUBLEDOT'),
|
||||
('left', '.'),
|
||||
('left', '|'),
|
||||
('left', '&'),
|
||||
('left', 'WHERE'),
|
||||
]
|
||||
|
||||
def p_error(self, t):
|
||||
raise Exception('Parse error at %s:%s near token %s (%s)' % (t.lineno, t.col, t.value, t.type))
|
||||
|
||||
def p_jsonpath_binop(self, p):
|
||||
"""jsonpath : jsonpath '.' jsonpath
|
||||
| jsonpath DOUBLEDOT jsonpath
|
||||
| jsonpath WHERE jsonpath
|
||||
| jsonpath '|' jsonpath
|
||||
| jsonpath '&' jsonpath"""
|
||||
op = p[2]
|
||||
|
||||
if op == '.':
|
||||
p[0] = Child(p[1], p[3])
|
||||
elif op == '..':
|
||||
p[0] = Descendants(p[1], p[3])
|
||||
elif op == 'where':
|
||||
p[0] = Where(p[1], p[3])
|
||||
elif op == '|':
|
||||
p[0] = Union(p[1], p[3])
|
||||
elif op == '&':
|
||||
p[0] = Intersect(p[1], p[3])
|
||||
|
||||
def p_jsonpath_fields(self, p):
|
||||
"jsonpath : fields_or_any"
|
||||
p[0] = Fields(*p[1])
|
||||
|
||||
def p_jsonpath_named_operator(self, p):
|
||||
"jsonpath : NAMED_OPERATOR"
|
||||
if p[1] == 'this':
|
||||
p[0] = This()
|
||||
elif p[1] == 'parent':
|
||||
p[0] = Parent()
|
||||
else:
|
||||
raise Exception('Unknown named operator `%s` at %s:%s' % (p[1], p.lineno(1), p.lexpos(1)))
|
||||
|
||||
def p_jsonpath_root(self, p):
|
||||
"jsonpath : '$'"
|
||||
p[0] = Root()
|
||||
|
||||
def p_jsonpath_idx(self, p):
|
||||
"jsonpath : '[' idx ']'"
|
||||
p[0] = p[2]
|
||||
|
||||
def p_jsonpath_slice(self, p):
|
||||
"jsonpath : '[' slice ']'"
|
||||
p[0] = p[2]
|
||||
|
||||
def p_jsonpath_fieldbrackets(self, p):
|
||||
"jsonpath : '[' fields ']'"
|
||||
p[0] = Fields(*p[2])
|
||||
|
||||
def p_jsonpath_child_fieldbrackets(self, p):
|
||||
"jsonpath : jsonpath '[' fields ']'"
|
||||
p[0] = Child(p[1], Fields(*p[3]))
|
||||
|
||||
def p_jsonpath_child_idxbrackets(self, p):
|
||||
"jsonpath : jsonpath '[' idx ']'"
|
||||
p[0] = Child(p[1], p[3])
|
||||
|
||||
def p_jsonpath_child_slicebrackets(self, p):
|
||||
"jsonpath : jsonpath '[' slice ']'"
|
||||
p[0] = Child(p[1], p[3])
|
||||
|
||||
def p_jsonpath_parens(self, p):
|
||||
"jsonpath : '(' jsonpath ')'"
|
||||
p[0] = p[2]
|
||||
|
||||
# Because fields in brackets cannot be '*' - that is reserved for array indices
|
||||
def p_fields_or_any(self, p):
|
||||
"""fields_or_any : fields
|
||||
| '*' """
|
||||
if p[1] == '*':
|
||||
p[0] = ['*']
|
||||
else:
|
||||
p[0] = p[1]
|
||||
|
||||
def p_fields_id(self, p):
|
||||
"fields : ID"
|
||||
p[0] = [p[1]]
|
||||
|
||||
def p_fields_comma(self, p):
|
||||
"fields : fields ',' fields"
|
||||
p[0] = p[1] + p[3]
|
||||
|
||||
def p_idx(self, p):
|
||||
"idx : NUMBER"
|
||||
p[0] = Index(p[1])
|
||||
|
||||
def p_slice_any(self, p):
|
||||
"slice : '*'"
|
||||
p[0] = Slice()
|
||||
|
||||
def p_slice(self, p): # Currently does not support `step`
|
||||
"slice : maybe_int ':' maybe_int"
|
||||
p[0] = Slice(start=p[1], end=p[3])
|
||||
|
||||
def p_maybe_int(self, p):
|
||||
"""maybe_int : NUMBER
|
||||
| empty"""
|
||||
p[0] = p[1]
|
||||
|
||||
def p_empty(self, p):
|
||||
'empty :'
|
||||
p[0] = None
|
||||
|
||||
class IteratorToTokenStream(object):
|
||||
def __init__(self, iterator):
|
||||
self.iterator = iterator
|
||||
|
||||
def token(self):
|
||||
try:
|
||||
return next(self.iterator)
|
||||
except StopIteration:
|
||||
return None
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
logging.basicConfig()
|
||||
parser = JsonPathParser(debug=True)
|
||||
print(parser.parse(sys.stdin.read()))
|
||||
Reference in New Issue
Block a user