Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Add support for server side NOT_IN filter. #957

Merged
merged 4 commits into from
Mar 1, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 1 addition & 0 deletions google/cloud/ndb/_datastore_query.py
Expand Up @@ -58,6 +58,7 @@
">=": query_pb2.PropertyFilter.Operator.GREATER_THAN_OR_EQUAL,
"!=": query_pb2.PropertyFilter.Operator.NOT_EQUAL,
"in": query_pb2.PropertyFilter.Operator.IN,
"not_in": query_pb2.PropertyFilter.Operator.NOT_IN,
}

_KEY_NOT_IN_CACHE = object()
Expand Down
24 changes: 16 additions & 8 deletions google/cloud/ndb/_gql.py
Expand Up @@ -30,8 +30,8 @@ class GQL(object):
[OFFSET <offset>]
[HINT (ORDER_FIRST | FILTER_FIRST | ANCESTOR_FIRST)]
[;]
<condition> := <property> {< | <= | > | >= | = | != | IN} <value>
<condition> := <property> {< | <= | > | >= | = | != | IN} CAST(<value>)
<condition> := <property> {< | <= | > | >= | = | != | IN | NOT_IN} <value>
<condition> := <property> {< | <= | > | >= | = | != | IN | NOT_IN} CAST(<value>)
sorced-jim marked this conversation as resolved.
Show resolved Hide resolved
<condition> := <property> IN (<value>, ...)
<condition> := ANCESTOR IS <entity or key>

Expand Down Expand Up @@ -186,7 +186,7 @@ def _entity(self):
_identifier_regex = re.compile(r"(\w+(?:\.\w+)*)$")

_quoted_identifier_regex = re.compile(r'((?:"[^"\s]+")+)$')
_conditions_regex = re.compile(r"(<=|>=|!=|=|<|>|is|in)$", re.IGNORECASE)
_conditions_regex = re.compile(r"(<=|>=|!=|=|<|>|is|in|not)$", re.IGNORECASE)
_number_regex = re.compile(r"(\d+)$")
_cast_regex = re.compile(r"(geopt|user|key|date|time|datetime)$", re.IGNORECASE)

Expand Down Expand Up @@ -325,6 +325,9 @@ def _FilterList(self):
condition = self._AcceptRegex(self._conditions_regex)
if not condition:
self._Error("Invalid WHERE Condition")
if condition.lower() == "not":
condition += "_" + self._AcceptRegex(self._conditions_regex)

self._CheckFilterSyntax(identifier, condition)

if not self._AddSimpleFilter(identifier, condition, self._Reference()):
Expand Down Expand Up @@ -366,22 +369,25 @@ def _GetValueList(self):

return params

def _CheckFilterSyntax(self, identifier, condition):
def _CheckFilterSyntax(self, identifier, raw_condition):
"""Check that filter conditions are valid and throw errors if not.

Args:
identifier (str): identifier being used in comparison.
condition (str): comparison operator used in the filter.
"""
condition = raw_condition.lower()
if identifier.lower() == "ancestor":
if condition.lower() == "is":
if condition == "is":

if self._has_ancestor:
self._Error('Only one ANCESTOR IS" clause allowed')
else:
self._Error('"IS" expected to follow "ANCESTOR"')
elif condition.lower() == "is":
elif condition == "is":
self._Error('"IS" can only be used when comparing against "ANCESTOR"')
elif condition.startswith("not") and condition != "not_in":
self._Error('"NOT " can only be used as "NOT IN"')

def _AddProcessedParameterFilter(self, identifier, condition, operator, parameters):
"""Add a filter with post-processing required.
Expand Down Expand Up @@ -409,8 +415,8 @@ def _AddProcessedParameterFilter(self, identifier, condition, operator, paramete
filter_rule = (self._ANCESTOR, "is")
assert condition.lower() == "is"

if operator == "list" and condition.lower() != "in":
self._Error("Only IN can process a list of values")
if operator == "list" and condition.lower() not in ["in", "not_in"]:
self._Error("Only IN can process a list of values, given '%s'" % condition)
sorced-jim marked this conversation as resolved.
Show resolved Hide resolved

self._filters.setdefault(filter_rule, []).append((operator, parameters))
return True
Expand Down Expand Up @@ -676,6 +682,8 @@ def query_filters(self, model_class, filters):
node = query_module.ParameterNode(prop, op, val)
elif op == "in":
node = prop._IN(val)
elif op == "not_in":
node = prop._NOT_IN(val)
else:
node = prop._comparison(op, val)
filters.append(node)
Expand Down
51 changes: 33 additions & 18 deletions google/cloud/ndb/model.py
Expand Up @@ -1258,6 +1258,36 @@ def __ge__(self, value):
"""FilterNode: Represents the ``>=`` comparison."""
return self._comparison(">=", value)

def _validate_and_canonicalize_values(self, value):
if not self._indexed:
raise exceptions.BadFilterError(
"Cannot query for unindexed property {}".format(self._name)
)

if not isinstance(value, (list, tuple, set, frozenset)):
raise exceptions.BadArgumentError(
"For field {}, expected list, tuple or set, got {!r}".format(
self._name, value
)
)

values = []
for sub_value in value:
if sub_value is not None:
sub_value = self._do_validate(sub_value)
sub_value = self._call_to_base_type(sub_value)
sub_value = self._datastore_type(sub_value)
values.append(sub_value)
return values

def _NOT_IN(self, value, server_op=False):
""".FilterNode: Represents the ``not_in`` filter."""
# Import late to avoid circular imports.
from google.cloud.ndb import query

values = self._validate_and_canonicalize_values(value)
return query.FilterNode(self._name, "not_in", values)

def _IN(self, value, server_op=False):
"""For the ``in`` comparison operator.

Expand Down Expand Up @@ -1297,27 +1327,12 @@ def _IN(self, value, server_op=False):
# Import late to avoid circular imports.
from google.cloud.ndb import query

if not self._indexed:
raise exceptions.BadFilterError(
"Cannot query for unindexed property {}".format(self._name)
)

if not isinstance(value, (list, tuple, set, frozenset)):
raise exceptions.BadArgumentError(
"Expected list, tuple or set, got {!r}".format(value)
)

values = []
for sub_value in value:
if sub_value is not None:
sub_value = self._do_validate(sub_value)
sub_value = self._call_to_base_type(sub_value)
sub_value = self._datastore_type(sub_value)
values.append(sub_value)

values = self._validate_and_canonicalize_values(value)
return query.FilterNode(self._name, "in", values, server_op=server_op)

IN = _IN
NOT_IN = _NOT_IN

"""Used to check if a property value is contained in a set of values.

For example:
Expand Down
5 changes: 4 additions & 1 deletion google/cloud/ndb/query.py
Expand Up @@ -172,9 +172,10 @@ def ranked(cls, rank):
_EQ_OP = "="
_NE_OP = "!="
_IN_OP = "in"
_NOT_IN_OP = "not_in"
_LT_OP = "<"
_GT_OP = ">"
_OPS = frozenset([_EQ_OP, _NE_OP, _LT_OP, "<=", _GT_OP, ">=", _IN_OP])
_OPS = frozenset([_EQ_OP, _NE_OP, _LT_OP, "<=", _GT_OP, ">=", _IN_OP, _NOT_IN_OP])

_log = logging.getLogger(__name__)

Expand Down Expand Up @@ -589,6 +590,8 @@ def resolve(self, bindings, used):
value = self._param.resolve(bindings, used)
if self._op == _IN_OP:
return self._prop._IN(value)
elif self._op == _NOT_IN_OP:
return self._prop._NOT_IN(value)
else:
return self._prop._comparison(self._op, value)

Expand Down
49 changes: 49 additions & 0 deletions tests/system/test_query.py
Expand Up @@ -1866,6 +1866,55 @@ class SomeKind(ndb.Model):
assert results[1].foo == 3


@pytest.mark.filterwarnings("ignore")
@pytest.mark.usefixtures("client_context")
def test_IN_timestamp(ds_entity):
for i in range(5):
entity_id = test_utils.system.unique_resource_id()
ds_entity(KIND, entity_id, foo=datetime.datetime.fromtimestamp(i))

class SomeKind(ndb.Model):
foo = ndb.DateTimeProperty()

eventually(SomeKind.query().fetch, length_equals(5))

t2 = datetime.datetime.fromtimestamp(2)
t3 = datetime.datetime.fromtimestamp(3)

query = SomeKind.query(SomeKind.foo.IN((t2, t3), server_op=True))
results = query.fetch()
assert len(results) == 2
assert results[0].foo == t2
assert results[1].foo == t3


@pytest.mark.filterwarnings("ignore")
@pytest.mark.usefixtures("client_context")
def test_NOT_IN(ds_entity):
for i in range(5):
entity_id = test_utils.system.unique_resource_id()
ds_entity(KIND, entity_id, foo=i, pt=ndb.GeoPt(i, i))

class SomeKind(ndb.Model):
foo = ndb.IntegerProperty()
pt = ndb.GeoPtProperty()

eventually(SomeKind.query().fetch, length_equals(5))

query = SomeKind.query(SomeKind.pt.NOT_IN([ndb.GeoPt(1, 1)]))
results = query.fetch()
assert len(results) == 4
assert results[0].foo == 0
assert results[1].foo == 2

query = SomeKind.gql("where foo not in :1", [2, 3])
results = query.fetch()
assert len(results) == 3
assert results[0].foo == 0
assert results[1].foo == 1
assert results[2].foo == 4


@pytest.mark.usefixtures("client_context")
def test_projection_with_json_property(dispose_of):
"""Regression test for #378
Expand Down
35 changes: 35 additions & 0 deletions tests/unit/test__gql.py
Expand Up @@ -198,11 +198,24 @@ def test_in_list():
("prop1", "IN"): [("list", [Literal(1), Literal(2), Literal(3)])]
}

@staticmethod
def test_not_in_list():
Literal = gql_module.Literal
gql = gql_module.GQL("SELECT * FROM SomeKind WHERE prop1 NOT IN (1, 2, 3)")
assert gql.filters() == {
("prop1", "NOT_IN"): [("list", [Literal(1), Literal(2), Literal(3)])]
}

@staticmethod
def test_cast_list_no_in():
with pytest.raises(exceptions.BadQueryError):
gql_module.GQL("SELECT * FROM SomeKind WHERE prop1=(1, 2, 3)")

@staticmethod
def test_not_without_in():
with pytest.raises(exceptions.BadQueryError):
gql_module.GQL("SELECT * FROM SomeKind WHERE prop1 NOT=1")

@staticmethod
def test_reference():
gql = gql_module.GQL("SELECT * FROM SomeKind WHERE prop1=:ref")
Expand Down Expand Up @@ -328,6 +341,16 @@ class SomeKind(model.Model):
query_module.FilterNode("prop1", "=", 3),
)

@staticmethod
@pytest.mark.usefixtures("in_context")
def test_get_query_not_in():
class SomeKind(model.Model):
prop1 = model.IntegerProperty()

gql = gql_module.GQL("SELECT prop1 FROM SomeKind WHERE prop1 NOT IN (1, 2)")
query = gql.get_query()
assert query.filters == query_module.FilterNode("prop1", "not_in", [1, 2])

@staticmethod
@pytest.mark.usefixtures("in_context")
def test_get_query_in_parameterized():
Expand All @@ -338,6 +361,18 @@ class SomeKind(model.Model):
query = gql.get_query()
assert "'in'," in str(query.filters)

@staticmethod
@pytest.mark.usefixtures("in_context")
def test_get_query_not_in_parameterized():
class SomeKind(model.Model):
prop1 = model.StringProperty()

gql = gql_module.GQL(
"SELECT prop1 FROM SomeKind WHERE prop1 NOT IN (:1, :2, :3)"
)
query = gql.get_query()
assert "'not_in'," in str(query.filters)

@staticmethod
@pytest.mark.usefixtures("in_context")
def test_get_query_keys_only():
Expand Down
11 changes: 10 additions & 1 deletion tests/unit/test_model.py
Expand Up @@ -575,7 +575,7 @@ def test__IN_client():
assert or_node == prop.IN(["a", None, "xy"])

@staticmethod
def test_server__IN():
def test__IN_server():
prop = model.Property("name", indexed=True)
in_node = prop._IN(["a", None, "xy"], server_op=True)
assert in_node == prop.IN(["a", None, "xy"], server_op=True)
Expand All @@ -588,6 +588,15 @@ def test_server__IN():
"name", "in", ["a", None, "xy"], server_op=True
)

@staticmethod
def test__NOT_IN_server():
prop = model.Property("name", indexed=True)
not_in_node = prop._NOT_IN(["a", None, "xy"])
assert not_in_node == prop.NOT_IN(["a", None, "xy"])
assert not_in_node == query_module.FilterNode(
"name", "not_in", ["a", None, "xy"]
)

@staticmethod
def test___neg__():
prop = model.Property("name")
Expand Down