From f0b0724d7e364cc3f3574e77076465657089b09c Mon Sep 17 00:00:00 2001 From: Jim Morrison Date: Thu, 29 Feb 2024 20:13:07 -0800 Subject: [PATCH] feat: Add support for server side NOT_IN filter. (#957) * tests: Add a test for IN queries with a more complex python object. * feat: Add support for server side NOT_IN filter. * Use NOT IN for GQL instead of NOT_IN. * Add missing test for GQL parameter resolving. --- google/cloud/ndb/_datastore_query.py | 1 + google/cloud/ndb/_gql.py | 26 +++++++++----- google/cloud/ndb/model.py | 51 ++++++++++++++++++---------- google/cloud/ndb/query.py | 5 ++- tests/system/test_query.py | 49 ++++++++++++++++++++++++++ tests/unit/test__gql.py | 35 +++++++++++++++++++ tests/unit/test_model.py | 11 +++++- tests/unit/test_query.py | 14 ++++++++ 8 files changed, 163 insertions(+), 29 deletions(-) diff --git a/google/cloud/ndb/_datastore_query.py b/google/cloud/ndb/_datastore_query.py index 480a2a68..7dd98a4c 100644 --- a/google/cloud/ndb/_datastore_query.py +++ b/google/cloud/ndb/_datastore_query.py @@ -58,6 +58,7 @@ ">=": query_pb2.PropertyFilter.Operator.GREATER_THAN_OR_EQUAL, "!=": query_pb2.PropertyFilter.Operator.NOT_EQUAL, "in": query_pb2.PropertyFilter.Operator.IN, + "not_in": query_pb2.PropertyFilter.Operator.NOT_IN, } _KEY_NOT_IN_CACHE = object() diff --git a/google/cloud/ndb/_gql.py b/google/cloud/ndb/_gql.py index 2d0a2745..60a17075 100644 --- a/google/cloud/ndb/_gql.py +++ b/google/cloud/ndb/_gql.py @@ -30,9 +30,9 @@ class GQL(object): [OFFSET ] [HINT (ORDER_FIRST | FILTER_FIRST | ANCESTOR_FIRST)] [;] - := {< | <= | > | >= | = | != | IN} - := {< | <= | > | >= | = | != | IN} CAST() - := IN (, ...) + := {< | <= | > | >= | = | != | IN | NOT IN} + := {< | <= | > | >= | = | != | IN | NOT IN} CAST() + := {IN | NOT IN} (, ...) := ANCESTOR IS The class is implemented using some basic regular expression tokenization @@ -186,7 +186,7 @@ def _entity(self): _identifier_regex = re.compile(r"(\w+(?:\.\w+)*)$") _quoted_identifier_regex = re.compile(r'((?:"[^"\s]+")+)$') - _conditions_regex = re.compile(r"(<=|>=|!=|=|<|>|is|in)$", re.IGNORECASE) + _conditions_regex = re.compile(r"(<=|>=|!=|=|<|>|is|in|not)$", re.IGNORECASE) _number_regex = re.compile(r"(\d+)$") _cast_regex = re.compile(r"(geopt|user|key|date|time|datetime)$", re.IGNORECASE) @@ -325,6 +325,9 @@ def _FilterList(self): condition = self._AcceptRegex(self._conditions_regex) if not condition: self._Error("Invalid WHERE Condition") + if condition.lower() == "not": + condition += "_" + self._AcceptRegex(self._conditions_regex) + self._CheckFilterSyntax(identifier, condition) if not self._AddSimpleFilter(identifier, condition, self._Reference()): @@ -366,22 +369,25 @@ def _GetValueList(self): return params - def _CheckFilterSyntax(self, identifier, condition): + def _CheckFilterSyntax(self, identifier, raw_condition): """Check that filter conditions are valid and throw errors if not. Args: identifier (str): identifier being used in comparison. condition (str): comparison operator used in the filter. """ + condition = raw_condition.lower() if identifier.lower() == "ancestor": - if condition.lower() == "is": + if condition == "is": if self._has_ancestor: self._Error('Only one ANCESTOR IS" clause allowed') else: self._Error('"IS" expected to follow "ANCESTOR"') - elif condition.lower() == "is": + elif condition == "is": self._Error('"IS" can only be used when comparing against "ANCESTOR"') + elif condition.startswith("not") and condition != "not_in": + self._Error('"NOT " can only be used as "NOT IN"') def _AddProcessedParameterFilter(self, identifier, condition, operator, parameters): """Add a filter with post-processing required. @@ -409,8 +415,8 @@ def _AddProcessedParameterFilter(self, identifier, condition, operator, paramete filter_rule = (self._ANCESTOR, "is") assert condition.lower() == "is" - if operator == "list" and condition.lower() != "in": - self._Error("Only IN can process a list of values") + if operator == "list" and condition.lower() not in ["in", "not_in"]: + self._Error("Only IN can process a list of values, given '%s'" % condition) self._filters.setdefault(filter_rule, []).append((operator, parameters)) return True @@ -676,6 +682,8 @@ def query_filters(self, model_class, filters): node = query_module.ParameterNode(prop, op, val) elif op == "in": node = prop._IN(val) + elif op == "not_in": + node = prop._NOT_IN(val) else: node = prop._comparison(op, val) filters.append(node) diff --git a/google/cloud/ndb/model.py b/google/cloud/ndb/model.py index 51d082f7..994daa42 100644 --- a/google/cloud/ndb/model.py +++ b/google/cloud/ndb/model.py @@ -1258,6 +1258,36 @@ def __ge__(self, value): """FilterNode: Represents the ``>=`` comparison.""" return self._comparison(">=", value) + def _validate_and_canonicalize_values(self, value): + if not self._indexed: + raise exceptions.BadFilterError( + "Cannot query for unindexed property {}".format(self._name) + ) + + if not isinstance(value, (list, tuple, set, frozenset)): + raise exceptions.BadArgumentError( + "For field {}, expected list, tuple or set, got {!r}".format( + self._name, value + ) + ) + + values = [] + for sub_value in value: + if sub_value is not None: + sub_value = self._do_validate(sub_value) + sub_value = self._call_to_base_type(sub_value) + sub_value = self._datastore_type(sub_value) + values.append(sub_value) + return values + + def _NOT_IN(self, value, server_op=False): + """.FilterNode: Represents the ``not_in`` filter.""" + # Import late to avoid circular imports. + from google.cloud.ndb import query + + values = self._validate_and_canonicalize_values(value) + return query.FilterNode(self._name, "not_in", values) + def _IN(self, value, server_op=False): """For the ``in`` comparison operator. @@ -1297,27 +1327,12 @@ def _IN(self, value, server_op=False): # Import late to avoid circular imports. from google.cloud.ndb import query - if not self._indexed: - raise exceptions.BadFilterError( - "Cannot query for unindexed property {}".format(self._name) - ) - - if not isinstance(value, (list, tuple, set, frozenset)): - raise exceptions.BadArgumentError( - "Expected list, tuple or set, got {!r}".format(value) - ) - - values = [] - for sub_value in value: - if sub_value is not None: - sub_value = self._do_validate(sub_value) - sub_value = self._call_to_base_type(sub_value) - sub_value = self._datastore_type(sub_value) - values.append(sub_value) - + values = self._validate_and_canonicalize_values(value) return query.FilterNode(self._name, "in", values, server_op=server_op) IN = _IN + NOT_IN = _NOT_IN + """Used to check if a property value is contained in a set of values. For example: diff --git a/google/cloud/ndb/query.py b/google/cloud/ndb/query.py index 6109fe11..bc2beadc 100644 --- a/google/cloud/ndb/query.py +++ b/google/cloud/ndb/query.py @@ -172,9 +172,10 @@ def ranked(cls, rank): _EQ_OP = "=" _NE_OP = "!=" _IN_OP = "in" +_NOT_IN_OP = "not_in" _LT_OP = "<" _GT_OP = ">" -_OPS = frozenset([_EQ_OP, _NE_OP, _LT_OP, "<=", _GT_OP, ">=", _IN_OP]) +_OPS = frozenset([_EQ_OP, _NE_OP, _LT_OP, "<=", _GT_OP, ">=", _IN_OP, _NOT_IN_OP]) _log = logging.getLogger(__name__) @@ -589,6 +590,8 @@ def resolve(self, bindings, used): value = self._param.resolve(bindings, used) if self._op == _IN_OP: return self._prop._IN(value) + elif self._op == _NOT_IN_OP: + return self._prop._NOT_IN(value) else: return self._prop._comparison(self._op, value) diff --git a/tests/system/test_query.py b/tests/system/test_query.py index fb2e9bbb..12bac380 100644 --- a/tests/system/test_query.py +++ b/tests/system/test_query.py @@ -1866,6 +1866,55 @@ class SomeKind(ndb.Model): assert results[1].foo == 3 +@pytest.mark.filterwarnings("ignore") +@pytest.mark.usefixtures("client_context") +def test_IN_timestamp(ds_entity): + for i in range(5): + entity_id = test_utils.system.unique_resource_id() + ds_entity(KIND, entity_id, foo=datetime.datetime.fromtimestamp(i)) + + class SomeKind(ndb.Model): + foo = ndb.DateTimeProperty() + + eventually(SomeKind.query().fetch, length_equals(5)) + + t2 = datetime.datetime.fromtimestamp(2) + t3 = datetime.datetime.fromtimestamp(3) + + query = SomeKind.query(SomeKind.foo.IN((t2, t3), server_op=True)) + results = query.fetch() + assert len(results) == 2 + assert results[0].foo == t2 + assert results[1].foo == t3 + + +@pytest.mark.filterwarnings("ignore") +@pytest.mark.usefixtures("client_context") +def test_NOT_IN(ds_entity): + for i in range(5): + entity_id = test_utils.system.unique_resource_id() + ds_entity(KIND, entity_id, foo=i, pt=ndb.GeoPt(i, i)) + + class SomeKind(ndb.Model): + foo = ndb.IntegerProperty() + pt = ndb.GeoPtProperty() + + eventually(SomeKind.query().fetch, length_equals(5)) + + query = SomeKind.query(SomeKind.pt.NOT_IN([ndb.GeoPt(1, 1)])) + results = query.fetch() + assert len(results) == 4 + assert results[0].foo == 0 + assert results[1].foo == 2 + + query = SomeKind.gql("where foo not in :1", [2, 3]) + results = query.fetch() + assert len(results) == 3 + assert results[0].foo == 0 + assert results[1].foo == 1 + assert results[2].foo == 4 + + @pytest.mark.usefixtures("client_context") def test_projection_with_json_property(dispose_of): """Regression test for #378 diff --git a/tests/unit/test__gql.py b/tests/unit/test__gql.py index ee9371c8..3c96d4fe 100644 --- a/tests/unit/test__gql.py +++ b/tests/unit/test__gql.py @@ -198,11 +198,24 @@ def test_in_list(): ("prop1", "IN"): [("list", [Literal(1), Literal(2), Literal(3)])] } + @staticmethod + def test_not_in_list(): + Literal = gql_module.Literal + gql = gql_module.GQL("SELECT * FROM SomeKind WHERE prop1 NOT IN (1, 2, 3)") + assert gql.filters() == { + ("prop1", "NOT_IN"): [("list", [Literal(1), Literal(2), Literal(3)])] + } + @staticmethod def test_cast_list_no_in(): with pytest.raises(exceptions.BadQueryError): gql_module.GQL("SELECT * FROM SomeKind WHERE prop1=(1, 2, 3)") + @staticmethod + def test_not_without_in(): + with pytest.raises(exceptions.BadQueryError): + gql_module.GQL("SELECT * FROM SomeKind WHERE prop1 NOT=1") + @staticmethod def test_reference(): gql = gql_module.GQL("SELECT * FROM SomeKind WHERE prop1=:ref") @@ -328,6 +341,16 @@ class SomeKind(model.Model): query_module.FilterNode("prop1", "=", 3), ) + @staticmethod + @pytest.mark.usefixtures("in_context") + def test_get_query_not_in(): + class SomeKind(model.Model): + prop1 = model.IntegerProperty() + + gql = gql_module.GQL("SELECT prop1 FROM SomeKind WHERE prop1 NOT IN (1, 2)") + query = gql.get_query() + assert query.filters == query_module.FilterNode("prop1", "not_in", [1, 2]) + @staticmethod @pytest.mark.usefixtures("in_context") def test_get_query_in_parameterized(): @@ -338,6 +361,18 @@ class SomeKind(model.Model): query = gql.get_query() assert "'in'," in str(query.filters) + @staticmethod + @pytest.mark.usefixtures("in_context") + def test_get_query_not_in_parameterized(): + class SomeKind(model.Model): + prop1 = model.StringProperty() + + gql = gql_module.GQL( + "SELECT prop1 FROM SomeKind WHERE prop1 NOT IN (:1, :2, :3)" + ) + query = gql.get_query() + assert "'not_in'," in str(query.filters) + @staticmethod @pytest.mark.usefixtures("in_context") def test_get_query_keys_only(): diff --git a/tests/unit/test_model.py b/tests/unit/test_model.py index b57a6040..14f03cef 100644 --- a/tests/unit/test_model.py +++ b/tests/unit/test_model.py @@ -575,7 +575,7 @@ def test__IN_client(): assert or_node == prop.IN(["a", None, "xy"]) @staticmethod - def test_server__IN(): + def test__IN_server(): prop = model.Property("name", indexed=True) in_node = prop._IN(["a", None, "xy"], server_op=True) assert in_node == prop.IN(["a", None, "xy"], server_op=True) @@ -588,6 +588,15 @@ def test_server__IN(): "name", "in", ["a", None, "xy"], server_op=True ) + @staticmethod + def test__NOT_IN(): + prop = model.Property("name", indexed=True) + not_in_node = prop._NOT_IN(["a", None, "xy"]) + assert not_in_node == prop.NOT_IN(["a", None, "xy"]) + assert not_in_node == query_module.FilterNode( + "name", "not_in", ["a", None, "xy"] + ) + @staticmethod def test___neg__(): prop = model.Property("name") diff --git a/tests/unit/test_query.py b/tests/unit/test_query.py index 13da4740..33b560b4 100644 --- a/tests/unit/test_query.py +++ b/tests/unit/test_query.py @@ -2408,3 +2408,17 @@ class SomeKind(model.Model): query = query_module.gql(gql_query, *positional, **keywords) compat_rep = "'xxx'" assert query.__repr__() == rep.format(compat_rep) + + @staticmethod + @pytest.mark.usefixtures("in_context") + def test_gql_with_bind_not_in(): + class SomeKind(model.Model): + prop1 = model.StringProperty() + + query = query_module.gql( + "SELECT * FROM SomeKind WHERE prop1 not in :1", ["a", "b", "c"] + ) + assert ( + query.__repr__() + == "Query(kind='SomeKind', filters=FilterNode('prop1', 'not_in', ['a', 'b', 'c']), order_by=[], offset=0)" + )