From 3634405fa1b40ae5f69b06d7c7f8de4e3d246d92 Mon Sep 17 00:00:00 2001 From: Lingqing Gan Date: Wed, 10 Apr 2024 13:58:29 -0700 Subject: [PATCH] feat: support RANGE in queries Part 1: JSON (#1884) * feat: support range in queries as dict * fix sys tests * lint * fix typo --- google/cloud/bigquery/_helpers.py | 41 ++++++++++++ tests/system/helpers.py | 5 ++ tests/system/test_query.py | 6 +- tests/unit/test__helpers.py | 105 +++++++++++++++++++++++++++++- 4 files changed, 153 insertions(+), 4 deletions(-) diff --git a/google/cloud/bigquery/_helpers.py b/google/cloud/bigquery/_helpers.py index 7198b60c2..0572867d7 100644 --- a/google/cloud/bigquery/_helpers.py +++ b/google/cloud/bigquery/_helpers.py @@ -309,6 +309,46 @@ def _json_from_json(value, field): return None +def _range_element_from_json(value, field): + """Coerce 'value' to a range element value, if set or not nullable.""" + if value == "UNBOUNDED": + return None + elif field.element_type == "DATE": + return _date_from_json(value, None) + elif field.element_type == "DATETIME": + return _datetime_from_json(value, None) + elif field.element_type == "TIMESTAMP": + return _timestamp_from_json(value, None) + else: + raise ValueError(f"Unsupported range field type: {value}") + + +def _range_from_json(value, field): + """Coerce 'value' to a range, if set or not nullable. + + Args: + value (str): The literal representation of the range. + field (google.cloud.bigquery.schema.SchemaField): + The field corresponding to the value. + + Returns: + Optional[dict]: + The parsed range object from ``value`` if the ``field`` is not + null (otherwise it is :data:`None`). + """ + range_literal = re.compile(r"\[.*, .*\)") + if _not_null(value, field): + if range_literal.match(value): + start, end = value[1:-1].split(", ") + start = _range_element_from_json(start, field.range_element_type) + end = _range_element_from_json(end, field.range_element_type) + return {"start": start, "end": end} + else: + raise ValueError(f"Unknown range format: {value}") + else: + return None + + # Parse BigQuery API response JSON into a Python representation. _CELLDATA_FROM_JSON = { "INTEGER": _int_from_json, @@ -329,6 +369,7 @@ def _json_from_json(value, field): "TIME": _time_from_json, "RECORD": _record_from_json, "JSON": _json_from_json, + "RANGE": _range_from_json, } _QUERY_PARAMS_FROM_JSON = dict(_CELLDATA_FROM_JSON) diff --git a/tests/system/helpers.py b/tests/system/helpers.py index 721f55040..7fd344eeb 100644 --- a/tests/system/helpers.py +++ b/tests/system/helpers.py @@ -25,6 +25,7 @@ _naive = datetime.datetime(2016, 12, 5, 12, 41, 9) _naive_microseconds = datetime.datetime(2016, 12, 5, 12, 41, 9, 250000) _stamp = "%s %s" % (_naive.date().isoformat(), _naive.time().isoformat()) +_date = _naive.date().isoformat() _stamp_microseconds = _stamp + ".250000" _zoned = _naive.replace(tzinfo=UTC) _zoned_microseconds = _naive_microseconds.replace(tzinfo=UTC) @@ -78,6 +79,10 @@ ), ("SELECT ARRAY(SELECT STRUCT([1, 2]))", [{"_field_1": [1, 2]}]), ("SELECT ST_GeogPoint(1, 2)", "POINT(1 2)"), + ( + "SELECT RANGE '[UNBOUNDED, %s)'" % _date, + {"start": None, "end": _naive.date()}, + ), ] diff --git a/tests/system/test_query.py b/tests/system/test_query.py index 0494272d9..d94a117e3 100644 --- a/tests/system/test_query.py +++ b/tests/system/test_query.py @@ -425,7 +425,7 @@ def test_query_statistics(bigquery_client, query_api_method): ), ( "SELECT @range_date", - "[2016-12-05, UNBOUNDED)", + {"end": None, "start": datetime.date(2016, 12, 5)}, [ RangeQueryParameter( name="range_date", @@ -436,7 +436,7 @@ def test_query_statistics(bigquery_client, query_api_method): ), ( "SELECT @range_datetime", - "[2016-12-05T00:00:00, UNBOUNDED)", + {"end": None, "start": datetime.datetime(2016, 12, 5, 0, 0)}, [ RangeQueryParameter( name="range_datetime", @@ -447,7 +447,7 @@ def test_query_statistics(bigquery_client, query_api_method): ), ( "SELECT @range_unbounded", - "[UNBOUNDED, UNBOUNDED)", + {"end": None, "start": None}, [ RangeQueryParameter( name="range_unbounded", diff --git a/tests/unit/test__helpers.py b/tests/unit/test__helpers.py index 320c57737..a50625e2a 100644 --- a/tests/unit/test__helpers.py +++ b/tests/unit/test__helpers.py @@ -452,6 +452,99 @@ def test_w_bogus_string_value(self): self._call_fut("12:12:27.123", object()) +class Test_range_from_json(unittest.TestCase): + def _call_fut(self, value, field): + from google.cloud.bigquery._helpers import _range_from_json + + return _range_from_json(value, field) + + def test_w_none_nullable(self): + self.assertIsNone(self._call_fut(None, _Field("NULLABLE"))) + + def test_w_none_required(self): + with self.assertRaises(TypeError): + self._call_fut(None, _Field("REQUIRED")) + + def test_w_wrong_format(self): + range_field = _Field( + "NULLABLE", + field_type="RANGE", + range_element_type=_Field("NULLABLE", element_type="DATE"), + ) + with self.assertRaises(ValueError): + self._call_fut("[2009-06-172019-06-17)", range_field) + + def test_w_wrong_element_type(self): + range_field = _Field( + "NULLABLE", + field_type="RANGE", + range_element_type=_Field("NULLABLE", element_type="TIME"), + ) + with self.assertRaises(ValueError): + self._call_fut("[15:31:38, 15:50:38)", range_field) + + def test_w_unbounded_value(self): + range_field = _Field( + "NULLABLE", + field_type="RANGE", + range_element_type=_Field("NULLABLE", element_type="DATE"), + ) + coerced = self._call_fut("[UNBOUNDED, 2019-06-17)", range_field) + self.assertEqual( + coerced, + {"start": None, "end": datetime.date(2019, 6, 17)}, + ) + + def test_w_date_value(self): + range_field = _Field( + "NULLABLE", + field_type="RANGE", + range_element_type=_Field("NULLABLE", element_type="DATE"), + ) + coerced = self._call_fut("[2009-06-17, 2019-06-17)", range_field) + self.assertEqual( + coerced, + { + "start": datetime.date(2009, 6, 17), + "end": datetime.date(2019, 6, 17), + }, + ) + + def test_w_datetime_value(self): + range_field = _Field( + "NULLABLE", + field_type="RANGE", + range_element_type=_Field("NULLABLE", element_type="DATETIME"), + ) + coerced = self._call_fut( + "[2009-06-17T13:45:30, 2019-06-17T13:45:30)", range_field + ) + self.assertEqual( + coerced, + { + "start": datetime.datetime(2009, 6, 17, 13, 45, 30), + "end": datetime.datetime(2019, 6, 17, 13, 45, 30), + }, + ) + + def test_w_timestamp_value(self): + from google.cloud._helpers import _EPOCH + + range_field = _Field( + "NULLABLE", + field_type="RANGE", + range_element_type=_Field("NULLABLE", element_type="TIMESTAMP"), + ) + coerced = self._call_fut("[1234567, 1234789)", range_field) + self.assertEqual( + coerced, + { + "start": _EPOCH + datetime.timedelta(seconds=1, microseconds=234567), + "end": _EPOCH + datetime.timedelta(seconds=1, microseconds=234789), + }, + ) + + class Test_record_from_json(unittest.TestCase): def _call_fut(self, value, field): from google.cloud.bigquery._helpers import _record_from_json @@ -1323,11 +1416,21 @@ def test_w_str(self): class _Field(object): - def __init__(self, mode, name="unknown", field_type="UNKNOWN", fields=()): + def __init__( + self, + mode, + name="unknown", + field_type="UNKNOWN", + fields=(), + range_element_type=None, + element_type=None, + ): self.mode = mode self.name = name self.field_type = field_type self.fields = fields + self.range_element_type = range_element_type + self.element_type = element_type def _field_isinstance_patcher():