From c852c153c55025ba1187d61e313ead2308616c55 Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Wed, 3 Apr 2024 07:09:21 -0400 Subject: [PATCH 01/13] fix: creates linting-typing.cfg in presubmit (#1881) * creates linting-typing.cfg in presubmit * attempt to filter out linting and typing tests from presubmit * lints and blackens this commit * revise environmental variables * Update noxfile.py * Update noxfile.py * Update noxfile.py * Update noxfile.py * Update noxfile.py * Update noxfile.py * Update .kokoro/presubmit/linting-typing.cfg * Update .kokoro/presubmit/linting-typing.cfg * Update .kokoro/presubmit/linting-typing.cfg * Update .kokoro/presubmit/presubmit.cfg * Update .kokoro/presubmit/presubmit.cfg --- .kokoro/presubmit/linting-typing.cfg | 7 +++++++ .kokoro/presubmit/presubmit.cfg | 4 ++++ noxfile.py | 24 ++++++++++++++++++++++++ 3 files changed, 35 insertions(+) create mode 100644 .kokoro/presubmit/linting-typing.cfg diff --git a/.kokoro/presubmit/linting-typing.cfg b/.kokoro/presubmit/linting-typing.cfg new file mode 100644 index 000000000..b1a7406c2 --- /dev/null +++ b/.kokoro/presubmit/linting-typing.cfg @@ -0,0 +1,7 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +# Only run these nox sessions. +env_vars: { + key: "NOX_SESSION" + value: "lint lint_setup_py blacken mypy mypy_samples pytype" +} diff --git a/.kokoro/presubmit/presubmit.cfg b/.kokoro/presubmit/presubmit.cfg index 17d071cae..fa39b1118 100644 --- a/.kokoro/presubmit/presubmit.cfg +++ b/.kokoro/presubmit/presubmit.cfg @@ -9,3 +9,7 @@ env_vars: { key: "RUN_SNIPPETS_TESTS" value: "false" } +env_vars: { + key: "RUN_LINTING_TYPING_TESTS" + value: "false" +} diff --git a/noxfile.py b/noxfile.py index 3adb4ba70..034bb843a 100644 --- a/noxfile.py +++ b/noxfile.py @@ -132,6 +132,10 @@ def unit_noextras(session): def mypy(session): """Run type checks with mypy.""" + # Check the value of `RUN_LINTING_TYPING_TESTS` env var. It defaults to true. + if os.environ.get("RUN_LINTING_TYPING_TESTS", "true") == "false": + session.skip("RUN_LINTING_TYPING_TESTS is set to false, skipping") + session.install("-e", ".[all]") session.install(MYPY_VERSION) @@ -153,6 +157,10 @@ def pytype(session): # recent version avoids the error until a possibly better fix is found. # https://github.com/googleapis/python-bigquery/issues/655 + # Check the value of `RUN_LINTING_TYPING_TESTS` env var. It defaults to true. + if os.environ.get("RUN_LINTING_TYPING_TESTS", "true") == "false": + session.skip("RUN_LINTING_TYPING_TESTS is set to false, skipping") + session.install("attrs==20.3.0") session.install("-e", ".[all]") session.install(PYTYPE_VERSION) @@ -213,6 +221,10 @@ def system(session): def mypy_samples(session): """Run type checks with mypy.""" + # Check the value of `RUN_LINTING_TYPING_TESTS` env var. It defaults to true. + if os.environ.get("RUN_LINTING_TYPING_TESTS", "true") == "false": + session.skip("RUN_LINTING_TYPING_TESTS is set to false, skipping") + session.install("pytest") for requirements_path in CURRENT_DIRECTORY.glob("samples/*/requirements.txt"): session.install("-r", str(requirements_path)) @@ -394,6 +406,10 @@ def lint(session): serious code quality issues. """ + # Check the value of `RUN_LINTING_TYPING_TESTS` env var. It defaults to true. + if os.environ.get("RUN_LINTING_TYPING_TESTS", "true") == "false": + session.skip("RUN_LINTING_TYPING_TESTS is set to false, skipping") + session.install("flake8", BLACK_VERSION) session.install("-e", ".") session.run("flake8", os.path.join("google", "cloud", "bigquery")) @@ -408,6 +424,10 @@ def lint(session): def lint_setup_py(session): """Verify that setup.py is valid (including RST check).""" + # Check the value of `RUN_LINTING_TYPING_TESTS` env var. It defaults to true. + if os.environ.get("RUN_LINTING_TYPING_TESTS", "true") == "false": + session.skip("RUN_LINTING_TYPING_TESTS is set to false, skipping") + session.install("docutils", "Pygments") session.run("python", "setup.py", "check", "--restructuredtext", "--strict") @@ -418,6 +438,10 @@ def blacken(session): Format code to uniform standard. """ + # Check the value of `RUN_LINTING_TYPING_TESTS` env var. It defaults to true. + if os.environ.get("RUN_LINTING_TYPING_TESTS", "true") == "false": + session.skip("RUN_LINTING_TYPING_TESTS is set to false, skipping") + session.install(BLACK_VERSION) session.run("black", *BLACK_PATHS) From d08ca708ba91b01fe5e7095e612c326cd3bcfe98 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Mon, 8 Apr 2024 21:16:14 +0200 Subject: [PATCH 02/13] chore(deps): update all dependencies (#1882) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * chore(deps): update all dependencies * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * pin pycparser==2.21 for python 3.7 --------- Co-authored-by: Owl Bot Co-authored-by: Chalmer Lowe Co-authored-by: Lingqing Gan --- samples/geography/requirements.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 1c7bfa5b3..76b1a7b6b 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -36,7 +36,8 @@ pyasn1===0.5.1; python_version == '3.7' pyasn1==0.6.0; python_version >= '3.8' pyasn1-modules===0.3.0; python_version == '3.7' pyasn1-modules==0.4.0; python_version >= '3.8' -pycparser==2.21 +pycparser==2.21; python_version == '3.7' +pycparser==2.22; python_version >= '3.8' pyparsing==3.1.2 python-dateutil==2.9.0.post0 pytz==2024.1 From 3634405fa1b40ae5f69b06d7c7f8de4e3d246d92 Mon Sep 17 00:00:00 2001 From: Lingqing Gan Date: Wed, 10 Apr 2024 13:58:29 -0700 Subject: [PATCH 03/13] feat: support RANGE in queries Part 1: JSON (#1884) * feat: support range in queries as dict * fix sys tests * lint * fix typo --- google/cloud/bigquery/_helpers.py | 41 ++++++++++++ tests/system/helpers.py | 5 ++ tests/system/test_query.py | 6 +- tests/unit/test__helpers.py | 105 +++++++++++++++++++++++++++++- 4 files changed, 153 insertions(+), 4 deletions(-) diff --git a/google/cloud/bigquery/_helpers.py b/google/cloud/bigquery/_helpers.py index 7198b60c2..0572867d7 100644 --- a/google/cloud/bigquery/_helpers.py +++ b/google/cloud/bigquery/_helpers.py @@ -309,6 +309,46 @@ def _json_from_json(value, field): return None +def _range_element_from_json(value, field): + """Coerce 'value' to a range element value, if set or not nullable.""" + if value == "UNBOUNDED": + return None + elif field.element_type == "DATE": + return _date_from_json(value, None) + elif field.element_type == "DATETIME": + return _datetime_from_json(value, None) + elif field.element_type == "TIMESTAMP": + return _timestamp_from_json(value, None) + else: + raise ValueError(f"Unsupported range field type: {value}") + + +def _range_from_json(value, field): + """Coerce 'value' to a range, if set or not nullable. + + Args: + value (str): The literal representation of the range. + field (google.cloud.bigquery.schema.SchemaField): + The field corresponding to the value. + + Returns: + Optional[dict]: + The parsed range object from ``value`` if the ``field`` is not + null (otherwise it is :data:`None`). + """ + range_literal = re.compile(r"\[.*, .*\)") + if _not_null(value, field): + if range_literal.match(value): + start, end = value[1:-1].split(", ") + start = _range_element_from_json(start, field.range_element_type) + end = _range_element_from_json(end, field.range_element_type) + return {"start": start, "end": end} + else: + raise ValueError(f"Unknown range format: {value}") + else: + return None + + # Parse BigQuery API response JSON into a Python representation. _CELLDATA_FROM_JSON = { "INTEGER": _int_from_json, @@ -329,6 +369,7 @@ def _json_from_json(value, field): "TIME": _time_from_json, "RECORD": _record_from_json, "JSON": _json_from_json, + "RANGE": _range_from_json, } _QUERY_PARAMS_FROM_JSON = dict(_CELLDATA_FROM_JSON) diff --git a/tests/system/helpers.py b/tests/system/helpers.py index 721f55040..7fd344eeb 100644 --- a/tests/system/helpers.py +++ b/tests/system/helpers.py @@ -25,6 +25,7 @@ _naive = datetime.datetime(2016, 12, 5, 12, 41, 9) _naive_microseconds = datetime.datetime(2016, 12, 5, 12, 41, 9, 250000) _stamp = "%s %s" % (_naive.date().isoformat(), _naive.time().isoformat()) +_date = _naive.date().isoformat() _stamp_microseconds = _stamp + ".250000" _zoned = _naive.replace(tzinfo=UTC) _zoned_microseconds = _naive_microseconds.replace(tzinfo=UTC) @@ -78,6 +79,10 @@ ), ("SELECT ARRAY(SELECT STRUCT([1, 2]))", [{"_field_1": [1, 2]}]), ("SELECT ST_GeogPoint(1, 2)", "POINT(1 2)"), + ( + "SELECT RANGE '[UNBOUNDED, %s)'" % _date, + {"start": None, "end": _naive.date()}, + ), ] diff --git a/tests/system/test_query.py b/tests/system/test_query.py index 0494272d9..d94a117e3 100644 --- a/tests/system/test_query.py +++ b/tests/system/test_query.py @@ -425,7 +425,7 @@ def test_query_statistics(bigquery_client, query_api_method): ), ( "SELECT @range_date", - "[2016-12-05, UNBOUNDED)", + {"end": None, "start": datetime.date(2016, 12, 5)}, [ RangeQueryParameter( name="range_date", @@ -436,7 +436,7 @@ def test_query_statistics(bigquery_client, query_api_method): ), ( "SELECT @range_datetime", - "[2016-12-05T00:00:00, UNBOUNDED)", + {"end": None, "start": datetime.datetime(2016, 12, 5, 0, 0)}, [ RangeQueryParameter( name="range_datetime", @@ -447,7 +447,7 @@ def test_query_statistics(bigquery_client, query_api_method): ), ( "SELECT @range_unbounded", - "[UNBOUNDED, UNBOUNDED)", + {"end": None, "start": None}, [ RangeQueryParameter( name="range_unbounded", diff --git a/tests/unit/test__helpers.py b/tests/unit/test__helpers.py index 320c57737..a50625e2a 100644 --- a/tests/unit/test__helpers.py +++ b/tests/unit/test__helpers.py @@ -452,6 +452,99 @@ def test_w_bogus_string_value(self): self._call_fut("12:12:27.123", object()) +class Test_range_from_json(unittest.TestCase): + def _call_fut(self, value, field): + from google.cloud.bigquery._helpers import _range_from_json + + return _range_from_json(value, field) + + def test_w_none_nullable(self): + self.assertIsNone(self._call_fut(None, _Field("NULLABLE"))) + + def test_w_none_required(self): + with self.assertRaises(TypeError): + self._call_fut(None, _Field("REQUIRED")) + + def test_w_wrong_format(self): + range_field = _Field( + "NULLABLE", + field_type="RANGE", + range_element_type=_Field("NULLABLE", element_type="DATE"), + ) + with self.assertRaises(ValueError): + self._call_fut("[2009-06-172019-06-17)", range_field) + + def test_w_wrong_element_type(self): + range_field = _Field( + "NULLABLE", + field_type="RANGE", + range_element_type=_Field("NULLABLE", element_type="TIME"), + ) + with self.assertRaises(ValueError): + self._call_fut("[15:31:38, 15:50:38)", range_field) + + def test_w_unbounded_value(self): + range_field = _Field( + "NULLABLE", + field_type="RANGE", + range_element_type=_Field("NULLABLE", element_type="DATE"), + ) + coerced = self._call_fut("[UNBOUNDED, 2019-06-17)", range_field) + self.assertEqual( + coerced, + {"start": None, "end": datetime.date(2019, 6, 17)}, + ) + + def test_w_date_value(self): + range_field = _Field( + "NULLABLE", + field_type="RANGE", + range_element_type=_Field("NULLABLE", element_type="DATE"), + ) + coerced = self._call_fut("[2009-06-17, 2019-06-17)", range_field) + self.assertEqual( + coerced, + { + "start": datetime.date(2009, 6, 17), + "end": datetime.date(2019, 6, 17), + }, + ) + + def test_w_datetime_value(self): + range_field = _Field( + "NULLABLE", + field_type="RANGE", + range_element_type=_Field("NULLABLE", element_type="DATETIME"), + ) + coerced = self._call_fut( + "[2009-06-17T13:45:30, 2019-06-17T13:45:30)", range_field + ) + self.assertEqual( + coerced, + { + "start": datetime.datetime(2009, 6, 17, 13, 45, 30), + "end": datetime.datetime(2019, 6, 17, 13, 45, 30), + }, + ) + + def test_w_timestamp_value(self): + from google.cloud._helpers import _EPOCH + + range_field = _Field( + "NULLABLE", + field_type="RANGE", + range_element_type=_Field("NULLABLE", element_type="TIMESTAMP"), + ) + coerced = self._call_fut("[1234567, 1234789)", range_field) + self.assertEqual( + coerced, + { + "start": _EPOCH + datetime.timedelta(seconds=1, microseconds=234567), + "end": _EPOCH + datetime.timedelta(seconds=1, microseconds=234789), + }, + ) + + class Test_record_from_json(unittest.TestCase): def _call_fut(self, value, field): from google.cloud.bigquery._helpers import _record_from_json @@ -1323,11 +1416,21 @@ def test_w_str(self): class _Field(object): - def __init__(self, mode, name="unknown", field_type="UNKNOWN", fields=()): + def __init__( + self, + mode, + name="unknown", + field_type="UNKNOWN", + fields=(), + range_element_type=None, + element_type=None, + ): self.mode = mode self.name = name self.field_type = field_type self.fields = fields + self.range_element_type = range_element_type + self.element_type = element_type def _field_isinstance_patcher(): From 38697fb942516fc2f6f5e21e19a11811fbaeb1f4 Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Thu, 11 Apr 2024 13:49:15 -0400 Subject: [PATCH 04/13] feat: adds billing to opentel (#1889) --- google/cloud/bigquery/opentelemetry_tracing.py | 8 ++++++++ tests/unit/test_opentelemetry_tracing.py | 6 ++++++ 2 files changed, 14 insertions(+) diff --git a/google/cloud/bigquery/opentelemetry_tracing.py b/google/cloud/bigquery/opentelemetry_tracing.py index e2a05e4d0..b5f6bf991 100644 --- a/google/cloud/bigquery/opentelemetry_tracing.py +++ b/google/cloud/bigquery/opentelemetry_tracing.py @@ -153,4 +153,12 @@ def _set_job_attributes(job_ref): if job_ref.num_child_jobs is not None: job_attributes["num_child_jobs"] = job_ref.num_child_jobs + total_bytes_billed = getattr(job_ref, "total_bytes_billed", None) + if total_bytes_billed is not None: + job_attributes["total_bytes_billed"] = total_bytes_billed + + total_bytes_processed = getattr(job_ref, "total_bytes_processed", None) + if total_bytes_processed is not None: + job_attributes["total_bytes_processed"] = total_bytes_processed + return job_attributes diff --git a/tests/unit/test_opentelemetry_tracing.py b/tests/unit/test_opentelemetry_tracing.py index 579d7b1b7..546cc02bd 100644 --- a/tests/unit/test_opentelemetry_tracing.py +++ b/tests/unit/test_opentelemetry_tracing.py @@ -142,6 +142,8 @@ def test_default_job_attributes(setup): "timeEnded": ended_time.isoformat(), "hasErrors": True, "state": "some_job_state", + "total_bytes_billed": 42, + "total_bytes_processed": 13, } with mock.patch("google.cloud.bigquery.job._AsyncJob") as test_job_ref: test_job_ref.job_id = "test_job_id" @@ -154,6 +156,8 @@ def test_default_job_attributes(setup): test_job_ref.ended = ended_time test_job_ref.error_result = error_result test_job_ref.state = "some_job_state" + test_job_ref.total_bytes_billed = 42 + test_job_ref.total_bytes_processed = 13 with opentelemetry_tracing.create_span( TEST_SPAN_NAME, attributes=TEST_SPAN_ATTRIBUTES, job_ref=test_job_ref @@ -180,6 +184,8 @@ def test_optional_job_attributes(setup): test_job_ref.state = "some_job_state" test_job_ref.num_child_jobs = None test_job_ref.parent_job_id = None + test_job_ref.total_bytes_billed = None + test_job_ref.total_bytes_processed = None with opentelemetry_tracing.create_span( TEST_SPAN_NAME, attributes=TEST_SPAN_ATTRIBUTES, job_ref=test_job_ref From 19394ab2ab2fa67b1995a3c5e53f06f99500d3f6 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Thu, 11 Apr 2024 21:17:23 +0200 Subject: [PATCH 05/13] chore(deps): update all dependencies (#1891) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * chore(deps): update all dependencies * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * revert pinned requirement version and add triple equal "===" prevents dependabot from attempting to upgrade it in the future --------- Co-authored-by: Owl Bot Co-authored-by: Lingqing Gan --- samples/desktopapp/requirements.txt | 2 +- samples/geography/requirements.txt | 6 +++--- samples/magics/requirements.txt | 2 +- samples/notebooks/requirements.txt | 4 ++-- samples/snippets/requirements.txt | 2 +- 5 files changed, 8 insertions(+), 8 deletions(-) diff --git a/samples/desktopapp/requirements.txt b/samples/desktopapp/requirements.txt index 8561934dc..fee6806b7 100644 --- a/samples/desktopapp/requirements.txt +++ b/samples/desktopapp/requirements.txt @@ -1,2 +1,2 @@ -google-cloud-bigquery==3.19.0 +google-cloud-bigquery==3.20.1 google-auth-oauthlib==1.2.0 diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 76b1a7b6b..e11fa09cf 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -15,7 +15,7 @@ geopandas===0.13.2; python_version == '3.8' geopandas==0.14.3; python_version >= '3.9' google-api-core==2.18.0 google-auth==2.29.0 -google-cloud-bigquery==3.19.0 +google-cloud-bigquery==3.20.1 google-cloud-bigquery-storage==2.24.0 google-cloud-core==2.4.1 google-crc32c==1.5.0 @@ -36,7 +36,7 @@ pyasn1===0.5.1; python_version == '3.7' pyasn1==0.6.0; python_version >= '3.8' pyasn1-modules===0.3.0; python_version == '3.7' pyasn1-modules==0.4.0; python_version >= '3.8' -pycparser==2.21; python_version == '3.7' +pycparser===2.21; python_version == '3.7' pycparser==2.22; python_version >= '3.8' pyparsing==3.1.2 python-dateutil==2.9.0.post0 @@ -47,7 +47,7 @@ rsa==4.9 Shapely==2.0.3 six==1.16.0 typing-extensions===4.7.1; python_version == '3.7' -typing-extensions==4.10.0; python_version >= '3.8' +typing-extensions==4.11.0; python_version >= '3.8' typing-inspect==0.9.0 urllib3===1.26.18; python_version == '3.7' urllib3==2.2.1; python_version >= '3.8' diff --git a/samples/magics/requirements.txt b/samples/magics/requirements.txt index 9179db067..05fd1907b 100644 --- a/samples/magics/requirements.txt +++ b/samples/magics/requirements.txt @@ -1,5 +1,5 @@ db-dtypes==1.2.0 -google.cloud.bigquery==3.19.0 +google.cloud.bigquery==3.20.1 google-cloud-bigquery-storage==2.24.0 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' diff --git a/samples/notebooks/requirements.txt b/samples/notebooks/requirements.txt index 8f2e93620..40fba4b87 100644 --- a/samples/notebooks/requirements.txt +++ b/samples/notebooks/requirements.txt @@ -1,12 +1,12 @@ db-dtypes==1.2.0 -google-cloud-bigquery==3.19.0 +google-cloud-bigquery==3.20.1 google-cloud-bigquery-storage==2.24.0 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' ipython===8.18.1; python_version >= '3.9' matplotlib===3.5.3; python_version == '3.7' matplotlib===3.7.4; python_version == '3.8' -matplotlib==3.8.3; python_version >= '3.9' +matplotlib==3.8.4; python_version >= '3.9' pandas===1.3.5; python_version == '3.7' pandas===2.0.3; python_version == '3.8' pandas==2.2.1; python_version >= '3.9' diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index af9436c51..95f915364 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,2 +1,2 @@ # samples/snippets should be runnable with no "extras" -google-cloud-bigquery==3.19.0 +google-cloud-bigquery==3.20.1 From 5ed9ccee204b7cf8e96cb0e050f6830c05f3b4fd Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Thu, 11 Apr 2024 15:33:30 -0400 Subject: [PATCH 06/13] feat: Add compression option ZSTD. (#1890) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Add ZSTD to compression types * feat: adds tests re Compression types * revise datatype from Enum to object * adds license text and docstring * change object back to enum datatype * updates compression object comparison * updates Compression class * jsonify and sort the input and output for testing * Update tests/unit/job/test_extract.py * moved json import statement * removed enums test and file --------- Co-authored-by: Ethan Steinberg Co-authored-by: Tim Sweña (Swast) --- google/cloud/bigquery/enums.py | 5 ++++- tests/unit/job/test_extract.py | 12 +++++++++--- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/google/cloud/bigquery/enums.py b/google/cloud/bigquery/enums.py index d75037ad1..1abe28381 100644 --- a/google/cloud/bigquery/enums.py +++ b/google/cloud/bigquery/enums.py @@ -22,7 +22,7 @@ class AutoRowIDs(enum.Enum): GENERATE_UUID = enum.auto() -class Compression(object): +class Compression(str, enum.Enum): """The compression type to use for exported files. The default value is :attr:`NONE`. @@ -39,6 +39,9 @@ class Compression(object): SNAPPY = "SNAPPY" """Specifies SNAPPY format.""" + ZSTD = "ZSTD" + """Specifies ZSTD format.""" + NONE = "NONE" """Specifies no compression.""" diff --git a/tests/unit/job/test_extract.py b/tests/unit/job/test_extract.py index 76ee72f28..ee0d67d68 100644 --- a/tests/unit/job/test_extract.py +++ b/tests/unit/job/test_extract.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import json from unittest import mock from ..helpers import make_connection @@ -45,9 +46,8 @@ def test_to_api_repr(self): config.print_header = False config._properties["extract"]["someNewField"] = "some-value" config.use_avro_logical_types = True - resource = config.to_api_repr() - self.assertEqual( - resource, + resource = json.dumps(config.to_api_repr(), sort_keys=True) + expected = json.dumps( { "extract": { "compression": "SNAPPY", @@ -58,6 +58,12 @@ def test_to_api_repr(self): "useAvroLogicalTypes": True, } }, + sort_keys=True, + ) + + self.assertEqual( + resource, + expected, ) def test_from_api_repr(self): From 5c6f7d9a98a84a9c39123dd621915f56f53d34bb Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Fri, 12 Apr 2024 17:18:51 +0200 Subject: [PATCH 07/13] chore(deps): update dependency idna to v3.7 [security] (#1896) --- samples/geography/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index e11fa09cf..e4b63cdaa 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -22,7 +22,7 @@ google-crc32c==1.5.0 google-resumable-media==2.7.0 googleapis-common-protos==1.63.0 grpcio==1.62.1 -idna==3.6 +idna==3.7 munch==4.0.0 mypy-extensions==1.0.0 packaging==24.0 From 88501c0cc3d88423c5e1b421fcd6b69cc72e7d51 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Fri, 12 Apr 2024 18:49:08 +0200 Subject: [PATCH 08/13] chore(deps): update all dependencies (#1893) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * chore(deps): update all dependencies * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --------- Co-authored-by: Owl Bot Co-authored-by: Chalmer Lowe --- samples/geography/requirements.txt | 2 +- samples/magics/requirements.txt | 2 +- samples/notebooks/requirements.txt | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index e4b63cdaa..b3d9bc841 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -28,7 +28,7 @@ mypy-extensions==1.0.0 packaging==24.0 pandas===1.3.5; python_version == '3.7' pandas===2.0.3; python_version == '3.8' -pandas==2.2.1; python_version >= '3.9' +pandas==2.2.2; python_version >= '3.9' proto-plus==1.23.0 pyarrow==12.0.1; python_version == '3.7' pyarrow==15.0.2; python_version >= '3.8' diff --git a/samples/magics/requirements.txt b/samples/magics/requirements.txt index 05fd1907b..61471a348 100644 --- a/samples/magics/requirements.txt +++ b/samples/magics/requirements.txt @@ -6,4 +6,4 @@ ipython===8.0.1; python_version == '3.8' ipython===8.18.1; python_version >= '3.9' pandas===1.3.5; python_version == '3.7' pandas===2.0.3; python_version == '3.8' -pandas==2.2.1; python_version >= '3.9' +pandas==2.2.2; python_version >= '3.9' diff --git a/samples/notebooks/requirements.txt b/samples/notebooks/requirements.txt index 40fba4b87..3960f47b9 100644 --- a/samples/notebooks/requirements.txt +++ b/samples/notebooks/requirements.txt @@ -9,4 +9,4 @@ matplotlib===3.7.4; python_version == '3.8' matplotlib==3.8.4; python_version >= '3.9' pandas===1.3.5; python_version == '3.7' pandas===2.0.3; python_version == '3.8' -pandas==2.2.1; python_version >= '3.9' +pandas==2.2.2; python_version >= '3.9' From a0fddbba1aac1ae94aa8ec75f9d0b158b430549b Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Fri, 12 Apr 2024 13:41:37 -0400 Subject: [PATCH 09/13] chore(python): bump idna from 3.4 to 3.7 in .kokoro (#1897) * chore(python): bump idna from 3.4 to 3.7 in .kokoro Source-Link: https://github.com/googleapis/synthtool/commit/d50980e704793a2d3310bfb3664f3a82f24b5796 Post-Processor: gcr.io/cloud-devrel-public-resources/owlbot-python:latest@sha256:5a4c19d17e597b92d786e569be101e636c9c2817731f80a5adec56b2aa8fe070 * Apply changes from googleapis/synthtool#1950 --------- Co-authored-by: Owl Bot Co-authored-by: Chalmer Lowe Co-authored-by: Anthonios Partheniou --- .github/.OwlBot.lock.yaml | 4 ++-- .github/auto-label.yaml | 5 +++++ .github/blunderbuss.yml | 17 +++++++++++++++++ .kokoro/requirements.txt | 6 +++--- docs/index.rst | 5 +++++ docs/summary_overview.md | 22 ++++++++++++++++++++++ 6 files changed, 54 insertions(+), 5 deletions(-) create mode 100644 .github/blunderbuss.yml create mode 100644 docs/summary_overview.md diff --git a/.github/.OwlBot.lock.yaml b/.github/.OwlBot.lock.yaml index dc9c56e9d..81f87c569 100644 --- a/.github/.OwlBot.lock.yaml +++ b/.github/.OwlBot.lock.yaml @@ -13,5 +13,5 @@ # limitations under the License. docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:a8a80fc6456e433df53fc2a0d72ca0345db0ddefb409f1b75b118dfd1babd952 -# created: 2024-03-15T16:25:47.905264637Z \ No newline at end of file + digest: sha256:5a4c19d17e597b92d786e569be101e636c9c2817731f80a5adec56b2aa8fe070 +# created: 2024-04-12T11:35:58.922854369Z diff --git a/.github/auto-label.yaml b/.github/auto-label.yaml index b2016d119..8b37ee897 100644 --- a/.github/auto-label.yaml +++ b/.github/auto-label.yaml @@ -13,3 +13,8 @@ # limitations under the License. requestsize: enabled: true + +path: + pullrequest: true + paths: + samples: "samples" diff --git a/.github/blunderbuss.yml b/.github/blunderbuss.yml new file mode 100644 index 000000000..5b7383dc7 --- /dev/null +++ b/.github/blunderbuss.yml @@ -0,0 +1,17 @@ +# Blunderbuss config +# +# This file controls who is assigned for pull requests and issues. +# Note: This file is autogenerated. To make changes to the assignee +# team, please update `codeowner_team` in `.repo-metadata.json`. +assign_issues: + - googleapis/api-bigquery + +assign_issues_by: + - labels: + - "samples" + to: + - googleapis/python-samples-reviewers + - googleapis/api-bigquery + +assign_prs: + - googleapis/api-bigquery diff --git a/.kokoro/requirements.txt b/.kokoro/requirements.txt index dd61f5f32..51f92b8e1 100644 --- a/.kokoro/requirements.txt +++ b/.kokoro/requirements.txt @@ -252,9 +252,9 @@ googleapis-common-protos==1.61.0 \ --hash=sha256:22f1915393bb3245343f6efe87f6fe868532efc12aa26b391b15132e1279f1c0 \ --hash=sha256:8a64866a97f6304a7179873a465d6eee97b7a24ec6cfd78e0f575e96b821240b # via google-api-core -idna==3.4 \ - --hash=sha256:814f528e8dead7d329833b91c5faa87d60bf71824cd12a7530b5526063d02cb4 \ - --hash=sha256:90b77e79eaa3eba6de819a0c442c0b4ceefc341a7a2ab77d7562bf49f425c5c2 +idna==3.7 \ + --hash=sha256:028ff3aadf0609c1fd278d8ea3089299412a7a8b9bd005dd08b9f8285bcb5cfc \ + --hash=sha256:82fee1fc78add43492d3a1898bfa6d8a904cc97d8427f683ed8e798d07761aa0 # via requests importlib-metadata==6.8.0 \ --hash=sha256:3ebb78df84a805d7698245025b975d9d67053cd94c79245ba4b3eb694abe68bb \ diff --git a/docs/index.rst b/docs/index.rst index 500c67a7f..6d6ed63f6 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -48,3 +48,8 @@ For a list of all ``google-cloud-bigquery`` releases: :maxdepth: 2 changelog + +.. toctree:: + :hidden: + + summary_overview.md diff --git a/docs/summary_overview.md b/docs/summary_overview.md new file mode 100644 index 000000000..6dd228e13 --- /dev/null +++ b/docs/summary_overview.md @@ -0,0 +1,22 @@ +[ +This is a templated file. Adding content to this file may result in it being +reverted. Instead, if you want to place additional content, create an +"overview_content.md" file in `docs/` directory. The Sphinx tool will +pick up on the content and merge the content. +]: # + +# Google Cloud BigQuery API + +Overview of the APIs available for Google Cloud BigQuery API. + +## All entries + +Classes, methods and properties & attributes for +Google Cloud BigQuery API. + +[classes](https://cloud.google.com/python/docs/reference/bigquery/latest/summary_class.html) + +[methods](https://cloud.google.com/python/docs/reference/bigquery/latest/summary_method.html) + +[properties and +attributes](https://cloud.google.com/python/docs/reference/bigquery/latest/summary_property.html) From 82ae908fbf3b2361343fff1859d3533383dc50ec Mon Sep 17 00:00:00 2001 From: Toran Sahu Date: Fri, 12 Apr 2024 23:40:17 +0530 Subject: [PATCH 10/13] =?UTF-8?q?fix:=20Remove=20duplicate=20key=20time=5F?= =?UTF-8?q?partitioning=20from=20Table.=5FPROPERTY=5FTO=5FA=E2=80=A6=20(#1?= =?UTF-8?q?898)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit …PI_FIELD Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [ ] Make sure to open an issue as a [bug/issue](https://togithub.com/googleapis/python-bigquery/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [ ] Ensure the tests and linter pass - [ ] Code coverage does not decrease (if any source code was changed) - [ ] Appropriate docs were updated (if necessary) Fixes # 🦕 --- google/cloud/bigquery/table.py | 1 - 1 file changed, 1 deletion(-) diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index c002822fe..73e755e9e 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -385,7 +385,6 @@ class Table(_TableBase): "clone_definition": "cloneDefinition", "streaming_buffer": "streamingBuffer", "self_link": "selfLink", - "time_partitioning": "timePartitioning", "type": "type", "view_use_legacy_sql": "view", "view_query": "view", From bf8861c3473a1af978db7a06463ddc0bad86f326 Mon Sep 17 00:00:00 2001 From: kserruys Date: Fri, 12 Apr 2024 20:42:29 +0200 Subject: [PATCH 11/13] fix: add types to DatasetReference constructor (#1601) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix: add types to DatasetReference constructor * fix: add types to DatasetReference constructor * fix: DatasetReference.from_string test coverage --------- Co-authored-by: Karel Serruys Co-authored-by: Chalmer Lowe Co-authored-by: meredithslota Co-authored-by: Tim Sweña (Swast) --- google/cloud/bigquery/dataset.py | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/google/cloud/bigquery/dataset.py b/google/cloud/bigquery/dataset.py index c313045ce..c49a52faf 100644 --- a/google/cloud/bigquery/dataset.py +++ b/google/cloud/bigquery/dataset.py @@ -92,7 +92,7 @@ class DatasetReference(object): ValueError: If either argument is not of type ``str``. """ - def __init__(self, project, dataset_id): + def __init__(self, project: str, dataset_id: str): if not isinstance(project, str): raise ValueError("Pass a string for project") if not isinstance(dataset_id, str): @@ -166,22 +166,24 @@ def from_string( standard SQL format. """ output_dataset_id = dataset_id - output_project_id = default_project parts = _helpers._split_id(dataset_id) - if len(parts) == 1 and not default_project: - raise ValueError( - "When default_project is not set, dataset_id must be a " - "fully-qualified dataset ID in standard SQL format, " - 'e.g., "project.dataset_id" got {}'.format(dataset_id) - ) + if len(parts) == 1: + if default_project is not None: + output_project_id = default_project + else: + raise ValueError( + "When default_project is not set, dataset_id must be a " + "fully-qualified dataset ID in standard SQL format, " + 'e.g., "project.dataset_id" got {}'.format(dataset_id) + ) elif len(parts) == 2: output_project_id, output_dataset_id = parts - elif len(parts) > 2: + else: raise ValueError( "Too many parts in dataset_id. Expected a fully-qualified " - "dataset ID in standard SQL format. e.g. " - '"project.dataset_id", got {}'.format(dataset_id) + "dataset ID in standard SQL format, " + 'e.g. "project.dataset_id", got {}'.format(dataset_id) ) return cls(output_project_id, output_dataset_id) From 1367b584b68d917ec325ce4383a0e9a36205b894 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20Swe=C3=B1a=20=28Swast=29?= Date: Thu, 18 Apr 2024 09:31:40 -0500 Subject: [PATCH 12/13] fix: avoid unnecessary API call in QueryJob.result() when job is already finished (#1900) fix: retry query job after ambiguous failures Co-authored-by: Chalmer Lowe --- google/cloud/bigquery/_job_helpers.py | 9 +- google/cloud/bigquery/job/query.py | 172 ++++++++----- google/cloud/bigquery/retry.py | 52 +++- tests/unit/job/test_query.py | 334 +++++++++++++++----------- tests/unit/test__job_helpers.py | 38 ++- tests/unit/test_job_retry.py | 172 ++++++++++++- 6 files changed, 547 insertions(+), 230 deletions(-) diff --git a/google/cloud/bigquery/_job_helpers.py b/google/cloud/bigquery/_job_helpers.py index 602a49eba..290439394 100644 --- a/google/cloud/bigquery/_job_helpers.py +++ b/google/cloud/bigquery/_job_helpers.py @@ -258,15 +258,16 @@ def _to_query_job( errors = query_response["errors"] query_job._properties["status"]["errors"] = errors - # Transform job state so that QueryJob doesn't try to restart the query. + # Avoid an extra call to `getQueryResults` if the query has finished. job_complete = query_response.get("jobComplete") if job_complete: - query_job._properties["status"]["state"] = "DONE" query_job._query_results = google.cloud.bigquery.query._QueryResults( query_response ) - else: - query_job._properties["status"]["state"] = "PENDING" + + # We want job.result() to refresh the job state, so the conversion is + # always "PENDING", even if the job is finished. + query_job._properties["status"]["state"] = "PENDING" return query_job diff --git a/google/cloud/bigquery/job/query.py b/google/cloud/bigquery/job/query.py index e92e9cb9e..7436b6013 100644 --- a/google/cloud/bigquery/job/query.py +++ b/google/cloud/bigquery/job/query.py @@ -17,11 +17,11 @@ import concurrent.futures import copy import re +import time import typing from typing import Any, Dict, Iterable, List, Optional, Union from google.api_core import exceptions -from google.api_core.future import polling as polling_future from google.api_core import retry as retries import requests @@ -1383,7 +1383,7 @@ def _begin(self, client=None, retry=DEFAULT_RETRY, timeout=None): def _reload_query_results( self, retry: "retries.Retry" = DEFAULT_RETRY, timeout: Optional[float] = None ): - """Refresh the cached query results. + """Refresh the cached query results unless already cached and complete. Args: retry (Optional[google.api_core.retry.Retry]): @@ -1392,6 +1392,8 @@ def _reload_query_results( The number of seconds to wait for the underlying HTTP transport before using ``retry``. """ + # Optimization: avoid a call to jobs.getQueryResults if it's already + # been fetched, e.g. from jobs.query first page of results. if self._query_results and self._query_results.complete: return @@ -1430,40 +1432,6 @@ def _reload_query_results( timeout=transport_timeout, ) - def _done_or_raise(self, retry=DEFAULT_RETRY, timeout=None): - """Check if the query has finished running and raise if it's not. - - If the query has finished, also reload the job itself. - """ - # If an explicit timeout is not given, fall back to the transport timeout - # stored in _blocking_poll() in the process of polling for job completion. - transport_timeout = timeout if timeout is not None else self._transport_timeout - - try: - self._reload_query_results(retry=retry, timeout=transport_timeout) - except exceptions.GoogleAPIError as exc: - # Reloading also updates error details on self, thus no need for an - # explicit self.set_exception() call if reloading succeeds. - try: - self.reload(retry=retry, timeout=transport_timeout) - except exceptions.GoogleAPIError: - # Use the query results reload exception, as it generally contains - # much more useful error information. - self.set_exception(exc) - finally: - return - - # Only reload the job once we know the query is complete. - # This will ensure that fields such as the destination table are - # correctly populated. - if not self._query_results.complete: - raise polling_future._OperationNotComplete() - else: - try: - self.reload(retry=retry, timeout=transport_timeout) - except exceptions.GoogleAPIError as exc: - self.set_exception(exc) - def result( # type: ignore # (incompatible with supertype) self, page_size: Optional[int] = None, @@ -1528,6 +1496,10 @@ def result( # type: ignore # (incompatible with supertype) If Non-``None`` and non-default ``job_retry`` is provided and the job is not retryable. """ + # Note: Since waiting for a query job to finish is more complex than + # refreshing the job state in a loop, we avoid calling the superclass + # in this method. + if self.dry_run: return _EmptyRowIterator( project=self.project, @@ -1548,46 +1520,124 @@ def result( # type: ignore # (incompatible with supertype) " provided to the query that created this job." ) - first = True + restart_query_job = False + + def is_job_done(): + nonlocal restart_query_job - def do_get_result(): - nonlocal first + if restart_query_job: + restart_query_job = False - if first: - first = False - else: + # The original job has failed. Create a new one. + # # Note that we won't get here if retry_do_query is # None, because we won't use a retry. - - # The orinal job is failed. Create a new one. job = retry_do_query() - # If it's already failed, we might as well stop: - if job.done() and job.exception() is not None: - raise job.exception() - # Become the new job: self.__dict__.clear() self.__dict__.update(job.__dict__) - # This shouldn't be necessary, because once we have a good - # job, it should stay good,and we shouldn't have to retry. - # But let's be paranoid. :) + # It's possible the job fails again and we'll have to + # retry that too. self._retry_do_query = retry_do_query self._job_retry = job_retry - super(QueryJob, self).result(retry=retry, timeout=timeout) - - # Since the job could already be "done" (e.g. got a finished job - # via client.get_job), the superclass call to done() might not - # set the self._query_results cache. - if self._query_results is None or not self._query_results.complete: - self._reload_query_results(retry=retry, timeout=timeout) + # Refresh the job status with jobs.get because some of the + # exceptions thrown by jobs.getQueryResults like timeout and + # rateLimitExceeded errors are ambiguous. We want to know if + # the query job failed and not just the call to + # jobs.getQueryResults. + if self.done(retry=retry, timeout=timeout): + # If it's already failed, we might as well stop. + job_failed_exception = self.exception() + if job_failed_exception is not None: + # Only try to restart the query job if the job failed for + # a retriable reason. For example, don't restart the query + # if the call to reload the job metadata within self.done() + # timed out. + # + # The `restart_query_job` must only be called after a + # successful call to the `jobs.get` REST API and we + # determine that the job has failed. + # + # The `jobs.get` REST API + # (https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/get) + # is called via `self.done()` which calls + # `self.reload()`. + # + # To determine if the job failed, the `self.exception()` + # is set from `self.reload()` via + # `self._set_properties()`, which translates the + # `Job.status.errorResult` field + # (https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatus.FIELDS.error_result) + # into an exception that can be processed by the + # `job_retry` predicate. + restart_query_job = True + raise job_failed_exception + else: + # Make sure that the _query_results are cached so we + # can return a complete RowIterator. + # + # Note: As an optimization, _reload_query_results + # doesn't make any API calls if the query results are + # already cached and have jobComplete=True in the + # response from the REST API. This ensures we aren't + # making any extra API calls if the previous loop + # iteration fetched the finished job. + self._reload_query_results(retry=retry, timeout=timeout) + return True + + # Call jobs.getQueryResults with max results set to 0 just to + # wait for the query to finish. Unlike most methods, + # jobs.getQueryResults hangs as long as it can to ensure we + # know when the query has finished as soon as possible. + self._reload_query_results(retry=retry, timeout=timeout) + + # Even if the query is finished now according to + # jobs.getQueryResults, we'll want to reload the job status if + # it's not already DONE. + return False if retry_do_query is not None and job_retry is not None: - do_get_result = job_retry(do_get_result) - - do_get_result() + is_job_done = job_retry(is_job_done) + + # timeout can be a number of seconds, `None`, or a + # `google.api_core.future.polling.PollingFuture._DEFAULT_VALUE` + # sentinel object indicating a default timeout if we choose to add + # one some day. This value can come from our PollingFuture + # superclass and was introduced in + # https://github.com/googleapis/python-api-core/pull/462. + if isinstance(timeout, (float, int)): + remaining_timeout = timeout + else: + # Note: we may need to handle _DEFAULT_VALUE as a separate + # case someday, but even then the best we can do for queries + # is 72+ hours for hyperparameter tuning jobs: + # https://cloud.google.com/bigquery/quotas#query_jobs + # + # The timeout for a multi-statement query is 24+ hours. See: + # https://cloud.google.com/bigquery/quotas#multi_statement_query_limits + remaining_timeout = None + + if remaining_timeout is None: + # Since is_job_done() calls jobs.getQueryResults, which is a + # long-running API, don't delay the next request at all. + while not is_job_done(): + pass + else: + # Use a monotonic clock since we don't actually care about + # daylight savings or similar, just the elapsed time. + previous_time = time.monotonic() + + while not is_job_done(): + current_time = time.monotonic() + elapsed_time = current_time - previous_time + remaining_timeout = remaining_timeout - elapsed_time + previous_time = current_time + + if remaining_timeout < 0: + raise concurrent.futures.TimeoutError() except exceptions.GoogleAPICallError as exc: exc.message = _EXCEPTION_FOOTER_TEMPLATE.format( diff --git a/google/cloud/bigquery/retry.py b/google/cloud/bigquery/retry.py index 01b127972..c9898287f 100644 --- a/google/cloud/bigquery/retry.py +++ b/google/cloud/bigquery/retry.py @@ -36,10 +36,25 @@ _DEFAULT_RETRY_DEADLINE = 10.0 * 60.0 # 10 minutes -# Allow for a few retries after the API request times out. This relevant for -# rateLimitExceeded errors, which can be raised either by the Google load -# balancer or the BigQuery job server. -_DEFAULT_JOB_DEADLINE = 3.0 * _DEFAULT_RETRY_DEADLINE +# Ambiguous errors (e.g. internalError, backendError, rateLimitExceeded) retry +# until the full `_DEFAULT_RETRY_DEADLINE`. This is because the +# `jobs.getQueryResults` REST API translates a job failure into an HTTP error. +# +# TODO(https://github.com/googleapis/python-bigquery/issues/1903): Investigate +# if we can fail early for ambiguous errors in `QueryJob.result()`'s call to +# the `jobs.getQueryResult` API. +# +# We need `_DEFAULT_JOB_DEADLINE` to be some multiple of +# `_DEFAULT_RETRY_DEADLINE` to allow for a few retries after the retry +# timeout is reached. +# +# Note: This multiple should actually be a multiple of +# (2 * _DEFAULT_RETRY_DEADLINE). After an ambiguous exception, the first +# call from `job_retry()` refreshes the job state without actually restarting +# the query. The second `job_retry()` actually restarts the query. For a more +# detailed explanation, see the comments where we set `restart_query_job = True` +# in `QueryJob.result()`'s inner `is_job_done()` function. +_DEFAULT_JOB_DEADLINE = 2.0 * (2.0 * _DEFAULT_RETRY_DEADLINE) def _should_retry(exc): @@ -66,6 +81,11 @@ def _should_retry(exc): pass ``retry=bigquery.DEFAULT_RETRY.with_deadline(30)``. """ +# Note: Take care when updating DEFAULT_TIMEOUT to anything but None. We +# briefly had a default timeout, but even setting it at more than twice the +# theoretical server-side default timeout of 2 minutes was not enough for +# complex queries. See: +# https://github.com/googleapis/python-bigquery/issues/970#issuecomment-921934647 DEFAULT_TIMEOUT = None """The default API timeout. @@ -73,10 +93,32 @@ def _should_retry(exc): deadline on the retry object. """ -job_retry_reasons = "rateLimitExceeded", "backendError", "jobRateLimitExceeded" +job_retry_reasons = ( + "rateLimitExceeded", + "backendError", + "internalError", + "jobRateLimitExceeded", +) def _job_should_retry(exc): + # Sometimes we have ambiguous errors, such as 'backendError' which could + # be due to an API problem or a job problem. For these, make sure we retry + # our is_job_done() function. + # + # Note: This won't restart the job unless we know for sure it's because of + # the job status and set restart_query_job = True in that loop. This means + # that we might end up calling this predicate twice for the same job + # but from different paths: (1) from jobs.getQueryResults RetryError and + # (2) from translating the job error from the body of a jobs.get response. + # + # Note: If we start retrying job types other than queries where we don't + # call the problematic getQueryResults API to check the status, we need + # to provide a different predicate, as there shouldn't be ambiguous + # errors in those cases. + if isinstance(exc, exceptions.RetryError): + exc = exc.cause + if not hasattr(exc, "errors") or len(exc.errors) == 0: return False diff --git a/tests/unit/job/test_query.py b/tests/unit/job/test_query.py index 37ac7ba5e..0fee053e3 100644 --- a/tests/unit/job/test_query.py +++ b/tests/unit/job/test_query.py @@ -13,6 +13,7 @@ # limitations under the License. import concurrent +import concurrent.futures import copy import http import textwrap @@ -371,100 +372,6 @@ def test_cancelled(self): self.assertTrue(job.cancelled()) - def test__done_or_raise_w_timeout(self): - client = _make_client(project=self.PROJECT) - resource = self._make_resource(ended=False) - job = self._get_target_class().from_api_repr(resource, client) - - with mock.patch.object( - client, "_get_query_results" - ) as fake_get_results, mock.patch.object(job, "reload") as fake_reload: - job._done_or_raise(timeout=42) - - fake_get_results.assert_called_once() - call_args = fake_get_results.call_args[0][1] - self.assertEqual(call_args.timeout, 600.0) - - call_args = fake_reload.call_args[1] - self.assertEqual(call_args["timeout"], 42) - - def test__done_or_raise_w_timeout_and_longer_internal_api_timeout(self): - client = _make_client(project=self.PROJECT) - resource = self._make_resource(ended=False) - job = self._get_target_class().from_api_repr(resource, client) - job._done_timeout = 8.8 - - with mock.patch.object( - client, "_get_query_results" - ) as fake_get_results, mock.patch.object(job, "reload") as fake_reload: - job._done_or_raise(timeout=5.5) - - # The expected timeout used is simply the given timeout, as the latter - # is shorter than the job's internal done timeout. - expected_timeout = 5.5 - - fake_get_results.assert_called_once() - call_args = fake_get_results.call_args[0][1] - self.assertAlmostEqual(call_args.timeout, 600.0) - - call_args = fake_reload.call_args - self.assertAlmostEqual(call_args[1].get("timeout"), expected_timeout) - - def test__done_or_raise_w_query_results_error_reload_ok(self): - client = _make_client(project=self.PROJECT) - bad_request_error = exceptions.BadRequest("Error in query") - client._get_query_results = mock.Mock(side_effect=bad_request_error) - - resource = self._make_resource(ended=False) - job = self._get_target_class().from_api_repr(resource, client) - job._exception = None - - def fake_reload(self, *args, **kwargs): - self._properties["status"]["state"] = "DONE" - self.set_exception(copy.copy(bad_request_error)) - - fake_reload_method = types.MethodType(fake_reload, job) - - with mock.patch.object(job, "reload", new=fake_reload_method): - job._done_or_raise() - - assert isinstance(job._exception, exceptions.BadRequest) - - def test__done_or_raise_w_query_results_error_reload_error(self): - client = _make_client(project=self.PROJECT) - bad_request_error = exceptions.BadRequest("Error in query") - client._get_query_results = mock.Mock(side_effect=bad_request_error) - - resource = self._make_resource(ended=False) - job = self._get_target_class().from_api_repr(resource, client) - reload_error = exceptions.DataLoss("Oops, sorry!") - job.reload = mock.Mock(side_effect=reload_error) - job._exception = None - - job._done_or_raise() - - assert job._exception is bad_request_error - - def test__done_or_raise_w_job_query_results_ok_reload_error(self): - client = _make_client(project=self.PROJECT) - query_results = google.cloud.bigquery.query._QueryResults( - properties={ - "jobComplete": True, - "jobReference": {"projectId": self.PROJECT, "jobId": "12345"}, - } - ) - client._get_query_results = mock.Mock(return_value=query_results) - - resource = self._make_resource(ended=False) - job = self._get_target_class().from_api_repr(resource, client) - retry_error = exceptions.RetryError("Too many retries", cause=TimeoutError) - job.reload = mock.Mock(side_effect=retry_error) - job._exception = None - - job._done_or_raise() - - assert job._exception is retry_error - def test_query_plan(self): from google.cloud._helpers import _RFC3339_MICROS from google.cloud.bigquery.job import QueryPlanEntry @@ -933,7 +840,12 @@ def test_search_stats(self): assert isinstance(job.search_stats, SearchStats) assert job.search_stats.mode == "INDEX_USAGE_MODE_UNSPECIFIED" - def test_result(self): + def test_result_reloads_job_state_until_done(self): + """Verify that result() doesn't return until state == 'DONE'. + + This test verifies correctness for a possible sequence of API responses + that might cause internal customer issue b/332850329. + """ from google.cloud.bigquery.table import RowIterator query_resource = { @@ -970,7 +882,54 @@ def test_result(self): "rows": [{"f": [{"v": "abc"}]}], } conn = make_connection( - query_resource, query_resource_done, job_resource_done, query_page_resource + # QueryJob.result() makes a pair of jobs.get & jobs.getQueryResults + # REST API calls each iteration to determine if the job has finished + # or not. + # + # jobs.get (https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/get) + # is necessary to make sure the job has really finished via + # `Job.status.state == "DONE"` and to get necessary properties for + # `RowIterator` like the destination table. + # + # jobs.getQueryResults + # (https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/getQueryResults) + # with maxResults == 0 is technically optional, + # but it hangs up to 10 seconds until the job has finished. This + # makes sure we can know when the query has finished as close as + # possible to when the query finishes. It also gets properties + # necessary for `RowIterator` that isn't available on the job + # resource such as the schema + # (https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/getQueryResults#body.GetQueryResultsResponse.FIELDS.schema) + # of the results. + job_resource, + query_resource, + # The query wasn't finished in the last call to jobs.get, so try + # again with a call to both jobs.get & jobs.getQueryResults. + job_resource, + query_resource_done, + # Even though, the previous jobs.getQueryResults response says + # the job is complete, we haven't downloaded the full job status + # yet. + # + # Important: per internal issue 332850329, this reponse has + # `Job.status.state = "RUNNING"`. This ensures we are protected + # against possible eventual consistency issues where + # `jobs.getQueryResults` says jobComplete == True, but our next + # call to `jobs.get` still doesn't have + # `Job.status.state == "DONE"`. + job_resource, + # Try again until `Job.status.state == "DONE"`. + # + # Note: the call to `jobs.getQueryResults` is missing here as + # an optimization. We already received a "completed" response, so + # we won't learn anything new by calling that API again. + job_resource, + job_resource_done, + # When we iterate over the `RowIterator` we return from + # `QueryJob.result()`, we make additional calls to + # `jobs.getQueryResults` but this time allowing the actual rows + # to be returned as well. + query_page_resource, ) client = _make_client(self.PROJECT, connection=conn) job = self._get_target_class().from_api_repr(job_resource, client) @@ -1013,8 +972,32 @@ def test_result(self): }, timeout=None, ) + # Ensure that we actually made the expected API calls in the sequence + # we thought above at the make_connection() call above. + # + # Note: The responses from jobs.get and jobs.getQueryResults can be + # deceptively similar, so this check ensures we actually made the + # requests we expected. conn.api_request.assert_has_calls( - [query_results_call, query_results_call, reload_call, query_page_call] + [ + # jobs.get & jobs.getQueryResults because the job just started. + reload_call, + query_results_call, + # jobs.get & jobs.getQueryResults because the query is still + # running. + reload_call, + query_results_call, + # We got a jobComplete response from the most recent call to + # jobs.getQueryResults, so now call jobs.get until we get + # `Jobs.status.state == "DONE"`. This tests a fix for internal + # issue b/332850329. + reload_call, + reload_call, + reload_call, + # jobs.getQueryResults without `maxResults` set to download + # the rows as we iterate over the `RowIterator`. + query_page_call, + ] ) def test_result_dry_run(self): @@ -1069,7 +1052,7 @@ def test_result_with_done_job_calls_get_query_results(self): method="GET", path=query_results_path, query_params={"maxResults": 0, "location": "EU"}, - timeout=None, + timeout=google.cloud.bigquery.client._MIN_GET_QUERY_RESULTS_TIMEOUT, ) query_results_page_call = mock.call( method="GET", @@ -1107,7 +1090,10 @@ def test_result_with_done_jobs_query_response_doesnt_call_get_query_results(self request_config=None, query_response=query_resource_done, ) - assert job.state == "DONE" + + # We want job.result() to refresh the job state, so the conversion is + # always "PENDING", even if the job is finished. + assert job.state == "PENDING" result = job.result() @@ -1156,7 +1142,9 @@ def test_result_with_done_jobs_query_response_and_page_size_invalidates_cache(se request_config=None, query_response=query_resource_done, ) - assert job.state == "DONE" + # We want job.result() to refresh the job state, so the conversion is + # always "PENDING", even if the job is finished. + assert job.state == "PENDING" # Act result = job.result(page_size=3) @@ -1230,7 +1218,7 @@ def test_result_with_max_results(self): query_page_request[1]["query_params"]["maxResults"], max_results ) - def test_result_w_retry(self): + def test_result_w_custom_retry(self): from google.cloud.bigquery.table import RowIterator query_resource = { @@ -1254,12 +1242,24 @@ def test_result_w_retry(self): } connection = make_connection( + # Also, for each API request, raise an exception that we know can + # be retried. Because of this, for each iteration we do: + # jobs.get (x2) & jobs.getQueryResults (x2) + exceptions.NotFound("not normally retriable"), + job_resource, exceptions.NotFound("not normally retriable"), query_resource, + # Query still not done, repeat both. exceptions.NotFound("not normally retriable"), - query_resource_done, + job_resource, exceptions.NotFound("not normally retriable"), + query_resource, + exceptions.NotFound("not normally retriable"), + # Query still not done, repeat both. job_resource_done, + exceptions.NotFound("not normally retriable"), + query_resource_done, + # Query finished! ) client = _make_client(self.PROJECT, connection=connection) job = self._get_target_class().from_api_repr(job_resource, client) @@ -1279,7 +1279,10 @@ def test_result_w_retry(self): method="GET", path=f"/projects/{self.PROJECT}/queries/{self.JOB_ID}", query_params={"maxResults": 0, "location": "asia-northeast1"}, - timeout=None, + # TODO(tswast): Why do we end up setting timeout to + # google.cloud.bigquery.client._MIN_GET_QUERY_RESULTS_TIMEOUT in + # some cases but not others? + timeout=mock.ANY, ) reload_call = mock.call( method="GET", @@ -1289,7 +1292,26 @@ def test_result_w_retry(self): ) connection.api_request.assert_has_calls( - [query_results_call, query_results_call, reload_call] + [ + # See make_connection() call above for explanation of the + # expected API calls. + # + # Query not done. + reload_call, + reload_call, + query_results_call, + query_results_call, + # Query still not done. + reload_call, + reload_call, + query_results_call, + query_results_call, + # Query done! + reload_call, + reload_call, + query_results_call, + query_results_call, + ] ) def test_result_w_empty_schema(self): @@ -1316,41 +1338,60 @@ def test_result_w_empty_schema(self): self.assertEqual(result.location, "asia-northeast1") self.assertEqual(result.query_id, "xyz-abc") - def test_result_invokes_begins(self): + def test_result_w_timeout_doesnt_raise(self): + import google.cloud.bigquery.client + begun_resource = self._make_resource() - incomplete_resource = { - "jobComplete": False, + query_resource = { + "jobComplete": True, "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, "schema": {"fields": [{"name": "col1", "type": "STRING"}]}, } - query_resource = copy.deepcopy(incomplete_resource) - query_resource["jobComplete"] = True done_resource = copy.deepcopy(begun_resource) done_resource["status"] = {"state": "DONE"} - connection = make_connection( - begun_resource, - incomplete_resource, - query_resource, - done_resource, - query_resource, - ) + connection = make_connection(begun_resource, query_resource, done_resource) client = _make_client(project=self.PROJECT, connection=connection) job = self._make_one(self.JOB_ID, self.QUERY, client) + job._properties["jobReference"]["location"] = "US" - job.result() + with freezegun.freeze_time("1970-01-01 00:00:00", tick=False): + job.result( + # Test that fractional seconds are supported, but use a timeout + # that is representable as a floating point without rounding + # errors since it can be represented exactly in base 2. In this + # case 1.125 is 9 / 8, which is a fraction with a power of 2 in + # the denominator. + timeout=1.125, + ) - self.assertEqual(len(connection.api_request.call_args_list), 4) - begin_request = connection.api_request.call_args_list[0] - query_request = connection.api_request.call_args_list[2] - reload_request = connection.api_request.call_args_list[3] - self.assertEqual(begin_request[1]["method"], "POST") - self.assertEqual(query_request[1]["method"], "GET") - self.assertEqual(reload_request[1]["method"], "GET") + reload_call = mock.call( + method="GET", + path=f"/projects/{self.PROJECT}/jobs/{self.JOB_ID}", + query_params={"location": "US"}, + timeout=1.125, + ) + get_query_results_call = mock.call( + method="GET", + path=f"/projects/{self.PROJECT}/queries/{self.JOB_ID}", + query_params={ + "maxResults": 0, + "location": "US", + }, + timeout=google.cloud.bigquery.client._MIN_GET_QUERY_RESULTS_TIMEOUT, + ) + connection.api_request.assert_has_calls( + [ + reload_call, + get_query_results_call, + reload_call, + ] + ) - def test_result_w_timeout(self): + def test_result_w_timeout_raises_concurrent_futures_timeout(self): import google.cloud.bigquery.client begun_resource = self._make_resource() + begun_resource["jobReference"]["location"] = "US" query_resource = { "jobComplete": True, "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, @@ -1361,26 +1402,35 @@ def test_result_w_timeout(self): connection = make_connection(begun_resource, query_resource, done_resource) client = _make_client(project=self.PROJECT, connection=connection) job = self._make_one(self.JOB_ID, self.QUERY, client) + job._properties["jobReference"]["location"] = "US" - with freezegun.freeze_time("1970-01-01 00:00:00", tick=False): - job.result(timeout=1.0) - - self.assertEqual(len(connection.api_request.call_args_list), 3) - begin_request = connection.api_request.call_args_list[0] - query_request = connection.api_request.call_args_list[1] - reload_request = connection.api_request.call_args_list[2] - self.assertEqual(begin_request[1]["method"], "POST") - self.assertEqual(query_request[1]["method"], "GET") - self.assertEqual( - query_request[1]["path"], - "/projects/{}/queries/{}".format(self.PROJECT, self.JOB_ID), + with freezegun.freeze_time( + "1970-01-01 00:00:00", auto_tick_seconds=1.0 + ), self.assertRaises(concurrent.futures.TimeoutError): + job.result(timeout=1.125) + + reload_call = mock.call( + method="GET", + path=f"/projects/{self.PROJECT}/jobs/{self.JOB_ID}", + query_params={"location": "US"}, + timeout=1.125, ) - self.assertEqual(query_request[1]["timeout"], 120) - self.assertEqual( - query_request[1]["timeout"], - google.cloud.bigquery.client._MIN_GET_QUERY_RESULTS_TIMEOUT, + get_query_results_call = mock.call( + method="GET", + path=f"/projects/{self.PROJECT}/queries/{self.JOB_ID}", + query_params={ + "maxResults": 0, + "location": "US", + }, + timeout=google.cloud.bigquery.client._MIN_GET_QUERY_RESULTS_TIMEOUT, + ) + connection.api_request.assert_has_calls( + [ + reload_call, + get_query_results_call, + # Timeout before we can reload with the final job state. + ] ) - self.assertEqual(reload_request[1]["method"], "GET") def test_result_w_page_size(self): # Arrange diff --git a/tests/unit/test__job_helpers.py b/tests/unit/test__job_helpers.py index 671b829f7..9f661dca7 100644 --- a/tests/unit/test__job_helpers.py +++ b/tests/unit/test__job_helpers.py @@ -246,7 +246,9 @@ def test__to_query_job_dry_run(): @pytest.mark.parametrize( ("completed", "expected_state"), ( - (True, "DONE"), + # Always pending so that we refresh the job state to get the + # destination table or job stats in case it's needed. + (True, "PENDING"), (False, "PENDING"), ), ) @@ -843,6 +845,7 @@ def test_query_and_wait_caches_completed_query_results_more_pages(): "jobId": "response-job-id", "location": "response-location", }, + "status": {"state": "DONE"}, }, { "rows": [ @@ -896,18 +899,10 @@ def test_query_and_wait_caches_completed_query_results_more_pages(): timeout=None, ) - # TODO(swast): Fetching job metadata isn't necessary in this case. - jobs_get_path = "/projects/response-project/jobs/response-job-id" - client._call_api.assert_any_call( - None, # retry - span_name="BigQuery.job.reload", - span_attributes={"path": jobs_get_path}, - job_ref=mock.ANY, - method="GET", - path=jobs_get_path, - query_params={"location": "response-location"}, - timeout=None, - ) + # Note: There is no get call to + # "/projects/response-project/jobs/response-job-id", because fetching job + # metadata isn't necessary in this case. The job already completed in + # jobs.query and we don't need the full job metadata in query_and_wait. # Fetch the remaining two pages. jobs_get_query_results_path = "/projects/response-project/queries/response-job-id" @@ -944,6 +939,7 @@ def test_query_and_wait_incomplete_query(): Client._list_rows_from_query_results, client ) client._call_api.side_effect = ( + # jobs.query { "jobReference": { "projectId": "response-project", @@ -952,6 +948,16 @@ def test_query_and_wait_incomplete_query(): }, "jobComplete": False, }, + # jobs.get + { + "jobReference": { + "projectId": "response-project", + "jobId": "response-job-id", + "location": "response-location", + }, + "status": {"state": "RUNNING"}, + }, + # jobs.getQueryResults with max_results=0 { "jobReference": { "projectId": "response-project", @@ -968,13 +974,18 @@ def test_query_and_wait_incomplete_query(): ], }, }, + # jobs.get { "jobReference": { "projectId": "response-project", "jobId": "response-job-id", "location": "response-location", }, + "status": {"state": "DONE"}, }, + # jobs.getQueryResults + # Note: No more jobs.getQueryResults with max_results=0 because the + # previous call to jobs.getQueryResults returned with jobComplete=True. { "rows": [ {"f": [{"v": "Whillma Phlyntstone"}, {"v": "27"}]}, @@ -987,6 +998,7 @@ def test_query_and_wait_incomplete_query(): "totalRows": 2, "pageToken": "page-2", }, + # jobs.getQueryResults { "rows": [ {"f": [{"v": "Pearl Slaghoople"}, {"v": "53"}]}, diff --git a/tests/unit/test_job_retry.py b/tests/unit/test_job_retry.py index d7049c5ca..43ddae1dc 100644 --- a/tests/unit/test_job_retry.py +++ b/tests/unit/test_job_retry.py @@ -24,7 +24,7 @@ from google.cloud.bigquery.client import Client from google.cloud.bigquery import _job_helpers -from google.cloud.bigquery.retry import DEFAULT_JOB_RETRY +import google.cloud.bigquery.retry from .helpers import make_connection @@ -126,6 +126,168 @@ def api_request(method, path, query_params=None, data=None, **kw): assert job.job_id == orig_job_id +def test_query_retry_with_default_retry_and_ambiguous_errors_only_retries_with_failed_job( + client, monkeypatch +): + """ + Some errors like 'rateLimitExceeded' can be ambiguous. Make sure we only + retry the job when we know for sure that the job has failed for a retriable + reason. We can only be sure after a "successful" call to jobs.get to fetch + the failed job status. + """ + job_counter = 0 + + def make_job_id(*args, **kwargs): + nonlocal job_counter + job_counter += 1 + return f"{job_counter}" + + monkeypatch.setattr(_job_helpers, "make_job_id", make_job_id) + + project = client.project + job_reference_1 = {"projectId": project, "jobId": "1", "location": "test-loc"} + job_reference_2 = {"projectId": project, "jobId": "2", "location": "test-loc"} + NUM_API_RETRIES = 2 + + # This error is modeled after a real customer exception in + # https://github.com/googleapis/python-bigquery/issues/707. + internal_error = google.api_core.exceptions.InternalServerError( + "Job failed just because...", + errors=[ + {"reason": "internalError"}, + ], + ) + responses = [ + # jobs.insert + {"jobReference": job_reference_1, "status": {"state": "PENDING"}}, + # jobs.get + {"jobReference": job_reference_1, "status": {"state": "RUNNING"}}, + # jobs.getQueryResults x2 + # + # Note: internalError is ambiguous in jobs.getQueryResults. The + # problem could be at the Google Frontend level or it could be because + # the job has failed due to some transient issues and the BigQuery + # REST API is translating the job failed status into failure HTTP + # codes. + # + # TODO(GH#1903): We shouldn't retry nearly this many times when we get + # ambiguous errors from jobs.getQueryResults. + # See: https://github.com/googleapis/python-bigquery/issues/1903 + internal_error, + internal_error, + # jobs.get -- the job has failed + { + "jobReference": job_reference_1, + "status": {"state": "DONE", "errorResult": {"reason": "internalError"}}, + }, + # jobs.insert + {"jobReference": job_reference_2, "status": {"state": "PENDING"}}, + # jobs.get + {"jobReference": job_reference_2, "status": {"state": "RUNNING"}}, + # jobs.getQueryResults + {"jobReference": job_reference_2, "jobComplete": True}, + # jobs.get + {"jobReference": job_reference_2, "status": {"state": "DONE"}}, + ] + + conn = client._connection = make_connection() + conn.api_request.side_effect = responses + + with freezegun.freeze_time( + # Note: because of exponential backoff and a bit of jitter, + # NUM_API_RETRIES will get less accurate the greater the value. + # We add 1 because we know there will be at least some additional + # calls to fetch the time / sleep before the retry deadline is hit. + auto_tick_seconds=( + google.cloud.bigquery.retry._DEFAULT_RETRY_DEADLINE / NUM_API_RETRIES + ) + + 1, + ): + job = client.query("select 1") + job.result() + + conn.api_request.assert_has_calls( + [ + # jobs.insert + mock.call( + method="POST", + path="/projects/PROJECT/jobs", + data={ + "jobReference": {"jobId": "1", "projectId": "PROJECT"}, + "configuration": { + "query": {"useLegacySql": False, "query": "select 1"} + }, + }, + timeout=None, + ), + # jobs.get + mock.call( + method="GET", + path="/projects/PROJECT/jobs/1", + query_params={"location": "test-loc"}, + timeout=None, + ), + # jobs.getQueryResults x2 + mock.call( + method="GET", + path="/projects/PROJECT/queries/1", + query_params={"maxResults": 0, "location": "test-loc"}, + timeout=None, + ), + mock.call( + method="GET", + path="/projects/PROJECT/queries/1", + query_params={"maxResults": 0, "location": "test-loc"}, + timeout=None, + ), + # jobs.get -- verify that the job has failed + mock.call( + method="GET", + path="/projects/PROJECT/jobs/1", + query_params={"location": "test-loc"}, + timeout=None, + ), + # jobs.insert + mock.call( + method="POST", + path="/projects/PROJECT/jobs", + data={ + "jobReference": { + # Make sure that we generated a new job ID. + "jobId": "2", + "projectId": "PROJECT", + }, + "configuration": { + "query": {"useLegacySql": False, "query": "select 1"} + }, + }, + timeout=None, + ), + # jobs.get + mock.call( + method="GET", + path="/projects/PROJECT/jobs/2", + query_params={"location": "test-loc"}, + timeout=None, + ), + # jobs.getQueryResults + mock.call( + method="GET", + path="/projects/PROJECT/queries/2", + query_params={"maxResults": 0, "location": "test-loc"}, + timeout=None, + ), + # jobs.get + mock.call( + method="GET", + path="/projects/PROJECT/jobs/2", + query_params={"location": "test-loc"}, + timeout=None, + ), + ] + ) + + # With job_retry_on_query, we're testing 4 scenarios: # - Pass None retry to `query`. # - Pass None retry to `result`. @@ -187,8 +349,8 @@ def api_request(method, path, query_params=None, data=None, **kw): with pytest.raises(google.api_core.exceptions.RetryError): job.result() - # We never got a successful job, so the job id never changed: - assert job.job_id == orig_job_id + # We retried the job at least once, so we should have generated a new job ID. + assert job.job_id != orig_job_id # We failed because we couldn't succeed after 120 seconds. # But we can try again: @@ -301,8 +463,8 @@ def test_query_and_wait_retries_job_for_DDL_queries(): job_config=None, page_size=None, max_results=None, - retry=DEFAULT_JOB_RETRY, - job_retry=DEFAULT_JOB_RETRY, + retry=google.cloud.bigquery.retry.DEFAULT_RETRY, + job_retry=google.cloud.bigquery.retry.DEFAULT_JOB_RETRY, ) assert len(list(rows)) == 4 From bd0814caf2b3bf907006cd50129f7798874571d5 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Thu, 18 Apr 2024 12:24:03 -0500 Subject: [PATCH 13/13] chore(main): release 3.21.0 (#1883) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- CHANGELOG.md | 22 ++++++++++++++++++++++ google/cloud/bigquery/version.py | 2 +- 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 95af2d213..0fc77f7c5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,28 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [3.21.0](https://github.com/googleapis/python-bigquery/compare/v3.20.1...v3.21.0) (2024-04-18) + + +### Features + +* Add compression option ZSTD. ([#1890](https://github.com/googleapis/python-bigquery/issues/1890)) ([5ed9cce](https://github.com/googleapis/python-bigquery/commit/5ed9ccee204b7cf8e96cb0e050f6830c05f3b4fd)) +* Adds billing to opentel ([#1889](https://github.com/googleapis/python-bigquery/issues/1889)) ([38697fb](https://github.com/googleapis/python-bigquery/commit/38697fb942516fc2f6f5e21e19a11811fbaeb1f4)) +* Support RANGE in queries Part 1: JSON ([#1884](https://github.com/googleapis/python-bigquery/issues/1884)) ([3634405](https://github.com/googleapis/python-bigquery/commit/3634405fa1b40ae5f69b06d7c7f8de4e3d246d92)) + + +### Bug Fixes + +* Add types to DatasetReference constructor ([#1601](https://github.com/googleapis/python-bigquery/issues/1601)) ([bf8861c](https://github.com/googleapis/python-bigquery/commit/bf8861c3473a1af978db7a06463ddc0bad86f326)) +* Creates linting-typing.cfg in presubmit ([#1881](https://github.com/googleapis/python-bigquery/issues/1881)) ([c852c15](https://github.com/googleapis/python-bigquery/commit/c852c153c55025ba1187d61e313ead2308616c55)) +* Remove duplicate key time_partitioning from Table._PROPERTY_TO_A… ([#1898](https://github.com/googleapis/python-bigquery/issues/1898)) ([82ae908](https://github.com/googleapis/python-bigquery/commit/82ae908fbf3b2361343fff1859d3533383dc50ec)) +* Retry query jobs that fail even with ambiguous `jobs.getQueryResults` REST errors ([#1903](https://github.com/googleapis/python-bigquery/issues/1903), [#1900](https://github.com/googleapis/python-bigquery/issues/1900)) ([1367b58](https://github.com/googleapis/python-bigquery/commit/1367b584b68d917ec325ce4383a0e9a36205b894)) + + +### Performance Improvements + +* Avoid unnecessary API call in `QueryJob.result()` when job is already finished ([#1900](https://github.com/googleapis/python-bigquery/issues/1900)) ([1367b58](https://github.com/googleapis/python-bigquery/commit/1367b584b68d917ec325ce4383a0e9a36205b894)) + ## [3.20.1](https://github.com/googleapis/python-bigquery/compare/v3.20.0...v3.20.1) (2024-04-01) diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index 55093e390..29c08b51f 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "3.20.1" +__version__ = "3.21.0"