From aa6b9ebcdae0f7cbb08dc743d312e110d9701e23 Mon Sep 17 00:00:00 2001 From: Brian Hulette Date: Thu, 1 May 2025 12:54:23 -0700 Subject: [PATCH 1/2] Add ability to set autodetect_schema query_param --- google/cloud/bigquery/client.py | 11 ++++++++ tests/system/test_client.py | 48 +++++++++++++++++++++++++++++++++ 2 files changed, 59 insertions(+) diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index e7cafc47e..7d6ba03a4 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -1389,6 +1389,7 @@ def update_table( self, table: Table, fields: Sequence[str], + autodetect_schema: bool = False, retry: retries.Retry = DEFAULT_RETRY, timeout: TimeoutType = DEFAULT_TIMEOUT, ) -> Table: @@ -1419,6 +1420,10 @@ def update_table( fields (Sequence[str]): The fields of ``table`` to change, spelled as the :class:`~google.cloud.bigquery.table.Table` properties. + autodetect_schema (bool): + Specifies if the schema of the table should be autodetected when + updating the table from the underlying source. Only applicable + for external tables. retry (Optional[google.api_core.retry.Retry]): A description of how to retry the API call. timeout (Optional[float]): @@ -1438,12 +1443,18 @@ def update_table( path = table.path span_attributes = {"path": path, "fields": fields} + if autodetect_schema: + query_params = {"autodetect_schema": True} + else: + query_params = None + api_response = self._call_api( retry, span_name="BigQuery.updateTable", span_attributes=span_attributes, method="PATCH", path=path, + query_params=query_params, data=partial, headers=headers, timeout=timeout, diff --git a/tests/system/test_client.py b/tests/system/test_client.py index 9df572b14..196b80092 100644 --- a/tests/system/test_client.py +++ b/tests/system/test_client.py @@ -978,6 +978,54 @@ def test_update_table_constraints(self): ) self.assertIsNone(reference_table3.table_constraints, None) + def test_update_table_autodetect_schema(self): + dataset = self.temp_dataset(_make_dataset_id("bq_update_table_test")) + + # Create an external table, restrict schema to one field + TABLE_NAME = "test_table" + set_schema = [bigquery.SchemaField("username", "STRING", mode="NULLABLE")] + table_arg = Table(dataset.table(TABLE_NAME)) + external_config = bigquery.ExternalConfig(bigquery.ExternalSourceFormat.AVRO) + external_config.source_uris = SOURCE_URIS_AVRO + external_config.reference_file_schema_uri = REFERENCE_FILE_SCHEMA_URI_AVRO + external_config.schema = set_schema + table_arg.external_data_configuration = external_config + + self.assertFalse(_table_exists(table_arg)) + + table = helpers.retry_403(Config.CLIENT.create_table)(table_arg) + self.to_delete.insert(0, table) + self.assertTrue(_table_exists(table)) + + self.assertEqual(table.schema, set_schema) + + # Update table with schema autodetection + updated_table_arg = Table(dataset.table(TABLE_NAME)) + updated_external_config = bigquery.ExternalConfig( + bigquery.ExternalSourceFormat.AVRO + ) + updated_external_config.source_uris = SOURCE_URIS_AVRO + updated_external_config.reference_file_schema_uri = ( + REFERENCE_FILE_SCHEMA_URI_AVRO + ) + updated_external_config.autodetect = True + updated_external_config.schema = None + updated_table_arg.external_data_configuration = updated_external_config + + updated_table = Config.CLIENT.update_table( + updated_table_arg, ["external_data_configuration"], autodetect_schema=True + ) + + # The updated table shlould have a schema inferred from the reference + # file, which has all four fields. + expected_schema = [ + bigquery.SchemaField("username", "STRING", mode="NULLABLE"), + bigquery.SchemaField("tweet", "STRING", mode="NULLABLE"), + bigquery.SchemaField("timestamp", "STRING", mode="NULLABLE"), + bigquery.SchemaField("likes", "INTEGER", mode="NULLABLE"), + ] + self.assertEqual(updated_table.schema, expected_schema) + @staticmethod def _fetch_single_page(table, selected_fields=None): iterator = Config.CLIENT.list_rows(table, selected_fields=selected_fields) From 85c7b9f6cb1105ddffd084a88975235e81f722dc Mon Sep 17 00:00:00 2001 From: chalmer lowe Date: Fri, 2 May 2025 18:39:49 +0000 Subject: [PATCH 2/2] suggested updates: reduce duplicate code, change query_params --- google/cloud/bigquery/client.py | 5 ++--- tests/system/test_client.py | 14 ++++++-------- 2 files changed, 8 insertions(+), 11 deletions(-) diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index 7d6ba03a4..5d91a4263 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -1443,10 +1443,9 @@ def update_table( path = table.path span_attributes = {"path": path, "fields": fields} + query_params = {} if autodetect_schema: - query_params = {"autodetect_schema": True} - else: - query_params = None + query_params["autodetect_schema"] = True api_response = self._call_api( retry, diff --git a/tests/system/test_client.py b/tests/system/test_client.py index 196b80092..ea8cc4046 100644 --- a/tests/system/test_client.py +++ b/tests/system/test_client.py @@ -985,6 +985,8 @@ def test_update_table_autodetect_schema(self): TABLE_NAME = "test_table" set_schema = [bigquery.SchemaField("username", "STRING", mode="NULLABLE")] table_arg = Table(dataset.table(TABLE_NAME)) + + # Create an external_config and include it in the table arguments external_config = bigquery.ExternalConfig(bigquery.ExternalSourceFormat.AVRO) external_config.source_uris = SOURCE_URIS_AVRO external_config.reference_file_schema_uri = REFERENCE_FILE_SCHEMA_URI_AVRO @@ -1001,13 +1003,9 @@ def test_update_table_autodetect_schema(self): # Update table with schema autodetection updated_table_arg = Table(dataset.table(TABLE_NAME)) - updated_external_config = bigquery.ExternalConfig( - bigquery.ExternalSourceFormat.AVRO - ) - updated_external_config.source_uris = SOURCE_URIS_AVRO - updated_external_config.reference_file_schema_uri = ( - REFERENCE_FILE_SCHEMA_URI_AVRO - ) + + # Update the external_config and include it in the updated table arguments + updated_external_config = copy.deepcopy(external_config) updated_external_config.autodetect = True updated_external_config.schema = None updated_table_arg.external_data_configuration = updated_external_config @@ -1016,7 +1014,7 @@ def test_update_table_autodetect_schema(self): updated_table_arg, ["external_data_configuration"], autodetect_schema=True ) - # The updated table shlould have a schema inferred from the reference + # The updated table should have a schema inferred from the reference # file, which has all four fields. expected_schema = [ bigquery.SchemaField("username", "STRING", mode="NULLABLE"),