From 15f51fffe1cba99ee6c813f293d022fb8a952945 Mon Sep 17 00:00:00 2001 From: "google-labs-jules[bot]" <161369871+google-labs-jules[bot]@users.noreply.github.com> Date: Wed, 21 May 2025 16:53:52 +0000 Subject: [PATCH] feat: Add dataset_view parameter to get_dataset method This change introduces an optional `dataset_view` parameter to the `get_dataset` method in the BigQuery client. This parameter allows you to specify which parts of the dataset resource should be returned by the API, potentially optimizing the response size and content. The `dataset_view` parameter accepts a `DatasetView` enum, which has been added to `google.cloud.bigquery.enums` with the following values: - DATASET_VIEW_UNSPECIFIED - METADATA - ACL - FULL I've added unit and system tests to verify the functionality, including handling of valid enum values, None, and invalid inputs. The docstrings for `get_dataset` have also been updated. --- google/cloud/bigquery/client.py | 20 ++++++++++++ google/cloud/bigquery/enums.py | 21 +++++++++++++ tests/system/test_client.py | 24 ++++++++++++++ tests/unit/test_client.py | 56 +++++++++++++++++++++++++++++++++ 4 files changed, 121 insertions(+) diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index 8ad1586f4..e56ec8756 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -849,6 +849,7 @@ def get_dataset( dataset_ref: Union[DatasetReference, str], retry: retries.Retry = DEFAULT_RETRY, timeout: TimeoutType = DEFAULT_TIMEOUT, + dataset_view: Optional[enums.DatasetView] = None, ) -> Dataset: """Fetch the dataset referenced by ``dataset_ref`` @@ -866,6 +867,20 @@ def get_dataset( timeout (Optional[float]): The number of seconds to wait for the underlying HTTP transport before using ``retry``. + dataset_view (Optional[google.cloud.bigquery.enums.DatasetView]): + Specifies the level of detail to include for the dataset resource. + If provided and not None, this parameter controls which parts of the + dataset resource are returned. + Possible enum values: + - :attr:`~google.cloud.bigquery.enums.DatasetView.ACL`: + Includes dataset metadata and the ACL. + - :attr:`~google.cloud.bigquery.enums.DatasetView.FULL`: + Includes all dataset metadata, including the ACL and table metadata. + This view is not supported by the `datasets.list` API method. + - :attr:`~google.cloud.bigquery.enums.DatasetView.METADATA`: + Includes basic dataset metadata, but not the ACL. + - :attr:`~google.cloud.bigquery.enums.DatasetView.DATASET_VIEW_UNSPECIFIED`: + The server will decide which view to use. Returns: google.cloud.bigquery.dataset.Dataset: @@ -876,6 +891,10 @@ def get_dataset( dataset_ref, default_project=self.project ) path = dataset_ref.path + params: Dict[str, Any] = {} + if dataset_view is not None: + params["view"] = str(dataset_view.value) + span_attributes = {"path": path} api_response = self._call_api( retry, @@ -883,6 +902,7 @@ def get_dataset( span_attributes=span_attributes, method="GET", path=path, + query_params=params, timeout=timeout, ) return Dataset.from_api_repr(api_response) diff --git a/google/cloud/bigquery/enums.py b/google/cloud/bigquery/enums.py index 203ea3c7b..8883d25bb 100644 --- a/google/cloud/bigquery/enums.py +++ b/google/cloud/bigquery/enums.py @@ -46,6 +46,27 @@ class Compression(str, enum.Enum): """Specifies no compression.""" +class DatasetView(str, enum.Enum): + """Specifies the level of detail to include for a dataset resource. + + https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets/get#DatasetView + """ + + DATASET_VIEW_UNSPECIFIED = "DATASET_VIEW_UNSPECIFIED" + """The server will decide which view to use.""" + + METADATA = "METADATA" + """Includes basic dataset metadata, but not the ACL.""" + + ACL = "ACL" + """Includes dataset metadata and the ACL.""" + + FULL = "FULL" + """Includes all dataset metadata, including the ACL and table metadata. + This view is not supported by the `datasets.list` API method. + """ + + class DecimalTargetType: """The data types that could be used as a target type when converting decimal values. diff --git a/tests/system/test_client.py b/tests/system/test_client.py index 6584ca03c..c3e1c4ebd 100644 --- a/tests/system/test_client.py +++ b/tests/system/test_client.py @@ -252,6 +252,30 @@ def test_create_dataset(self): self.assertEqual(dataset.project, Config.CLIENT.project) self.assertIs(dataset.is_case_insensitive, False) + def test_get_dataset_with_dataset_view(self): + dataset_id = _make_dataset_id("get_dataset_view") + dataset_ref = self.temp_dataset(dataset_id) + client = Config.CLIENT + + views_to_test = [ + None, + bigquery.enums.DatasetView.METADATA, + bigquery.enums.DatasetView.ACL, + bigquery.enums.DatasetView.FULL, + bigquery.enums.DatasetView.DATASET_VIEW_UNSPECIFIED, + ] + + for view in views_to_test: + try: + dataset = client.get_dataset(dataset_ref.reference, dataset_view=view) + self.assertEqual(dataset.dataset_id, dataset_id) + # Further assertions could be made here if the expected content + # for each view was known and testable (e.g., presence/absence of ACLs) + except GoogleAPICallError as e: + self.fail( + f"client.get_dataset with dataset_view={view} failed with API error: {e}" + ) + def test_create_dataset_case_sensitive(self): DATASET_ID = _make_dataset_id("create_cs_dataset") dataset = self.temp_dataset(DATASET_ID, is_case_insensitive=False) diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index 468068321..b2a7505a7 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -807,6 +807,62 @@ def test_get_dataset(self): self.assertEqual(dataset.dataset_id, self.DS_ID) + @pytest.mark.parametrize( + "dataset_view, expected_view_param", + [ + (None, None), + (bigquery.enums.DatasetView.METADATA, "METADATA"), + (bigquery.enums.DatasetView.ACL, "ACL"), + (bigquery.enums.DatasetView.FULL, "FULL"), + ( + bigquery.enums.DatasetView.DATASET_VIEW_UNSPECIFIED, + "DATASET_VIEW_UNSPECIFIED", + ), + ], + ) + def test_get_dataset_with_view_parameter( + self, dataset_view, expected_view_param + ): + path = "projects/%s/datasets/%s" % (self.PROJECT, self.DS_ID) + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + resource = { + "id": "%s:%s" % (self.PROJECT, self.DS_ID), + "datasetReference": {"projectId": self.PROJECT, "datasetId": self.DS_ID}, + } + + # Mock _call_api directly to inspect its arguments + mock_call_api = mock.patch.object(client, "_call_api").start() + mock_call_api.return_value = resource # Simulate a successful API response + + dataset_ref = DatasetReference(self.PROJECT, self.DS_ID) + client.get_dataset(dataset_ref, dataset_view=dataset_view) + + expected_query_params = {} + if expected_view_param is not None: + expected_query_params["view"] = expected_view_param + + mock_call_api.assert_called_once_with( + mock.ANY, # retry argument + span_name="BigQuery.getDataset", + span_attributes={"path": "/" + path}, + method="GET", + path="/" + path, + query_params=expected_query_params, + timeout=DEFAULT_TIMEOUT, + ) + mock.patch.stopall() # Stop the mock + + def test_get_dataset_with_invalid_view_parameter_string(self): + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + dataset_ref = DatasetReference(self.PROJECT, self.DS_ID) + + with pytest.raises(AttributeError): + client.get_dataset(dataset_ref, dataset_view="INVALID_STRING_VALUE") + def test_ensure_bqstorage_client_creating_new_instance(self): bigquery_storage = pytest.importorskip("google.cloud.bigquery_storage")