Skip to content

Commit

Permalink
Bigquery assets (#23165)
Browse files Browse the repository at this point in the history
  • Loading branch information
wojsamjan committed Apr 30, 2022
1 parent a914ec2 commit 511d0ee
Show file tree
Hide file tree
Showing 12 changed files with 240 additions and 45 deletions.
19 changes: 12 additions & 7 deletions airflow/providers/google/cloud/hooks/bigquery.py
Original file line number Diff line number Diff line change
Expand Up @@ -408,7 +408,7 @@ def create_empty_dataset(
location: Optional[str] = None,
dataset_reference: Optional[Dict[str, Any]] = None,
exists_ok: bool = True,
) -> None:
) -> Dict[str, Any]:
"""
Create a new empty dataset:
https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets/insert
Expand Down Expand Up @@ -452,8 +452,11 @@ def create_empty_dataset(

dataset: Dataset = Dataset.from_api_repr(dataset_reference)
self.log.info('Creating dataset: %s in project: %s ', dataset.dataset_id, dataset.project)
self.get_client(location=location).create_dataset(dataset=dataset, exists_ok=exists_ok)
dataset_object = self.get_client(location=location).create_dataset(
dataset=dataset, exists_ok=exists_ok
)
self.log.info('Dataset created successfully.')
return dataset_object.to_api_repr()

@GoogleBaseHook.fallback_to_default_project_id
def get_dataset_tables(
Expand Down Expand Up @@ -533,7 +536,7 @@ def create_external_table(
encryption_configuration: Optional[Dict] = None,
location: Optional[str] = None,
project_id: Optional[str] = None,
) -> None:
) -> Table:
"""
Creates a new external table in the dataset with the data from Google
Cloud Storage. See here:
Expand Down Expand Up @@ -659,10 +662,11 @@ def create_external_table(
table.encryption_configuration = EncryptionConfiguration.from_api_repr(encryption_configuration)

self.log.info('Creating external table: %s', external_project_dataset_table)
self.create_empty_table(
table_object = self.create_empty_table(
table_resource=table.to_api_repr(), project_id=project_id, location=location, exists_ok=True
)
self.log.info('External table created successfully: %s', external_project_dataset_table)
return table_object

@GoogleBaseHook.fallback_to_default_project_id
def update_table(
Expand Down Expand Up @@ -1287,7 +1291,7 @@ def update_table_schema(
dataset_id: str,
table_id: str,
project_id: Optional[str] = None,
) -> None:
) -> Dict[str, Any]:
"""
Update fields within a schema for a given dataset and table. Note that
some fields in schemas are immutable and trying to change them will cause
Expand Down Expand Up @@ -1361,13 +1365,14 @@ def _remove_policy_tags(schema: List[Dict[str, Any]]):
if not include_policy_tags:
_remove_policy_tags(new_schema)

self.update_table(
table = self.update_table(
table_resource={"schema": {"fields": new_schema}},
fields=["schema"],
project_id=project_id,
dataset_id=dataset_id,
table_id=table_id,
)
return table

@GoogleBaseHook.fallback_to_default_project_id
def poll_job_complete(
Expand Down Expand Up @@ -2244,7 +2249,7 @@ def create_empty_table(self, *args, **kwargs) -> None:
)
return self.hook.create_empty_table(*args, **kwargs)

def create_empty_dataset(self, *args, **kwargs) -> None:
def create_empty_dataset(self, *args, **kwargs) -> Dict[str, Any]:
"""
This method is deprecated.
Please use `airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.create_empty_dataset`
Expand Down
77 changes: 77 additions & 0 deletions airflow/providers/google/cloud/links/bigquery.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""This module contains Google BigQuery links."""
from typing import TYPE_CHECKING

from airflow.models import BaseOperator
from airflow.providers.google.cloud.links.base import BaseGoogleLink

if TYPE_CHECKING:
from airflow.utils.context import Context

BIGQUERY_BASE_LINK = "https://console.cloud.google.com/bigquery"
BIGQUERY_DATASET_LINK = (
BIGQUERY_BASE_LINK + "?referrer=search&project={project_id}&d={dataset_id}&p={project_id}&page=dataset"
)
BIGQUERY_TABLE_LINK = (
BIGQUERY_BASE_LINK
+ "?referrer=search&project={project_id}&d={dataset_id}&p={project_id}&page=table&t={table_id}"
)


class BigQueryDatasetLink(BaseGoogleLink):
"""Helper class for constructing BigQuery Dataset Link"""

name = "BigQuery Dataset"
key = "bigquery_dataset"
format_str = BIGQUERY_DATASET_LINK

@staticmethod
def persist(
context: "Context",
task_instance: BaseOperator,
dataset_id: str,
project_id: str,
):
task_instance.xcom_push(
context,
key=BigQueryDatasetLink.key,
value={"dataset_id": dataset_id, "project_id": project_id},
)


class BigQueryTableLink(BaseGoogleLink):
"""Helper class for constructing BigQuery Table Link"""

name = "BigQuery Table"
key = "bigquery_table"
format_str = BIGQUERY_TABLE_LINK

@staticmethod
def persist(
context: "Context",
task_instance: BaseOperator,
dataset_id: str,
project_id: str,
table_id: str,
):
task_instance.xcom_push(
context,
key=BigQueryTableLink.key,
value={"dataset_id": dataset_id, "project_id": project_id, "table_id": table_id},
)

0 comments on commit 511d0ee

Please sign in to comment.