Skip to content

Commit

Permalink
Migrate Google example DAG bigquery_transfer to new design AIP-47 (#2…
Browse files Browse the repository at this point in the history
…4543)

related: #22447, #22430
  • Loading branch information
chenglongyan committed Jun 25, 2022
1 parent 47f54b6 commit abb304c
Show file tree
Hide file tree
Showing 2 changed files with 125 additions and 77 deletions.

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

"""
Example Airflow DAG for Google BigQuery service.
"""
import os
from datetime import datetime

from airflow import models
from airflow.providers.google.cloud.operators.bigquery import (
BigQueryCreateEmptyDatasetOperator,
BigQueryCreateEmptyTableOperator,
BigQueryDeleteDatasetOperator,
)
from airflow.providers.google.cloud.operators.gcs import GCSCreateBucketOperator, GCSDeleteBucketOperator
from airflow.providers.google.cloud.transfers.bigquery_to_bigquery import BigQueryToBigQueryOperator
from airflow.providers.google.cloud.transfers.bigquery_to_gcs import BigQueryToGCSOperator
from airflow.utils.trigger_rule import TriggerRule

ENV_ID = os.environ.get("SYSTEM_TESTS_ENV_ID")
PROJECT_ID = os.environ.get("SYSTEM_TESTS_GCP_PROJECT")
DAG_ID = "example_bigquery_transfer"

DATASET_NAME = f"dataset_{DAG_ID}_{ENV_ID}"
BUCKET_NAME = f"bucket_{DAG_ID}_{ENV_ID}"

ORIGIN = "origin"
TARGET = "target"

with models.DAG(
DAG_ID,
schedule_interval='@once', # Override to match your needs
start_date=datetime(2021, 1, 1),
catchup=False,
tags=["example", "bigquery"],
) as dag:
create_bucket = GCSCreateBucketOperator(
task_id="create_bucket", bucket_name=BUCKET_NAME, project_id=PROJECT_ID
)

create_dataset = BigQueryCreateEmptyDatasetOperator(task_id="create_dataset", dataset_id=DATASET_NAME)

create_origin_table = BigQueryCreateEmptyTableOperator(
task_id=f"create_{ORIGIN}_table",
dataset_id=DATASET_NAME,
table_id=ORIGIN,
schema_fields=[
{"name": "emp_name", "type": "STRING", "mode": "REQUIRED"},
{"name": "salary", "type": "INTEGER", "mode": "NULLABLE"},
],
)

create_target_table = BigQueryCreateEmptyTableOperator(
task_id=f"create_{TARGET}_table",
dataset_id=DATASET_NAME,
table_id=TARGET,
schema_fields=[
{"name": "emp_name", "type": "STRING", "mode": "REQUIRED"},
{"name": "salary", "type": "INTEGER", "mode": "NULLABLE"},
],
)

copy_selected_data = BigQueryToBigQueryOperator(
task_id="copy_selected_data",
source_project_dataset_tables=f"{DATASET_NAME}.{ORIGIN}",
destination_project_dataset_table=f"{DATASET_NAME}.{TARGET}",
)

bigquery_to_gcs = BigQueryToGCSOperator(
task_id="bigquery_to_gcs",
source_project_dataset_table=f"{DATASET_NAME}.{ORIGIN}",
destination_cloud_storage_uris=[f"gs://{BUCKET_NAME}/export-bigquery.csv"],
)

delete_dataset = BigQueryDeleteDatasetOperator(
task_id="delete_dataset",
dataset_id=DATASET_NAME,
delete_contents=True,
trigger_rule=TriggerRule.ALL_DONE,
)

delete_bucket = GCSDeleteBucketOperator(
task_id="delete_bucket", bucket_name=BUCKET_NAME, trigger_rule=TriggerRule.ALL_DONE
)

(
# TEST SETUP
create_bucket
>> create_dataset
>> create_origin_table
>> create_target_table
# TEST BODY
>> copy_selected_data
>> bigquery_to_gcs
# TEST TEARDOWN
>> delete_dataset
>> delete_bucket
)

from tests.system.utils.watcher import watcher

# This test needs watcher in order to properly mark success/failure
# when "tearDown" task with trigger rule is part of the DAG
list(dag.tasks) >> watcher()

from tests.system.utils import get_test_run # noqa: E402

# Needed to run the example DAG with pytest (see: tests/system/README.md#run_via_pytest)
test_run = get_test_run(dag)

0 comments on commit abb304c

Please sign in to comment.