Skip to content

Commit

Permalink
Add Auto ML operators for Vertex AI service (#21470)
Browse files Browse the repository at this point in the history
  • Loading branch information
MaksYermak committed Feb 20, 2022
1 parent 62d1ef8 commit 6061cc4
Show file tree
Hide file tree
Showing 11 changed files with 2,857 additions and 159 deletions.
153 changes: 153 additions & 0 deletions airflow/providers/google/cloud/example_dags/example_vertex_ai.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,15 @@
from google.protobuf.struct_pb2 import Value

from airflow import models
from airflow.providers.google.cloud.operators.vertex_ai.auto_ml import (
CreateAutoMLForecastingTrainingJobOperator,
CreateAutoMLImageTrainingJobOperator,
CreateAutoMLTabularTrainingJobOperator,
CreateAutoMLTextTrainingJobOperator,
CreateAutoMLVideoTrainingJobOperator,
DeleteAutoMLTrainingJobOperator,
ListAutoMLTrainingJobOperator,
)
from airflow.providers.google.cloud.operators.vertex_ai.custom_job import (
CreateCustomContainerTrainingJobOperator,
CreateCustomPythonPackageTrainingJobOperator,
Expand Down Expand Up @@ -121,6 +130,33 @@
DATASET_TO_UPDATE = {"display_name": "test-name"}
TEST_UPDATE_MASK = {"paths": ["displayName"]}

TEST_TIME_COLUMN = "date"
TEST_TIME_SERIES_IDENTIFIER_COLUMN = "store_name"
TEST_TARGET_COLUMN = "sale_dollars"

COLUMN_SPECS = {
TEST_TIME_COLUMN: "timestamp",
TEST_TARGET_COLUMN: "numeric",
"city": "categorical",
"zip_code": "categorical",
"county": "categorical",
}

COLUMN_TRANSFORMATIONS = [
{"categorical": {"column_name": "Type"}},
{"numeric": {"column_name": "Age"}},
{"categorical": {"column_name": "Breed1"}},
{"categorical": {"column_name": "Color1"}},
{"categorical": {"column_name": "Color2"}},
{"categorical": {"column_name": "MaturitySize"}},
{"categorical": {"column_name": "FurLength"}},
{"categorical": {"column_name": "Vaccinated"}},
{"categorical": {"column_name": "Sterilized"}},
{"categorical": {"column_name": "Health"}},
{"numeric": {"column_name": "Fee"}},
{"numeric": {"column_name": "PhotoAmt"}},
]

with models.DAG(
"example_gcp_vertex_ai_custom_jobs",
schedule_interval="@once",
Expand Down Expand Up @@ -313,3 +349,120 @@
create_image_dataset_job >> import_data_job >> export_data_job
create_video_dataset_job >> update_dataset_job
list_dataset_job

with models.DAG(
"example_gcp_vertex_ai_auto_ml",
schedule_interval="@once",
start_date=datetime(2021, 1, 1),
catchup=False,
) as auto_ml_dag:
# [START how_to_cloud_vertex_ai_create_auto_ml_forecasting_training_job_operator]
create_auto_ml_forecasting_training_job = CreateAutoMLForecastingTrainingJobOperator(
task_id="auto_ml_forecasting_task",
display_name=f"auto-ml-forecasting-{DISPLAY_NAME}",
optimization_objective="minimize-rmse",
column_specs=COLUMN_SPECS,
# run params
dataset_id=DATASET_ID,
target_column=TEST_TARGET_COLUMN,
time_column=TEST_TIME_COLUMN,
time_series_identifier_column=TEST_TIME_SERIES_IDENTIFIER_COLUMN,
available_at_forecast_columns=[TEST_TIME_COLUMN],
unavailable_at_forecast_columns=[TEST_TARGET_COLUMN],
time_series_attribute_columns=["city", "zip_code", "county"],
forecast_horizon=30,
context_window=30,
data_granularity_unit="day",
data_granularity_count=1,
weight_column=None,
budget_milli_node_hours=1000,
model_display_name=f"auto-ml-forecasting-model-{DISPLAY_NAME}",
predefined_split_column_name=None,
region=REGION,
project_id=PROJECT_ID,
)
# [END how_to_cloud_vertex_ai_create_auto_ml_forecasting_training_job_operator]

# [START how_to_cloud_vertex_ai_create_auto_ml_image_training_job_operator]
create_auto_ml_image_training_job = CreateAutoMLImageTrainingJobOperator(
task_id="auto_ml_image_task",
display_name=f"auto-ml-image-{DISPLAY_NAME}",
dataset_id=DATASET_ID,
prediction_type="classification",
multi_label=False,
model_type="CLOUD",
training_fraction_split=0.6,
validation_fraction_split=0.2,
test_fraction_split=0.2,
budget_milli_node_hours=8000,
model_display_name=f"auto-ml-image-model-{DISPLAY_NAME}",
disable_early_stopping=False,
region=REGION,
project_id=PROJECT_ID,
)
# [END how_to_cloud_vertex_ai_create_auto_ml_image_training_job_operator]

# [START how_to_cloud_vertex_ai_create_auto_ml_tabular_training_job_operator]
create_auto_ml_tabular_training_job = CreateAutoMLTabularTrainingJobOperator(
task_id="auto_ml_tabular_task",
display_name=f"auto-ml-tabular-{DISPLAY_NAME}",
optimization_prediction_type="classification",
column_transformations=COLUMN_TRANSFORMATIONS,
dataset_id=DATASET_ID,
target_column="Adopted",
training_fraction_split=0.8,
validation_fraction_split=0.1,
test_fraction_split=0.1,
model_display_name="adopted-prediction-model",
disable_early_stopping=False,
region=REGION,
project_id=PROJECT_ID,
)
# [END how_to_cloud_vertex_ai_create_auto_ml_tabular_training_job_operator]

# [START how_to_cloud_vertex_ai_create_auto_ml_text_training_job_operator]
create_auto_ml_text_training_job = CreateAutoMLTextTrainingJobOperator(
task_id="auto_ml_text_task",
display_name=f"auto-ml-text-{DISPLAY_NAME}",
prediction_type="classification",
multi_label=False,
dataset_id=DATASET_ID,
model_display_name=f"auto-ml-text-model-{DISPLAY_NAME}",
training_fraction_split=0.7,
validation_fraction_split=0.2,
test_fraction_split=0.1,
sync=True,
region=REGION,
project_id=PROJECT_ID,
)
# [END how_to_cloud_vertex_ai_create_auto_ml_text_training_job_operator]

# [START how_to_cloud_vertex_ai_create_auto_ml_video_training_job_operator]
create_auto_ml_video_training_job = CreateAutoMLVideoTrainingJobOperator(
task_id="auto_ml_video_task",
display_name=f"auto-ml-video-{DISPLAY_NAME}",
prediction_type="classification",
model_type="CLOUD",
dataset_id=DATASET_ID,
model_display_name=f"auto-ml-video-model-{DISPLAY_NAME}",
region=REGION,
project_id=PROJECT_ID,
)
# [END how_to_cloud_vertex_ai_create_auto_ml_video_training_job_operator]

# [START how_to_cloud_vertex_ai_delete_auto_ml_training_job_operator]
delete_auto_ml_training_job = DeleteAutoMLTrainingJobOperator(
task_id="delete_auto_ml_training_job",
training_pipeline_id=TRAINING_PIPELINE_ID,
region=REGION,
project_id=PROJECT_ID,
)
# [END how_to_cloud_vertex_ai_delete_auto_ml_training_job_operator]

# [START how_to_cloud_vertex_ai_list_auto_ml_training_job_operator]
list_auto_ml_training_job = ListAutoMLTrainingJobOperator(
task_id="list_auto_ml_training_job",
region=REGION,
project_id=PROJECT_ID,
)
# [END how_to_cloud_vertex_ai_list_auto_ml_training_job_operator]

0 comments on commit 6061cc4

Please sign in to comment.