Skip to content

Commit

Permalink
Add discoverability for triggers in provider.yaml (#31576)
Browse files Browse the repository at this point in the history
* Add discoverability for triggers in provider.yaml
  • Loading branch information
eladkal committed Jun 2, 2023
1 parent a59076e commit dc5bf3f
Show file tree
Hide file tree
Showing 11 changed files with 70 additions and 72 deletions.
8 changes: 4 additions & 4 deletions airflow/provider.yaml.schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -208,8 +208,8 @@
"type": "string",
"description": "Integration name. It must have a matching item in the 'integration' section of any provider."
},
"class-names": {
"description": "List of the trigger class name that implements trigger.",
"python-modules": {
"description": "List of Python modules containing the triggers.",
"type": "array",
"items": {
"type": "string"
Expand All @@ -219,7 +219,7 @@
"additionalProperties": false,
"required": [
"integration-name",
"class-names"
"python-modules"
]
}
},
Expand Down Expand Up @@ -312,7 +312,7 @@
"items": {
"type": "string"
}
},
},
"plugins": {
"type": "array",
"description": "Plugins exposed by the provider",
Expand Down
18 changes: 18 additions & 0 deletions airflow/providers/amazon/provider.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -505,6 +505,24 @@ hooks:
python-modules:
- airflow.providers.amazon.aws.hooks.appflow

triggers:
- integration-name: AWS Batch
python-modules:
- airflow.providers.amazon.aws.triggers.batch
- integration-name: Amazon EC2
python-modules:
- airflow.providers.amazon.aws.triggers.ec2
- integration-name: Amazon Redshift
python-modules:
- airflow.providers.amazon.aws.triggers.redshift_cluster
- integration-name: Amazon SageMaker
python-modules:
- airflow.providers.amazon.aws.triggers.sagemaker
- integration-name: AWS Glue
python-modules:
- airflow.providers.amazon.aws.triggers.glue
- airflow.providers.amazon.aws.triggers.glue_crawler

transfers:
- source-integration-name: Amazon DynamoDB
target-integration-name: Amazon Simple Storage Service (S3)
Expand Down
4 changes: 2 additions & 2 deletions airflow/providers/apache/kafka/provider.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -58,8 +58,8 @@ sensors:

triggers:
- integration-name: Apache Kafka
class-names:
- airflow.providers.apache.kafka.triggers.await_message.AwaitMessageTrigger
python-modules:
- airflow.providers.apache.kafka.triggers.await_message

connection-types:
- hook-class-name: airflow.providers.apache.kafka.hooks.base.KafkaBaseHook
Expand Down
4 changes: 2 additions & 2 deletions airflow/providers/apache/livy/provider.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -70,8 +70,8 @@ hooks:

triggers:
- integration-name: Apache Livy
class-names:
- airflow.providers.apache.livy.triggers.livy.LivyTrigger
python-modules:
- airflow.providers.apache.livy.triggers.livy


connection-types:
Expand Down
4 changes: 2 additions & 2 deletions airflow/providers/cncf/kubernetes/provider.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -109,8 +109,8 @@ hooks:

triggers:
- integration-name: Kubernetes
class-names:
- airflow.providers.cncf.kubernetes.triggers.pod.KubernetesPodTrigger
python-modules:
- airflow.providers.cncf.kubernetes.triggers.pod


connection-types:
Expand Down
4 changes: 2 additions & 2 deletions airflow/providers/databricks/provider.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -98,8 +98,8 @@ hooks:

triggers:
- integration-name: Databricks
class-names:
- airflow.providers.databricks.triggers.databricks.DatabricksExecutionTrigger
python-modules:
- airflow.providers.databricks.triggers.databricks

sensors:
- integration-name: Databricks
Expand Down
4 changes: 2 additions & 2 deletions airflow/providers/dbt/cloud/provider.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -67,8 +67,8 @@ hooks:

triggers:
- integration-name: dbt Cloud
class-names:
- airflow.providers.dbt.cloud.triggers.dbt.DbtCloudRunJobTrigger
python-modules:
- airflow.providers.dbt.cloud.triggers.dbt


connection-types:
Expand Down
52 changes: 20 additions & 32 deletions airflow/providers/google/provider.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -832,47 +832,35 @@ hooks:

triggers:
- integration-name: Google BigQuery Data Transfer Service
class-names:
- airflow.providers.google.cloud.triggers.bigquery_dts.BigQueryDataTransferRunTrigger
python-modules:
- airflow.providers.google.cloud.triggers.bigquery_dts
- integration-name: Google BigQuery
class-names:
- airflow.providers.google.cloud.triggers.bigquery.BigQueryInsertJobTrigger
- airflow.providers.google.cloud.triggers.bigquery.BigQueryCheckTrigger
- airflow.providers.google.cloud.triggers.bigquery.BigQueryGetDataTrigger
- airflow.providers.google.cloud.triggers.bigquery.BigQueryIntervalCheckTrigger
- airflow.providers.google.cloud.triggers.bigquery.BigQueryValueCheckTrigger
- airflow.providers.google.cloud.triggers.bigquery.BigQueryTableExistenceTrigger
- airflow.providers.google.cloud.triggers.bigquery.BigQueryTablePartitionExistenceTrigger
python-modules:
- airflow.providers.google.cloud.triggers.bigquery
- integration-name: Google Cloud Build
class-names:
- airflow.providers.google.cloud.triggers.cloud_build.CloudBuildCreateBuildTrigger
python-modules:
- airflow.providers.google.cloud.triggers.cloud_build
- integration-name: Google Cloud Composer
class-names:
- airflow.providers.google.cloud.triggers.cloud_composer.CloudComposerExecutionTrigger
python-modules:
- airflow.providers.google.cloud.triggers.cloud_composer
- integration-name: Google Dataflow
class-names:
- airflow.providers.google.cloud.triggers.dataflow.TemplateJobStartTrigger
python-modules:
- airflow.providers.google.cloud.triggers.dataflow
- integration-name: Google Data Fusion
class-names:
- airflow.providers.google.cloud.triggers.datafusion.DataFusionStartPipelineTrigger
python-modules:
- airflow.providers.google.cloud.triggers.datafusion
- integration-name: Google Dataproc
class-names:
- airflow.providers.google.cloud.triggers.dataproc.DataprocBaseTrigger
- airflow.providers.google.cloud.triggers.dataproc.DataprocSubmitTrigger
- airflow.providers.google.cloud.triggers.dataproc.DataprocClusterTrigger
- airflow.providers.google.cloud.triggers.dataproc.DataprocBatchTrigger
- airflow.providers.google.cloud.triggers.dataproc.DataprocDeleteClusterTrigger
- airflow.providers.google.cloud.triggers.dataproc.DataprocWorkflowTrigger
python-modules:
- airflow.providers.google.cloud.triggers.dataproc
- integration-name: Google Cloud Storage (GCS)
class-names:
- airflow.providers.google.cloud.triggers.gcs.GCSBlobTrigger
python-modules:
- airflow.providers.google.cloud.triggers.gcs
- integration-name: Google Kubernetes Engine
class-names:
- airflow.providers.google.cloud.triggers.kubernetes_engine.GKEStartPodTrigger
- airflow.providers.google.cloud.triggers.kubernetes_engine.GKEOperationTrigger
python-modules:
- airflow.providers.google.cloud.triggers.kubernetes_engine
- integration-name: Google Machine Learning Engine
class-names:
- airflow.providers.google.cloud.triggers.mlengine.MLEngineStartTrainingJobTrigger
python-modules:
- airflow.providers.google.cloud.triggers.mlengine

transfers:
- source-integration-name: Presto
Expand Down
8 changes: 8 additions & 0 deletions airflow/providers/microsoft/azure/provider.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,14 @@ hooks:
python-modules:
- airflow.providers.microsoft.azure.hooks.synapse

triggers:
- integration-name: Microsoft Azure Data Factory
python-modules:
- airflow.providers.microsoft.azure.triggers.data_factory
- integration-name: Microsoft Azure Blob Storage
python-modules:
- airflow.providers.microsoft.azure.triggers.wasb

transfers:
- source-integration-name: Local
target-integration-name: Microsoft Azure Data Lake Storage
Expand Down
2 changes: 1 addition & 1 deletion airflow/providers_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -956,7 +956,7 @@ def initialize_providers_triggers(self):
self.initialize_providers_list()
for provider_package, provider in self._provider_dict.items():
for trigger in provider.data.get("triggers", []):
for trigger_class_name in trigger.get("class-names"):
for trigger_class_name in trigger.get("python-modules"):
self._trigger_info_set.add(
TriggerInfo(
package_name=provider_package,
Expand Down
34 changes: 9 additions & 25 deletions scripts/in_container/run_provider_yaml_files_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@
EXCLUDED_MODULES = [
"airflow.providers.apache.hdfs.sensors.hdfs",
"airflow.providers.apache.hdfs.hooks.hdfs",
"airflow.providers.cncf.kubernetes.triggers.kubernetes_pod",
]


Expand Down Expand Up @@ -229,10 +230,10 @@ def parse_module_data(provider_data, resource_type, yaml_file_path):


def check_correctness_of_list_of_sensors_operators_hook_modules(yaml_files: dict[str, dict]):
print("Checking completeness of list of {sensors, hooks, operators}")
print(" -- {sensors, hooks, operators} - Expected modules (left) : Current modules (right)")
print("Checking completeness of list of {sensors, hooks, operators, triggers}")
print(" -- {sensors, hooks, operators, triggers} - Expected modules (left) : Current modules (right)")
for (yaml_file_path, provider_data), resource_type in product(
yaml_files.items(), ["sensors", "operators", "hooks"]
yaml_files.items(), ["sensors", "operators", "hooks", "triggers"]
):
expected_modules, provider_package, resource_data = parse_module_data(
provider_data, resource_type, yaml_file_path
Expand All @@ -254,9 +255,9 @@ def check_correctness_of_list_of_sensors_operators_hook_modules(yaml_files: dict


def check_duplicates_in_integrations_names_of_hooks_sensors_operators(yaml_files: dict[str, dict]):
print("Checking for duplicates in list of {sensors, hooks, operators}")
print("Checking for duplicates in list of {sensors, hooks, operators, triggers}")
for (yaml_file_path, provider_data), resource_type in product(
yaml_files.items(), ["sensors", "operators", "hooks"]
yaml_files.items(), ["sensors", "operators", "hooks", "triggers"]
):
resource_data = provider_data.get(resource_type, [])
current_integrations = [r.get("integration-name", "") for r in resource_data]
Expand Down Expand Up @@ -294,7 +295,7 @@ def check_completeness_of_list_of_transfers(yaml_files: dict[str, dict]):
)


def check_hook_classes(yaml_files: dict[str, dict]):
def check_hook_connection_classes(yaml_files: dict[str, dict]):
print("Checking connection classes belong to package, exist and are classes")
resource_type = "hook-class-names"
for yaml_file_path, provider_data in yaml_files.items():
Expand All @@ -306,22 +307,6 @@ def check_hook_classes(yaml_files: dict[str, dict]):
)


def check_trigger_classes(yaml_files: dict[str, dict]):
print("Checking triggers classes belong to package, exist and are classes")
resource_type = "triggers"
for yaml_file_path, provider_data in yaml_files.items():
provider_package = pathlib.Path(yaml_file_path).parent.as_posix().replace("/", ".")
trigger_classes = {
name
for trigger_class in provider_data.get(resource_type, {})
for name in trigger_class["class-names"]
}
if trigger_classes:
check_if_objects_exist_and_belong_to_package(
trigger_classes, provider_package, yaml_file_path, resource_type, ObjectType.CLASS
)


def check_plugin_classes(yaml_files: dict[str, dict]):
print("Checking plugin classes belong to package, exist and are classes")
resource_type = "plugins"
Expand Down Expand Up @@ -377,7 +362,7 @@ def check_invalid_integration(yaml_files: dict[str, dict]):
all_integration_names = set(get_all_integration_names(yaml_files))

for (yaml_file_path, provider_data), resource_type in product(
yaml_files.items(), ["sensors", "operators", "hooks"]
yaml_files.items(), ["sensors", "operators", "hooks", "triggers"]
):
resource_data = provider_data.get(resource_type, [])
current_names = {r["integration-name"] for r in resource_data}
Expand Down Expand Up @@ -522,10 +507,9 @@ def check_providers_have_all_documentation_files(yaml_files: dict[str, dict]):

check_completeness_of_list_of_transfers(all_parsed_yaml_files)
check_duplicates_in_list_of_transfers(all_parsed_yaml_files)
check_hook_classes(all_parsed_yaml_files)
check_hook_connection_classes(all_parsed_yaml_files)
check_plugin_classes(all_parsed_yaml_files)
check_extra_link_classes(all_parsed_yaml_files)
check_trigger_classes(all_parsed_yaml_files)
check_correctness_of_list_of_sensors_operators_hook_modules(all_parsed_yaml_files)
check_unique_provider_name(all_parsed_yaml_files)
check_providers_have_all_documentation_files(all_parsed_yaml_files)
Expand Down

0 comments on commit dc5bf3f

Please sign in to comment.