Skip to content

Commit dc5bf3f

Browse files
authored
Add discoverability for triggers in provider.yaml (#31576)
* Add discoverability for triggers in provider.yaml
1 parent a59076e commit dc5bf3f

File tree

11 files changed

+70
-72
lines changed

11 files changed

+70
-72
lines changed

airflow/provider.yaml.schema.json

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -208,8 +208,8 @@
208208
"type": "string",
209209
"description": "Integration name. It must have a matching item in the 'integration' section of any provider."
210210
},
211-
"class-names": {
212-
"description": "List of the trigger class name that implements trigger.",
211+
"python-modules": {
212+
"description": "List of Python modules containing the triggers.",
213213
"type": "array",
214214
"items": {
215215
"type": "string"
@@ -219,7 +219,7 @@
219219
"additionalProperties": false,
220220
"required": [
221221
"integration-name",
222-
"class-names"
222+
"python-modules"
223223
]
224224
}
225225
},
@@ -312,7 +312,7 @@
312312
"items": {
313313
"type": "string"
314314
}
315-
},
315+
},
316316
"plugins": {
317317
"type": "array",
318318
"description": "Plugins exposed by the provider",

airflow/providers/amazon/provider.yaml

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -505,6 +505,24 @@ hooks:
505505
python-modules:
506506
- airflow.providers.amazon.aws.hooks.appflow
507507

508+
triggers:
509+
- integration-name: AWS Batch
510+
python-modules:
511+
- airflow.providers.amazon.aws.triggers.batch
512+
- integration-name: Amazon EC2
513+
python-modules:
514+
- airflow.providers.amazon.aws.triggers.ec2
515+
- integration-name: Amazon Redshift
516+
python-modules:
517+
- airflow.providers.amazon.aws.triggers.redshift_cluster
518+
- integration-name: Amazon SageMaker
519+
python-modules:
520+
- airflow.providers.amazon.aws.triggers.sagemaker
521+
- integration-name: AWS Glue
522+
python-modules:
523+
- airflow.providers.amazon.aws.triggers.glue
524+
- airflow.providers.amazon.aws.triggers.glue_crawler
525+
508526
transfers:
509527
- source-integration-name: Amazon DynamoDB
510528
target-integration-name: Amazon Simple Storage Service (S3)

airflow/providers/apache/kafka/provider.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -58,8 +58,8 @@ sensors:
5858

5959
triggers:
6060
- integration-name: Apache Kafka
61-
class-names:
62-
- airflow.providers.apache.kafka.triggers.await_message.AwaitMessageTrigger
61+
python-modules:
62+
- airflow.providers.apache.kafka.triggers.await_message
6363

6464
connection-types:
6565
- hook-class-name: airflow.providers.apache.kafka.hooks.base.KafkaBaseHook

airflow/providers/apache/livy/provider.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -70,8 +70,8 @@ hooks:
7070

7171
triggers:
7272
- integration-name: Apache Livy
73-
class-names:
74-
- airflow.providers.apache.livy.triggers.livy.LivyTrigger
73+
python-modules:
74+
- airflow.providers.apache.livy.triggers.livy
7575

7676

7777
connection-types:

airflow/providers/cncf/kubernetes/provider.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -109,8 +109,8 @@ hooks:
109109

110110
triggers:
111111
- integration-name: Kubernetes
112-
class-names:
113-
- airflow.providers.cncf.kubernetes.triggers.pod.KubernetesPodTrigger
112+
python-modules:
113+
- airflow.providers.cncf.kubernetes.triggers.pod
114114

115115

116116
connection-types:

airflow/providers/databricks/provider.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -98,8 +98,8 @@ hooks:
9898

9999
triggers:
100100
- integration-name: Databricks
101-
class-names:
102-
- airflow.providers.databricks.triggers.databricks.DatabricksExecutionTrigger
101+
python-modules:
102+
- airflow.providers.databricks.triggers.databricks
103103

104104
sensors:
105105
- integration-name: Databricks

airflow/providers/dbt/cloud/provider.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -67,8 +67,8 @@ hooks:
6767

6868
triggers:
6969
- integration-name: dbt Cloud
70-
class-names:
71-
- airflow.providers.dbt.cloud.triggers.dbt.DbtCloudRunJobTrigger
70+
python-modules:
71+
- airflow.providers.dbt.cloud.triggers.dbt
7272

7373

7474
connection-types:

airflow/providers/google/provider.yaml

Lines changed: 20 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -832,47 +832,35 @@ hooks:
832832

833833
triggers:
834834
- integration-name: Google BigQuery Data Transfer Service
835-
class-names:
836-
- airflow.providers.google.cloud.triggers.bigquery_dts.BigQueryDataTransferRunTrigger
835+
python-modules:
836+
- airflow.providers.google.cloud.triggers.bigquery_dts
837837
- integration-name: Google BigQuery
838-
class-names:
839-
- airflow.providers.google.cloud.triggers.bigquery.BigQueryInsertJobTrigger
840-
- airflow.providers.google.cloud.triggers.bigquery.BigQueryCheckTrigger
841-
- airflow.providers.google.cloud.triggers.bigquery.BigQueryGetDataTrigger
842-
- airflow.providers.google.cloud.triggers.bigquery.BigQueryIntervalCheckTrigger
843-
- airflow.providers.google.cloud.triggers.bigquery.BigQueryValueCheckTrigger
844-
- airflow.providers.google.cloud.triggers.bigquery.BigQueryTableExistenceTrigger
845-
- airflow.providers.google.cloud.triggers.bigquery.BigQueryTablePartitionExistenceTrigger
838+
python-modules:
839+
- airflow.providers.google.cloud.triggers.bigquery
846840
- integration-name: Google Cloud Build
847-
class-names:
848-
- airflow.providers.google.cloud.triggers.cloud_build.CloudBuildCreateBuildTrigger
841+
python-modules:
842+
- airflow.providers.google.cloud.triggers.cloud_build
849843
- integration-name: Google Cloud Composer
850-
class-names:
851-
- airflow.providers.google.cloud.triggers.cloud_composer.CloudComposerExecutionTrigger
844+
python-modules:
845+
- airflow.providers.google.cloud.triggers.cloud_composer
852846
- integration-name: Google Dataflow
853-
class-names:
854-
- airflow.providers.google.cloud.triggers.dataflow.TemplateJobStartTrigger
847+
python-modules:
848+
- airflow.providers.google.cloud.triggers.dataflow
855849
- integration-name: Google Data Fusion
856-
class-names:
857-
- airflow.providers.google.cloud.triggers.datafusion.DataFusionStartPipelineTrigger
850+
python-modules:
851+
- airflow.providers.google.cloud.triggers.datafusion
858852
- integration-name: Google Dataproc
859-
class-names:
860-
- airflow.providers.google.cloud.triggers.dataproc.DataprocBaseTrigger
861-
- airflow.providers.google.cloud.triggers.dataproc.DataprocSubmitTrigger
862-
- airflow.providers.google.cloud.triggers.dataproc.DataprocClusterTrigger
863-
- airflow.providers.google.cloud.triggers.dataproc.DataprocBatchTrigger
864-
- airflow.providers.google.cloud.triggers.dataproc.DataprocDeleteClusterTrigger
865-
- airflow.providers.google.cloud.triggers.dataproc.DataprocWorkflowTrigger
853+
python-modules:
854+
- airflow.providers.google.cloud.triggers.dataproc
866855
- integration-name: Google Cloud Storage (GCS)
867-
class-names:
868-
- airflow.providers.google.cloud.triggers.gcs.GCSBlobTrigger
856+
python-modules:
857+
- airflow.providers.google.cloud.triggers.gcs
869858
- integration-name: Google Kubernetes Engine
870-
class-names:
871-
- airflow.providers.google.cloud.triggers.kubernetes_engine.GKEStartPodTrigger
872-
- airflow.providers.google.cloud.triggers.kubernetes_engine.GKEOperationTrigger
859+
python-modules:
860+
- airflow.providers.google.cloud.triggers.kubernetes_engine
873861
- integration-name: Google Machine Learning Engine
874-
class-names:
875-
- airflow.providers.google.cloud.triggers.mlengine.MLEngineStartTrainingJobTrigger
862+
python-modules:
863+
- airflow.providers.google.cloud.triggers.mlengine
876864

877865
transfers:
878866
- source-integration-name: Presto

airflow/providers/microsoft/azure/provider.yaml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -218,6 +218,14 @@ hooks:
218218
python-modules:
219219
- airflow.providers.microsoft.azure.hooks.synapse
220220

221+
triggers:
222+
- integration-name: Microsoft Azure Data Factory
223+
python-modules:
224+
- airflow.providers.microsoft.azure.triggers.data_factory
225+
- integration-name: Microsoft Azure Blob Storage
226+
python-modules:
227+
- airflow.providers.microsoft.azure.triggers.wasb
228+
221229
transfers:
222230
- source-integration-name: Local
223231
target-integration-name: Microsoft Azure Data Lake Storage

airflow/providers_manager.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -956,7 +956,7 @@ def initialize_providers_triggers(self):
956956
self.initialize_providers_list()
957957
for provider_package, provider in self._provider_dict.items():
958958
for trigger in provider.data.get("triggers", []):
959-
for trigger_class_name in trigger.get("class-names"):
959+
for trigger_class_name in trigger.get("python-modules"):
960960
self._trigger_info_set.add(
961961
TriggerInfo(
962962
package_name=provider_package,

scripts/in_container/run_provider_yaml_files_check.py

Lines changed: 9 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@
4444
EXCLUDED_MODULES = [
4545
"airflow.providers.apache.hdfs.sensors.hdfs",
4646
"airflow.providers.apache.hdfs.hooks.hdfs",
47+
"airflow.providers.cncf.kubernetes.triggers.kubernetes_pod",
4748
]
4849

4950

@@ -229,10 +230,10 @@ def parse_module_data(provider_data, resource_type, yaml_file_path):
229230

230231

231232
def check_correctness_of_list_of_sensors_operators_hook_modules(yaml_files: dict[str, dict]):
232-
print("Checking completeness of list of {sensors, hooks, operators}")
233-
print(" -- {sensors, hooks, operators} - Expected modules (left) : Current modules (right)")
233+
print("Checking completeness of list of {sensors, hooks, operators, triggers}")
234+
print(" -- {sensors, hooks, operators, triggers} - Expected modules (left) : Current modules (right)")
234235
for (yaml_file_path, provider_data), resource_type in product(
235-
yaml_files.items(), ["sensors", "operators", "hooks"]
236+
yaml_files.items(), ["sensors", "operators", "hooks", "triggers"]
236237
):
237238
expected_modules, provider_package, resource_data = parse_module_data(
238239
provider_data, resource_type, yaml_file_path
@@ -254,9 +255,9 @@ def check_correctness_of_list_of_sensors_operators_hook_modules(yaml_files: dict
254255

255256

256257
def check_duplicates_in_integrations_names_of_hooks_sensors_operators(yaml_files: dict[str, dict]):
257-
print("Checking for duplicates in list of {sensors, hooks, operators}")
258+
print("Checking for duplicates in list of {sensors, hooks, operators, triggers}")
258259
for (yaml_file_path, provider_data), resource_type in product(
259-
yaml_files.items(), ["sensors", "operators", "hooks"]
260+
yaml_files.items(), ["sensors", "operators", "hooks", "triggers"]
260261
):
261262
resource_data = provider_data.get(resource_type, [])
262263
current_integrations = [r.get("integration-name", "") for r in resource_data]
@@ -294,7 +295,7 @@ def check_completeness_of_list_of_transfers(yaml_files: dict[str, dict]):
294295
)
295296

296297

297-
def check_hook_classes(yaml_files: dict[str, dict]):
298+
def check_hook_connection_classes(yaml_files: dict[str, dict]):
298299
print("Checking connection classes belong to package, exist and are classes")
299300
resource_type = "hook-class-names"
300301
for yaml_file_path, provider_data in yaml_files.items():
@@ -306,22 +307,6 @@ def check_hook_classes(yaml_files: dict[str, dict]):
306307
)
307308

308309

309-
def check_trigger_classes(yaml_files: dict[str, dict]):
310-
print("Checking triggers classes belong to package, exist and are classes")
311-
resource_type = "triggers"
312-
for yaml_file_path, provider_data in yaml_files.items():
313-
provider_package = pathlib.Path(yaml_file_path).parent.as_posix().replace("/", ".")
314-
trigger_classes = {
315-
name
316-
for trigger_class in provider_data.get(resource_type, {})
317-
for name in trigger_class["class-names"]
318-
}
319-
if trigger_classes:
320-
check_if_objects_exist_and_belong_to_package(
321-
trigger_classes, provider_package, yaml_file_path, resource_type, ObjectType.CLASS
322-
)
323-
324-
325310
def check_plugin_classes(yaml_files: dict[str, dict]):
326311
print("Checking plugin classes belong to package, exist and are classes")
327312
resource_type = "plugins"
@@ -377,7 +362,7 @@ def check_invalid_integration(yaml_files: dict[str, dict]):
377362
all_integration_names = set(get_all_integration_names(yaml_files))
378363

379364
for (yaml_file_path, provider_data), resource_type in product(
380-
yaml_files.items(), ["sensors", "operators", "hooks"]
365+
yaml_files.items(), ["sensors", "operators", "hooks", "triggers"]
381366
):
382367
resource_data = provider_data.get(resource_type, [])
383368
current_names = {r["integration-name"] for r in resource_data}
@@ -522,10 +507,9 @@ def check_providers_have_all_documentation_files(yaml_files: dict[str, dict]):
522507

523508
check_completeness_of_list_of_transfers(all_parsed_yaml_files)
524509
check_duplicates_in_list_of_transfers(all_parsed_yaml_files)
525-
check_hook_classes(all_parsed_yaml_files)
510+
check_hook_connection_classes(all_parsed_yaml_files)
526511
check_plugin_classes(all_parsed_yaml_files)
527512
check_extra_link_classes(all_parsed_yaml_files)
528-
check_trigger_classes(all_parsed_yaml_files)
529513
check_correctness_of_list_of_sensors_operators_hook_modules(all_parsed_yaml_files)
530514
check_unique_provider_name(all_parsed_yaml_files)
531515
check_providers_have_all_documentation_files(all_parsed_yaml_files)

0 commit comments

Comments
 (0)