Skip to content

Commit

Permalink
Import Hooks lazily individually in providers manager (#17682)
Browse files Browse the repository at this point in the history
This change implements lazy loading of individual hooks for providers
manager. First the hooks list is discovered by the manager when
hooks are accessed, but the hooks are not immediately
imported - the hooks initially keep just a callable that will
be used to retrieve the hook when first accessed.

Besides listing details of all hooks, all Hooks are only imported when we
want to retrieve the list of available field behaviours and widgets
(which only happens in webserver and should happen anyway whenever one
of those are needed because they are all collectively used in the
connection view).

In the case when hooks are accessed in tasks
(connection.get_hook()) only the individual Hooks are
imported when accessed.

The chand deprecates 'hook-class-names' json-schema and replaces it
with 'connection-types' because we need to know connection-type
for each HookClass name declaratively so that we can utilse
it in connection.get_hook() mehtod (otherwise we do not know
which Hooks conrrespond to which connection type without importing
them.

The change is backwards compatible. It adds deprecation
warnings in case providers use the 'hook-class-names' property
only and log warnings in case it provides both `hook-class-names`
and `connection-types` but there are inconsistencies between
those.

Part of this change is also to fix some inconsistencies found
when all hooks were added to connection-types arrays, which
make potetntially backwards-incompatible changes to Google Provider
where some hooks were useing `google_cloud_default` name for
default_connection_type, but they were in fact using different,
specialized Hook.
  • Loading branch information
potiuk committed Aug 19, 2021
1 parent 02fbe44 commit 76ed2a4
Show file tree
Hide file tree
Showing 67 changed files with 711 additions and 76 deletions.
12 changes: 7 additions & 5 deletions airflow/cli/commands/provider_command.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@
from airflow.providers_manager import ProvidersManager
from airflow.utils.cli import suppress_logs_and_warning

ERROR_IMPORTING_HOOK = "Error when importing hook!"


def _remove_rst_syntax(value: str) -> str:
return re.sub("[`_<>]", "", value.strip(" \n."))
Expand Down Expand Up @@ -68,10 +70,10 @@ def hooks_list(args):
output=args.output,
mapper=lambda x: {
"connection_type": x[0],
"class": x[1].connection_class,
"conn_id_attribute_name": x[1].connection_id_attribute_name,
'package_name': x[1].package_name,
'hook_name': x[1].hook_name,
"class": x[1].hook_class_name if x[1] else ERROR_IMPORTING_HOOK,
"conn_id_attribute_name": x[1].connection_id_attribute_name if x[1] else ERROR_IMPORTING_HOOK,
'package_name': x[1].package_name if x[1] else ERROR_IMPORTING_HOOK,
'hook_name': x[1].hook_name if x[1] else ERROR_IMPORTING_HOOK,
},
)

Expand All @@ -84,7 +86,7 @@ def connection_form_widget_list(args):
output=args.output,
mapper=lambda x: {
"connection_parameter_name": x[0],
"class": x[1].connection_class,
"class": x[1].hook_class_name,
'package_name': x[1].package_name,
'field_type': x[1].field.field_class.__name__,
},
Expand Down
10 changes: 7 additions & 3 deletions airflow/models/connection.py
Original file line number Diff line number Diff line change
Expand Up @@ -288,9 +288,13 @@ def rotate_fernet_key(self):

def get_hook(self):
"""Return hook based on conn_type."""
hook_class_name, conn_id_param, package_name, hook_name = ProvidersManager().hooks.get(
self.conn_type, (None, None, None, None)
)
(
hook_class_name,
conn_id_param,
package_name,
hook_name,
connection_type,
) = ProvidersManager().hooks.get(self.conn_type, (None, None, None, None, None))

if not hook_class_name:
raise AirflowException(f'Unknown hook type "{self.conn_type}"')
Expand Down
26 changes: 24 additions & 2 deletions airflow/provider.yaml.schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -195,11 +195,33 @@
},
"hook-class-names": {
"type": "array",
"description": "Hook class names that provide connection types to core",
"description": "Hook class names that provide connection types to core (deprecated by connection-types)",
"items": {
"type": "string"
"type": "string"
},
"deprecated": {
"description": "The hook-class-names property has been deprecated in favour of connection-types which is more performant version allowing to only import individual Hooks rather than all hooks at once",
"deprecatedVersion": "2.2"
}
},
"connection-types": {
"type": "array",
"description": "Array of connection types mapped to hook class names",
"items": {
"type": "object",
"properties": {
"connection-type": {
"description": "Type of connection defined by the provider",
"type": "string"
},
"hook-class-name": {
"description": "Hook class name that implements the connection type",
"type": "string"
}
}
},
"required": ["connection-type", "hook-class-name"]
},
"extra-links": {
"type": "array",
"description": "Operator class names that provide extra link functionality",
Expand Down
24 changes: 23 additions & 1 deletion airflow/provider_info.schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,33 @@
},
"hook-class-names": {
"type": "array",
"description": "Hook class names that provide connection types to core",
"description": "Hook class names that provide connection types to core (deprecated by connection-types)",
"items": {
"type": "string"
},
"deprecated": {
"description": "The hook-class-names property has been deprecated in favour of connection-types which is more performant version allowing to only import individual Hooks rather than all hooks at once",
"deprecatedVersion": "2.2.0"
}
},
"connection-types": {
"type": "array",
"description": "Map of connection types mapped to hook class names.",
"items": {
"type": "object",
"properties": {
"connection-type": {
"description": "Type of connection defined by the provider",
"type": "string"
},
"hook-class-name": {
"description": "Hook class name that implements the connection type",
"type": "string"
}
}
},
"required": ["connection-type", "hook-class-name"]
},
"extra-links": {
"type": "array",
"description": "Operator class names that provide extra link functionality",
Expand Down
4 changes: 4 additions & 0 deletions airflow/providers/airbyte/provider.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -54,3 +54,7 @@ sensors:

hook-class-names:
- airflow.providers.airbyte.hooks.airbyte.AirbyteHook

connection-types:
- hook-class-name: airflow.providers.airbyte.hooks.airbyte.AirbyteHook
connection-type: airbyte
4 changes: 4 additions & 0 deletions airflow/providers/alibaba/provider.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -52,3 +52,7 @@ hooks:

hook-class-names:
- airflow.providers.alibaba.cloud.hooks.oss.OSSHook

connection-types:
- hook-class-name: airflow.providers.alibaba.cloud.hooks.oss.OSSHook
connection-type: oss
8 changes: 8 additions & 0 deletions airflow/providers/amazon/provider.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -415,6 +415,14 @@ hook-class-names:
- airflow.providers.amazon.aws.hooks.base_aws.AwsBaseHook
- airflow.providers.amazon.aws.hooks.emr.EmrHook

connection-types:
- hook-class-name: airflow.providers.amazon.aws.hooks.s3.S3Hook
connection-type: s3
- hook-class-name: airflow.providers.amazon.aws.hooks.base_aws.AwsBaseHook
connection-type: aws
- hook-class-name: airflow.providers.amazon.aws.hooks.emr.EmrHook
connection-type: emr

secrets-backends:
- airflow.providers.amazon.aws.secrets.secrets_manager.SecretsManagerBackend
- airflow.providers.amazon.aws.secrets.systems_manager.SystemsManagerParameterStoreBackend
Expand Down
4 changes: 4 additions & 0 deletions airflow/providers/apache/cassandra/provider.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -50,3 +50,7 @@ hooks:

hook-class-names:
- airflow.providers.apache.cassandra.hooks.cassandra.CassandraHook

connection-types:
- hook-class-name: airflow.providers.apache.cassandra.hooks.cassandra.CassandraHook
connection-type: cassandra
4 changes: 4 additions & 0 deletions airflow/providers/apache/drill/provider.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -47,3 +47,7 @@ hooks:

hook-class-names:
- airflow.providers.apache.drill.hooks.drill.DrillHook

connection-types:
- hook-class-name: airflow.providers.apache.drill.hooks.drill.DrillHook
connection-type: drill
4 changes: 4 additions & 0 deletions airflow/providers/apache/druid/provider.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,10 @@ hooks:
hook-class-names:
- airflow.providers.apache.druid.hooks.druid.DruidDbApiHook

connection-types:
- hook-class-name: airflow.providers.apache.druid.hooks.druid.DruidDbApiHook
connection-type: druid

transfers:
- source-integration-name: Apache Hive
target-integration-name: Apache Druid
Expand Down
4 changes: 4 additions & 0 deletions airflow/providers/apache/hdfs/provider.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -60,3 +60,7 @@ hooks:

hook-class-names:
- airflow.providers.apache.hdfs.hooks.hdfs.HDFSHook

connection-types:
- hook-class-name: airflow.providers.apache.hdfs.hooks.hdfs.HDFSHook
connection-type: hdfs
8 changes: 8 additions & 0 deletions airflow/providers/apache/hive/provider.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -83,3 +83,11 @@ hook-class-names:
- airflow.providers.apache.hive.hooks.hive.HiveCliHook
- airflow.providers.apache.hive.hooks.hive.HiveServer2Hook
- airflow.providers.apache.hive.hooks.hive.HiveMetastoreHook

connection-types:
- hook-class-name: airflow.providers.apache.hive.hooks.hive.HiveCliHook
connection-type: hive_cli
- hook-class-name: airflow.providers.apache.hive.hooks.hive.HiveServer2Hook
connection-type: hiveserver2
- hook-class-name: airflow.providers.apache.hive.hooks.hive.HiveMetastoreHook
connection-type: hive_metastore
4 changes: 4 additions & 0 deletions airflow/providers/apache/livy/provider.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -53,3 +53,7 @@ hooks:

hook-class-names:
- airflow.providers.apache.livy.hooks.livy.LivyHook

connection-types:
- hook-class-name: airflow.providers.apache.livy.hooks.livy.LivyHook
connection-type: livy
4 changes: 4 additions & 0 deletions airflow/providers/apache/pig/provider.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -47,3 +47,7 @@ hooks:

hook-class-names:
- airflow.providers.apache.pig.hooks.pig.PigCliHook

connection-types:
- connection-type: pig_cli
hook-class-name: airflow.providers.apache.pig.hooks.pig.PigCliHook
8 changes: 8 additions & 0 deletions airflow/providers/apache/spark/provider.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -58,3 +58,11 @@ hook-class-names:
- airflow.providers.apache.spark.hooks.spark_jdbc.SparkJDBCHook
- airflow.providers.apache.spark.hooks.spark_sql.SparkSqlHook
- airflow.providers.apache.spark.hooks.spark_submit.SparkSubmitHook

connection-types:
- hook-class-name: airflow.providers.apache.spark.hooks.spark_jdbc.SparkJDBCHook
connection-type: spark_jdbc
- hook-class-name: airflow.providers.apache.spark.hooks.spark_sql.SparkSqlHook
connection-type: spark_sql
- hook-class-name: airflow.providers.apache.spark.hooks.spark_submit.SparkSubmitHook
connection-type: spark
4 changes: 4 additions & 0 deletions airflow/providers/apache/sqoop/provider.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -48,3 +48,7 @@ hooks:

hook-class-names:
- airflow.providers.apache.sqoop.hooks.sqoop.SqoopHook

connection-types:
- hook-class-name: airflow.providers.apache.sqoop.hooks.sqoop.SqoopHook
connection-type: sqoop
4 changes: 4 additions & 0 deletions airflow/providers/asana/provider.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -46,3 +46,7 @@ hooks:

hook-class-names:
- airflow.providers.asana.hooks.asana.AsanaHook

connection-types:
- hook-class-name: airflow.providers.asana.hooks.asana.AsanaHook
connection-type: asana
4 changes: 4 additions & 0 deletions airflow/providers/cloudant/provider.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -42,3 +42,7 @@ hooks:

hook-class-names:
- airflow.providers.cloudant.hooks.cloudant.CloudantHook

connection-types:
- hook-class-name: airflow.providers.cloudant.hooks.cloudant.CloudantHook
connection-type: cloudant
4 changes: 4 additions & 0 deletions airflow/providers/cncf/kubernetes/provider.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -63,3 +63,7 @@ hooks:

hook-class-names:
- airflow.providers.cncf.kubernetes.hooks.kubernetes.KubernetesHook

connection-types:
- hook-class-name: airflow.providers.cncf.kubernetes.hooks.kubernetes.KubernetesHook
connection-type: kubernetes
4 changes: 4 additions & 0 deletions airflow/providers/databricks/provider.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -48,3 +48,7 @@ hooks:

hook-class-names:
- airflow.providers.databricks.hooks.databricks.DatabricksHook

connection-types:
- hook-class-name: airflow.providers.databricks.hooks.databricks.DatabricksHook
connection-type: databricks
4 changes: 4 additions & 0 deletions airflow/providers/dingding/provider.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -50,3 +50,7 @@ hooks:

hook-class-names:
- airflow.providers.dingding.hooks.dingding.DingdingHook

connection-types:
- hook-class-name: airflow.providers.dingding.hooks.dingding.DingdingHook
connection-type: dingding
4 changes: 4 additions & 0 deletions airflow/providers/discord/provider.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -47,3 +47,7 @@ hooks:

hook-class-names:
- airflow.providers.discord.hooks.discord_webhook.DiscordWebhookHook

connection-types:
- hook-class-name: airflow.providers.discord.hooks.discord_webhook.DiscordWebhookHook
connection-type: discord
4 changes: 4 additions & 0 deletions airflow/providers/docker/provider.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -58,3 +58,7 @@ hooks:

hook-class-names:
- airflow.providers.docker.hooks.docker.DockerHook

connection-types:
- hook-class-name: airflow.providers.docker.hooks.docker.DockerHook
connection-type: docker
4 changes: 4 additions & 0 deletions airflow/providers/elasticsearch/provider.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -47,5 +47,9 @@ hooks:
hook-class-names:
- airflow.providers.elasticsearch.hooks.elasticsearch.ElasticsearchHook

connection-types:
- hook-class-name: airflow.providers.elasticsearch.hooks.elasticsearch.ElasticsearchHook
connection-type: elasticsearch

logging:
- airflow.providers.elasticsearch.log.es_task_handler.ElasticsearchTaskHandler
4 changes: 4 additions & 0 deletions airflow/providers/exasol/provider.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -48,3 +48,7 @@ hooks:

hook-class-names:
- airflow.providers.exasol.hooks.exasol.ExasolHook

connection-types:
- hook-class-name: airflow.providers.exasol.hooks.exasol.ExasolHook
connection-type: exasol
4 changes: 4 additions & 0 deletions airflow/providers/facebook/provider.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -43,3 +43,7 @@ hooks:

hook-class-names:
- airflow.providers.facebook.ads.hooks.ads.FacebookAdsReportingHook

connection-types:
- hook-class-name: airflow.providers.facebook.ads.hooks.ads.FacebookAdsReportingHook
connection-type: facebook_social
4 changes: 4 additions & 0 deletions airflow/providers/ftp/provider.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -45,3 +45,7 @@ hooks:

hook-class-names:
- airflow.providers.ftp.hooks.ftp.FTPHook

connection-types:
- hook-class-name: airflow.providers.ftp.hooks.ftp.FTPHook
connection-type: ftp
8 changes: 4 additions & 4 deletions airflow/providers/google/cloud/hooks/bigquery.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,13 +87,13 @@ class BigQueryHook(GoogleBaseHook, DbApiHook):
"""

conn_name_attr = 'gcp_conn_id'
default_conn_name = 'google_cloud_default'
conn_type = 'google_cloud_platform'
hook_name = 'Google Cloud'
default_conn_name = 'google_cloud_bigquery_default'
conn_type = 'gcpbigquery'
hook_name = 'Google Bigquery'

def __init__(
self,
gcp_conn_id: str = default_conn_name,
gcp_conn_id: str = GoogleBaseHook.default_conn_name,
delegate_to: Optional[str] = None,
use_legacy_sql: bool = True,
location: Optional[str] = None,
Expand Down
4 changes: 2 additions & 2 deletions airflow/providers/google/cloud/hooks/cloud_sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ class CloudSQLHook(GoogleBaseHook):
"""

conn_name_attr = 'gcp_conn_id'
default_conn_name = 'google_cloud_default'
default_conn_name = 'google_cloud_sql_default'
conn_type = 'gcpcloudsql'
hook_name = 'Google Cloud SQL'

Expand Down Expand Up @@ -725,7 +725,7 @@ class CloudSQLDatabaseHook(BaseHook):
"""

conn_name_attr = 'gcp_cloudsql_conn_id'
default_conn_name = 'google_cloud_sql_default'
default_conn_name = 'google_cloud_sqldb_default'
conn_type = 'gcpcloudsqldb'
hook_name = 'Google Cloud SQL Database'

Expand Down
2 changes: 1 addition & 1 deletion airflow/providers/google/cloud/hooks/compute_ssh.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ class ComputeEngineSSHHook(SSHHook):
"""

conn_name_attr = 'gcp_conn_id'
default_conn_name = 'google_cloud_default'
default_conn_name = 'google_cloud_ssh_default'
conn_type = 'gcpssh'
hook_name = 'Google Cloud SSH'

Expand Down
2 changes: 1 addition & 1 deletion airflow/providers/google/cloud/hooks/dataprep.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ class GoogleDataprepHook(BaseHook):
"""

conn_name_attr = 'dataprep_conn_id'
default_conn_name = 'dataprep_default'
default_conn_name = 'google_cloud_dataprep_default'
conn_type = 'dataprep'
hook_name = 'Google Dataprep'

Expand Down

0 comments on commit 76ed2a4

Please sign in to comment.