Skip to content

Commit

Permalink
Rename AzureDataLakeStorage to ADLS (#18493)
Browse files Browse the repository at this point in the history
* Rename AzureDataLakeStorage to ADLS
  • Loading branch information
eladkal committed Sep 25, 2021
1 parent f1ea87a commit 97d6892
Show file tree
Hide file tree
Showing 16 changed files with 187 additions and 107 deletions.
22 changes: 19 additions & 3 deletions airflow/contrib/operators/adls_list_operator.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,30 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""This module is deprecated. Please use :mod:`airflow.providers.microsoft.azure.operators.adls_list`."""
"""This module is deprecated. Please use :mod:`airflow.providers.microsoft.azure.operators.adls`."""

import warnings

from airflow.providers.microsoft.azure.operators.adls_list import AzureDataLakeStorageListOperator # noqa
from airflow.providers.microsoft.azure.operators.adls import ADLSListOperator

warnings.warn(
"This module is deprecated. Please use `airflow.providers.microsoft.azure.operators.adls_list`.",
"This module is deprecated. Please use `airflow.providers.microsoft.azure.operators.adls`.",
DeprecationWarning,
stacklevel=2,
)


class AzureDataLakeStorageListOperator(ADLSListOperator):
"""
This class is deprecated.
Please use Please use :mod:`airflow.providers.microsoft.azure.operators.adls.ADLSListOperator`.
"""

def __init__(self, *args, **kwargs):
warnings.warn(
"""This class is deprecated.
Please use Please use :mod:`airflow.providers.microsoft.azure.operators.adls.ADLSListOperator`""",
DeprecationWarning,
stacklevel=2,
)
super().__init__(*args, **kwargs)
4 changes: 2 additions & 2 deletions airflow/providers/google/cloud/transfers/adls_to_gcs.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,10 @@

from airflow.providers.google.cloud.hooks.gcs import GCSHook, _parse_gcs_url
from airflow.providers.microsoft.azure.hooks.azure_data_lake import AzureDataLakeHook
from airflow.providers.microsoft.azure.operators.adls_list import AzureDataLakeStorageListOperator
from airflow.providers.microsoft.azure.operators.adls import ADLSListOperator


class ADLSToGCSOperator(AzureDataLakeStorageListOperator):
class ADLSToGCSOperator(ADLSListOperator):
"""
Synchronizes an Azure Data Lake Storage path with a GCS bucket
Expand Down
14 changes: 14 additions & 0 deletions airflow/providers/microsoft/azure/CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,20 @@
Changelog
---------

Main
....

Changes in operators names and import paths are listed in the following table
This is a backward compatible change. Deprecated operators will be removed in the next major release.

+------------------------------------+--------------------+---------------------------------------------------------+--------------------------------------------------+
| Deprecated operator name | New operator name | Deprecated path | New path |
+------------------------------------+--------------------+---------------------------------------------------------+--------------------------------------------------+
| AzureDataLakeStorageListOperator | ADLSListOperator | airflow.providers.microsoft.azure.operators.adls_list | airflow.providers.microsoft.azure.operators.adls |
+------------------------------------+--------------------+---------------------------------------------------------+--------------------------------------------------+
| AzureDataLakeStorageDeleteOperator | ADLSDeleteOperator | airflow.providers.microsoft.azure.operators.adls_delete | airflow.providers.microsoft.azure.operators.adls |
+------------------------------------+--------------------+---------------------------------------------------------+--------------------------------------------------+

3.1.1
.....

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
import os

from airflow import models
from airflow.providers.microsoft.azure.operators.adls_delete import AzureDataLakeStorageDeleteOperator
from airflow.providers.microsoft.azure.operators.adls import ADLSDeleteOperator
from airflow.providers.microsoft.azure.transfers.local_to_adls import LocalFilesystemToADLSOperator
from airflow.utils.dates import days_ago

Expand All @@ -39,9 +39,7 @@
remote_path=REMOTE_FILE_PATH,
)
# [START howto_operator_adls_delete]
remove_file = AzureDataLakeStorageDeleteOperator(
task_id="delete_task", path=REMOTE_FILE_PATH, recursive=True
)
remove_file = ADLSDeleteOperator(task_id="delete_task", path=REMOTE_FILE_PATH, recursive=True)
# [END howto_operator_adls_delete]

upload_file >> remove_file
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
import os

from airflow import models
from airflow.providers.microsoft.azure.operators.adls_delete import AzureDataLakeStorageDeleteOperator
from airflow.providers.microsoft.azure.operators.adls import ADLSDeleteOperator
from airflow.providers.microsoft.azure.transfers.local_to_adls import LocalFilesystemToADLSOperator
from airflow.utils.dates import days_ago

Expand All @@ -39,8 +39,6 @@
)
# [END howto_operator_local_to_adls]

delete_file = AzureDataLakeStorageDeleteOperator(
task_id="remove_task", path=REMOTE_FILE_PATH, recursive=True
)
delete_file = ADLSDeleteOperator(task_id="remove_task", path=REMOTE_FILE_PATH, recursive=True)

upload_file >> delete_file
102 changes: 102 additions & 0 deletions airflow/providers/microsoft/azure/operators/adls.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

from typing import Any, Sequence

from airflow.models import BaseOperator
from airflow.providers.microsoft.azure.hooks.azure_data_lake import AzureDataLakeHook


class ADLSDeleteOperator(BaseOperator):
"""
Delete files in the specified path.
.. seealso::
For more information on how to use this operator, take a look at the guide:
:ref:`howto/operator:ADLSDeleteOperator`
:param path: A directory or file to remove
:type path: str
:param recursive: Whether to loop into directories in the location and remove the files
:type recursive: bool
:param ignore_not_found: Whether to raise error if file to delete is not found
:type ignore_not_found: bool
:param azure_data_lake_conn_id: Reference to the :ref:`Azure Data Lake connection<howto/connection:adl>`.
:type azure_data_lake_conn_id: str
"""

template_fields: Sequence[str] = ('path',)
ui_color = '#901dd2'

def __init__(
self,
*,
path: str,
recursive: bool = False,
ignore_not_found: bool = True,
azure_data_lake_conn_id: str = 'azure_data_lake_default',
**kwargs,
) -> None:
super().__init__(**kwargs)
self.path = path
self.recursive = recursive
self.ignore_not_found = ignore_not_found
self.azure_data_lake_conn_id = azure_data_lake_conn_id

def execute(self, context: dict) -> Any:
hook = AzureDataLakeHook(azure_data_lake_conn_id=self.azure_data_lake_conn_id)
return hook.remove(path=self.path, recursive=self.recursive, ignore_not_found=self.ignore_not_found)


class ADLSListOperator(BaseOperator):
"""
List all files from the specified path
This operator returns a python list with the names of files which can be used by
`xcom` in the downstream tasks.
:param path: The Azure Data Lake path to find the objects. Supports glob
strings (templated)
:type path: str
:param azure_data_lake_conn_id: Reference to the :ref:`Azure Data Lake connection<howto/connection:adl>`.
:type azure_data_lake_conn_id: str
**Example**:
The following Operator would list all the Parquet files from ``folder/output/``
folder in the specified ADLS account ::
adls_files = ADLSListOperator(
task_id='adls_files',
path='folder/output/*.parquet',
azure_data_lake_conn_id='azure_data_lake_default'
)
"""

template_fields: Sequence[str] = ('path',)
ui_color = '#901dd2'

def __init__(
self, *, path: str, azure_data_lake_conn_id: str = 'azure_data_lake_default', **kwargs
) -> None:
super().__init__(**kwargs)
self.path = path
self.azure_data_lake_conn_id = azure_data_lake_conn_id

def execute(self, context: dict) -> list:
hook = AzureDataLakeHook(azure_data_lake_conn_id=self.azure_data_lake_conn_id)
self.log.info('Getting list of ADLS files in path: %s', self.path)
return hook.list(path=self.path)
55 changes: 15 additions & 40 deletions airflow/providers/microsoft/azure/operators/adls_delete.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,50 +14,25 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""This module is deprecated. Please use :mod:`airflow.providers.microsoft.azure.operators.adls`."""

from typing import Any, Sequence
import warnings

from airflow.models import BaseOperator
from airflow.providers.microsoft.azure.hooks.azure_data_lake import AzureDataLakeHook
from airflow.providers.microsoft.azure.operators.adls import ADLSDeleteOperator


class AzureDataLakeStorageDeleteOperator(BaseOperator):
class AzureDataLakeStorageDeleteOperator(ADLSDeleteOperator):
"""
Delete files in the specified path.
.. seealso::
For more information on how to use this operator, take a look at the guide:
:ref:`howto/operator:AzureDataLakeStorageDeleteOperator`
:param path: A directory or file to remove
:type path: str
:param recursive: Whether to loop into directories in the location and remove the files
:type recursive: bool
:param ignore_not_found: Whether to raise error if file to delete is not found
:type ignore_not_found: bool
:param azure_data_lake_conn_id: Reference to the :ref:`Azure Data Lake connection<howto/connection:adl>`.
:type azure_data_lake_conn_id: str
This class is deprecated.
Please use `airflow.providers.microsoft.azure.operators.adls.ADLSDeleteOperator`.
"""

template_fields: Sequence[str] = ('path',)
ui_color = '#901dd2'

def __init__(
self,
*,
path: str,
recursive: bool = False,
ignore_not_found: bool = True,
azure_data_lake_conn_id: str = 'azure_data_lake_default',
**kwargs,
) -> None:
super().__init__(**kwargs)
self.path = path
self.recursive = recursive
self.ignore_not_found = ignore_not_found
self.azure_data_lake_conn_id = azure_data_lake_conn_id

def execute(self, context: dict) -> Any:
hook = AzureDataLakeHook(azure_data_lake_conn_id=self.azure_data_lake_conn_id)

return hook.remove(path=self.path, recursive=self.recursive, ignore_not_found=self.ignore_not_found)
def __init__(self, *args, **kwargs):
warnings.warn(
"""This class is deprecated.
Please use
`airflow.providers.microsoft.azure.operators.adls.ADLSDeleteOperator`.""",
DeprecationWarning,
stacklevel=3,
)
super().__init__(*args, **kwargs)
57 changes: 16 additions & 41 deletions airflow/providers/microsoft/azure/operators/adls_list.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,50 +15,25 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from typing import Sequence
"""This module is deprecated. Please use :mod:`airflow.providers.microsoft.azure.operators.adls`."""

from airflow.models import BaseOperator
from airflow.providers.microsoft.azure.hooks.azure_data_lake import AzureDataLakeHook
import warnings

from airflow.providers.microsoft.azure.operators.adls import ADLSListOperator

class AzureDataLakeStorageListOperator(BaseOperator):
"""
List all files from the specified path
This operator returns a python list with the names of files which can be used by
`xcom` in the downstream tasks.
:param path: The Azure Data Lake path to find the objects. Supports glob
strings (templated)
:type path: str
:param azure_data_lake_conn_id: Reference to the :ref:`Azure Data Lake connection<howto/connection:adl>`.
:type azure_data_lake_conn_id: str

**Example**:
The following Operator would list all the Parquet files from ``folder/output/``
folder in the specified ADLS account ::
adls_files = AzureDataLakeStorageListOperator(
task_id='adls_files',
path='folder/output/*.parquet',
azure_data_lake_conn_id='azure_data_lake_default'
)
class AzureDataLakeStorageListOperator(ADLSListOperator):
"""
This class is deprecated.
Please use `airflow.providers.microsoft.azure.operators.adls.ADLSListOperator`.
"""

template_fields: Sequence[str] = ('path',)
ui_color = '#901dd2'

def __init__(
self, *, path: str, azure_data_lake_conn_id: str = 'azure_data_lake_default', **kwargs
) -> None:
super().__init__(**kwargs)
self.path = path
self.azure_data_lake_conn_id = azure_data_lake_conn_id

def execute(self, context: dict) -> list:

hook = AzureDataLakeHook(azure_data_lake_conn_id=self.azure_data_lake_conn_id)

self.log.info('Getting list of ADLS files in path: %s', self.path)

return hook.list(path=self.path)
def __init__(self, *args, **kwargs):
warnings.warn(
"""This class is deprecated.
Please use
`airflow.providers.microsoft.azure.operators.adls.ADLSListOperator`.""",
DeprecationWarning,
stacklevel=3,
)
super().__init__(*args, **kwargs)
1 change: 1 addition & 0 deletions airflow/providers/microsoft/azure/provider.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@ operators:
python-modules:
- airflow.providers.microsoft.azure.operators.adls_list
- airflow.providers.microsoft.azure.operators.adls_delete
- airflow.providers.microsoft.azure.operators.adls
- integration-name: Microsoft Azure Data Explorer
python-modules:
- airflow.providers.microsoft.azure.operators.adx
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,12 +29,12 @@ Prerequisite Tasks

.. include::/operators/_partials/prerequisite_tasks.rst
.. _howto/operator:AzureDataLakeStorageDeleteOperator:
.. _howto/operator:ADLSDeleteOperator:

AzureDataLakeStorageDeleteOperator
ADLSDeleteOperator
----------------------------------
Use the
:class:`~airflow.providers.microsoft.azure.operators.adls_delete.AzureDataLakeStorageDeleteOperator` to remove
:class:`~airflow.providers.microsoft.azure.operators.adls_delete.ADLSDeleteOperator` to remove
file(s) from Azure DataLake Storage


Expand Down
1 change: 1 addition & 0 deletions docs/spelling_wordlist.txt
Original file line number Diff line number Diff line change
Expand Up @@ -404,6 +404,7 @@ acknowledgement
actionCard
acyclic
adhoc
adls
airbnb
airbyte
airflowignore
Expand Down
2 changes: 1 addition & 1 deletion tests/deprecated_classes.py
Original file line number Diff line number Diff line change
Expand Up @@ -1024,7 +1024,7 @@
'airflow.operators.pig_operator.PigOperator',
),
(
'airflow.providers.microsoft.azure.operators.adls_list.AzureDataLakeStorageListOperator',
'airflow.providers.microsoft.azure.operators.adls.ADLSListOperator',
'airflow.contrib.operators.adls_list_operator.AzureDataLakeStorageListOperator',
),
(
Expand Down

0 comments on commit 97d6892

Please sign in to comment.