Skip to content

Commit

Permalink
Check for missing references to operator guides (#13059)
Browse files Browse the repository at this point in the history
  • Loading branch information
mik-laj committed Dec 14, 2020
1 parent fa9c6b4 commit b4b9cf5
Show file tree
Hide file tree
Showing 5 changed files with 72 additions and 52 deletions.
4 changes: 4 additions & 0 deletions airflow/providers/google/cloud/operators/dataprep.py
Expand Up @@ -106,6 +106,10 @@ class DataprepRunJobGroupOperator(BaseOperator):
To get recipe_id please follow the Dataprep API documentation
https://clouddataprep.com/documentation/api#operation/runJobGroup
.. seealso::
For more information on how to use this operator, take a look at the guide:
:ref:`howto/operator:DataprepRunJobGroupOperator`
:param recipe_id: The identifier for the recipe you would like to run.
:type recipe_id: int
"""
Expand Down
4 changes: 4 additions & 0 deletions airflow/providers/google/cloud/transfers/mysql_to_gcs.py
Expand Up @@ -33,6 +33,10 @@
class MySQLToGCSOperator(BaseSQLToGCSOperator):
"""Copy data from MySQL to Google Cloud Storage in JSON or CSV format.
.. seealso::
For more information on how to use this operator, take a look at the guide:
:ref:`howto/operator:MySQLToGCSOperator`
:param mysql_conn_id: Reference to a specific MySQL hook.
:type mysql_conn_id: str
:param ensure_utc: Ensure TIMESTAMP columns exported as UTC. If set to
Expand Down
4 changes: 4 additions & 0 deletions airflow/providers/google/cloud/transfers/s3_to_gcs.py
Expand Up @@ -31,6 +31,10 @@ class S3ToGCSOperator(S3ListOperator):
Synchronizes an S3 key, possibly a prefix, with a Google Cloud Storage
destination path.
.. seealso::
For more information on how to use this operator, take a look at the guide:
:ref:`howto/operator:S3ToGCSOperator`
:param bucket: The S3 bucket where to find the objects. (templated)
:type bucket: str
:param prefix: Prefix string which filters objects whose name begin with
Expand Down
Expand Up @@ -16,6 +16,7 @@
under the License.
.. _howto/operator:S3ToGCSOperator:

Transfer Data from Amazon S3 to Google Cloud Storage
====================================================
Expand All @@ -32,8 +33,6 @@ Prerequisite Tasks

.. include::/howto/operator/google/_partials/prerequisite_tasks.rst
.. _howto/operator:S3ToGCSOperator:

Use the :class:`~airflow.providers.google.cloud.transfers.s3_to_gcs.S3ToGCSOperator`
to transfer data from Amazon S3 to Google Cloud Storage.

Expand Down
109 changes: 59 additions & 50 deletions docs/exts/docs_build/lint_checks.py
Expand Up @@ -22,6 +22,7 @@
from itertools import chain
from typing import Iterable, List, Optional, Set

from docs.exts.docs_build.docs_builder import ALL_PROVIDER_YAMLS
from docs.exts.docs_build.errors import DocBuildError # pylint: disable=no-name-in-module

ROOT_PROJECT_DIR = os.path.abspath(
Expand All @@ -31,14 +32,14 @@
DOCS_DIR = os.path.join(ROOT_PROJECT_DIR, "docs")


def find_existing_guide_operator_names(src_dir: str) -> Set[str]:
def find_existing_guide_operator_names(src_dir_pattern: str) -> Set[str]:
"""
Find names of existing operators.
:return names of existing operators.
"""
operator_names = set()

paths = glob(f"{src_dir}/**/*.rst", recursive=True)
paths = glob(src_dir_pattern, recursive=True)
for path in paths:
with open(path) as f:
operator_names |= set(re.findall(".. _howto/operator:(.+?):", f.read()))
Expand All @@ -49,72 +50,80 @@ def find_existing_guide_operator_names(src_dir: str) -> Set[str]:
def extract_ast_class_def_by_name(ast_tree, class_name):
"""
Extracts class definition by name
:param ast_tree: AST tree
:param class_name: name of the class.
:return: class node found
"""
for node in ast.walk(ast_tree):
if isinstance(node, ast.ClassDef) and node.name == class_name:
return node

class ClassVisitor(ast.NodeVisitor):
"""Visitor."""

def __init__(self):
self.found_class_node = None

def visit_ClassDef(self, node): # pylint: disable=invalid-name
"""
Visit class definition.
:param node: node.
:return:
"""
if node.name == class_name:
self.found_class_node = node
return None

visitor = ClassVisitor()
visitor.visit(ast_tree)

return visitor.found_class_node
def _generate_missing_guide_error(path, line_no, operator_name):
return DocBuildError(
file_path=path,
line_no=line_no,
message=(
f"Link to the guide is missing in operator's description: {operator_name}.\n"
f"Please add link to the guide to the description in the following form:\n"
f"\n"
f".. seealso::\n"
f" For more information on how to use this operator, take a look at the guide:\n"
f" :ref:`howto/operator:{operator_name}`\n"
),
)


def check_guide_links_in_operator_descriptions() -> List[DocBuildError]:
"""Check if there are links to guides in operator's descriptions."""
# TODO: We should also check the guides in the provider documentations.
# For now, we are only checking the core documentation.
# This is easiest to do after the content has been fully migrated.
build_errors = []

def generate_build_error(path, line_no, operator_name):
return DocBuildError(
file_path=path,
line_no=line_no,
message=(
f"Link to the guide is missing in operator's description: {operator_name}.\n"
f"Please add link to the guide to the description in the following form:\n"
f"\n"
f".. seealso::\n"
f" For more information on how to use this operator, take a look at the guide:\n"
f" :ref:`apache-airflow:howto/operator:{operator_name}`\n"
build_errors.extend(
_check_missing_guide_references(
operator_names=find_existing_guide_operator_names(
f"{DOCS_DIR}/apache-airflow/howto/operator/**/*.rst"
),
python_module_paths=chain(
glob(f"{ROOT_PACKAGE_DIR}/operators/*.py"),
glob(f"{ROOT_PACKAGE_DIR}/sensors/*.py"),
),
)
)

# Extract operators for which there are existing .rst guides
operator_names = find_existing_guide_operator_names(f"{DOCS_DIR}/howto/operator")
for provider in ALL_PROVIDER_YAMLS:
operator_names = {
*find_existing_guide_operator_names(f"{DOCS_DIR}/{provider['package-name']}/operators/**/*.rst"),
*find_existing_guide_operator_names(f"{DOCS_DIR}/{provider['package-name']}/operators.rst"),
}

# Extract all potential python modules that can contain operators
python_module_paths = chain(
glob(f"{provider['package-dir']}/**/operators/*.py", recursive=True),
glob(f"{provider['package-dir']}/**/sensors/*.py", recursive=True),
glob(f"{provider['package-dir']}/**/transfers/*.py", recursive=True),
)

# Extract all potential python modules that can contain operators
python_module_paths = chain(
glob(f"{ROOT_PACKAGE_DIR}/operators/*.py"),
glob(f"{ROOT_PACKAGE_DIR}/sensors/*.py"),
glob(f"{ROOT_PACKAGE_DIR}/providers/**/operators/*.py", recursive=True),
glob(f"{ROOT_PACKAGE_DIR}/providers/**/sensors/*.py", recursive=True),
glob(f"{ROOT_PACKAGE_DIR}/providers/**/transfers/*.py", recursive=True),
)
build_errors.extend(
_check_missing_guide_references(
operator_names=operator_names, python_module_paths=python_module_paths
)
)

return build_errors


def _check_missing_guide_references(operator_names, python_module_paths) -> List[DocBuildError]:
build_errors = []

for py_module_path in python_module_paths:
with open(py_module_path) as f:
py_content = f.read()

if "This module is deprecated" in py_content:
continue

for existing_operator in operator_names:
if f"class {existing_operator}" not in py_content:
continue
Expand All @@ -130,12 +139,12 @@ def generate_build_error(path, line_no, operator_name):
if "This class is deprecated." in docstring:
continue

if f":ref:`apache-airflow:howto/operator:{existing_operator}`" in ast.get_docstring(
class_def
) or f":ref:`howto/operator:{existing_operator}`" in ast.get_docstring(class_def):
if f":ref:`howto/operator:{existing_operator}`" in ast.get_docstring(class_def):
continue

build_errors.append(generate_build_error(py_module_path, class_def.lineno, existing_operator))
build_errors.append(
_generate_missing_guide_error(py_module_path, class_def.lineno, existing_operator)
)
return build_errors


Expand Down Expand Up @@ -193,7 +202,7 @@ def find_modules(deprecated_only: bool = False) -> Set[str]:

def check_exampleinclude_for_example_dags() -> List[DocBuildError]:
"""Checks all exampleincludes for example dags."""
all_docs_files = glob(f"${DOCS_DIR}/**/*rst", recursive=True)
all_docs_files = glob(f"${DOCS_DIR}/**/*.rst", recursive=True)
build_errors = []
for doc_file in all_docs_files:
build_error = assert_file_not_contains(
Expand All @@ -211,7 +220,7 @@ def check_exampleinclude_for_example_dags() -> List[DocBuildError]:

def check_enforce_code_block() -> List[DocBuildError]:
"""Checks all code:: blocks."""
all_docs_files = glob(f"{DOCS_DIR}/**/*rst", recursive=True)
all_docs_files = glob(f"{DOCS_DIR}/**/*.rst", recursive=True)
build_errors = []
for doc_file in all_docs_files:
build_error = assert_file_not_contains(
Expand Down

0 comments on commit b4b9cf5

Please sign in to comment.