Skip to content

Commit

Permalink
Filtering and ordering results of DataprocListBatchesOperator (#32500)
Browse files Browse the repository at this point in the history
  • Loading branch information
kristopherkane committed Jul 20, 2023
1 parent 3c14753 commit 99b8a90
Show file tree
Hide file tree
Showing 4 changed files with 30 additions and 0 deletions.
12 changes: 12 additions & 0 deletions airflow/providers/google/cloud/hooks/dataproc.py
Original file line number Diff line number Diff line change
Expand Up @@ -950,6 +950,8 @@ def list_batches(
retry: Retry | _MethodDefault = DEFAULT,
timeout: float | None = None,
metadata: Sequence[tuple[str, str]] = (),
filter: str | None = None,
order_by: str | None = None,
):
"""List batch workloads.
Expand All @@ -966,6 +968,8 @@ def list_batches(
to complete. If *retry* is specified, the timeout applies to each
individual attempt.
:param metadata: Additional metadata that is provided to the method.
:param filter: Result filters as specified in ListBatchesRequest
:param order_by: How to order results as specified in ListBatchesRequest
"""
client = self.get_batch_client(region)
parent = f"projects/{project_id}/regions/{region}"
Expand All @@ -975,6 +979,8 @@ def list_batches(
"parent": parent,
"page_size": page_size,
"page_token": page_token,
"filter": filter,
"order_by": order_by,
},
retry=retry,
timeout=timeout,
Expand Down Expand Up @@ -1768,6 +1774,8 @@ async def list_batches(
retry: Retry | _MethodDefault = DEFAULT,
timeout: float | None = None,
metadata: Sequence[tuple[str, str]] = (),
filter: str | None = None,
order_by: str | None = None,
):
"""List batch workloads.
Expand All @@ -1784,6 +1792,8 @@ async def list_batches(
to complete. If *retry* is specified, the timeout applies to each
individual attempt.
:param metadata: Additional metadata that is provided to the method.
:param filter: Result filters as specified in ListBatchesRequest
:param order_by: How to order results as specified in ListBatchesRequest
"""
client = self.get_batch_client(region)
parent = f"projects/{project_id}/regions/{region}"
Expand All @@ -1793,6 +1803,8 @@ async def list_batches(
"parent": parent,
"page_size": page_size,
"page_token": page_token,
"filter": filter,
"order_by": order_by,
},
retry=retry,
timeout=timeout,
Expand Down
8 changes: 8 additions & 0 deletions airflow/providers/google/cloud/operators/dataproc.py
Original file line number Diff line number Diff line change
Expand Up @@ -2582,6 +2582,8 @@ class DataprocListBatchesOperator(GoogleCloudBaseOperator):
If set as a sequence, the identities from the list must grant
Service Account Token Creator IAM role to the directly preceding identity, with first
account from the list granting this role to the originating account (templated).
:param filter: Result filters as specified in ListBatchesRequest
:param order_by: How to order results as specified in ListBatchesRequest
"""

template_fields: Sequence[str] = ("region", "project_id", "impersonation_chain")
Expand All @@ -2599,6 +2601,8 @@ def __init__(
metadata: Sequence[tuple[str, str]] = (),
gcp_conn_id: str = "google_cloud_default",
impersonation_chain: str | Sequence[str] | None = None,
filter: str | None = None,
order_by: str | None = None,
**kwargs,
) -> None:
super().__init__(**kwargs)
Expand All @@ -2611,6 +2615,8 @@ def __init__(
self.metadata = metadata
self.gcp_conn_id = gcp_conn_id
self.impersonation_chain = impersonation_chain
self.filter = filter
self.order_by = order_by

def execute(self, context: Context):
hook = DataprocHook(gcp_conn_id=self.gcp_conn_id, impersonation_chain=self.impersonation_chain)
Expand All @@ -2622,6 +2628,8 @@ def execute(self, context: Context):
retry=self.retry,
timeout=self.timeout,
metadata=self.metadata,
filter=self.filter,
order_by=self.order_by,
)
DataprocListLink.persist(context=context, task_instance=self, url=DATAPROC_BATCHES_LINK)
return [Batch.to_dict(result) for result in results]
Expand Down
4 changes: 4 additions & 0 deletions tests/providers/google/cloud/hooks/test_dataproc.py
Original file line number Diff line number Diff line change
Expand Up @@ -491,6 +491,8 @@ def test_list_batches(self, mock_client):
parent=PARENT.format(GCP_PROJECT, GCP_LOCATION),
page_size=None,
page_token=None,
filter=None,
order_by=None,
),
metadata=(),
retry=DEFAULT,
Expand Down Expand Up @@ -939,6 +941,8 @@ async def test_list_batches(self, mock_client):
parent=PARENT.format(GCP_PROJECT, GCP_LOCATION),
page_size=None,
page_token=None,
filter=None,
order_by=None,
),
metadata=(),
retry=DEFAULT,
Expand Down
6 changes: 6 additions & 0 deletions tests/providers/google/cloud/operators/test_dataproc.py
Original file line number Diff line number Diff line change
Expand Up @@ -2220,6 +2220,8 @@ class TestDataprocListBatchesOperator:
def test_execute(self, mock_hook):
page_token = "page_token"
page_size = 42
filter = 'batch_id=~"a-batch-id*" AND create_time>="2023-07-05T14:25:04.643818Z"'
order_by = "create_time desc"

op = DataprocListBatchesOperator(
task_id=TASK_ID,
Expand All @@ -2232,6 +2234,8 @@ def test_execute(self, mock_hook):
retry=RETRY,
timeout=TIMEOUT,
metadata=METADATA,
filter=filter,
order_by=order_by,
)
op.execute(context=MagicMock())
mock_hook.assert_called_once_with(gcp_conn_id=GCP_CONN_ID, impersonation_chain=IMPERSONATION_CHAIN)
Expand All @@ -2243,6 +2247,8 @@ def test_execute(self, mock_hook):
retry=RETRY,
timeout=TIMEOUT,
metadata=METADATA,
filter=filter,
order_by=order_by,
)

@mock.patch(DATAPROC_PATH.format("DataprocHook"))
Expand Down

0 comments on commit 99b8a90

Please sign in to comment.