Skip to content

Commit d3b0880

Browse files
Wojciech JanuszekWojciech Januszek
andauthored
Speech To Text assets & system tests migration (AIP-47) (#23643)
Co-authored-by: Wojciech Januszek <[email protected]>
1 parent 64d0d9c commit d3b0880

File tree

5 files changed

+57
-56
lines changed

5 files changed

+57
-56
lines changed

airflow/providers/google/cloud/operators/speech_to_text.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
from airflow.exceptions import AirflowException
2727
from airflow.models import BaseOperator
2828
from airflow.providers.google.cloud.hooks.speech_to_text import CloudSpeechToTextHook, RecognitionAudio
29+
from airflow.providers.google.common.links.storage import FileDetailsLink
2930

3031
if TYPE_CHECKING:
3132
from airflow.utils.context import Context
@@ -72,6 +73,7 @@ class CloudSpeechToTextRecognizeSpeechOperator(BaseOperator):
7273
"impersonation_chain",
7374
)
7475
# [END gcp_speech_to_text_synthesize_template_fields]
76+
operator_extra_links = (FileDetailsLink(),)
7577

7678
def __init__(
7779
self,
@@ -106,6 +108,15 @@ def execute(self, context: 'Context'):
106108
gcp_conn_id=self.gcp_conn_id,
107109
impersonation_chain=self.impersonation_chain,
108110
)
111+
112+
FileDetailsLink.persist(
113+
context=context,
114+
task_instance=self,
115+
# Slice from: "gs://{BUCKET_NAME}/{FILE_NAME}" to: "{BUCKET_NAME}/{FILE_NAME}"
116+
uri=self.audio["uri"][5:],
117+
project_id=self.project_id or hook.project_id,
118+
)
119+
109120
response = hook.recognize_speech(
110121
config=self.config, audio=self.audio, retry=self.retry, timeout=self.timeout
111122
)

docs/apache-airflow-providers-google/operators/cloud/speech_to_text.rst

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -42,22 +42,22 @@ google.cloud.speech_v1.types module
4242

4343
for more information, see: https://googleapis.github.io/google-cloud-python/latest/speech/gapic/v1/api.html#google.cloud.speech_v1.SpeechClient.recognize
4444

45-
.. exampleinclude:: /../../airflow/providers/google/cloud/example_dags/example_speech_to_text.py
45+
.. exampleinclude:: /../../tests/system/providers/google/speech_to_text/example_speech_to_text.py
4646
:language: python
4747
:start-after: [START howto_operator_text_to_speech_api_arguments]
4848
:end-before: [END howto_operator_text_to_speech_api_arguments]
4949

5050
filename is a simple string argument:
5151

52-
.. exampleinclude:: /../../airflow/providers/google/cloud/example_dags/example_speech_to_text.py
52+
.. exampleinclude:: /../../tests/system/providers/google/speech_to_text/example_speech_to_text.py
5353
:language: python
5454
:start-after: [START howto_operator_speech_to_text_api_arguments]
5555
:end-before: [END howto_operator_speech_to_text_api_arguments]
5656

5757
Using the operator
5858
""""""""""""""""""
5959

60-
.. exampleinclude:: /../../airflow/providers/google/cloud/example_dags/example_speech_to_text.py
60+
.. exampleinclude:: /../../tests/system/providers/google/speech_to_text/example_speech_to_text.py
6161
:language: python
6262
:dedent: 4
6363
:start-after: [START howto_operator_speech_to_text_recognize]

tests/providers/google/cloud/operators/test_speech_to_text.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ def test_recognize_speech_green_path(self, mock_hook):
4444
audio=AUDIO,
4545
task_id="id",
4646
impersonation_chain=IMPERSONATION_CHAIN,
47-
).execute(context={"task_instance": Mock()})
47+
).execute(context=MagicMock())
4848

4949
mock_hook.assert_called_once_with(
5050
gcp_conn_id=GCP_CONN_ID,

tests/providers/google/cloud/operators/test_speech_to_text_system.py

Lines changed: 0 additions & 41 deletions
This file was deleted.

airflow/providers/google/cloud/example_dags/example_speech_to_text.py renamed to tests/system/providers/google/speech_to_text/example_speech_to_text.py

Lines changed: 42 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -20,14 +20,19 @@
2020
from datetime import datetime
2121

2222
from airflow import models
23+
from airflow.providers.google.cloud.operators.gcs import GCSCreateBucketOperator, GCSDeleteBucketOperator
2324
from airflow.providers.google.cloud.operators.speech_to_text import CloudSpeechToTextRecognizeSpeechOperator
2425
from airflow.providers.google.cloud.operators.text_to_speech import CloudTextToSpeechSynthesizeOperator
26+
from airflow.utils.trigger_rule import TriggerRule
2527

26-
GCP_PROJECT_ID = os.environ.get("GCP_PROJECT_ID", "example-project")
27-
BUCKET_NAME = os.environ.get("GCP_SPEECH_TO_TEXT_TEST_BUCKET", "INVALID BUCKET NAME")
28+
ENV_ID = os.environ.get("SYSTEM_TESTS_ENV_ID")
29+
PROJECT_ID = os.environ.get("SYSTEM_TESTS_GCP_PROJECT")
30+
DAG_ID = "speech_to_text"
31+
32+
BUCKET_NAME = f"bucket_{DAG_ID}_{ENV_ID}"
2833

2934
# [START howto_operator_speech_to_text_gcp_filename]
30-
FILENAME = "gcp-speech-test-file"
35+
FILE_NAME = f"test-audio-file-{DAG_ID}-{ENV_ID}"
3136
# [END howto_operator_speech_to_text_gcp_filename]
3237

3338
# [START howto_operator_text_to_speech_api_arguments]
@@ -38,29 +43,55 @@
3843

3944
# [START howto_operator_speech_to_text_api_arguments]
4045
CONFIG = {"encoding": "LINEAR16", "language_code": "en_US"}
41-
AUDIO = {"uri": f"gs://{BUCKET_NAME}/{FILENAME}"}
46+
AUDIO = {"uri": f"gs://{BUCKET_NAME}/{FILE_NAME}"}
4247
# [END howto_operator_speech_to_text_api_arguments]
4348

4449
with models.DAG(
45-
"example_gcp_speech_to_text",
46-
schedule_interval='@once', # Override to match your needs
50+
DAG_ID,
51+
schedule_interval=None,
4752
start_date=datetime(2021, 1, 1),
4853
catchup=False,
49-
tags=['example'],
54+
tags=["example", "speech_to_text"],
5055
) as dag:
56+
create_bucket = GCSCreateBucketOperator(task_id="create_bucket", bucket_name=BUCKET_NAME)
57+
5158
text_to_speech_synthesize_task = CloudTextToSpeechSynthesizeOperator(
52-
project_id=GCP_PROJECT_ID,
59+
project_id=PROJECT_ID,
5360
input_data=INPUT,
5461
voice=VOICE,
5562
audio_config=AUDIO_CONFIG,
5663
target_bucket_name=BUCKET_NAME,
57-
target_filename=FILENAME,
64+
target_filename=FILE_NAME,
5865
task_id="text_to_speech_synthesize_task",
5966
)
6067
# [START howto_operator_speech_to_text_recognize]
61-
speech_to_text_recognize_task2 = CloudSpeechToTextRecognizeSpeechOperator(
68+
speech_to_text_recognize_task = CloudSpeechToTextRecognizeSpeechOperator(
6269
config=CONFIG, audio=AUDIO, task_id="speech_to_text_recognize_task"
6370
)
6471
# [END howto_operator_speech_to_text_recognize]
6572

66-
text_to_speech_synthesize_task >> speech_to_text_recognize_task2
73+
delete_bucket = GCSDeleteBucketOperator(
74+
task_id="delete_bucket", bucket_name=BUCKET_NAME, trigger_rule=TriggerRule.ALL_DONE
75+
)
76+
77+
(
78+
# TEST SETUP
79+
create_bucket
80+
# TEST BODY
81+
>> text_to_speech_synthesize_task
82+
>> speech_to_text_recognize_task
83+
# TEST TEARDOWN
84+
>> delete_bucket
85+
)
86+
87+
from tests.system.utils.watcher import watcher
88+
89+
# This test needs watcher in order to properly mark success/failure
90+
# when "tearDown" task with trigger rule is part of the DAG
91+
list(dag.tasks) >> watcher()
92+
93+
94+
from tests.system.utils import get_test_run # noqa: E402
95+
96+
# Needed to run the example DAG with pytest (see: tests/system/README.md#run_via_pytest)
97+
test_run = get_test_run(dag)

0 commit comments

Comments
 (0)