Skip to content

Commit

Permalink
Decode Remote Google Logs (#13115)
Browse files Browse the repository at this point in the history
* decode remote google logs before returning

The `Blob.download_as_string` function returns a byte which cause
the log result to be displayed in a single line like:

b"line1\nline2"

instead of

line1
line2

added an isinstance check to make sure it doesn't break if it
returns string in some case and not others
  • Loading branch information
KevYuen committed Dec 24, 2020
1 parent e7aeacf commit e9d65bd
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 16 deletions.
4 changes: 2 additions & 2 deletions airflow/providers/google/cloud/log/gcs_task_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ def _read(self, ti, try_number, metadata=None):

try:
blob = storage.Blob.from_string(remote_loc, self.client)
remote_log = blob.download_as_string()
remote_log = blob.download_as_bytes().decode()
log = f'*** Reading remote log from {remote_loc}.\n{remote_log}\n'
return log, {'end_of_log': True}
except Exception as e: # pylint: disable=broad-except
Expand All @@ -174,7 +174,7 @@ def gcs_write(self, log, remote_log_location):
"""
try:
blob = storage.Blob.from_string(remote_log_location, self.client)
old_log = blob.download_as_string()
old_log = blob.download_as_bytes().decode()
log = '\n'.join([old_log, log]) if old_log else log
except Exception as e: # pylint: disable=broad-except
if not hasattr(e, 'resp') or e.resp.get('status') != '404': # pylint: disable=no-member
Expand Down
34 changes: 20 additions & 14 deletions tests/providers/google/cloud/log/test_gcs_task_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ def test_hook(self, mock_client, mock_creds):
@mock.patch("google.cloud.storage.Client")
@mock.patch("google.cloud.storage.Blob")
def test_should_read_logs_from_remote(self, mock_blob, mock_client, mock_creds):
mock_blob.from_string.return_value.download_as_string.return_value = "CONTENT"
mock_blob.from_string.return_value.download_as_bytes.return_value = b"CONTENT"

logs, metadata = self.gcs_task_handler._read(self.ti, self.ti.try_number)
mock_blob.from_string.assert_called_once_with(
Expand All @@ -94,7 +94,7 @@ def test_should_read_logs_from_remote(self, mock_blob, mock_client, mock_creds):
@mock.patch("google.cloud.storage.Client")
@mock.patch("google.cloud.storage.Blob")
def test_should_read_from_local(self, mock_blob, mock_client, mock_creds):
mock_blob.from_string.return_value.download_as_string.side_effect = Exception("Failed to connect")
mock_blob.from_string.return_value.download_as_bytes.side_effect = Exception("Failed to connect")

self.gcs_task_handler.set_context(self.ti)
log, metadata = self.gcs_task_handler._read(self.ti, self.ti.try_number)
Expand All @@ -116,7 +116,7 @@ def test_should_read_from_local(self, mock_blob, mock_client, mock_creds):
@mock.patch("google.cloud.storage.Client")
@mock.patch("google.cloud.storage.Blob")
def test_write_to_remote_on_close(self, mock_blob, mock_client, mock_creds):
mock_blob.from_string.return_value.download_as_string.return_value = "CONTENT"
mock_blob.from_string.return_value.download_as_bytes.return_value = b"CONTENT"

self.gcs_task_handler.set_context(self.ti)
self.gcs_task_handler.emit(
Expand All @@ -135,7 +135,7 @@ def test_write_to_remote_on_close(self, mock_blob, mock_client, mock_creds):
mock_blob.assert_has_calls(
[
mock.call.from_string("gs://bucket/remote/log/location/1.log", mock_client.return_value),
mock.call.from_string().download_as_string(),
mock.call.from_string().download_as_bytes(),
mock.call.from_string("gs://bucket/remote/log/location/1.log", mock_client.return_value),
mock.call.from_string().upload_from_string("CONTENT\nMESSAGE\n", content_type="text/plain"),
],
Expand All @@ -152,30 +152,36 @@ def test_write_to_remote_on_close(self, mock_blob, mock_client, mock_creds):
@mock.patch("google.cloud.storage.Blob")
def test_failed_write_to_remote_on_close(self, mock_blob, mock_client, mock_creds):
mock_blob.from_string.return_value.upload_from_string.side_effect = Exception("Failed to connect")
mock_blob.from_string.return_value.download_as_string.return_value = b"Old log"
mock_blob.from_string.return_value.download_as_bytes.return_value = b"Old log"

self.gcs_task_handler.set_context(self.ti)
self.gcs_task_handler.emit(
logging.LogRecord(
name="NAME",
level="DEBUG",
pathname=None,
lineno=None,
msg="MESSAGE",
args=None,
exc_info=None,
)
)
with self.assertLogs(self.gcs_task_handler.log) as cm:
self.gcs_task_handler.close()

self.assertEqual(
cm.output,
[
'INFO:airflow.providers.google.cloud.log.gcs_task_handler.GCSTaskHandler:Previous '
'log discarded: sequence item 0: expected str instance, bytes found',
'ERROR:airflow.providers.google.cloud.log.gcs_task_handler.GCSTaskHandler:Could '
'not write logs to gs://bucket/remote/log/location/1.log: Failed to connect',
],
)
mock_blob.assert_has_calls(
[
mock.call.from_string("gs://bucket/remote/log/location/1.log", mock_client.return_value),
mock.call.from_string().download_as_string(),
mock.call.from_string().download_as_bytes(),
mock.call.from_string("gs://bucket/remote/log/location/1.log", mock_client.return_value),
mock.call.from_string().upload_from_string(
"*** Previous log discarded: sequence item 0: expected str instance, bytes found\n\n",
content_type="text/plain",
),
mock.call.from_string().upload_from_string("Old log\nMESSAGE\n", content_type="text/plain"),
],
any_order=False,
)
Expand All @@ -187,7 +193,7 @@ def test_failed_write_to_remote_on_close(self, mock_blob, mock_client, mock_cred
@mock.patch("google.cloud.storage.Client")
@mock.patch("google.cloud.storage.Blob")
def test_write_to_remote_on_close_failed_read_old_logs(self, mock_blob, mock_client, mock_creds):
mock_blob.from_string.return_value.download_as_string.side_effect = Exception("Fail to download")
mock_blob.from_string.return_value.download_as_bytes.side_effect = Exception("Fail to download")

self.gcs_task_handler.set_context(self.ti)
self.gcs_task_handler.emit(
Expand All @@ -206,7 +212,7 @@ def test_write_to_remote_on_close_failed_read_old_logs(self, mock_blob, mock_cli
mock_blob.assert_has_calls(
[
mock.call.from_string("gs://bucket/remote/log/location/1.log", mock_client.return_value),
mock.call.from_string().download_as_string(),
mock.call.from_string().download_as_bytes(),
mock.call.from_string("gs://bucket/remote/log/location/1.log", mock_client.return_value),
mock.call.from_string().upload_from_string(
"*** Previous log discarded: Fail to download\n\nMESSAGE\n", content_type="text/plain"
Expand Down

0 comments on commit e9d65bd

Please sign in to comment.