Skip to content

Commit

Permalink
FIPS environments: Mark uses of md5 as "not-used-for-security" (#31171)
Browse files Browse the repository at this point in the history
Co-authored-by: Long Nguyen <[email protected]>
  • Loading branch information
longslvr and Long Nguyen committed May 26, 2023
1 parent 85910b7 commit 22e44ab
Show file tree
Hide file tree
Showing 9 changed files with 37 additions and 25 deletions.
4 changes: 2 additions & 2 deletions airflow/cli/commands/webserver_command.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
"""Webserver command."""
from __future__ import annotations

import hashlib
import logging
import os
import signal
Expand All @@ -40,6 +39,7 @@
from airflow.exceptions import AirflowException, AirflowWebServerTimeout
from airflow.utils import cli as cli_utils
from airflow.utils.cli import setup_locations, setup_logging
from airflow.utils.hashlib_wrapper import md5
from airflow.utils.log.logging_mixin import LoggingMixin
from airflow.utils.process_utils import check_if_pidfile_process_is_running

Expand Down Expand Up @@ -124,7 +124,7 @@ def _generate_plugin_state(self) -> dict[str, float]:
@staticmethod
def _get_file_hash(fname: str):
"""Calculate MD5 hash for file."""
hash_md5 = hashlib.md5()
hash_md5 = md5()
with open(fname, "rb") as f:
for chunk in iter(lambda: f.read(4096), b""):
hash_md5.update(chunk)
Expand Down
4 changes: 2 additions & 2 deletions airflow/kubernetes/pod_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@

import copy
import datetime
import hashlib
import logging
import os
import re
Expand All @@ -46,6 +45,7 @@
from airflow.kubernetes.kubernetes_helper_functions import add_pod_suffix, rand_str
from airflow.kubernetes.pod_generator_deprecated import PodDefaults, PodGenerator as PodGeneratorDeprecated
from airflow.utils import yaml
from airflow.utils.hashlib_wrapper import md5
from airflow.version import version as airflow_version

log = logging.getLogger(__name__)
Expand All @@ -68,7 +68,7 @@ def make_safe_label_value(string: str) -> str:
safe_label = re.sub(r"^[^a-z0-9A-Z]*|[^a-zA-Z0-9_\-\.]|[^a-z0-9A-Z]*$", "", string)

if len(safe_label) > MAX_LABEL_LEN or string != safe_label:
safe_hash = hashlib.md5(string.encode()).hexdigest()[:9]
safe_hash = md5(string.encode()).hexdigest()[:9]
safe_label = safe_label[: MAX_LABEL_LEN - len(safe_hash) - 1] + "-" + safe_hash

return safe_label
Expand Down
5 changes: 3 additions & 2 deletions airflow/kubernetes/pod_generator_deprecated.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,13 @@
from __future__ import annotations

import copy
import hashlib
import re
import uuid

from kubernetes.client import models as k8s

from airflow.utils.hashlib_wrapper import md5

MAX_POD_ID_LEN = 253

MAX_LABEL_LEN = 63
Expand Down Expand Up @@ -72,7 +73,7 @@ def make_safe_label_value(string):
safe_label = re.sub(r"^[^a-z0-9A-Z]*|[^a-zA-Z0-9_\-\.]|[^a-z0-9A-Z]*$", "", string)

if len(safe_label) > MAX_LABEL_LEN or string != safe_label:
safe_hash = hashlib.md5(string.encode()).hexdigest()[:9]
safe_hash = md5(string.encode()).hexdigest()[:9]
safe_label = safe_label[: MAX_LABEL_LEN - len(safe_hash) - 1] + "-" + safe_hash

return safe_label
Expand Down
2 changes: 1 addition & 1 deletion airflow/models/serialized_dag.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ def __init__(self, dag: DAG, processor_subdir: str | None = None) -> None:
dag_data = SerializedDAG.to_dict(dag)
dag_data_json = json.dumps(dag_data, sort_keys=True).encode("utf-8")

self.dag_hash = md5(dag_data_json, usedforsecurity=False).hexdigest()
self.dag_hash = md5(dag_data_json).hexdigest()

if COMPRESS_SERIALIZED_DAGS:
self._data = None
Expand Down
11 changes: 8 additions & 3 deletions airflow/providers/google/cloud/hooks/bigquery.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@
"""
from __future__ import annotations

import hashlib
import json
import logging
import re
Expand Down Expand Up @@ -62,6 +61,12 @@
from airflow.providers.google.cloud.utils.bigquery import bq_cast
from airflow.providers.google.common.consts import CLIENT_INFO
from airflow.providers.google.common.hooks.base_google import GoogleBaseAsyncHook, GoogleBaseHook, get_field

try:
from airflow.utils.hashlib_wrapper import md5
except ModuleNotFoundError:
# Remove when Airflow providers min Airflow version is "2.7.0"
from hashlib import md5
from airflow.utils.helpers import convert_camel_to_snake
from airflow.utils.log.logging_mixin import LoggingMixin

Expand Down Expand Up @@ -1527,7 +1532,7 @@ def get_job(
@staticmethod
def _custom_job_id(configuration: dict[str, Any]) -> str:
hash_base = json.dumps(configuration, sort_keys=True)
uniqueness_suffix = hashlib.md5(hash_base.encode()).hexdigest()
uniqueness_suffix = md5(hash_base.encode()).hexdigest()
microseconds_from_epoch = int(
(datetime.now() - datetime.fromtimestamp(0)) / timedelta(microseconds=1)
)
Expand Down Expand Up @@ -2258,7 +2263,7 @@ def generate_job_id(self, job_id, dag_id, task_id, logical_date, configuration,
else:
hash_base = json.dumps(configuration, sort_keys=True)

uniqueness_suffix = hashlib.md5(hash_base.encode()).hexdigest()
uniqueness_suffix = md5(hash_base.encode()).hexdigest()

if job_id:
return f"{job_id}_{uniqueness_suffix}"
Expand Down
8 changes: 6 additions & 2 deletions airflow/providers/google/cloud/operators/workflows.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
# under the License.
from __future__ import annotations

import hashlib
import json
import re
import uuid
Expand All @@ -41,6 +40,11 @@

if TYPE_CHECKING:
from airflow.utils.context import Context
try:
from airflow.utils.hashlib_wrapper import md5
except ModuleNotFoundError:
# Remove when Airflow providers min Airflow version is "2.7.0"
from hashlib import md5


class WorkflowsCreateWorkflowOperator(GoogleCloudBaseOperator):
Expand Down Expand Up @@ -112,7 +116,7 @@ def _workflow_id(self, context):
# we use hash of whole information
exec_date = context["execution_date"].isoformat()
base = f"airflow_{self.dag_id}_{self.task_id}_{exec_date}_{hash_base}"
workflow_id = hashlib.md5(base.encode()).hexdigest()
workflow_id = md5(base.encode()).hexdigest()
return re.sub(r"[:\-+.]", "_", workflow_id)

def execute(self, context: Context):
Expand Down
22 changes: 12 additions & 10 deletions airflow/utils/hashlib_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,21 +18,23 @@
from __future__ import annotations

import hashlib
from typing import TYPE_CHECKING

if TYPE_CHECKING:
from _typeshed import ReadableBuffer

from airflow import PY39


def md5(data: bytes, *, usedforsecurity: bool | None = None):
def md5(__string: ReadableBuffer = b"") -> hashlib._Hash:
"""
Safely allows calling the hashlib.md5 function with the "usedforsecurity" param.
:param data: The data to hash.
:param usedforsecurity: The value to pass to the md5 function's "usedforsecurity" param.
Defaults to None.
Safely allows calling the hashlib.md5 function with the "usedforsecurity" disabled
when specified in the configuration.
:param string: The data to hash.
Default to empty str byte.
:return: The hashed value.
:rtype: _Hash
"""
if PY39 and usedforsecurity is not None:
return hashlib.md5(data, usedforsecurity=usedforsecurity) # type: ignore
else:
return hashlib.md5(data)
if PY39:
return hashlib.md5(__string, usedforsecurity=False) # type: ignore
return hashlib.md5(__string)
2 changes: 1 addition & 1 deletion tests/providers/google/cloud/hooks/test_bigquery.py
Original file line number Diff line number Diff line change
Expand Up @@ -930,7 +930,7 @@ def test_insert_job(self, mock_client, mock_query_job, nowait):
def test_dbapi_get_uri(self):
assert self.hook.get_uri().startswith("bigquery://")

@mock.patch("airflow.providers.google.cloud.hooks.bigquery.hashlib.md5")
@mock.patch("airflow.providers.google.cloud.hooks.bigquery.md5")
@pytest.mark.parametrize(
"test_dag_id, expected_job_id",
[("test-dag-id-1.1", "airflow_test_dag_id_1_1_test_job_id_2020_01_23T00_00_00_hash")],
Expand Down
4 changes: 2 additions & 2 deletions tests/sensors/test_external_task_sensor.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
# under the License.
from __future__ import annotations

import hashlib
import logging
import os
import tempfile
Expand All @@ -39,6 +38,7 @@
from airflow.sensors.external_task import ExternalTaskMarker, ExternalTaskSensor, ExternalTaskSensorLink
from airflow.sensors.time_sensor import TimeSensor
from airflow.serialization.serialized_objects import SerializedBaseOperator
from airflow.utils.hashlib_wrapper import md5
from airflow.utils.session import create_session, provide_session
from airflow.utils.state import DagRunState, State, TaskInstanceState
from airflow.utils.task_group import TaskGroup
Expand Down Expand Up @@ -66,7 +66,7 @@ def dag_zip_maker():
class DagZipMaker:
def __call__(self, *dag_files):
self.__dag_files = [os.sep.join([TEST_DAGS_FOLDER.__str__(), dag_file]) for dag_file in dag_files]
dag_files_hash = hashlib.md5("".join(self.__dag_files).encode()).hexdigest()
dag_files_hash = md5("".join(self.__dag_files).encode()).hexdigest()
self.__tmp_dir = os.sep.join([tempfile.tempdir, dag_files_hash])

self.__zip_file_name = os.sep.join([self.__tmp_dir, f"{dag_files_hash}.zip"])
Expand Down

0 comments on commit 22e44ab

Please sign in to comment.