Skip to content

Commit

Permalink
Fixing re pattern and changing to use a single character class. (#11857)
Browse files Browse the repository at this point in the history
  • Loading branch information
nathadfield committed Oct 28, 2020
1 parent 0c8d089 commit 79cb771
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 1 deletion.
2 changes: 1 addition & 1 deletion airflow/providers/google/cloud/operators/bigquery.py
Expand Up @@ -2062,7 +2062,7 @@ def _job_id(self, context):

exec_date = context['execution_date'].isoformat()
job_id = f"airflow_{self.dag_id}_{self.task_id}_{exec_date}_{uniqueness_suffix}"
return re.sub(r"\:|-|\+\.", "_", job_id)
return re.sub(r"[:\-+.]", "_", job_id)

def execute(self, context: Any):
hook = BigQueryHook(
Expand Down
21 changes: 21 additions & 0 deletions tests/providers/google/cloud/operators/test_bigquery.py
Expand Up @@ -960,3 +960,24 @@ def test_execute_no_force_rerun(self, mock_hook, mock_md5):
# No force rerun
with pytest.raises(AirflowException):
op.execute({})

@mock.patch('airflow.providers.google.cloud.operators.bigquery.hashlib.md5')
@pytest.mark.parametrize(
"test_dag_id, expected_job_id",
[("test-dag-id-1.1", "airflow_test_dag_id_1_1_test_job_id_2020_01_23T00_00_00_hash")],
)
def test_job_id_validity(self, mock_md5, test_dag_id, expected_job_id):
hash_ = "hash"
mock_md5.return_value.hexdigest.return_value = hash_
context = {"execution_date": datetime(2020, 1, 23)}
configuration = {
"query": {
"query": "SELECT * FROM any",
"useLegacySql": False,
}
}
with DAG(dag_id=test_dag_id, start_date=datetime(2020, 1, 23)):
op = BigQueryInsertJobOperator(
task_id="test_job_id", configuration=configuration, project_id=TEST_GCP_PROJECT_ID
)
assert op._job_id(context) == expected_job_id

0 comments on commit 79cb771

Please sign in to comment.