Skip to content

Commit b6cedae

Browse files
committed
feat: add default LoadJobConfig to Client
1 parent aa0fa02 commit b6cedae

File tree

5 files changed

+596
-27
lines changed

5 files changed

+596
-27
lines changed

google/cloud/bigquery/client.py

Lines changed: 46 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -210,6 +210,9 @@ class Client(ClientWithProject):
210210
default_query_job_config (Optional[google.cloud.bigquery.job.QueryJobConfig]):
211211
Default ``QueryJobConfig``.
212212
Will be merged into job configs passed into the ``query`` method.
213+
default_load_job_config (Optional[google.cloud.bigquery.job.LoadJobConfig]):
214+
Default ``LoadJobConfig``.
215+
Will be merged into job configs passed into the ``load_table_*`` methods.
213216
client_info (Optional[google.api_core.client_info.ClientInfo]):
214217
The client info used to send a user-agent string along with API
215218
requests. If ``None``, then default info will be used. Generally,
@@ -235,6 +238,7 @@ def __init__(
235238
_http=None,
236239
location=None,
237240
default_query_job_config=None,
241+
default_load_job_config=None,
238242
client_info=None,
239243
client_options=None,
240244
) -> None:
@@ -260,6 +264,7 @@ def __init__(
260264
self._connection = Connection(self, **kw_args)
261265
self._location = location
262266
self._default_query_job_config = copy.deepcopy(default_query_job_config)
267+
self._default_load_job_config = copy.deepcopy(default_load_job_config)
263268

264269
@property
265270
def location(self):
@@ -277,6 +282,17 @@ def default_query_job_config(self):
277282
def default_query_job_config(self, value: QueryJobConfig):
278283
self._default_query_job_config = copy.deepcopy(value)
279284

285+
@property
286+
def default_load_job_config(self):
287+
"""Default ``LoadJobConfig``.
288+
Will be merged into job configs passed into the ``load_table_*`` methods.
289+
"""
290+
return self._default_load_job_config
291+
292+
@default_load_job_config.setter
293+
def default_load_job_config(self, value: LoadJobConfig):
294+
self._default_load_job_config = copy.deepcopy(value)
295+
280296
def close(self):
281297
"""Close the underlying transport objects, releasing system resources.
282298
@@ -2330,8 +2346,8 @@ def load_table_from_uri(
23302346
23312347
Raises:
23322348
TypeError:
2333-
If ``job_config`` is not an instance of :class:`~google.cloud.bigquery.job.LoadJobConfig`
2334-
class.
2349+
If ``job_config`` is not an instance of
2350+
:class:`~google.cloud.bigquery.job.LoadJobConfig` class.
23352351
"""
23362352
job_id = _make_job_id(job_id, job_id_prefix)
23372353

@@ -2349,8 +2365,12 @@ def load_table_from_uri(
23492365
destination = _table_arg_to_table_ref(destination, default_project=self.project)
23502366

23512367
if job_config:
2352-
job_config = copy.deepcopy(job_config)
2353-
_verify_job_config_type(job_config, google.cloud.bigquery.job.LoadJobConfig)
2368+
_verify_job_config_type(job_config, LoadJobConfig)
2369+
else:
2370+
job_config = job.LoadJobConfig()
2371+
2372+
if job_config:
2373+
job_config = job_config._fill_from_default(self._default_load_job_config)
23542374

23552375
load_job = job.LoadJob(job_ref, source_uris, destination, self, job_config)
23562376
load_job._begin(retry=retry, timeout=timeout)
@@ -2424,8 +2444,8 @@ def load_table_from_file(
24242444
mode.
24252445
24262446
TypeError:
2427-
If ``job_config`` is not an instance of :class:`~google.cloud.bigquery.job.LoadJobConfig`
2428-
class.
2447+
If ``job_config`` is not an instance of
2448+
:class:`~google.cloud.bigquery.job.LoadJobConfig` class.
24292449
"""
24302450
job_id = _make_job_id(job_id, job_id_prefix)
24312451

@@ -2437,9 +2457,15 @@ def load_table_from_file(
24372457

24382458
destination = _table_arg_to_table_ref(destination, default_project=self.project)
24392459
job_ref = job._JobReference(job_id, project=project, location=location)
2460+
24402461
if job_config:
2441-
job_config = copy.deepcopy(job_config)
2442-
_verify_job_config_type(job_config, google.cloud.bigquery.job.LoadJobConfig)
2462+
_verify_job_config_type(job_config, LoadJobConfig)
2463+
else:
2464+
job_config = job.LoadJobConfig()
2465+
2466+
if job_config:
2467+
job_config = job_config._fill_from_default(self._default_load_job_config)
2468+
24432469
load_job = job.LoadJob(job_ref, None, destination, self, job_config)
24442470
job_resource = load_job.to_api_repr()
24452471

@@ -2564,21 +2590,19 @@ def load_table_from_dataframe(
25642590
If a usable parquet engine cannot be found. This method
25652591
requires :mod:`pyarrow` to be installed.
25662592
TypeError:
2567-
If ``job_config`` is not an instance of :class:`~google.cloud.bigquery.job.LoadJobConfig`
2568-
class.
2593+
If ``job_config`` is not an instance of
2594+
:class:`~google.cloud.bigquery.job.LoadJobConfig` class.
25692595
"""
25702596
job_id = _make_job_id(job_id, job_id_prefix)
25712597

25722598
if job_config:
2573-
_verify_job_config_type(job_config, google.cloud.bigquery.job.LoadJobConfig)
2574-
# Make a copy so that the job config isn't modified in-place.
2575-
job_config_properties = copy.deepcopy(job_config._properties)
2576-
job_config = job.LoadJobConfig()
2577-
job_config._properties = job_config_properties
2578-
2599+
_verify_job_config_type(job_config, LoadJobConfig)
25792600
else:
25802601
job_config = job.LoadJobConfig()
25812602

2603+
if job_config:
2604+
job_config = job_config._fill_from_default(self._default_load_job_config)
2605+
25822606
supported_formats = {job.SourceFormat.CSV, job.SourceFormat.PARQUET}
25832607
if job_config.source_format is None:
25842608
# default value
@@ -2791,18 +2815,19 @@ def load_table_from_json(
27912815
27922816
Raises:
27932817
TypeError:
2794-
If ``job_config`` is not an instance of :class:`~google.cloud.bigquery.job.LoadJobConfig`
2795-
class.
2818+
If ``job_config`` is not an instance of
2819+
:class:`~google.cloud.bigquery.job.LoadJobConfig` class.
27962820
"""
27972821
job_id = _make_job_id(job_id, job_id_prefix)
27982822

27992823
if job_config:
2800-
_verify_job_config_type(job_config, google.cloud.bigquery.job.LoadJobConfig)
2801-
# Make a copy so that the job config isn't modified in-place.
2802-
job_config = copy.deepcopy(job_config)
2824+
_verify_job_config_type(job_config, LoadJobConfig)
28032825
else:
28042826
job_config = job.LoadJobConfig()
28052827

2828+
if job_config:
2829+
job_config = job_config._fill_from_default(self._default_load_job_config)
2830+
28062831
job_config.source_format = job.SourceFormat.NEWLINE_DELIMITED_JSON
28072832

28082833
if job_config.schema is None:

google/cloud/bigquery/job/base.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -269,7 +269,7 @@ def to_api_repr(self) -> dict:
269269
"""
270270
return copy.deepcopy(self._properties)
271271

272-
def _fill_from_default(self, default_job_config):
272+
def _fill_from_default(self, default_job_config=None):
273273
"""Merge this job config with a default job config.
274274
275275
The keys in this object take precedence over the keys in the default
@@ -283,6 +283,10 @@ def _fill_from_default(self, default_job_config):
283283
Returns:
284284
google.cloud.bigquery.job._JobConfig: A new (merged) job config.
285285
"""
286+
if not default_job_config:
287+
new_job_config = copy.deepcopy(self)
288+
return new_job_config
289+
286290
if self._job_type != default_job_config._job_type:
287291
raise TypeError(
288292
"attempted to merge two incompatible job types: "

tests/system/test_client.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2319,7 +2319,7 @@ def _table_exists(t):
23192319
return False
23202320

23212321

2322-
def test_dbapi_create_view(dataset_id):
2322+
def test_dbapi_create_view(dataset_id: str):
23232323

23242324
query = f"""
23252325
CREATE VIEW {dataset_id}.dbapi_create_view
@@ -2332,7 +2332,7 @@ def test_dbapi_create_view(dataset_id):
23322332
assert Config.CURSOR.rowcount == 0, "expected 0 rows"
23332333

23342334

2335-
def test_parameterized_types_round_trip(dataset_id):
2335+
def test_parameterized_types_round_trip(dataset_id: str):
23362336
client = Config.CLIENT
23372337
table_id = f"{dataset_id}.test_parameterized_types_round_trip"
23382338
fields = (
@@ -2358,7 +2358,7 @@ def test_parameterized_types_round_trip(dataset_id):
23582358
assert tuple(s._key()[:2] for s in table2.schema) == fields
23592359

23602360

2361-
def test_table_snapshots(dataset_id):
2361+
def test_table_snapshots(dataset_id: str):
23622362
from google.cloud.bigquery import CopyJobConfig
23632363
from google.cloud.bigquery import OperationType
23642364

@@ -2429,7 +2429,7 @@ def test_table_snapshots(dataset_id):
24292429
assert rows == [(1, "one"), (2, "two")]
24302430

24312431

2432-
def test_table_clones(dataset_id):
2432+
def test_table_clones(dataset_id: str):
24332433
from google.cloud.bigquery import CopyJobConfig
24342434
from google.cloud.bigquery import OperationType
24352435

tests/unit/job/test_base.py

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1104,7 +1104,7 @@ def test_ctor_with_unknown_property_raises_error(self):
11041104
config = self._make_one()
11051105
config.wrong_name = None
11061106

1107-
def test_fill_from_default(self):
1107+
def test_fill_query_job_config_from_default(self):
11081108
from google.cloud.bigquery import QueryJobConfig
11091109

11101110
job_config = QueryJobConfig()
@@ -1120,6 +1120,22 @@ def test_fill_from_default(self):
11201120
self.assertTrue(final_job_config.use_query_cache)
11211121
self.assertEqual(final_job_config.maximum_bytes_billed, 1000)
11221122

1123+
def test_fill_load_job_from_default(self):
1124+
from google.cloud.bigquery import LoadJobConfig
1125+
1126+
job_config = LoadJobConfig()
1127+
job_config.create_session = True
1128+
job_config.encoding = "UTF-8"
1129+
1130+
default_job_config = LoadJobConfig()
1131+
default_job_config.ignore_unknown_values = True
1132+
default_job_config.encoding = "ISO-8859-1"
1133+
1134+
final_job_config = job_config._fill_from_default(default_job_config)
1135+
self.assertTrue(final_job_config.create_session)
1136+
self.assertTrue(final_job_config.ignore_unknown_values)
1137+
self.assertEqual(final_job_config.encoding, "UTF-8")
1138+
11231139
def test_fill_from_default_conflict(self):
11241140
from google.cloud.bigquery import QueryJobConfig
11251141

@@ -1132,6 +1148,17 @@ def test_fill_from_default_conflict(self):
11321148
with self.assertRaises(TypeError):
11331149
basic_job_config._fill_from_default(conflicting_job_config)
11341150

1151+
def test_fill_from_empty_default_conflict(self):
1152+
from google.cloud.bigquery import QueryJobConfig
1153+
1154+
job_config = QueryJobConfig()
1155+
job_config.dry_run = True
1156+
job_config.maximum_bytes_billed = 1000
1157+
1158+
final_job_config = job_config._fill_from_default(default_job_config=None)
1159+
self.assertTrue(final_job_config.dry_run)
1160+
self.assertEqual(final_job_config.maximum_bytes_billed, 1000)
1161+
11351162
@mock.patch("google.cloud.bigquery._helpers._get_sub_prop")
11361163
def test__get_sub_prop_wo_default(self, _get_sub_prop):
11371164
job_config = self._make_one()

0 commit comments

Comments
 (0)