@@ -210,6 +210,9 @@ class Client(ClientWithProject):
210
210
default_query_job_config (Optional[google.cloud.bigquery.job.QueryJobConfig]):
211
211
Default ``QueryJobConfig``.
212
212
Will be merged into job configs passed into the ``query`` method.
213
+ default_load_job_config (Optional[google.cloud.bigquery.job.LoadJobConfig]):
214
+ Default ``LoadJobConfig``.
215
+ Will be merged into job configs passed into the ``load_table_*`` methods.
213
216
client_info (Optional[google.api_core.client_info.ClientInfo]):
214
217
The client info used to send a user-agent string along with API
215
218
requests. If ``None``, then default info will be used. Generally,
@@ -235,6 +238,7 @@ def __init__(
235
238
_http = None ,
236
239
location = None ,
237
240
default_query_job_config = None ,
241
+ default_load_job_config = None ,
238
242
client_info = None ,
239
243
client_options = None ,
240
244
) -> None :
@@ -260,6 +264,7 @@ def __init__(
260
264
self ._connection = Connection (self , ** kw_args )
261
265
self ._location = location
262
266
self ._default_query_job_config = copy .deepcopy (default_query_job_config )
267
+ self ._default_load_job_config = copy .deepcopy (default_load_job_config )
263
268
264
269
@property
265
270
def location (self ):
@@ -277,6 +282,17 @@ def default_query_job_config(self):
277
282
def default_query_job_config (self , value : QueryJobConfig ):
278
283
self ._default_query_job_config = copy .deepcopy (value )
279
284
285
+ @property
286
+ def default_load_job_config (self ):
287
+ """Default ``LoadJobConfig``.
288
+ Will be merged into job configs passed into the ``load_table_*`` methods.
289
+ """
290
+ return self ._default_load_job_config
291
+
292
+ @default_load_job_config .setter
293
+ def default_load_job_config (self , value : LoadJobConfig ):
294
+ self ._default_load_job_config = copy .deepcopy (value )
295
+
280
296
def close (self ):
281
297
"""Close the underlying transport objects, releasing system resources.
282
298
@@ -2330,8 +2346,8 @@ def load_table_from_uri(
2330
2346
2331
2347
Raises:
2332
2348
TypeError:
2333
- If ``job_config`` is not an instance of :class:`~google.cloud.bigquery.job.LoadJobConfig`
2334
- class.
2349
+ If ``job_config`` is not an instance of
2350
+ :class:`~google.cloud.bigquery.job.LoadJobConfig` class.
2335
2351
"""
2336
2352
job_id = _make_job_id (job_id , job_id_prefix )
2337
2353
@@ -2348,11 +2364,14 @@ def load_table_from_uri(
2348
2364
2349
2365
destination = _table_arg_to_table_ref (destination , default_project = self .project )
2350
2366
2351
- if job_config :
2352
- job_config = copy .deepcopy (job_config )
2353
- _verify_job_config_type (job_config , google .cloud .bigquery .job .LoadJobConfig )
2367
+ if job_config is not None :
2368
+ _verify_job_config_type (job_config , LoadJobConfig )
2369
+ else :
2370
+ job_config = job .LoadJobConfig ()
2354
2371
2355
- load_job = job .LoadJob (job_ref , source_uris , destination , self , job_config )
2372
+ new_job_config = job_config ._fill_from_default (self ._default_load_job_config )
2373
+
2374
+ load_job = job .LoadJob (job_ref , source_uris , destination , self , new_job_config )
2356
2375
load_job ._begin (retry = retry , timeout = timeout )
2357
2376
2358
2377
return load_job
@@ -2424,8 +2443,8 @@ def load_table_from_file(
2424
2443
mode.
2425
2444
2426
2445
TypeError:
2427
- If ``job_config`` is not an instance of :class:`~google.cloud.bigquery.job.LoadJobConfig`
2428
- class.
2446
+ If ``job_config`` is not an instance of
2447
+ :class:`~google.cloud.bigquery.job.LoadJobConfig` class.
2429
2448
"""
2430
2449
job_id = _make_job_id (job_id , job_id_prefix )
2431
2450
@@ -2437,10 +2456,15 @@ def load_table_from_file(
2437
2456
2438
2457
destination = _table_arg_to_table_ref (destination , default_project = self .project )
2439
2458
job_ref = job ._JobReference (job_id , project = project , location = location )
2440
- if job_config :
2441
- job_config = copy .deepcopy (job_config )
2442
- _verify_job_config_type (job_config , google .cloud .bigquery .job .LoadJobConfig )
2443
- load_job = job .LoadJob (job_ref , None , destination , self , job_config )
2459
+
2460
+ if job_config is not None :
2461
+ _verify_job_config_type (job_config , LoadJobConfig )
2462
+ else :
2463
+ job_config = job .LoadJobConfig ()
2464
+
2465
+ new_job_config = job_config ._fill_from_default (self ._default_load_job_config )
2466
+
2467
+ load_job = job .LoadJob (job_ref , None , destination , self , new_job_config )
2444
2468
job_resource = load_job .to_api_repr ()
2445
2469
2446
2470
if rewind :
@@ -2564,43 +2588,40 @@ def load_table_from_dataframe(
2564
2588
If a usable parquet engine cannot be found. This method
2565
2589
requires :mod:`pyarrow` to be installed.
2566
2590
TypeError:
2567
- If ``job_config`` is not an instance of :class:`~google.cloud.bigquery.job.LoadJobConfig`
2568
- class.
2591
+ If ``job_config`` is not an instance of
2592
+ :class:`~google.cloud.bigquery.job.LoadJobConfig` class.
2569
2593
"""
2570
2594
job_id = _make_job_id (job_id , job_id_prefix )
2571
2595
2572
- if job_config :
2573
- _verify_job_config_type (job_config , google .cloud .bigquery .job .LoadJobConfig )
2574
- # Make a copy so that the job config isn't modified in-place.
2575
- job_config_properties = copy .deepcopy (job_config ._properties )
2576
- job_config = job .LoadJobConfig ()
2577
- job_config ._properties = job_config_properties
2578
-
2596
+ if job_config is not None :
2597
+ _verify_job_config_type (job_config , LoadJobConfig )
2579
2598
else :
2580
2599
job_config = job .LoadJobConfig ()
2581
2600
2601
+ new_job_config = job_config ._fill_from_default (self ._default_load_job_config )
2602
+
2582
2603
supported_formats = {job .SourceFormat .CSV , job .SourceFormat .PARQUET }
2583
- if job_config .source_format is None :
2604
+ if new_job_config .source_format is None :
2584
2605
# default value
2585
- job_config .source_format = job .SourceFormat .PARQUET
2606
+ new_job_config .source_format = job .SourceFormat .PARQUET
2586
2607
2587
2608
if (
2588
- job_config .source_format == job .SourceFormat .PARQUET
2589
- and job_config .parquet_options is None
2609
+ new_job_config .source_format == job .SourceFormat .PARQUET
2610
+ and new_job_config .parquet_options is None
2590
2611
):
2591
2612
parquet_options = ParquetOptions ()
2592
2613
# default value
2593
2614
parquet_options .enable_list_inference = True
2594
- job_config .parquet_options = parquet_options
2615
+ new_job_config .parquet_options = parquet_options
2595
2616
2596
- if job_config .source_format not in supported_formats :
2617
+ if new_job_config .source_format not in supported_formats :
2597
2618
raise ValueError (
2598
2619
"Got unexpected source_format: '{}'. Currently, only PARQUET and CSV are supported" .format (
2599
- job_config .source_format
2620
+ new_job_config .source_format
2600
2621
)
2601
2622
)
2602
2623
2603
- if pyarrow is None and job_config .source_format == job .SourceFormat .PARQUET :
2624
+ if pyarrow is None and new_job_config .source_format == job .SourceFormat .PARQUET :
2604
2625
# pyarrow is now the only supported parquet engine.
2605
2626
raise ValueError ("This method requires pyarrow to be installed" )
2606
2627
@@ -2611,8 +2632,8 @@ def load_table_from_dataframe(
2611
2632
# schema, and check if dataframe schema is compatible with it - except
2612
2633
# for WRITE_TRUNCATE jobs, the existing schema does not matter then.
2613
2634
if (
2614
- not job_config .schema
2615
- and job_config .write_disposition != job .WriteDisposition .WRITE_TRUNCATE
2635
+ not new_job_config .schema
2636
+ and new_job_config .write_disposition != job .WriteDisposition .WRITE_TRUNCATE
2616
2637
):
2617
2638
try :
2618
2639
table = self .get_table (destination )
@@ -2623,7 +2644,7 @@ def load_table_from_dataframe(
2623
2644
name
2624
2645
for name , _ in _pandas_helpers .list_columns_and_indexes (dataframe )
2625
2646
)
2626
- job_config .schema = [
2647
+ new_job_config .schema = [
2627
2648
# Field description and policy tags are not needed to
2628
2649
# serialize a data frame.
2629
2650
SchemaField (
@@ -2637,11 +2658,11 @@ def load_table_from_dataframe(
2637
2658
if field .name in columns_and_indexes
2638
2659
]
2639
2660
2640
- job_config .schema = _pandas_helpers .dataframe_to_bq_schema (
2641
- dataframe , job_config .schema
2661
+ new_job_config .schema = _pandas_helpers .dataframe_to_bq_schema (
2662
+ dataframe , new_job_config .schema
2642
2663
)
2643
2664
2644
- if not job_config .schema :
2665
+ if not new_job_config .schema :
2645
2666
# the schema could not be fully detected
2646
2667
warnings .warn (
2647
2668
"Schema could not be detected for all columns. Loading from a "
@@ -2652,13 +2673,13 @@ def load_table_from_dataframe(
2652
2673
)
2653
2674
2654
2675
tmpfd , tmppath = tempfile .mkstemp (
2655
- suffix = "_job_{}.{}" .format (job_id [:8 ], job_config .source_format .lower ())
2676
+ suffix = "_job_{}.{}" .format (job_id [:8 ], new_job_config .source_format .lower ())
2656
2677
)
2657
2678
os .close (tmpfd )
2658
2679
2659
2680
try :
2660
2681
2661
- if job_config .source_format == job .SourceFormat .PARQUET :
2682
+ if new_job_config .source_format == job .SourceFormat .PARQUET :
2662
2683
if _PYARROW_VERSION in _PYARROW_BAD_VERSIONS :
2663
2684
msg = (
2664
2685
"Loading dataframe data in PARQUET format with pyarrow "
@@ -2669,13 +2690,13 @@ def load_table_from_dataframe(
2669
2690
)
2670
2691
warnings .warn (msg , category = RuntimeWarning )
2671
2692
2672
- if job_config .schema :
2693
+ if new_job_config .schema :
2673
2694
if parquet_compression == "snappy" : # adjust the default value
2674
2695
parquet_compression = parquet_compression .upper ()
2675
2696
2676
2697
_pandas_helpers .dataframe_to_parquet (
2677
2698
dataframe ,
2678
- job_config .schema ,
2699
+ new_job_config .schema ,
2679
2700
tmppath ,
2680
2701
parquet_compression = parquet_compression ,
2681
2702
parquet_use_compliant_nested_type = True ,
@@ -2715,7 +2736,7 @@ def load_table_from_dataframe(
2715
2736
job_id_prefix = job_id_prefix ,
2716
2737
location = location ,
2717
2738
project = project ,
2718
- job_config = job_config ,
2739
+ job_config = new_job_config ,
2719
2740
timeout = timeout ,
2720
2741
)
2721
2742
@@ -2791,22 +2812,22 @@ def load_table_from_json(
2791
2812
2792
2813
Raises:
2793
2814
TypeError:
2794
- If ``job_config`` is not an instance of :class:`~google.cloud.bigquery.job.LoadJobConfig`
2795
- class.
2815
+ If ``job_config`` is not an instance of
2816
+ :class:`~google.cloud.bigquery.job.LoadJobConfig` class.
2796
2817
"""
2797
2818
job_id = _make_job_id (job_id , job_id_prefix )
2798
2819
2799
- if job_config :
2800
- _verify_job_config_type (job_config , google .cloud .bigquery .job .LoadJobConfig )
2801
- # Make a copy so that the job config isn't modified in-place.
2802
- job_config = copy .deepcopy (job_config )
2820
+ if job_config is not None :
2821
+ _verify_job_config_type (job_config , LoadJobConfig )
2803
2822
else :
2804
2823
job_config = job .LoadJobConfig ()
2805
2824
2806
- job_config .source_format = job .SourceFormat .NEWLINE_DELIMITED_JSON
2825
+ new_job_config = job_config ._fill_from_default (self ._default_load_job_config )
2826
+
2827
+ new_job_config .source_format = job .SourceFormat .NEWLINE_DELIMITED_JSON
2807
2828
2808
- if job_config .schema is None :
2809
- job_config .autodetect = True
2829
+ if new_job_config .schema is None :
2830
+ new_job_config .autodetect = True
2810
2831
2811
2832
if project is None :
2812
2833
project = self .project
@@ -2828,7 +2849,7 @@ def load_table_from_json(
2828
2849
job_id_prefix = job_id_prefix ,
2829
2850
location = location ,
2830
2851
project = project ,
2831
- job_config = job_config ,
2852
+ job_config = new_job_config ,
2832
2853
timeout = timeout ,
2833
2854
)
2834
2855
0 commit comments