Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

insert_rows_from_dataframe doesn't support nullable boolean or nullable Int64 dtypes #1815

Closed
tswast opened this issue Feb 9, 2024 · 0 comments · Fixed by #1816
Closed
Assignees
Labels
api: bigquery Issues related to the googleapis/python-bigquery API. type: feature request ‘Nice-to-have’ improvement, new feature or different behavior or design.

Comments

@tswast
Copy link
Contributor

tswast commented Feb 9, 2024

Is your feature request related to a problem? Please describe.

I'm working on an alternative loading mechanism in BigQuery DataFrames based on the streaming API to avoid quota limits for load jobs.

Describe the solution you'd like

import pandas
import google.cloud.bigquery

df = pandas.DataFrame(
  {
    "bool_col": pandas.Series([True, False, True, pandas.NA, False, False, True, True, False], dtype="boolean"),
  }
)

bqclient = google.cloud.bigquery.Client()
bqclient.insert_rows_from_dataframe(bqclient.get_table("my_dataset.my_table"), df)

This results in the following error:

In [11]: bqclient.insert_rows_from_dataframe(bqclient.get_table("my_dataset.my_table"), df)
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
Input In [11], in <cell line: 1>()
----> 1 bqclient.insert_rows_from_dataframe(bqclient.get_table("my_dataset.my_table"), df)

File ~/src/github.com/googleapis/python-bigquery/google/cloud/bigquery/client.py:3662, in Client.insert_rows_from_dataframe(self, table, dataframe, selected_fields, chunk_size, **kwargs)
   3660 for _ in range(chunk_count):
   3661     rows_chunk = itertools.islice(rows_iter, chunk_size)
-> 3662     result = self.insert_rows(table, rows_chunk, selected_fields, **kwargs)
   3663     insert_results.append(result)
   3665 return insert_results

File ~/src/github.com/googleapis/python-bigquery/google/cloud/bigquery/client.py:3605, in Client.insert_rows(self, table, rows, selected_fields, **kwargs)
   3596     raise ValueError(
   3597         (
   3598             "Could not determine schema for table '{}'. Call client.get_table() "
   3599             "or pass in a list of schema fields to the selected_fields argument."
   3600         ).format(table)
   3601     )
   3603 json_rows = [_record_field_to_json(schema, row) for row in rows]
-> 3605 return self.insert_rows_json(table, json_rows, **kwargs)

File ~/src/github.com/googleapis/python-bigquery/google/cloud/bigquery/client.py:3801, in Client.insert_rows_json(self, table, json_rows, row_ids, skip_invalid_rows, ignore_unknown_values, template_suffix, retry, timeout)
   3799 # We can always retry, because every row has an insert ID.
   3800 span_attributes = {"path": path}
-> 3801 response = self._call_api(
   3802     retry,
   3803     span_name="BigQuery.insertRowsJson",
   3804     span_attributes=span_attributes,
   3805     method="POST",
   3806     path=path,
   3807     data=data,
   3808     timeout=timeout,
   3809 )
   3810 errors = []
   3812 for error in response.get("insertErrors", ()):

File ~/src/github.com/googleapis/python-bigquery/google/cloud/bigquery/client.py:827, in Client._call_api(self, retry, span_name, span_attributes, job_ref, headers, **kwargs)
    823 if span_name is not None:
    824     with create_span(
    825         name=span_name, attributes=span_attributes, client=self, job_ref=job_ref
    826     ):
--> 827         return call()
    829 return call()

File /opt/miniconda3/envs/dev-3.10/lib/python3.10/site-packages/google/api_core/retry.py:349, in Retry.__call__.<locals>.retry_wrapped_func(*args, **kwargs)
    345 target = functools.partial(func, *args, **kwargs)
    346 sleep_generator = exponential_sleep_generator(
    347     self._initial, self._maximum, multiplier=self._multiplier
    348 )
--> 349 return retry_target(
    350     target,
    351     self._predicate,
    352     sleep_generator,
    353     self._timeout,
    354     on_error=on_error,
    355 )

File /opt/miniconda3/envs/dev-3.10/lib/python3.10/site-packages/google/api_core/retry.py:191, in retry_target(target, predicate, sleep_generator, timeout, on_error, **kwargs)
    189 for sleep in sleep_generator:
    190     try:
--> 191         return target()
    193     # pylint: disable=broad-except
    194     # This function explicitly must deal with broad exceptions.
    195     except Exception as exc:

File /opt/miniconda3/envs/dev-3.10/lib/python3.10/site-packages/google/cloud/_http/__init__.py:479, in JSONConnection.api_request(self, method, path, query_params, data, content_type, headers, api_base_url, api_version, expect_json, _target_object, timeout, extra_api_info)
    476 # Making the executive decision that any dictionary
    477 # data will be sent properly as JSON.
    478 if data and isinstance(data, dict):
--> 479     data = json.dumps(data)
    480     content_type = "application/json"
    482 response = self._make_request(
    483     method=method,
    484     url=url,
   (...)
    490     extra_api_info=extra_api_info,
    491 )

File /opt/miniconda3/envs/dev-3.10/lib/python3.10/json/__init__.py:231, in dumps(obj, skipkeys, ensure_ascii, check_circular, allow_nan, cls, indent, separators, default, sort_keys, **kw)
    226 # cached encoder
    227 if (not skipkeys and ensure_ascii and
    228     check_circular and allow_nan and
    229     cls is None and indent is None and separators is None and
    230     default is None and not sort_keys and not kw):
--> 231     return _default_encoder.encode(obj)
    232 if cls is None:
    233     cls = JSONEncoder

File /opt/miniconda3/envs/dev-3.10/lib/python3.10/json/encoder.py:199, in JSONEncoder.encode(self, o)
    195         return encode_basestring(o)
    196 # This doesn't pass the iterator directly to ''.join() because the
    197 # exceptions aren't as detailed.  The list call should be roughly
    198 # equivalent to the PySequence_Fast that ''.join() would do.
--> 199 chunks = self.iterencode(o, _one_shot=True)
    200 if not isinstance(chunks, (list, tuple)):
    201     chunks = list(chunks)

File /opt/miniconda3/envs/dev-3.10/lib/python3.10/json/encoder.py:257, in JSONEncoder.iterencode(self, o, _one_shot)
    252 else:
    253     _iterencode = _make_iterencode(
    254         markers, self.default, _encoder, self.indent, floatstr,
    255         self.key_separator, self.item_separator, self.sort_keys,
    256         self.skipkeys, _one_shot)
--> 257 return _iterencode(o, 0)

File /opt/miniconda3/envs/dev-3.10/lib/python3.10/json/encoder.py:179, in JSONEncoder.default(self, o)
    160 def default(self, o):
    161     """Implement this method in a subclass such that it returns
    162     a serializable object for ``o``, or calls the base implementation
    163     (to raise a ``TypeError``).
   (...)
    177 
    178     """
--> 179     raise TypeError(f'Object of type {o.__class__.__name__} '
    180                     f'is not JSON serializable')

TypeError: Object of type bool_ is not JSON serializable

Describe alternatives you've considered

  • Load jobs, but these have quota limitations and I'd like to provide an alternative.
  • Pulling even more into BigQuery DataFrames third_party, but I'd like to contribute this fix here, instead.
@tswast tswast added the type: feature request ‘Nice-to-have’ improvement, new feature or different behavior or design. label Feb 9, 2024
@tswast tswast self-assigned this Feb 9, 2024
@product-auto-label product-auto-label bot added the api: bigquery Issues related to the googleapis/python-bigquery API. label Feb 9, 2024
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
api: bigquery Issues related to the googleapis/python-bigquery API. type: feature request ‘Nice-to-have’ improvement, new feature or different behavior or design.
Projects
None yet
1 participant