Merge branch 'main' into range_sql2

googleapis · Feb 27, 2024 · 13d9250 · 13d9250
1 parent 0307017
commit 13d9250
Show file tree

Hide file tree

Showing 14 changed files with 148 additions and 54 deletions.
diff --git a/.github/.OwlBot.lock.yaml b/.github/.OwlBot.lock.yaml
@@ -13,5 +13,5 @@
 # limitations under the License.
 docker:
  image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest
- digest: sha256:5ea6d0ab82c956b50962f91d94e206d3921537ae5fe1549ec5326381d8905cfa
-# created: 2024-01-15T16:32:08.142785673Z
+ digest: sha256:a0c4463fcfd9893fc172a3b3db2b6ac0c7b94ec6ad458c7dcea12d9693615ac3
+# created: 2024-02-17T12:21:23.177926195Z
diff --git a/.kokoro/requirements.txt b/.kokoro/requirements.txt
@@ -93,30 +93,39 @@ colorlog==6.7.0 \
  # via
  # gcp-docuploader
  # nox
-cryptography==41.0.6 \
- --hash=sha256:068bc551698c234742c40049e46840843f3d98ad7ce265fd2bd4ec0d11306596 \
- --hash=sha256:0f27acb55a4e77b9be8d550d762b0513ef3fc658cd3eb15110ebbcbd626db12c \
- --hash=sha256:2132d5865eea673fe6712c2ed5fb4fa49dba10768bb4cc798345748380ee3660 \
- --hash=sha256:3288acccef021e3c3c10d58933f44e8602cf04dba96d9796d70d537bb2f4bbc4 \
- --hash=sha256:35f3f288e83c3f6f10752467c48919a7a94b7d88cc00b0668372a0d2ad4f8ead \
- --hash=sha256:398ae1fc711b5eb78e977daa3cbf47cec20f2c08c5da129b7a296055fbb22aed \
- --hash=sha256:422e3e31d63743855e43e5a6fcc8b4acab860f560f9321b0ee6269cc7ed70cc3 \
- --hash=sha256:48783b7e2bef51224020efb61b42704207dde583d7e371ef8fc2a5fb6c0aabc7 \
- --hash=sha256:4d03186af98b1c01a4eda396b137f29e4e3fb0173e30f885e27acec8823c1b09 \
- --hash=sha256:5daeb18e7886a358064a68dbcaf441c036cbdb7da52ae744e7b9207b04d3908c \
- --hash=sha256:60e746b11b937911dc70d164060d28d273e31853bb359e2b2033c9e93e6f3c43 \
- --hash=sha256:742ae5e9a2310e9dade7932f9576606836ed174da3c7d26bc3d3ab4bd49b9f65 \
- --hash=sha256:7e00fb556bda398b99b0da289ce7053639d33b572847181d6483ad89835115f6 \
- --hash=sha256:85abd057699b98fce40b41737afb234fef05c67e116f6f3650782c10862c43da \
- --hash=sha256:8efb2af8d4ba9dbc9c9dd8f04d19a7abb5b49eab1f3694e7b5a16a5fc2856f5c \
- --hash=sha256:ae236bb8760c1e55b7a39b6d4d32d2279bc6c7c8500b7d5a13b6fb9fc97be35b \
- --hash=sha256:afda76d84b053923c27ede5edc1ed7d53e3c9f475ebaf63c68e69f1403c405a8 \
- --hash=sha256:b27a7fd4229abef715e064269d98a7e2909ebf92eb6912a9603c7e14c181928c \
- --hash=sha256:b648fe2a45e426aaee684ddca2632f62ec4613ef362f4d681a9a6283d10e079d \
- --hash=sha256:c5a550dc7a3b50b116323e3d376241829fd326ac47bc195e04eb33a8170902a9 \
- --hash=sha256:da46e2b5df770070412c46f87bac0849b8d685c5f2679771de277a422c7d0b86 \
- --hash=sha256:f39812f70fc5c71a15aa3c97b2bbe213c3f2a460b79bd21c40d033bb34a9bf36 \
- --hash=sha256:ff369dd19e8fe0528b02e8df9f2aeb2479f89b1270d90f96a63500afe9af5cae
+cryptography==42.0.2 \
+ --hash=sha256:087887e55e0b9c8724cf05361357875adb5c20dec27e5816b653492980d20380 \
+ --hash=sha256:09a77e5b2e8ca732a19a90c5bca2d124621a1edb5438c5daa2d2738bfeb02589 \
+ --hash=sha256:130c0f77022b2b9c99d8cebcdd834d81705f61c68e91ddd614ce74c657f8b3ea \
+ --hash=sha256:141e2aa5ba100d3788c0ad7919b288f89d1fe015878b9659b307c9ef867d3a65 \
+ --hash=sha256:28cb2c41f131a5758d6ba6a0504150d644054fd9f3203a1e8e8d7ac3aea7f73a \
+ --hash=sha256:2f9f14185962e6a04ab32d1abe34eae8a9001569ee4edb64d2304bf0d65c53f3 \
+ --hash=sha256:320948ab49883557a256eab46149df79435a22d2fefd6a66fe6946f1b9d9d008 \
+ --hash=sha256:36d4b7c4be6411f58f60d9ce555a73df8406d484ba12a63549c88bd64f7967f1 \
+ --hash=sha256:3b15c678f27d66d247132cbf13df2f75255627bcc9b6a570f7d2fd08e8c081d2 \
+ --hash=sha256:3dbd37e14ce795b4af61b89b037d4bc157f2cb23e676fa16932185a04dfbf635 \
+ --hash=sha256:4383b47f45b14459cab66048d384614019965ba6c1a1a141f11b5a551cace1b2 \
+ --hash=sha256:44c95c0e96b3cb628e8452ec060413a49002a247b2b9938989e23a2c8291fc90 \
+ --hash=sha256:4b063d3413f853e056161eb0c7724822a9740ad3caa24b8424d776cebf98e7ee \
+ --hash=sha256:52ed9ebf8ac602385126c9a2fe951db36f2cb0c2538d22971487f89d0de4065a \
+ --hash=sha256:55d1580e2d7e17f45d19d3b12098e352f3a37fe86d380bf45846ef257054b242 \
+ --hash=sha256:5ef9bc3d046ce83c4bbf4c25e1e0547b9c441c01d30922d812e887dc5f125c12 \
+ --hash=sha256:5fa82a26f92871eca593b53359c12ad7949772462f887c35edaf36f87953c0e2 \
+ --hash=sha256:61321672b3ac7aade25c40449ccedbc6db72c7f5f0fdf34def5e2f8b51ca530d \
+ --hash=sha256:701171f825dcab90969596ce2af253143b93b08f1a716d4b2a9d2db5084ef7be \
+ --hash=sha256:841ec8af7a8491ac76ec5a9522226e287187a3107e12b7d686ad354bb78facee \
+ --hash=sha256:8a06641fb07d4e8f6c7dda4fc3f8871d327803ab6542e33831c7ccfdcb4d0ad6 \
+ --hash=sha256:8e88bb9eafbf6a4014d55fb222e7360eef53e613215085e65a13290577394529 \
+ --hash=sha256:a00aee5d1b6c20620161984f8ab2ab69134466c51f58c052c11b076715e72929 \
+ --hash=sha256:a047682d324ba56e61b7ea7c7299d51e61fd3bca7dad2ccc39b72bd0118d60a1 \
+ --hash=sha256:a7ef8dd0bf2e1d0a27042b231a3baac6883cdd5557036f5e8df7139255feaac6 \
+ --hash=sha256:ad28cff53f60d99a928dfcf1e861e0b2ceb2bc1f08a074fdd601b314e1cc9e0a \
+ --hash=sha256:b9097a208875fc7bbeb1286d0125d90bdfed961f61f214d3f5be62cd4ed8a446 \
+ --hash=sha256:b97fe7d7991c25e6a31e5d5e795986b18fbbb3107b873d5f3ae6dc9a103278e9 \
+ --hash=sha256:e0ec52ba3c7f1b7d813cd52649a5b3ef1fc0d433219dc8c93827c57eab6cf888 \
+ --hash=sha256:ea2c3ffb662fec8bbbfce5602e2c159ff097a4631d96235fcf0fb00e59e3ece4 \
+ --hash=sha256:fa3dec4ba8fb6e662770b74f62f1a0c7d4e37e25b58b2bf2c1be4c95372b4a33 \
+ --hash=sha256:fbeb725c9dc799a574518109336acccaf1303c30d45c075c665c0793c2f79a7f
  # via
  # gcp-releasetool
  # secretstorage

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -5,6 +5,25 @@
 [1]: https://pypi.org/project/google-cloud-bigquery/#history
 
 
+## [3.17.2](https://github.com/googleapis/python-bigquery/compare/v3.17.1...v3.17.2) (2024-01-30)
+
+
+### Bug Fixes
+
+* Change load_table_from_json autodetect logic ([#1804](https://github.com/googleapis/python-bigquery/issues/1804)) ([6249032](https://github.com/googleapis/python-bigquery/commit/62490325f64e5d66303d9218992e28ac5f21cb3f))
+
+
+### Documentation
+
+* Update to use API ([#1781](https://github.com/googleapis/python-bigquery/issues/1781)) ([81563b0](https://github.com/googleapis/python-bigquery/commit/81563b06298fe3a64be6a89b583c3d64758ca12a))
+* Update `client_query_destination_table.py` sample to use `query_and_wait` ([#1783](https://github.com/googleapis/python-bigquery/issues/1783)) ([68ebbe1](https://github.com/googleapis/python-bigquery/commit/68ebbe12d455ce8e9b1784fb11787c2fb842ef22))
+* Update query_external_sheets_permanent_table.py to use query_and_wait API ([#1778](https://github.com/googleapis/python-bigquery/issues/1778)) ([a7be88a](https://github.com/googleapis/python-bigquery/commit/a7be88adf8a480ee61aa79789cb53df1b79bb091))
+* Update sample for query_to_arrow to use query_and_wait API ([#1776](https://github.com/googleapis/python-bigquery/issues/1776)) ([dbf10de](https://github.com/googleapis/python-bigquery/commit/dbf10dee51a7635e9b98658f205ded2de087a06f))
+* Update the query destination table legacy file to use query_and_wait API ([#1775](https://github.com/googleapis/python-bigquery/issues/1775)) ([ef89f9e](https://github.com/googleapis/python-bigquery/commit/ef89f9e58c22b3af5a7757b69daa030116012350))
+* Update to use `query_and_wait` in `client_query_w_positional_params.py` ([#1786](https://github.com/googleapis/python-bigquery/issues/1786)) ([410f71e](https://github.com/googleapis/python-bigquery/commit/410f71e6b6e755928e363ed89c1044e14b0db9cc))
+* Update to use `query_and_wait` in `samples/client_query_w_timestamp_params.py` ([#1785](https://github.com/googleapis/python-bigquery/issues/1785)) ([ba36948](https://github.com/googleapis/python-bigquery/commit/ba3694852c13c8a29fe0f9d923353e82acfd4278))
+* Update to_geodataframe to use query_and_wait functionality ([#1800](https://github.com/googleapis/python-bigquery/issues/1800)) ([1298594](https://github.com/googleapis/python-bigquery/commit/12985942942b8f205ecd261fcdf620df9a640460))
+
 ## [3.17.1](https://github.com/googleapis/python-bigquery/compare/v3.17.0...v3.17.1) (2024-01-24)
 
 

diff --git a/google/cloud/bigquery/_job_helpers.py b/google/cloud/bigquery/_job_helpers.py
@@ -54,7 +54,7 @@
 
 
 # The purpose of _TIMEOUT_BUFFER_MILLIS is to allow the server-side timeout to
-# happen before the client-side timeout. This is not strictly neccessary, as the
+# happen before the client-side timeout. This is not strictly necessary, as the
 # client retries client-side timeouts, but the hope by making the server-side
 # timeout slightly shorter is that it can save the server from some unncessary
 # processing time.

diff --git a/google/cloud/bigquery/_pandas_helpers.py b/google/cloud/bigquery/_pandas_helpers.py
@@ -958,6 +958,25 @@ def dataframe_to_json_generator(dataframe):
  # considered a NaN, however.
  if isinstance(is_nan, bool) and is_nan:
  continue
+
+ # Convert numpy types to corresponding Python types.
+ # https://stackoverflow.com/a/60441783/101923
+ if isinstance(value, numpy.bool_):
+ value = bool(value)
+ elif isinstance(
+ value,
+ (
+ numpy.int64,
+ numpy.int32,
+ numpy.int16,
+ numpy.int8,
+ numpy.uint64,
+ numpy.uint32,
+ numpy.uint16,
+ numpy.uint8,
+ ),
+ ):
+ value = int(value)
  output[column] = value
 
  yield output

diff --git a/google/cloud/bigquery/magics/magics.py b/google/cloud/bigquery/magics/magics.py
@@ -288,7 +288,7 @@ def _handle_error(error, destination_var=None):
 
  Args:
  error (Exception):
- An exception that ocurred during the query execution.
+ An exception that occurred during the query execution.
  destination_var (Optional[str]):
  The name of the IPython session variable to store the query job.
  """

diff --git a/google/cloud/bigquery/opentelemetry_tracing.py b/google/cloud/bigquery/opentelemetry_tracing.py
@@ -90,7 +90,7 @@ def _get_final_span_attributes(attributes=None, client=None, job_ref=None):
  """Compiles attributes from: client, job_ref, user-provided attributes.
 
  Attributes from all of these sources are merged together. Note the
- attributes are added sequentially based on perceived order of precendence:
+ attributes are added sequentially based on perceived order of precedence:
  i.e. attributes added last may overwrite attributes added earlier.
 
  Args:

diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py
@@ -12,4 +12,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-__version__ = "3.17.1"
+__version__ = "3.17.2"
diff --git a/samples/client_query_w_named_params.py b/samples/client_query_w_named_params.py
@@ -33,8 +33,10 @@ def client_query_w_named_params() -> None:
  bigquery.ScalarQueryParameter("min_word_count", "INT64", 250),
  ]
  )
- query_job = client.query(query, job_config=job_config) # Make an API request.
+ results = client.query_and_wait(
+ query, job_config=job_config
+ ) # Make an API request.
 
- for row in query_job:
+ for row in results:
  print("{}: \t{}".format(row.word, row.word_count))
  # [END bigquery_query_params_named]
diff --git a/tests/system/test_pandas.py b/tests/system/test_pandas.py
@@ -835,7 +835,9 @@ def test_insert_rows_from_dataframe(bigquery_client, dataset_id):
  schema = [
  SF("float_col", "FLOAT", mode="REQUIRED"),
  SF("int_col", "INTEGER", mode="REQUIRED"),
+ SF("int64_col", "INTEGER", mode="NULLABLE"),
  SF("bool_col", "BOOLEAN", mode="REQUIRED"),
+ SF("boolean_col", "BOOLEAN", mode="NULLABLE"),
  SF("string_col", "STRING", mode="NULLABLE"),
  SF("date_col", "DATE", mode="NULLABLE"),
  SF("time_col", "TIME", mode="NULLABLE"),
@@ -898,6 +900,15 @@ def test_insert_rows_from_dataframe(bigquery_client, dataset_id):
  dataframe["date_col"] = dataframe["date_col"].astype("dbdate")
  dataframe["time_col"] = dataframe["time_col"].astype("dbtime")
 
+ # Support nullable integer and boolean dtypes.
+ # https://github.com/googleapis/python-bigquery/issues/1815
+ dataframe["int64_col"] = pandas.Series(
+ [-11, -22, pandas.NA, -44, -55, -66], dtype="Int64"
+ )
+ dataframe["boolean_col"] = pandas.Series(
+ [True, False, True, pandas.NA, True, False], dtype="boolean"
+ )
+
  table_id = f"{bigquery_client.project}.{dataset_id}.test_insert_rows_from_dataframe"
  table_arg = bigquery.Table(table_id, schema=schema)
  table = helpers.retry_403(bigquery_client.create_table)(table_arg)
@@ -910,7 +921,7 @@ def test_insert_rows_from_dataframe(bigquery_client, dataset_id):
  expected = [
  # Pandas often represents NULL values as NaN. Convert to None for
  # easier comparison.
- tuple(None if col != col else col for col in data_row)
+ tuple(None if pandas.isna(col) else col for col in data_row)
  for data_row in dataframe.itertuples(index=False)
  ]
 

diff --git a/tests/system/test_query.py b/tests/system/test_query.py
@@ -477,7 +477,7 @@ def test_query_error_w_api_method_default(bigquery_client: bigquery.Client):
  """Test that an exception is not thrown until fetching the results.
 
  For backwards compatibility, jobs.insert is the default API method. With
- jobs.insert, a failed query job is "sucessfully" created. An exception is
+ jobs.insert, a failed query job is "successfully" created. An exception is
  thrown when fetching the results.
  """
 

diff --git a/tests/unit/test__job_helpers.py b/tests/unit/test__job_helpers.py
@@ -711,7 +711,7 @@ def test_query_and_wait_caches_completed_query_results_one_page():
  {"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]},
  {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]},
  ],
- # Even though totalRows > len(rows), we should use the presense of a
+ # Even though totalRows > len(rows), we should use the presence of a
  # next page token to decide if there are any more pages.
  "totalRows": 8,
  }
@@ -828,7 +828,7 @@ def test_query_and_wait_caches_completed_query_results_more_pages():
  {"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]},
  {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]},
  ],
- # Even though totalRows <= len(rows), we should use the presense of a
+ # Even though totalRows <= len(rows), we should use the presence of a
  # next page token to decide if there are any more pages.
  "totalRows": 2,
  "pageToken": "page-2",
@@ -981,7 +981,7 @@ def test_query_and_wait_incomplete_query():
  {"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]},
  {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]},
  ],
- # Even though totalRows <= len(rows), we should use the presense of a
+ # Even though totalRows <= len(rows), we should use the presence of a
  # next page token to decide if there are any more pages.
  "totalRows": 2,
  "pageToken": "page-2",

diff --git a/tests/unit/test__pandas_helpers.py b/tests/unit/test__pandas_helpers.py
@@ -808,29 +808,60 @@ def test_list_columns_and_indexes_with_named_index_same_as_column_name(
 @pytest.mark.skipif(pandas is None, reason="Requires `pandas`")
 def test_dataframe_to_json_generator(module_under_test):
  utcnow = datetime.datetime.utcnow()
- df_data = collections.OrderedDict(
- [
- ("a_series", [pandas.NA, 2, 3, 4]),
- ("b_series", [0.1, float("NaN"), 0.3, 0.4]),
- ("c_series", ["a", "b", pandas.NA, "d"]),
- ("d_series", [utcnow, utcnow, utcnow, pandas.NaT]),
- ("e_series", [True, False, True, None]),
- ]
- )
  dataframe = pandas.DataFrame(
- df_data, index=pandas.Index([4, 5, 6, 7], name="a_index")
+ {
+ "a_series": [1, 2, 3, 4],
+ "b_series": [0.1, float("NaN"), 0.3, 0.4],
+ "c_series": ["a", "b", pandas.NA, "d"],
+ "d_series": [utcnow, utcnow, utcnow, pandas.NaT],
+ "e_series": [True, False, True, None],
+ # Support nullable dtypes.
+ # https://github.com/googleapis/python-bigquery/issues/1815
+ "boolean_series": pandas.Series(
+ [True, False, pandas.NA, False], dtype="boolean"
+ ),
+ "int64_series": pandas.Series([-1, pandas.NA, -3, -4], dtype="Int64"),
+ }
  )
 
- dataframe = dataframe.astype({"a_series": pandas.Int64Dtype()})
+ # Index is not included, even if it is not the default and has a name.
+ dataframe = dataframe.rename(index=lambda idx: idx + 4)
+ dataframe.index.name = "a_index"
 
- rows = module_under_test.dataframe_to_json_generator(dataframe)
+ rows = list(module_under_test.dataframe_to_json_generator(dataframe))
  expected = [
- {"b_series": 0.1, "c_series": "a", "d_series": utcnow, "e_series": True},
- {"a_series": 2, "c_series": "b", "d_series": utcnow, "e_series": False},
- {"a_series": 3, "b_series": 0.3, "d_series": utcnow, "e_series": True},
- {"a_series": 4, "b_series": 0.4, "c_series": "d"},
+ {
+ "a_series": 1,
+ "b_series": 0.1,
+ "c_series": "a",
+ "d_series": utcnow,
+ "e_series": True,
+ "boolean_series": True,
+ "int64_series": -1,
+ },
+ {
+ "a_series": 2,
+ "c_series": "b",
+ "d_series": utcnow,
+ "e_series": False,
+ "boolean_series": False,
+ },
+ {
+ "a_series": 3,
+ "b_series": 0.3,
+ "d_series": utcnow,
+ "e_series": True,
+ "int64_series": -3,
+ },
+ {
+ "a_series": 4,
+ "b_series": 0.4,
+ "c_series": "d",
+ "boolean_series": False,
+ "int64_series": -4,
+ },
  ]
- assert list(rows) == expected
+ assert rows == expected
 
 
 @pytest.mark.skipif(pandas is None, reason="Requires `pandas`")

diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py
@@ -3285,6 +3285,9 @@ def test_to_dataframe_iterable_w_bqstorage(self):
  # Don't close the client if it was passed in.
  bqstorage_client._transport.grpc_channel.close.assert_not_called()
 
+ @unittest.skipIf(
+ bigquery_storage is None, "Requires `google-cloud-bigquery-storage`"
+ )
  @unittest.skipIf(pandas is None, "Requires `pandas`")
  def test_to_dataframe_iterable_w_bqstorage_max_results_warning(self):
  from google.cloud.bigquery import schema