Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We鈥檒l occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: make pyarrow an optional dependency post-3.20.0 yanked release #1879

Merged
merged 4 commits into from
Mar 28, 2024
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
7 changes: 3 additions & 4 deletions google/cloud/bigquery/_pandas_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,7 @@
db_dtypes_import_exception = exc
date_dtype_name = time_dtype_name = "" # Use '' rather than None because pytype

pyarrow = _versions_helpers.PYARROW_VERSIONS.try_import(raise_if_error=True)
from pyarrow import ArrowTypeError # type: ignore # noqa: E402
pyarrow = _versions_helpers.PYARROW_VERSIONS.try_import()

_BIGNUMERIC_SUPPORT = False
if pyarrow is not None: # pragma: NO COVER
Expand Down Expand Up @@ -309,10 +308,10 @@ def bq_to_arrow_array(series, bq_field):
if field_type_upper in schema._STRUCT_TYPES:
return pyarrow.StructArray.from_pandas(series, type=arrow_type)
return pyarrow.Array.from_pandas(series, type=arrow_type)
except ArrowTypeError: # pragma: NO COVER
except pyarrow.ArrowTypeError: # pragma: NO COVER
leahecole marked this conversation as resolved.
Show resolved Hide resolved
msg = f"""Error converting Pandas column with name: "{series.name}" and datatype: "{series.dtype}" to an appropriate pyarrow datatype: Array, ListArray, or StructArray"""
_LOGGER.error(msg)
raise ArrowTypeError(msg)
raise pyarrow.ArrowTypeError(msg)


def get_column_or_index(dataframe, name):
Expand Down
2 changes: 1 addition & 1 deletion google/cloud/bigquery/_pyarrow_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ def pyarrow_timestamp():
_BQ_TO_ARROW_SCALARS = {}
_ARROW_SCALAR_IDS_TO_BQ = {}

if pyarrow: # pragma: NO COVER
if pyarrow:
# This dictionary is duplicated in bigquery_storage/test/unite/test_reader.py
# When modifying it be sure to update it there as well.
# Note(todo!!): type "BIGNUMERIC"'s matching pyarrow type is added in _pandas_helpers.py
Expand Down
15 changes: 8 additions & 7 deletions noxfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ def default(session, install_extras=True):
install_target = ".[all]"
else:
install_target = "."
session.install("-e", install_target)
session.install("-e", install_target, "-c", constraints_path)
session.run("python", "-m", "pip", "freeze")

# Run py.test against the unit tests.
Expand Down Expand Up @@ -115,14 +115,15 @@ def unit(session):
def unit_noextras(session):
"""Run the unit test suite."""

# Install optional dependencies that are out-of-date.
# Install optional dependencies that are out-of-date to see that
# we fail gracefully.
# https://github.com/googleapis/python-bigquery/issues/933
# There is no pyarrow 1.0.0 package for Python 3.9.

#
# We only install this extra package on one of the two Python versions
# so that it continues to be an optional dependency.
# https://github.com/googleapis/python-bigquery/issues/1877
if session.python == UNIT_TEST_PYTHON_VERSIONS[0]:
session.install("pyarrow>=3.0.0")
elif session.python == UNIT_TEST_PYTHON_VERSIONS[-1]:
session.install("pyarrow")
session.install("pyarrow==1.0.0")

default(session, install_extras=False)

Expand Down
1 change: 0 additions & 1 deletion samples/desktopapp/requirements-test.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,3 @@ google-cloud-testutils==1.4.0
pytest===7.4.4; python_version == '3.7'
pytest==8.1.1; python_version >= '3.8'
mock==5.1.0
pyarrow>=3.0.0
2 changes: 1 addition & 1 deletion samples/snippets/requirements-test.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# samples/snippets should be runnable with no "extras"
google-cloud-testutils==1.4.0
pytest===7.4.4; python_version == '3.7'
pytest==8.1.1; python_version >= '3.8'
mock==5.1.0
pyarrow>=3.0.0
3 changes: 2 additions & 1 deletion samples/snippets/requirements.txt
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
google-cloud-bigquery==3.19.0
# samples/snippets should be runnable with no "extras"
google-cloud-bigquery==3.19.0
1 change: 0 additions & 1 deletion testing/constraints-3.11.txt
Original file line number Diff line number Diff line change
@@ -1 +0,0 @@
pyarrow>=3.0.0
1 change: 0 additions & 1 deletion testing/constraints-3.12.txt
Original file line number Diff line number Diff line change
@@ -1 +0,0 @@
pyarrow>=3.0.0
4 changes: 2 additions & 2 deletions testing/constraints-3.7.txt
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,9 @@ packaging==20.0.0
pandas==1.1.0
proto-plus==1.22.0
protobuf==3.19.5
pyarrow>=3.0.0
pyarrow==3.0.0
python-dateutil==2.7.3
requests==2.21.0
Shapely==1.8.4
six==1.13.0
tqdm==4.7.4
tqdm==4.7.4
7 changes: 3 additions & 4 deletions tests/unit/test__pandas_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,7 @@
if pyarrow:
import pyarrow.parquet
import pyarrow.types
from pyarrow import ArrowTypeError # type: ignore # noqa: E402
else: # pragma: NO COVER
else:
# Mock out pyarrow when missing, because methods from pyarrow.types are
# used in test parameterization.
pyarrow = mock.Mock()
Expand Down Expand Up @@ -572,9 +571,9 @@ def test_bq_to_arrow_array_w_conversion_fail(module_under_test): # pragma: NO C
series = pandas.Series(rows, name="test_col", dtype="object")
bq_field = schema.SchemaField("field_name", "STRING", mode="REPEATED")
exc_msg = f"""Error converting Pandas column with name: "{series.name}" and datatype: "{series.dtype}" to an appropriate pyarrow datatype: Array, ListArray, or StructArray"""
with pytest.raises(ArrowTypeError, match=exc_msg):
with pytest.raises(pyarrow.ArrowTypeError, match=exc_msg):
module_under_test.bq_to_arrow_array(series, bq_field)
raise ArrowTypeError(exc_msg)
raise pyarrow.ArrowTypeError(exc_msg)


@pytest.mark.parametrize("bq_type", ["RECORD", "record", "STRUCT", "struct"])
Expand Down
4 changes: 3 additions & 1 deletion tests/unit/test_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -3408,6 +3408,7 @@ def test_to_dataframe_datetime_out_of_pyarrow_bounds(self):

def test_to_dataframe_progress_bar(self):
pytest.importorskip("pandas")
pytest.importorskip("pyarrow")
pytest.importorskip("tqdm")

from google.cloud.bigquery.schema import SchemaField
Expand Down Expand Up @@ -3447,6 +3448,7 @@ def test_to_dataframe_progress_bar(self):
@mock.patch("google.cloud.bigquery._tqdm_helpers.tqdm", new=None)
def test_to_dataframe_no_tqdm_no_progress_bar(self):
pytest.importorskip("pandas")
pytest.importorskip("pyarrow")
from google.cloud.bigquery.schema import SchemaField

schema = [
Expand Down Expand Up @@ -3711,7 +3713,7 @@ def test_to_dataframe_w_dtypes_mapper(self):
if hasattr(pandas, "Float64Dtype"):
self.assertEqual(list(df.miles), [1.77, 6.66, 2.0])
self.assertEqual(df.miles.dtype.name, "Float64")
else: # pragma: NO COVER
else:
self.assertEqual(list(df.miles), ["1.77", "6.66", "2.0"])
self.assertEqual(df.miles.dtype.name, "string")

Expand Down