Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We鈥檒l occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: make pyarrow an optional dependency post-3.20.0 yanked release #1879

Merged
merged 4 commits into from
Mar 28, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
Prev Previous commit
remove many pragma: NO COVERs
  • Loading branch information
tswast committed Mar 28, 2024
commit c62144bde4ecef97614bcfcffc2b8717debd5337
13 changes: 5 additions & 8 deletions google/cloud/bigquery/_pandas_helpers.py
Expand Up @@ -32,7 +32,7 @@
import pandas # type: ignore

pandas_import_exception = None
except ImportError as exc: # pragma: NO COVER
except ImportError as exc:
pandas = None
pandas_import_exception = exc
else:
Expand All @@ -44,24 +44,21 @@
date_dtype_name = db_dtypes.DateDtype.name
time_dtype_name = db_dtypes.TimeDtype.name
db_dtypes_import_exception = None
except ImportError as exc: # pragma: NO COVER
except ImportError as exc:
db_dtypes = None
db_dtypes_import_exception = exc
date_dtype_name = time_dtype_name = "" # Use '' rather than None because pytype

pyarrow = _versions_helpers.PYARROW_VERSIONS.try_import()

_BIGNUMERIC_SUPPORT = False
if pyarrow is not None: # pragma: NO COVER
_BIGNUMERIC_SUPPORT = True

try:
# _BaseGeometry is used to detect shapely objevys in `bq_to_arrow_array`
from shapely.geometry.base import BaseGeometry as _BaseGeometry # type: ignore
except ImportError: # pragma: NO COVER
except ImportError:
# No shapely, use NoneType for _BaseGeometry as a placeholder.
_BaseGeometry = type(None)
else:
# We don't have any unit test sessions that install shapely but not pandas.
if pandas is not None: # pragma: NO COVER

def _to_wkb():
Expand Down Expand Up @@ -308,7 +305,7 @@ def bq_to_arrow_array(series, bq_field):
if field_type_upper in schema._STRUCT_TYPES:
return pyarrow.StructArray.from_pandas(series, type=arrow_type)
return pyarrow.Array.from_pandas(series, type=arrow_type)
except pyarrow.ArrowTypeError: # pragma: NO COVER
except pyarrow.ArrowTypeError:
msg = f"""Error converting Pandas column with name: "{series.name}" and datatype: "{series.dtype}" to an appropriate pyarrow datatype: Array, ListArray, or StructArray"""
_LOGGER.error(msg)
raise pyarrow.ArrowTypeError(msg)
Expand Down
2 changes: 1 addition & 1 deletion google/cloud/bigquery/_pyarrow_helpers.py
Expand Up @@ -20,7 +20,7 @@

try:
import pyarrow # type: ignore
except ImportError: # pragma: NO COVER
except ImportError:
pyarrow = None


Expand Down
4 changes: 2 additions & 2 deletions google/cloud/bigquery/_versions_helpers.py
Expand Up @@ -73,7 +73,7 @@ def try_import(self, raise_if_error: bool = False) -> Any:
"""
try:
import pyarrow
except ImportError as exc: # pragma: NO COVER
except ImportError as exc:
if raise_if_error:
raise exceptions.LegacyPyarrowError(
"pyarrow package not found. Install pyarrow version >="
Expand Down Expand Up @@ -212,7 +212,7 @@ def try_import(self, raise_if_error: bool = False) -> Any:
"""
try:
import pandas
except ImportError as exc: # pragma: NO COVER
except ImportError as exc:
if raise_if_error:
raise exceptions.LegacyPandasError(
"pandas package not found. Install pandas version >="
Expand Down
7 changes: 1 addition & 6 deletions google/cloud/bigquery/job/query.py
Expand Up @@ -56,14 +56,9 @@

try:
import pandas # type: ignore
except ImportError: # pragma: NO COVER
except ImportError:
pandas = None

try:
import db_dtypes # type: ignore
except ImportError: # pragma: NO COVER
db_dtypes = None

if typing.TYPE_CHECKING: # pragma: NO COVER
# Assumption: type checks are only used by library developers and CI environments
# that have all optional dependencies installed, thus no conditional imports.
Expand Down
2 changes: 1 addition & 1 deletion google/cloud/bigquery/magics/magics.py
Expand Up @@ -95,7 +95,7 @@
import IPython # type: ignore
from IPython import display # type: ignore
from IPython.core import magic_arguments # type: ignore
except ImportError: # pragma: NO COVER
except ImportError:
raise ImportError("This module can only be loaded in IPython.")

from google.api_core import client_info
Expand Down
6 changes: 3 additions & 3 deletions google/cloud/bigquery/table.py
Expand Up @@ -26,17 +26,17 @@

try:
import pandas # type: ignore
except ImportError: # pragma: NO COVER
except ImportError:
pandas = None

try:
import pyarrow # type: ignore
except ImportError: # pragma: NO COVER
except ImportError:
pyarrow = None

try:
import db_dtypes # type: ignore
except ImportError: # pragma: NO COVER
except ImportError:
db_dtypes = None

try:
Expand Down
40 changes: 14 additions & 26 deletions tests/unit/job/test_query_pandas.py
Expand Up @@ -19,53 +19,38 @@

import pytest

from ..helpers import make_connection
from .helpers import _make_client
from .helpers import _make_job_resource

try:
from google.cloud import bigquery_storage
import google.cloud.bigquery_storage_v1.reader
import google.cloud.bigquery_storage_v1.services.big_query_read.client
except (ImportError, AttributeError): # pragma: NO COVER
except (ImportError, AttributeError):
bigquery_storage = None

try:
import pandas
except (ImportError, AttributeError): # pragma: NO COVER
pandas = None
try:
import shapely
except (ImportError, AttributeError): # pragma: NO COVER
except (ImportError, AttributeError):
shapely = None
try:
import geopandas
except (ImportError, AttributeError): # pragma: NO COVER
except (ImportError, AttributeError):
geopandas = None
try:
import tqdm
except (ImportError, AttributeError): # pragma: NO COVER
except (ImportError, AttributeError):
tqdm = None

try:
import importlib.metadata as metadata
except ImportError:
import importlib_metadata as metadata

from ..helpers import make_connection
from .helpers import _make_client
from .helpers import _make_job_resource

if pandas is not None:
PANDAS_INSTALLED_VERSION = metadata.version("pandas")
else:
PANDAS_INSTALLED_VERSION = "0.0.0"

pandas = pytest.importorskip("pandas")

try:
import pyarrow
import pyarrow.types
except ImportError: # pragma: NO COVER
except ImportError:
pyarrow = None

pandas = pytest.importorskip("pandas")


@pytest.fixture
def table_read_options_kwarg():
Expand Down Expand Up @@ -660,7 +645,10 @@ def test_to_dataframe_bqstorage_no_pyarrow_compression():
)


@pytest.mark.skipif(PANDAS_INSTALLED_VERSION[0:2] not in ["0.", "1."], reason="")
@pytest.mark.skipif(
pandas.__version__.startswith("2."),
reason="pandas 2.0 changes some default dtypes and we haven't update the test to account for those",
)
@pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`")
def test_to_dataframe_column_dtypes():
from google.cloud.bigquery.job import QueryJob as target_class
Expand Down
8 changes: 5 additions & 3 deletions tests/unit/test__pandas_helpers.py
Expand Up @@ -30,12 +30,12 @@
import pandas
import pandas.api.types
import pandas.testing
except ImportError: # pragma: NO COVER
except ImportError:
pandas = None

try:
import geopandas
except ImportError: # pragma: NO COVER
except ImportError:
geopandas = None

import pytest
Expand All @@ -46,17 +46,19 @@
from google.cloud.bigquery import _pyarrow_helpers
from google.cloud.bigquery import _versions_helpers
from google.cloud.bigquery import schema
from google.cloud.bigquery._pandas_helpers import _BIGNUMERIC_SUPPORT

pyarrow = _versions_helpers.PYARROW_VERSIONS.try_import()

if pyarrow:
import pyarrow.parquet
import pyarrow.types

_BIGNUMERIC_SUPPORT = True
else:
# Mock out pyarrow when missing, because methods from pyarrow.types are
# used in test parameterization.
pyarrow = mock.Mock()
_BIGNUMERIC_SUPPORT = False

bigquery_storage = _versions_helpers.BQ_STORAGE_VERSIONS.try_import()

Expand Down
33 changes: 25 additions & 8 deletions tests/unit/test__versions_helpers.py
Expand Up @@ -18,17 +18,17 @@

try:
import pyarrow # type: ignore
except ImportError: # pragma: NO COVER
except ImportError:
pyarrow = None

try:
from google.cloud import bigquery_storage # type: ignore
except ImportError: # pragma: NO COVER
except ImportError:
bigquery_storage = None

try:
import pandas # type: ignore
except ImportError: # pragma: NO COVER
except ImportError:
pandas = None

from google.cloud.bigquery import _versions_helpers
Expand All @@ -39,11 +39,8 @@
def test_try_import_raises_no_error_w_recent_pyarrow():
versions = _versions_helpers.PyarrowVersions()
with mock.patch("pyarrow.__version__", new="5.0.0"):
try:
pyarrow = versions.try_import(raise_if_error=True)
assert pyarrow is not None
except exceptions.LegacyPyarrowError: # pragma: NO COVER
raise ("Legacy error raised with a non-legacy dependency version.")
pyarrow = versions.try_import(raise_if_error=True)
assert pyarrow is not None


@pytest.mark.skipif(pyarrow is None, reason="pyarrow is not installed")
Expand All @@ -62,6 +59,16 @@ def test_try_import_raises_error_w_legacy_pyarrow():
versions.try_import(raise_if_error=True)


@pytest.mark.skipif(
pyarrow is not None,
reason="pyarrow is installed, but this test needs it not to be",
)
def test_try_import_raises_error_w_no_pyarrow():
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there a way of asserting that a test like this was run at least once across multiple sessions? Without that, we could still potentially swallow this signal if the precondition is never met.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Coverage should catch that. We check coverage on test files as well as the core library.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

See:

"--cov=tests/unit",

versions = _versions_helpers.PyarrowVersions()
with pytest.raises(exceptions.LegacyPyarrowError):
versions.try_import(raise_if_error=True)


@pytest.mark.skipif(pyarrow is None, reason="pyarrow is not installed")
def test_installed_pyarrow_version_returns_cached():
versions = _versions_helpers.PyarrowVersions()
Expand Down Expand Up @@ -208,6 +215,16 @@ def test_try_import_raises_error_w_legacy_pandas():
versions.try_import(raise_if_error=True)


@pytest.mark.skipif(
pandas is not None,
reason="pandas is installed, but this test needs it not to be",
)
def test_try_import_raises_error_w_no_pandas():
versions = _versions_helpers.PandasVersions()
with pytest.raises(exceptions.LegacyPandasError):
versions.try_import(raise_if_error=True)


@pytest.mark.skipif(pandas is None, reason="pandas is not installed")
def test_installed_pandas_version_returns_cached():
versions = _versions_helpers.PandasVersions()
Expand Down
2 changes: 1 addition & 1 deletion tests/unit/test_legacy_types.py
Expand Up @@ -19,7 +19,7 @@

try:
import proto # type: ignore
except ImportError: # pragma: NO COVER
except ImportError:
proto = None


Expand Down
2 changes: 1 addition & 1 deletion tests/unit/test_opentelemetry_tracing.py
Expand Up @@ -19,7 +19,7 @@

try:
import opentelemetry
except ImportError: # pragma: NO COVER
except ImportError:
opentelemetry = None

if opentelemetry is not None:
Expand Down
15 changes: 4 additions & 11 deletions tests/unit/test_table_pandas.py
Expand Up @@ -16,11 +16,6 @@
import decimal
from unittest import mock

try:
import importlib.metadata as metadata
except ImportError:
import importlib_metadata as metadata

import pytest

from google.cloud import bigquery
Expand All @@ -31,11 +26,6 @@

TEST_PATH = "/v1/project/test-proj/dataset/test-dset/table/test-tbl/data"

if pandas is not None: # pragma: NO COVER
PANDAS_INSTALLED_VERSION = metadata.version("pandas")
else: # pragma: NO COVER
PANDAS_INSTALLED_VERSION = "0.0.0"


@pytest.fixture
def class_under_test():
Expand All @@ -44,7 +34,10 @@ def class_under_test():
return RowIterator


@pytest.mark.skipif(PANDAS_INSTALLED_VERSION[0:2] not in ["0.", "1."], reason="")
@pytest.mark.skipif(
pandas.__version__.startswith("2."),
reason="pandas 2.0 changes some default dtypes and we haven't update the test to account for those",
)
def test_to_dataframe_nullable_scalars(monkeypatch, class_under_test):
# See tests/system/test_arrow.py for the actual types we get from the API.
arrow_schema = pyarrow.schema(
Expand Down