Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,10 @@ Bug Fixes
- Fix a major performance regression in :py:meth:`Coordinates.to_index` (and
consequently :py:meth:`Dataset.to_dataframe`) caused by converting the cached
code ndarrays into Python lists (:issue:`11305`).
- Fix :py:func:`concat` failing with ``TypeError: Cannot interpret
'<StringDtype...>' as a data type`` when a ``pandas.Index`` with a
``StringDtype`` is used as the new concat dimension and another input has a
numpy string-dtype coord (:issue:`11317`).


Documentation
Expand Down
7 changes: 7 additions & 0 deletions xarray/structure/concat.py
Original file line number Diff line number Diff line change
Expand Up @@ -363,6 +363,13 @@ def _calc_concat_dim_index(
else:
(dim,) = dim_or_data.dims
coord_dtype = getattr(dim_or_data, "dtype", None)
# pandas may produce a StringDtype-backed Index, which xarray does
# not treat as an allowed extension array dtype for coords. Let
# PandasIndex compute a valid numpy coord_dtype in that case so a
# subsequent concat against a numpy-string-dtype coord does not fail
# in ``np.result_type``. See GH#11317.
if isinstance(coord_dtype, pd.StringDtype):
coord_dtype = None
index = PandasIndex(dim_or_data, dim, coord_dtype=coord_dtype)

return dim, index
Expand Down
13 changes: 13 additions & 0 deletions xarray/tests/test_concat.py
Original file line number Diff line number Diff line change
Expand Up @@ -1491,6 +1491,19 @@ def test_concat_index_not_same_dim() -> None:
concat([ds1, ds2], dim="x")


def test_concat_pandas_index_string_dtype() -> None:
# Regression test for GH#11317. When pandas yields a StringDtype-backed
# Index (e.g. with ``future.infer_string`` enabled), passing it as the
# new concat dim must not break a subsequent concat against a coord with
# a numpy string dtype.
with pd.option_context("future.infer_string", True):
da = DataArray([0], dims=["dim_a"], coords={"dim_a": ["a"]})
db = DataArray([0], dims=["dim_b"], coords={"dim_b": ["b"]})
db2 = concat([db], pd.Index(["b"], name="dim_a"))
result = concat([da, db2], dim="dim_a")
assert list(result["dim_a"].values) == ["a", "b"]


class TestNewDefaults:
def test_concat_second_empty_with_scalar_data_var_only_on_first(self) -> None:
ds1 = Dataset(data_vars={"a": ("y", [0.1]), "b": 0.1}, coords={"x": 0.1})
Expand Down
Loading