From 28601fd298d074826387c3340443a77cad7729cb Mon Sep 17 00:00:00 2001 From: Julia Signell Date: Fri, 16 Jan 2026 17:10:51 -0500 Subject: [PATCH 1/7] Better handling of `StringDType` --- xarray/backends/zarr.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index 410a6a49a7b..93e4b6daf93 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -206,6 +206,10 @@ def __init__(self, zarr_array): not _zarr_v3() and self._array.filters is not None and any(filt.codec_id == "vlen-utf8" for filt in self._array.filters) + ) or ( + _zarr_v3() + and self._array.serializer + and self._array.serializer.to_dict()["name"] == "vlen-utf8" ): dtype = coding.strings.create_vlen_dtype(str) else: From e61951e2ac9555f12d5871d70899e36a378f8390 Mon Sep 17 00:00:00 2001 From: Julia Signell Date: Wed, 21 Jan 2026 17:40:24 -0500 Subject: [PATCH 2/7] Allow for np.dtypes.StringDType() in `isnull` --- xarray/core/duck_array_ops.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/xarray/core/duck_array_ops.py b/xarray/core/duck_array_ops.py index 6739f2118ee..2e0432ceec2 100644 --- a/xarray/core/duck_array_ops.py +++ b/xarray/core/duck_array_ops.py @@ -26,6 +26,7 @@ from xarray.compat import dask_array_compat, dask_array_ops from xarray.compat.array_api_compat import get_array_namespace +from xarray.compat.npcompat import HAS_STRING_DTYPE from xarray.core import dtypes, nputils from xarray.core.extension_array import ( PandasExtensionArray, @@ -175,9 +176,10 @@ def isnull(data): # note: must check timedelta64 before integers, because currently # timedelta64 inherits from np.integer return isnat(data) + elif HAS_STRING_DTYPE and isinstance(scalar_type, np.dtypes.StringDType): + return xp.isnan(data) elif dtypes.isdtype(scalar_type, ("real floating", "complex floating"), xp=xp): # float types use NaN for null - xp = get_array_namespace(data) return xp.isnan(data) elif dtypes.isdtype(scalar_type, ("bool", "integral"), xp=xp) or ( isinstance(scalar_type, np.dtype) From 88481299ada7bfba2e1c81cf628fc0707400e7db Mon Sep 17 00:00:00 2001 From: Julia Signell Date: Thu, 22 Jan 2026 09:18:00 -0500 Subject: [PATCH 3/7] Fix failing tests --- xarray/tests/test_backends.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index ed86979c73e..3d694d7791a 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -58,6 +58,7 @@ from xarray.coding.cftime_offsets import date_range from xarray.coding.strings import check_vlen_dtype, create_vlen_dtype from xarray.coding.variables import SerializationWarning +from xarray.compat.npcompat import HAS_STRING_DTYPE from xarray.conventions import encode_dataset_coordinates from xarray.core import indexing from xarray.core.common import _contains_cftime_datetimes @@ -1088,8 +1089,9 @@ def test_roundtrip_empty_vlen_string_array(self) -> None: # eg. NETCDF3 based backends do not roundtrip metadata if actual["a"].dtype.metadata is not None: assert check_vlen_dtype(actual["a"].dtype) is str + elif HAS_STRING_DTYPE: + assert np.issubdtype(actual["a"].dtype, np.dtypes.StringDType()) else: - # zarr v3 sends back " Date: Thu, 22 Jan 2026 09:23:33 -0500 Subject: [PATCH 4/7] Update what's new --- doc/whats-new.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 4b50f51cf61..1d51f483eed 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -38,6 +38,8 @@ New Features By `Matthew Willson `_. - Better ordering of coordinates when displaying Xarray objects. (:pull:`11098`). By `Ian Hunt-Isaak `_, `Julia Signell `_. +- Use ``np.dtypes.StringDType`` when reading Zarr string variables (:pull:`11097`). + By `Julia Signell `_. Breaking Changes ~~~~~~~~~~~~~~~~ From 5248023cb0bcf10cb94eadb211c357b8fde8df60 Mon Sep 17 00:00:00 2001 From: Julia Signell Date: Tue, 27 Jan 2026 15:55:49 -0500 Subject: [PATCH 5/7] Add test for `na_object` --- xarray/tests/test_duck_array_ops.py | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/xarray/tests/test_duck_array_ops.py b/xarray/tests/test_duck_array_ops.py index 645a975a15a..bbd46f9ac33 100644 --- a/xarray/tests/test_duck_array_ops.py +++ b/xarray/tests/test_duck_array_ops.py @@ -13,6 +13,7 @@ from xarray import DataArray, Dataset, concat, date_range from xarray.coding.times import _NS_PER_TIME_DELTA +from xarray.compat.npcompat import HAS_STRING_DTYPE from xarray.core import dtypes, duck_array_ops from xarray.core.duck_array_ops import ( array_notnull_equiv, @@ -763,6 +764,33 @@ def test_isnull_with_dask(): assert_equal(da.isnull().load(), da.load().isnull()) +@pytest.mark.skipif(not HAS_STRING_DTYPE, reason="requires StringDType to exist") +@pytest.mark.parametrize( + ["array", "expected"], + [ + ( + np.array(["a", None, "c"], dtype=np.dtypes.StringDType(na_object=None)), + np.array([False, True, False]), + ), + ( + np.array(["a", "", "c"], dtype=np.dtypes.StringDType(na_object="")), + np.array([False, True, False]), + ), + ( + np.array(["a", np.nan, "c"], dtype=np.dtypes.StringDType(na_object=np.nan)), + np.array([False, True, False]), + ), + ( + np.array(["a", np.nan, "c"], dtype=np.dtypes.StringDType()), + np.array([False, False, False]), + ), + ], +) +def test_isnull_with_StringDType(array, expected): + actual = duck_array_ops.isnull(array) + np.testing.assert_equal(actual, expected) + + @pytest.mark.skipif(not has_dask, reason="This is for dask.") @pytest.mark.parametrize("axis", [0, -1, 1]) @pytest.mark.parametrize("edge_order", [1, 2]) From 6e812c4e82900c1378edf2cced772ef40f166726 Mon Sep 17 00:00:00 2001 From: Julia Signell Date: Tue, 27 Jan 2026 15:58:07 -0500 Subject: [PATCH 6/7] Handle `na_object` --- xarray/core/duck_array_ops.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/xarray/core/duck_array_ops.py b/xarray/core/duck_array_ops.py index 2e0432ceec2..2377e6b01d0 100644 --- a/xarray/core/duck_array_ops.py +++ b/xarray/core/duck_array_ops.py @@ -177,7 +177,12 @@ def isnull(data): # timedelta64 inherits from np.integer return isnat(data) elif HAS_STRING_DTYPE and isinstance(scalar_type, np.dtypes.StringDType): - return xp.isnan(data) + # na is settable, but it defaults to an empty string + na_object = getattr(scalar_type, "na_object", "") + if isna(na_object): + return xp.isnan(data) + else: + return data == na_object elif dtypes.isdtype(scalar_type, ("real floating", "complex floating"), xp=xp): # float types use NaN for null return xp.isnan(data) From eb94ed498a525254cca4ca4a130fdc1070476593 Mon Sep 17 00:00:00 2001 From: Julia Signell Date: Wed, 28 Jan 2026 09:35:51 -0500 Subject: [PATCH 7/7] Fix skipif with parametrize --- xarray/tests/test_duck_array_ops.py | 28 +++++++++++++++++++--------- 1 file changed, 19 insertions(+), 9 deletions(-) diff --git a/xarray/tests/test_duck_array_ops.py b/xarray/tests/test_duck_array_ops.py index bbd46f9ac33..a9a2d377449 100644 --- a/xarray/tests/test_duck_array_ops.py +++ b/xarray/tests/test_duck_array_ops.py @@ -766,27 +766,37 @@ def test_isnull_with_dask(): @pytest.mark.skipif(not HAS_STRING_DTYPE, reason="requires StringDType to exist") @pytest.mark.parametrize( - ["array", "expected"], + ["input", "na_object", "expected"], [ ( - np.array(["a", None, "c"], dtype=np.dtypes.StringDType(na_object=None)), + ["a", None, "c"], + None, np.array([False, True, False]), ), ( - np.array(["a", "", "c"], dtype=np.dtypes.StringDType(na_object="")), + ["a", "", "c"], + "", np.array([False, True, False]), ), ( - np.array(["a", np.nan, "c"], dtype=np.dtypes.StringDType(na_object=np.nan)), + ["a", np.nan, "c"], + np.nan, np.array([False, True, False]), ), - ( - np.array(["a", np.nan, "c"], dtype=np.dtypes.StringDType()), - np.array([False, False, False]), - ), ], ) -def test_isnull_with_StringDType(array, expected): +def test_isnull_with_different_StringDType_na_objects(input, na_object, expected): + dtype = np.dtypes.StringDType(na_object=na_object) + array = np.array(input, dtype=dtype) + actual = duck_array_ops.isnull(array) + np.testing.assert_equal(actual, expected) + + +@pytest.mark.skipif(not HAS_STRING_DTYPE, reason="requires StringDType to exist") +def test_isnull_with_default_StringDType(): + dtype = np.dtypes.StringDType() + array = np.array(["a", np.nan, "c"], dtype=dtype) + expected = np.array([False, False, False]) actual = duck_array_ops.isnull(array) np.testing.assert_equal(actual, expected)