Skip to content

Commit 8813563

Browse files
Restore -1 Zarr chunk sentinel handling
Translate xarray's documented -1 chunk sentinel into explicit full-dimension chunk sizes before passing chunk metadata to zarr, and cover the regression with a roundtrip test.
1 parent c68b25b commit 8813563

3 files changed

Lines changed: 20 additions & 1 deletion

File tree

doc/whats-new.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,9 @@ Bug Fixes
120120
By `Emmanuel Ferdman <https://github.com/emmanuel-ferdman>`_.
121121
- :func:`combine_by_coords` no longer returns an empty dataset when a generator is passed as ``data_objects`` (:issue:`10114`, :pull:`11265`).
122122
By `Amartya Anand <https://github.com/SurfyPenguin>`_.
123+
- Restore support for ``-1`` chunk sizes in Zarr encoding, mapping them to the
124+
full length of each written dimension (:issue:`11288`).
125+
By `Sarthak <https://github.com/Sarthak160>`_.
123126
- Fix h5netcdf backend module detection and ros3 tests (:issue:`11243`, :pull:`11274`).
124127
By `Kai Mühlbauer <https://github.com/kmuehlbauer>`_.
125128

xarray/backends/zarr.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -333,7 +333,7 @@ async def async_getitem(self, key):
333333
)
334334

335335

336-
def _determine_zarr_chunks(enc_chunks, var_chunks, ndim, name):
336+
def _determine_zarr_chunks(enc_chunks, var_chunks, ndim, name, shape):
337337
"""
338338
Given encoding chunks (possibly None or []) and variable chunks
339339
(possibly None or []).
@@ -389,6 +389,7 @@ def _determine_zarr_chunks(enc_chunks, var_chunks, ndim, name):
389389
var_chunks,
390390
ndim,
391391
name,
392+
shape,
392393
)
393394

394395
for x in enc_chunks_tuple:
@@ -400,6 +401,13 @@ def _determine_zarr_chunks(enc_chunks, var_chunks, ndim, name):
400401
f"for variable named {name!r}."
401402
)
402403

404+
# Preserve xarray's documented convention that -1 means the full length
405+
# of a dimension when encoding chunk sizes for zarr.
406+
enc_chunks_tuple = tuple(
407+
dim_size if chunk == -1 else chunk
408+
for chunk, dim_size in zip(enc_chunks_tuple, shape, strict=True)
409+
)
410+
403411
# if there are chunks in encoding and the variable data is a numpy array,
404412
# we use the specified chunks
405413
if not var_chunks:
@@ -532,6 +540,7 @@ def extract_zarr_variable_encoding(
532540
var_chunks=variable.chunks,
533541
ndim=variable.ndim,
534542
name=name,
543+
shape=variable.shape,
535544
)
536545
if _zarr_v3() and chunks is None:
537546
chunks = "auto"

xarray/tests/test_backends.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2902,6 +2902,13 @@ def test_chunk_encoding(self) -> None:
29022902
with self.roundtrip(data) as actual:
29032903
pass
29042904

2905+
def test_chunk_encoding_full_dimension_sentinel(self) -> None:
2906+
data = create_test_data()
2907+
data["var2"].encoding.update({"chunks": (5, -1)})
2908+
2909+
with self.roundtrip(data) as actual:
2910+
assert actual["var2"].encoding["chunks"] == (5, data["var2"].shape[1])
2911+
29052912
def test_shard_encoding(self) -> None:
29062913
# These datasets have no dask chunks. All chunking/sharding specified in
29072914
# encoding

0 commit comments

Comments
 (0)