Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,9 @@ Bug Fixes
By `Emmanuel Ferdman <https://github.com/emmanuel-ferdman>`_.
- :func:`combine_by_coords` no longer returns an empty dataset when a generator is passed as ``data_objects`` (:issue:`10114`, :pull:`11265`).
By `Amartya Anand <https://github.com/SurfyPenguin>`_.
- Restore support for ``-1`` chunk sizes in Zarr encoding, mapping them to the
full length of each written dimension (:issue:`11288`).
By `Sarthak <https://github.com/Sarthak160>`_.
- Fix h5netcdf backend module detection and ros3 tests (:issue:`11243`, :pull:`11274`).
By `Kai Mühlbauer <https://github.com/kmuehlbauer>`_.

Expand Down
11 changes: 10 additions & 1 deletion xarray/backends/zarr.py
Original file line number Diff line number Diff line change
Expand Up @@ -333,7 +333,7 @@ async def async_getitem(self, key):
)


def _determine_zarr_chunks(enc_chunks, var_chunks, ndim, name):
def _determine_zarr_chunks(enc_chunks, var_chunks, ndim, name, shape):
"""
Given encoding chunks (possibly None or []) and variable chunks
(possibly None or []).
Expand Down Expand Up @@ -389,6 +389,7 @@ def _determine_zarr_chunks(enc_chunks, var_chunks, ndim, name):
var_chunks,
ndim,
name,
shape,
)

for x in enc_chunks_tuple:
Expand All @@ -400,6 +401,13 @@ def _determine_zarr_chunks(enc_chunks, var_chunks, ndim, name):
f"for variable named {name!r}."
)

# Preserve xarray's documented convention that -1 means the full length
# of a dimension when encoding chunk sizes for zarr.
enc_chunks_tuple = tuple(
dim_size if chunk == -1 else chunk
for chunk, dim_size in zip(enc_chunks_tuple, shape, strict=True)
)

# if there are chunks in encoding and the variable data is a numpy array,
# we use the specified chunks
if not var_chunks:
Expand Down Expand Up @@ -532,6 +540,7 @@ def extract_zarr_variable_encoding(
var_chunks=variable.chunks,
ndim=variable.ndim,
name=name,
shape=variable.shape,
)
if _zarr_v3() and chunks is None:
chunks = "auto"
Expand Down
8 changes: 8 additions & 0 deletions xarray/tests/test_backends.py
Original file line number Diff line number Diff line change
Expand Up @@ -2902,6 +2902,14 @@ def test_chunk_encoding(self) -> None:
with self.roundtrip(data) as actual:
pass

def test_chunk_encoding_full_dimension_sentinel(self) -> None:
data = create_test_data()
data["var2"].encoding.update({"chunks": (5, -1)})

with self.roundtrip(data) as actual:
assert actual["var2"].encoding["chunks"] == (5, data["var2"].shape[1])
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you add a bit to this test that asserts that the encoding dict on the original data object is still ({"chunks": (5, -1)}?

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added this assertion in dad7145 so the test now verifies the original data object keeps encoding["chunks"] as (5, -1).

assert data["var2"].encoding["chunks"] == (5, -1)

def test_shard_encoding(self) -> None:
# These datasets have no dask chunks. All chunking/sharding specified in
# encoding
Expand Down
Loading