From be6a39267b5e15769b37bf37beb9b0b53b612699 Mon Sep 17 00:00:00 2001 From: Ian Hunt-Isaak Date: Wed, 17 Dec 2025 10:36:30 -0500 Subject: [PATCH 01/11] init impl --- xarray/core/indexing.py | 8 ++++++-- xarray/tests/test_dataset.py | 24 ++++++++++++++++++++++++ xarray/tests/test_indexing.py | 21 +++++++++++++++++++-- 3 files changed, 49 insertions(+), 4 deletions(-) diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py index eead0a1b8af..8ee136c39bc 100644 --- a/xarray/core/indexing.py +++ b/xarray/core/indexing.py @@ -138,18 +138,22 @@ def group_indexers_by_index( options: Mapping[str, Any], ) -> list[tuple[Index, dict[Any, Any]]]: """Returns a list of unique indexes and their corresponding indexers.""" + from xarray.core.indexes import PandasIndex + unique_indexes = {} grouped_indexers: Mapping[int | None, dict] = defaultdict(dict) for key, label in indexers.items(): index: Index = obj.xindexes.get(key, None) + if (key in obj.coords) and (index is None): + index = PandasIndex.from_variables( + {key: obj.coords[key].variable}, options={} + ) if index is not None: index_id = id(index) unique_indexes[index_id] = index grouped_indexers[index_id][key] = label - elif key in obj.coords: - raise KeyError(f"no index found for coordinate {key!r}") elif key not in obj.dims: raise KeyError( f"{key!r} is not a valid dimension or coordinate for " diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index 17d3e25b642..c8111fe002d 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -3167,6 +3167,30 @@ def test_drop_indexes(self) -> None: with pytest.raises(ValueError, match=r".*would corrupt the following index.*"): ds.drop_indexes("a") + def test_sel_on_unindexed_coordinate(self) -> None: + # Test that .sel() works on coordinates without an index by creating + # a PandasIndex on the fly + ds = Dataset( + {"data": (["x", "y"], np.arange(6).reshape(2, 3))}, + coords={"x": [0, 1], "y": [10, 20, 30]}, + ) + # Drop the index on y to create an unindexed coordinate + ds = ds.drop_indexes("y") + assert "y" not in ds.xindexes + assert "y" in ds.coords + + # .sel() should still work by creating a PandasIndex on the fly + result = ds.sel(y=20) + expected = ds.isel(y=1) + assert_identical(result, expected) + + # Also test with slice - compare data values directly since the result + # has no index on y (which triggers internal invariant checks) + result_slice = ds.sel(y=slice(10, 20)) + expected_slice = ds.isel(y=slice(0, 2)) + assert_array_equal(result_slice["data"].values, expected_slice["data"].values) + assert_array_equal(result_slice["y"].values, expected_slice["y"].values) + def test_drop_dims(self) -> None: data = xr.Dataset( { diff --git a/xarray/tests/test_indexing.py b/xarray/tests/test_indexing.py index 6b564c6f032..dfb3283a16e 100644 --- a/xarray/tests/test_indexing.py +++ b/xarray/tests/test_indexing.py @@ -109,8 +109,6 @@ def test_group_indexers_by_index(self) -> None: assert indexers == {"y": 0} assert len(grouped_indexers) == 3 - with pytest.raises(KeyError, match=r"no index found for coordinate 'y2'"): - indexing.group_indexers_by_index(data, {"y2": 2.0}, {}) with pytest.raises( KeyError, match=r"'w' is not a valid dimension or coordinate" ): @@ -118,6 +116,25 @@ def test_group_indexers_by_index(self) -> None: with pytest.raises(ValueError, match=r"cannot supply.*"): indexing.group_indexers_by_index(data, {"z": 1}, {"method": "nearest"}) + def test_group_indexers_by_index_creates_index_for_unindexed_coord(self) -> None: + # Test that selecting on a coordinate without an index creates a PandasIndex on the fly + data = DataArray( + np.zeros((2, 3)), coords={"x": [0, 1], "y": [10, 20, 30]}, dims=("x", "y") + ) + data.coords["y2"] = ("y", [2.0, 3.0, 4.0]) + + # y2 is a coordinate but has no index + assert "y2" in data.coords + assert "y2" not in data.xindexes + + # group_indexers_by_index should create a PandasIndex on the fly + grouped_indexers = indexing.group_indexers_by_index(data, {"y2": 3.0}, {}) + + assert len(grouped_indexers) == 1 + idx, indexers = grouped_indexers[0] + assert isinstance(idx, PandasIndex) + assert indexers == {"y2": 3.0} + def test_map_index_queries(self) -> None: def create_sel_results( x_indexer, From 69c008362b26decae5b883d7c0bea2fcecaf45fa Mon Sep 17 00:00:00 2001 From: Ian Hunt-Isaak Date: Wed, 17 Dec 2025 13:39:21 -0500 Subject: [PATCH 02/11] one more test --- xarray/tests/test_dataarray.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index df9d29843ff..f7d5ca27d7e 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -1190,11 +1190,14 @@ def test_sel_float_multiindex(self) -> None: assert_equal(actual, expected) def test_sel_no_index(self) -> None: - array = DataArray(np.arange(10), dims="x") + array = DataArray(np.arange(10), dims="x").assign_coords( + {"x_meta": ("x", np.linspace(0.1, 1, 10))} + ) assert_identical(array[0], array.sel(x=0)) assert_identical(array[:5], array.sel(x=slice(5))) assert_identical(array[[0, -1]], array.sel(x=[0, -1])) assert_identical(array[array < 5], array.sel(x=(array < 5))) + assert_identical(array[1], array.sel(x_meta=0.2)) def test_sel_method(self) -> None: data = DataArray(np.random.randn(3, 4), [("x", [0, 1, 2]), ("y", list("abcd"))]) From 5ed9b56efe51df91abe790e4a87e277463ea723a Mon Sep 17 00:00:00 2001 From: Ian Hunt-Isaak Date: Wed, 17 Dec 2025 13:42:40 -0500 Subject: [PATCH 03/11] whats new entry --- doc/whats-new.rst | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 1c463e885fc..95b71019fb3 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -18,6 +18,12 @@ New Features automatically replace any existing index being set instead of erroring or needing needing to call :py:meth:`drop_indexes` first (:pull:`11008`). By `Ian Hunt-Isaak `_. +- Calling :py:meth:`Dataset.sel` or :py:meth:`DataArray.sel` on a coordinate + without an index will now automatically create a temporary + :py:class:`~xarray.indexes.PandasIndex` to perform the selection + (:issue:`9703`, :pull:`XXXX`). + By `Ian Hunt-Isaak `_. + Breaking Changes ~~~~~~~~~~~~~~~~ From b8f77c491e623219270af262fcbd3fe46930d260 Mon Sep 17 00:00:00 2001 From: Ian Hunt-Isaak Date: Wed, 17 Dec 2025 13:43:25 -0500 Subject: [PATCH 04/11] improve tests --- xarray/tests/test_dataset.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index c8111fe002d..bd817cd5603 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -3172,11 +3172,13 @@ def test_sel_on_unindexed_coordinate(self) -> None: # a PandasIndex on the fly ds = Dataset( {"data": (["x", "y"], np.arange(6).reshape(2, 3))}, - coords={"x": [0, 1], "y": [10, 20, 30]}, + coords={"x": [0, 1], "y": [10, 20, 30], "y_meta": ["a", "b", "c"]}, ) - # Drop the index on y to create an unindexed coordinate + # Drop the index on y to create an unindexed dim coord + # also check that coord y_meta works despite not being on a data var ds = ds.drop_indexes("y") assert "y" not in ds.xindexes + assert "y_meta" not in ds.xindexes assert "y" in ds.coords # .sel() should still work by creating a PandasIndex on the fly @@ -3184,6 +3186,10 @@ def test_sel_on_unindexed_coordinate(self) -> None: expected = ds.isel(y=1) assert_identical(result, expected) + result = ds.sel(y_meta="b") + expected = ds.isel(y=1) + assert_identical(result, expected) + # Also test with slice - compare data values directly since the result # has no index on y (which triggers internal invariant checks) result_slice = ds.sel(y=slice(10, 20)) From 104d34cd5eef51f4cb11d37df5a078a1b1084450 Mon Sep 17 00:00:00 2001 From: Ian Hunt-Isaak Date: Wed, 17 Dec 2025 13:44:45 -0500 Subject: [PATCH 05/11] add todo note --- xarray/core/indexing.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py index 8ee136c39bc..8154c47b410 100644 --- a/xarray/core/indexing.py +++ b/xarray/core/indexing.py @@ -146,6 +146,9 @@ def group_indexers_by_index( for key, label in indexers.items(): index: Index = obj.xindexes.get(key, None) if (key in obj.coords) and (index is None): + # TODO: should we raise a more informative error + # here if the index creation fails? I.e. if a 2D coord + # or something else that PandasIndex cannot support. index = PandasIndex.from_variables( {key: obj.coords[key].variable}, options={} ) From 58951106a96c3bc0120fbc3872b7b797719c7074 Mon Sep 17 00:00:00 2001 From: Ian Hunt-Isaak Date: Wed, 17 Dec 2025 13:45:46 -0500 Subject: [PATCH 06/11] add note about import location --- xarray/core/indexing.py | 1 + 1 file changed, 1 insertion(+) diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py index 8154c47b410..facd9eb42ae 100644 --- a/xarray/core/indexing.py +++ b/xarray/core/indexing.py @@ -138,6 +138,7 @@ def group_indexers_by_index( options: Mapping[str, Any], ) -> list[tuple[Index, dict[Any, Any]]]: """Returns a list of unique indexes and their corresponding indexers.""" + # import here instead of at top to guard against circular imports from xarray.core.indexes import PandasIndex unique_indexes = {} From cfb9fa2941a1aa7625cf7cd539f3fb2192c30181 Mon Sep 17 00:00:00 2001 From: Ian Hunt-Isaak Date: Wed, 17 Dec 2025 13:51:50 -0500 Subject: [PATCH 07/11] pr number --- doc/whats-new.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 95b71019fb3..e1a846a90e5 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -21,7 +21,7 @@ New Features - Calling :py:meth:`Dataset.sel` or :py:meth:`DataArray.sel` on a coordinate without an index will now automatically create a temporary :py:class:`~xarray.indexes.PandasIndex` to perform the selection - (:issue:`9703`, :pull:`XXXX`). + (:issue:`9703`, :pull:`11029`). By `Ian Hunt-Isaak `_. From c3cd6ad3ff578cfa4df7ef472e78f9b4595aba0a Mon Sep 17 00:00:00 2001 From: Ian Hunt-Isaak Date: Wed, 17 Dec 2025 17:10:54 -0500 Subject: [PATCH 08/11] fix test --- xarray/tests/test_dataset.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index bd817cd5603..ba8599bb581 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -3172,10 +3172,10 @@ def test_sel_on_unindexed_coordinate(self) -> None: # a PandasIndex on the fly ds = Dataset( {"data": (["x", "y"], np.arange(6).reshape(2, 3))}, - coords={"x": [0, 1], "y": [10, 20, 30], "y_meta": ["a", "b", "c"]}, + coords={"x": [0, 1], "y": [10, 20, 30], "y_meta": ("y", ["a", "b", "c"])}, ) # Drop the index on y to create an unindexed dim coord - # also check that coord y_meta works despite not being on a data var + # also check that coord y_meta works despite not being a dim coord ds = ds.drop_indexes("y") assert "y" not in ds.xindexes assert "y_meta" not in ds.xindexes @@ -3184,11 +3184,11 @@ def test_sel_on_unindexed_coordinate(self) -> None: # .sel() should still work by creating a PandasIndex on the fly result = ds.sel(y=20) expected = ds.isel(y=1) - assert_identical(result, expected) + assert_identical(result, expected, check_default_indexes=False) result = ds.sel(y_meta="b") expected = ds.isel(y=1) - assert_identical(result, expected) + assert_identical(result, expected, check_default_indexes=False) # Also test with slice - compare data values directly since the result # has no index on y (which triggers internal invariant checks) From 515e293a79b87cb1b8f26951880adb528c2233d3 Mon Sep 17 00:00:00 2001 From: Ian Hunt-Isaak Date: Wed, 17 Dec 2025 17:15:39 -0500 Subject: [PATCH 09/11] address review comments --- xarray/core/indexing.py | 12 ++++++++---- xarray/tests/test_dataset.py | 5 +++++ 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py index facd9eb42ae..c34efe325c7 100644 --- a/xarray/core/indexing.py +++ b/xarray/core/indexing.py @@ -146,10 +146,14 @@ def group_indexers_by_index( for key, label in indexers.items(): index: Index = obj.xindexes.get(key, None) - if (key in obj.coords) and (index is None): - # TODO: should we raise a more informative error - # here if the index creation fails? I.e. if a 2D coord - # or something else that PandasIndex cannot support. + if index is None and key in obj.coords: + coord = obj.coords[key] + if coord.ndim != 1: + raise ValueError( + "Could not automatically create PandasIndex for " + f"coord {key!r} with {coord.ndim} dimensions. Please explicitly " + "set the index using `set_xindex`." + ) index = PandasIndex.from_variables( {key: obj.coords[key].variable}, options={} ) diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index ba8599bb581..3d5855c111d 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -3190,6 +3190,11 @@ def test_sel_on_unindexed_coordinate(self) -> None: expected = ds.isel(y=1) assert_identical(result, expected, check_default_indexes=False) + # check that our auto-created indexes are ephemeral + assert "y" not in ds.xindexes + assert "y_meta" not in ds.xindexes + assert "y" in ds.coords + # Also test with slice - compare data values directly since the result # has no index on y (which triggers internal invariant checks) result_slice = ds.sel(y=slice(10, 20)) From fca0418c9c55876bff19d2679a4cf7556e235ec5 Mon Sep 17 00:00:00 2001 From: Ian Hunt-Isaak Date: Wed, 17 Dec 2025 17:17:31 -0500 Subject: [PATCH 10/11] more review comments --- xarray/tests/test_dataset.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index 3d5855c111d..46c36875954 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -3195,12 +3195,14 @@ def test_sel_on_unindexed_coordinate(self) -> None: assert "y_meta" not in ds.xindexes assert "y" in ds.coords - # Also test with slice - compare data values directly since the result - # has no index on y (which triggers internal invariant checks) result_slice = ds.sel(y=slice(10, 20)) expected_slice = ds.isel(y=slice(0, 2)) - assert_array_equal(result_slice["data"].values, expected_slice["data"].values) - assert_array_equal(result_slice["y"].values, expected_slice["y"].values) + assert_identical( + result_slice["data"], expected_slice["data"], check_default_indexes=False + ) + assert_identical( + result_slice["y"], expected_slice["y"], check_default_indexes=False + ) def test_drop_dims(self) -> None: data = xr.Dataset( From 57b86a53c217ae175d5219532b8433c1d5fe32d4 Mon Sep 17 00:00:00 2001 From: Ian Hunt-Isaak Date: Wed, 17 Dec 2025 17:18:10 -0500 Subject: [PATCH 11/11] review comment --- doc/whats-new.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index e1a846a90e5..28c47d61e11 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -18,7 +18,7 @@ New Features automatically replace any existing index being set instead of erroring or needing needing to call :py:meth:`drop_indexes` first (:pull:`11008`). By `Ian Hunt-Isaak `_. -- Calling :py:meth:`Dataset.sel` or :py:meth:`DataArray.sel` on a coordinate +- Calling :py:meth:`Dataset.sel` or :py:meth:`DataArray.sel` on a 1-dimensional coordinate without an index will now automatically create a temporary :py:class:`~xarray.indexes.PandasIndex` to perform the selection (:issue:`9703`, :pull:`11029`).