Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,12 @@ Deprecations

Bug Fixes
~~~~~~~~~

- The NetCDF4 backend will now claim to be able to read any URL except for one that contains
the substring zarr. This restores backward compatibility after
:pull:`10804` broke workflows that relied on ``xr.open_dataset("http://...")``
(:pull:`10931`).
By `Ian Hunt-Isaak <https://github.com/ianhi>`_.
- Always normalize slices when indexing ``LazilyIndexedArray`` instances (:issue:`10941`, :pull:`10948`).
By `Justus Magin <https://github.com/keewis>`_.

Expand Down
34 changes: 34 additions & 0 deletions xarray/backends/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -849,3 +849,37 @@ def open_groups_as_dict(

# mapping of engine name to (module name, BackendEntrypoint Class)
BACKEND_ENTRYPOINTS: dict[str, tuple[str | None, type[BackendEntrypoint]]] = {}


def _is_likely_dap_url(url: str) -> bool:
"""
Determines if a URL is likely an OPeNDAP (DAP) endpoint based on
known protocols, server software path patterns, and file extensions.

Parameters
----------
url : str

Returns
-------
True if the URL matches common DAP patterns, False otherwise.
"""
if not url:
return False

url_lower = url.lower()

# For remote URIs, check for DAP server software path patterns
if is_remote_uri(url_lower):
dap_path_patterns = (
"/dodsc/", # THREDDS Data Server (TDS) DAP endpoint (case-insensitive)
"/dods/", # GrADS Data Server (GDS) DAP endpoint
"/opendap/", # Generic OPeNDAP/Hyrax server
"/erddap/", # ERDDAP data server
"/dap2/", # Explicit DAP2 version in path
"/dap4/", # Explicit DAP4 version in path
"/dap/",
)
return any(pattern in url_lower for pattern in dap_path_patterns)

return False
17 changes: 13 additions & 4 deletions xarray/backends/netCDF4_.py
Original file line number Diff line number Diff line change
Expand Up @@ -715,10 +715,19 @@ def _has_netcdf_ext(path: str | os.PathLike, is_remote: bool = False) -> bool:
_, ext = os.path.splitext(path)
return ext in {".nc", ".nc4", ".cdf"}

if isinstance(filename_or_obj, str) and is_remote_uri(filename_or_obj):
# For remote URIs, check extension (accounting for query params/fragments)
# Remote netcdf-c can handle both regular URLs and DAP URLs
return _has_netcdf_ext(filename_or_obj, is_remote=True)
if isinstance(filename_or_obj, str):
if is_remote_uri(filename_or_obj):
# For remote URIs, check extension (accounting for query params/fragments)
# Remote netcdf-c can handle both regular URLs and DAP URLs
if _has_netcdf_ext(filename_or_obj, is_remote=True):
return True
elif "zarr" in filename_or_obj.lower():
return False
# return true for non-zarr URLs so we don't have a breaking change for people relying on this
# netcdf backend guessing true for all remote sources.
# TODO: emit a warning here about deprecation of this behavior
# https://github.com/pydata/xarray/pull/10931
return True

if isinstance(filename_or_obj, str | os.PathLike):
# For local paths, check magic number first, then extension
Expand Down
20 changes: 2 additions & 18 deletions xarray/backends/pydap_.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
BackendArray,
BackendEntrypoint,
T_PathFileOrDataStore,
_is_likely_dap_url,
_normalize_path,
datatree_from_dict_with_io_cleanup,
robust_getitem,
Expand All @@ -22,7 +23,6 @@
Frozen,
FrozenDict,
close_on_error,
is_remote_uri,
)
from xarray.core.variable import Variable
from xarray.namedarray.pycompat import integer_types
Expand Down Expand Up @@ -252,23 +252,7 @@ class PydapBackendEntrypoint(BackendEntrypoint):
def guess_can_open(self, filename_or_obj: T_PathFileOrDataStore) -> bool:
if not isinstance(filename_or_obj, str):
return False

# Check for explicit DAP protocol indicators:
# 1. DAP scheme: dap2:// or dap4:// (case-insensitive, may not be recognized by is_remote_uri)
# 2. Remote URI with /dap2/ or /dap4/ in URL path (case-insensitive)
# Note: We intentionally do NOT check for .dap suffix as that would match
# file extensions like .dap which trigger downloads of binary data
url_lower = filename_or_obj.lower()
if url_lower.startswith(("dap2://", "dap4://")):
return True

# For standard remote URIs, check for DAP indicators in path
if is_remote_uri(filename_or_obj):
return (
"/dap2/" in url_lower or "/dap4/" in url_lower or "/dodsC/" in url_lower
)

return False
return _is_likely_dap_url(filename_or_obj)

def open_dataset(
self,
Expand Down
40 changes: 21 additions & 19 deletions xarray/tests/test_backends.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,6 @@
has_h5netcdf_1_4_0_or_above,
has_netCDF4,
has_numpy_2,
has_pydap,
has_scipy,
has_zarr,
has_zarr_v3,
Expand Down Expand Up @@ -7294,9 +7293,9 @@ def test_netcdf4_entrypoint(tmp_path: Path) -> None:
_check_guess_can_open_and_open(entrypoint, path, engine="netcdf4", expected=ds)
_check_guess_can_open_and_open(entrypoint, str(path), engine="netcdf4", expected=ds)

# Remote URLs without extensions are no longer claimed (stricter detection)
assert not entrypoint.guess_can_open("http://something/remote")
# Remote URLs with netCDF extensions are claimed
# Remote URLs without extensions return True (backward compatibility)
assert entrypoint.guess_can_open("http://something/remote")
# Remote URLs with netCDF extensions are also claimed
assert entrypoint.guess_can_open("http://something/remote.nc")
assert entrypoint.guess_can_open("something-local.nc")
assert entrypoint.guess_can_open("something-local.nc4")
Expand Down Expand Up @@ -7440,15 +7439,22 @@ def test_remote_url_backend_auto_detection() -> None:
f"URL {url!r} should select {expected_backend!r} but got {engine!r}"
)

# DAP URLs without extensions - pydap wins if available, netcdf4 otherwise
# When pydap is not installed, netCDF4 should handle these DAP URLs
expected_dap_backend = "pydap" if has_pydap else "netcdf4"
# DAP URLs - netcdf4 should handle these (it comes first in backend order)
# Both netcdf4 and pydap can open DAP URLs, but netcdf4 has priority
expected_dap_backend = "netcdf4"
dap_urls = [
# Explicit DAP protocol schemes
"dap2://opendap.earthdata.nasa.gov/collections/dataset",
"dap4://opendap.earthdata.nasa.gov/collections/dataset",
"dap://example.com/dataset",
"DAP2://example.com/dataset", # uppercase scheme
"DAP4://example.com/dataset", # uppercase scheme
# DAP path indicators
"https://example.com/services/DAP2/dataset", # uppercase in path
"http://test.opendap.org/opendap/data/nc/file.nc", # /opendap/ path
"https://coastwatch.pfeg.noaa.gov/erddap/griddap/erdMH1chla8day", # ERDDAP
"http://thredds.ucar.edu/thredds/dodsC/grib/NCEP/GFS/", # THREDDS dodsC
"https://disc2.gesdisc.eosdis.nasa.gov/dods/TRMM_3B42", # GrADS /dods/
]

for url in dap_urls:
Expand All @@ -7457,20 +7463,16 @@ def test_remote_url_backend_auto_detection() -> None:
f"URL {url!r} should select {expected_dap_backend!r} but got {engine!r}"
)

# URLs that should raise ValueError (no backend can open them)
invalid_urls = [
"http://test.opendap.org/opendap/data/nc/coads_climatology.nc.dap", # .dap suffix
"https://example.com/data.dap", # .dap suffix
"http://opendap.example.com/data", # no extension, no DAP indicators
"https://test.opendap.org/dataset", # no extension, no DAP indicators
# URLs with .dap suffix are claimed by netcdf4 (backward compatibility fallback)
# Note: .dap suffix is intentionally NOT recognized as a DAP dataset URL
fallback_urls = [
("http://test.opendap.org/opendap/data/nc/coads_climatology.nc.dap", "netcdf4"),
("https://example.com/data.dap", "netcdf4"),
]

for url in invalid_urls:
with pytest.raises(
ValueError,
match=r"did not find a match in any of xarray's currently installed IO backends",
):
guess_engine(url)
for url, expected_backend in fallback_urls:
engine = guess_engine(url)
assert engine == expected_backend


@requires_netCDF4
Expand Down
Loading