Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 36 additions & 0 deletions xarray/backends/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -849,3 +849,39 @@ def open_groups_as_dict(

# mapping of engine name to (module name, BackendEntrypoint Class)
BACKEND_ENTRYPOINTS: dict[str, tuple[str | None, type[BackendEntrypoint]]] = {}


def _is_likely_dap_url(url: str) -> bool:
"""
Determines if a URL is likely an OPeNDAP (DAP) endpoint based on
known protocols, server software path patterns, and file extensions.

Args:
url: The URL to inspect.

Returns:
True if the URL matches common DAP patterns, False otherwise.
"""
if not url:
return False

url_lower = url.lower()

# Check for explicit DAP protocol schemes - these definitively indicate a DAP service
if url_lower.startswith(("dap2://", "dap4://", "dap://")):
return True

# For remote URIs, check for DAP server software path patterns
if is_remote_uri(url_lower):
dap_path_patterns = (
"/dodsc/", # THREDDS Data Server (TDS) DAP endpoint (case-insensitive)
"/dods/", # GrADS Data Server (GDS) DAP endpoint
"/opendap/", # Generic OPeNDAP/Hyrax server
"/erddap/", # ERDDAP data server
"/dap2/", # Explicit DAP2 version in path
"/dap4/", # Explicit DAP4 version in path
"/dap/",
)
return any(pattern in url_lower for pattern in dap_path_patterns)

return False
44 changes: 40 additions & 4 deletions xarray/backends/netCDF4_.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@
from xarray.core.utils import (
FrozenDict,
close_on_error,
emit_user_level_warning,
is_remote_uri,
strip_uri_params,
try_read_magic_number_from_path,
Expand Down Expand Up @@ -463,6 +464,15 @@ def open(
if isinstance(filename, os.PathLike):
filename = os.fspath(filename)

# Replace DAP protocol prefixes with https:// - netCDF4 library can't handle them
# These prefixes may be added by users to explicitly indicate DAP protocol
# Following pydap's convention, we convert to https://
# See: https://github.com/pydap/pydap/blob/0a2b0892611abaf0a9762ffd4f2f082cb8e497c2/src/pydap/handlers/dap.py#L103-L107
if isinstance(filename, str):
filename_lower = filename.lower()
if filename_lower.startswith(("dap2://", "dap4://")):
filename = "https://" + filename[7:]

Comment thread
ianhi marked this conversation as resolved.
Outdated
if isinstance(filename, IOBase):
raise TypeError(
f"file objects are not supported by the netCDF4 backend: {filename}"
Expand Down Expand Up @@ -715,10 +725,36 @@ def _has_netcdf_ext(path: str | os.PathLike, is_remote: bool = False) -> bool:
_, ext = os.path.splitext(path)
return ext in {".nc", ".nc4", ".cdf"}

if isinstance(filename_or_obj, str) and is_remote_uri(filename_or_obj):
# For remote URIs, check extension (accounting for query params/fragments)
# Remote netcdf-c can handle both regular URLs and DAP URLs
return _has_netcdf_ext(filename_or_obj, is_remote=True)
# Check for explicit DAP protocol indicators:
# 1. DAP scheme: dap2:// or dap4:// (case-insensitive, may not be recognized by is_remote_uri)
# 2. Remote URI with /dap2/ or /dap4/ in URL path (case-insensitive)
# Note: We intentionally do NOT check for .dap suffix as that would match
# file extensions like .dap which trigger downloads of binary data
if isinstance(filename_or_obj, str):
url_lower = filename_or_obj.lower()
from xarray.backends.common import _is_likely_dap_url

if _is_likely_dap_url(url_lower):
return True
elif is_remote_uri(filename_or_obj):
Comment thread
ianhi marked this conversation as resolved.
Outdated
# For remote URIs, check extension (accounting for query params/fragments)
# Remote netcdf-c can handle both regular URLs and DAP URLs
if _has_netcdf_ext(filename_or_obj, is_remote=True):
return True
elif "zarr" in url_lower:
return False
Comment thread
ianhi marked this conversation as resolved.
else:
# returning true so we don't have a breaking change for people relying on this
# netcdf backend guessing true for all remote sources. But emitting a deprecation
# warning that this behavior will go away as it presents problems for other backends
emit_user_level_warning(
f"The NetCDF4 backend is guessing that {filename_or_obj!r} is a NetCDF file. "
"In the future, xarray will require remote URLs to either have a .nc, .nc4, or .cdf "
"extension, use a DAP protocol (dap2://, dap4://, or /dap/ in the path), or specify "
Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this isn't strictly true, but the full truth is, i think, too verbose for this already long warning

"the backend explicitly using the 'engine' parameter (e.g., engine='netcdf4').",
FutureWarning,
Comment thread
ianhi marked this conversation as resolved.
Outdated
)
return True

if isinstance(filename_or_obj, str | os.PathLike):
# For local paths, check magic number first, then extension
Expand Down
19 changes: 2 additions & 17 deletions xarray/backends/pydap_.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
BackendArray,
BackendEntrypoint,
T_PathFileOrDataStore,
_is_likely_dap_url,
_normalize_path,
datatree_from_dict_with_io_cleanup,
robust_getitem,
Expand All @@ -22,7 +23,6 @@
Frozen,
FrozenDict,
close_on_error,
is_remote_uri,
)
from xarray.core.variable import Variable
from xarray.namedarray.pycompat import integer_types
Expand Down Expand Up @@ -213,22 +213,7 @@ def guess_can_open(self, filename_or_obj: T_PathFileOrDataStore) -> bool:
if not isinstance(filename_or_obj, str):
return False

# Check for explicit DAP protocol indicators:
# 1. DAP scheme: dap2:// or dap4:// (case-insensitive, may not be recognized by is_remote_uri)
# 2. Remote URI with /dap2/ or /dap4/ in URL path (case-insensitive)
# Note: We intentionally do NOT check for .dap suffix as that would match
# file extensions like .dap which trigger downloads of binary data
url_lower = filename_or_obj.lower()
if url_lower.startswith(("dap2://", "dap4://")):
return True

# For standard remote URIs, check for DAP indicators in path
if is_remote_uri(filename_or_obj):
return (
"/dap2/" in url_lower or "/dap4/" in url_lower or "/dodsC/" in url_lower
)

return False
return _is_likely_dap_url(filename_or_obj)

def open_dataset(
self,
Expand Down
76 changes: 57 additions & 19 deletions xarray/tests/test_backends.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,6 @@
has_h5netcdf_1_4_0_or_above,
has_netCDF4,
has_numpy_2,
has_pydap,
has_scipy,
has_zarr,
has_zarr_v3,
Expand Down Expand Up @@ -7254,9 +7253,11 @@ def test_netcdf4_entrypoint(tmp_path: Path) -> None:
_check_guess_can_open_and_open(entrypoint, path, engine="netcdf4", expected=ds)
_check_guess_can_open_and_open(entrypoint, str(path), engine="netcdf4", expected=ds)

# Remote URLs without extensions are no longer claimed (stricter detection)
assert not entrypoint.guess_can_open("http://something/remote")
# Remote URLs with netCDF extensions are claimed
# Remote URLs without extensions emit a deprecation warning but still return True
# for backward compatibility
with pytest.warns(FutureWarning, match="NetCDF4 backend is guessing"):
assert entrypoint.guess_can_open("http://something/remote")
# Remote URLs with netCDF extensions are claimed without warning
assert entrypoint.guess_can_open("http://something/remote.nc")
assert entrypoint.guess_can_open("something-local.nc")
assert entrypoint.guess_can_open("something-local.nc4")
Expand Down Expand Up @@ -7400,15 +7401,22 @@ def test_remote_url_backend_auto_detection() -> None:
f"URL {url!r} should select {expected_backend!r} but got {engine!r}"
)

# DAP URLs without extensions - pydap wins if available, netcdf4 otherwise
# When pydap is not installed, netCDF4 should handle these DAP URLs
expected_dap_backend = "pydap" if has_pydap else "netcdf4"
# DAP URLs - netcdf4 should handle these (it comes first in backend order)
# Both netcdf4 and pydap can open DAP URLs, but netcdf4 has priority
expected_dap_backend = "netcdf4"
dap_urls = [
# Explicit DAP protocol schemes
"dap2://opendap.earthdata.nasa.gov/collections/dataset",
"dap4://opendap.earthdata.nasa.gov/collections/dataset",
"dap://example.com/dataset",
"DAP2://example.com/dataset", # uppercase scheme
"DAP4://example.com/dataset", # uppercase scheme
# DAP path indicators
"https://example.com/services/DAP2/dataset", # uppercase in path
"http://test.opendap.org/opendap/data/nc/file.nc", # /opendap/ path
"https://coastwatch.pfeg.noaa.gov/erddap/griddap/erdMH1chla8day", # ERDDAP
"http://thredds.ucar.edu/thredds/dodsC/grib/NCEP/GFS/", # THREDDS dodsC
"https://disc2.gesdisc.eosdis.nasa.gov/dods/TRMM_3B42", # GrADS /dods/
]

for url in dap_urls:
Expand All @@ -7417,20 +7425,50 @@ def test_remote_url_backend_auto_detection() -> None:
f"URL {url!r} should select {expected_dap_backend!r} but got {engine!r}"
)

# URLs that should raise ValueError (no backend can open them)
invalid_urls = [
"http://test.opendap.org/opendap/data/nc/coads_climatology.nc.dap", # .dap suffix
"https://example.com/data.dap", # .dap suffix
"http://opendap.example.com/data", # no extension, no DAP indicators
"https://test.opendap.org/dataset", # no extension, no DAP indicators
# URLs with .dap suffix are claimed by netcdf4 (backward compatibility fallback)
# Note: .dap suffix is intentionally NOT recognized as a DAP dataset URL
# These will emit a deprecation warning (tested in test_netcdf4_remote_url_deprecation_warning)
import warnings

fallback_urls = [
("http://test.opendap.org/opendap/data/nc/coads_climatology.nc.dap", "netcdf4"),
("https://example.com/data.dap", "netcdf4"),
]

for url in invalid_urls:
with pytest.raises(
ValueError,
match=r"did not find a match in any of xarray's currently installed IO backends",
):
guess_engine(url)
for url, expected_backend in fallback_urls:
with warnings.catch_warnings():
warnings.simplefilter("ignore", FutureWarning)
engine = guess_engine(url)
assert engine == expected_backend


@requires_netCDF4
def test_netcdf4_remote_url_deprecation_warning() -> None:
"""
Test that NetCDF4 backend emits a deprecation warning for ambiguous remote URLs.

Remote URLs without .nc extension or DAP indicators should trigger a FutureWarning
since in the future they won't be automatically claimed by the netCDF4 backend.
"""
from xarray.backends.netCDF4_ import NetCDF4BackendEntrypoint

entrypoint = NetCDF4BackendEntrypoint()

# Remote URLs without extension or DAP indicators should emit warning
with pytest.warns(FutureWarning, match="NetCDF4 backend is guessing"):
result = entrypoint.guess_can_open("http://example.com/data")
assert result is True # Still returns True for backward compatibility

# These should NOT emit FutureWarnings (they have clear indicators)
# Use warnings.catch_warnings to ensure no FutureWarning is emitted
import warnings

with warnings.catch_warnings():
warnings.simplefilter("error", FutureWarning)
entrypoint.guess_can_open("http://example.com/data.nc")
entrypoint.guess_can_open("dap2://example.com/data")
entrypoint.guess_can_open("http://example.com/dodsC/data")
entrypoint.guess_can_open("http://example.com/erddap/griddap/data")


@requires_netCDF4
Expand Down
Loading