Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ The following environmental variables can also be set and picked up for authenti

The filesystem can be instantiated for different use cases based on a variety of `storage_options` combinations. The following list describes some common use cases utilizing `AzureBlobFileSystem`, i.e. protocols `abfs`or `az`. Note that all cases require the `account_name` argument to be provided:
1. Anonymous connection to public container: `storage_options={'account_name': ACCOUNT_NAME, 'anon': True}` will assume the `ACCOUNT_NAME` points to a public container, and attempt to use an anonymous login. Note, the default value for `anon` is True.
2. Auto credential solving using Azure's DefaultAzureCredential() library: `storage_options={'account_name': ACCOUNT_NAME, 'anon': False}` will use [`DefaultAzureCredential`](https://learn.microsoft.com/en-us/python/api/azure-identity/azure.identity.defaultazurecredential?view=azure-python) to get valid credentials to the container `ACCOUNT_NAME`. `DefaultAzureCredential` attempts to authenticate via the [mechanisms and order visualized here](https://learn.microsoft.com/en-us/python/api/overview/azure/identity-readme?view=azure-python#defaultazurecredential).
2. Auto credential solving using Azure's DefaultAzureCredential() library: `storage_options={'account_name': ACCOUNT_NAME, 'anon': False}` will use [`DefaultAzureCredential`](https://learn.microsoft.com/en-us/python/api/azure-identity/azure.identity.aio.defaultazurecredential?view=azure-python) to get valid credentials to the container `ACCOUNT_NAME`. `DefaultAzureCredential` attempts to authenticate via the [mechanisms and order visualized here](https://learn.microsoft.com/en-us/python/api/overview/azure/identity-readme?view=azure-python#defaultazurecredential).
3. Auto credential solving without requiring `storage_options`: Set `AZURE_STORAGE_ANON` to `false`, resulting in automatic credential resolution. Useful for compatibility with fsspec.
4. Azure ServicePrincipal: `tenant_id`, `client_id`, and `client_secret` are all used as credentials for an Azure ServicePrincipal: e.g. `storage_options={'account_name': ACCOUNT_NAME, 'tenant_id': TENANT_ID, 'client_id': CLIENT_ID, 'client_secret': CLIENT_SECRET}`.

Expand Down
48 changes: 42 additions & 6 deletions adlfs/spec.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,15 @@
from collections import defaultdict
from datetime import datetime, timedelta, timezone
from glob import has_magic
from typing import TYPE_CHECKING, List, Optional, Tuple, Union
from typing import List, Optional, Tuple, Union
from uuid import uuid4

from azure.core.credentials import (
AzureNamedKeyCredential,
AzureSasCredential,
TokenCredential,
)
from azure.core.credentials_async import AsyncTokenCredential
from azure.core.exceptions import (
HttpResponseError,
ResourceExistsError,
Expand All @@ -37,9 +43,6 @@
from fsspec.spec import AbstractBufferedFile
from fsspec.utils import infer_storage_options

if TYPE_CHECKING:
from azure.core.credentials_async import AsyncTokenCredential

from .utils import (
__version__,
close_container_client,
Expand All @@ -52,6 +55,14 @@

logger = logging.getLogger(__name__)

CredentialType = (
str
| dict[str, str]
| AzureNamedKeyCredential
| AzureSasCredential
| AsyncTokenCredential
)

FORWARDED_BLOB_PROPERTIES = [
"metadata",
"creation_time",
Expand Down Expand Up @@ -127,7 +138,7 @@ def _coalesce_version_id(*args) -> Optional[str]:
def _create_aio_blob_service_client(
account_url: str,
location_mode: Optional[str] = None,
credential: Optional[Union[str, AsyncTokenCredential]] = None,
credential: CredentialType | None = None,
) -> AIOBlobServiceClient:
service_client_kwargs = {
"account_url": account_url,
Expand Down Expand Up @@ -179,6 +190,7 @@ class AzureBlobFileSystem(AsyncFileSystem):
credential: azure.core.credentials_async.AsyncTokenCredential or SAS token
The credentials with which to authenticate. Optional if the account URL already has a SAS token.
Can include an instance of TokenCredential class from azure.identity.aio.
In most cases, prefer ``anon=False`` to let adlfs resolve credentials automatically.
blocksize: int
The block size to use for download/upload operations. Defaults to 50 MiB
client_id: str
Expand Down Expand Up @@ -270,7 +282,7 @@ def __init__(
account_name: str = None,
account_key: str = None,
connection_string: str = None,
credential: Optional[Union[str, AsyncTokenCredential]] = None,
credential: CredentialType | None = None,
sas_token: str = None,
request_session=None,
socket_timeout=_SOCKET_TIMEOUT_DEFAULT,
Expand Down Expand Up @@ -336,6 +348,7 @@ def __init__(
)
self.location_mode = location_mode
self.credential = credential
self._warn_sync_credential()
if account_host:
self.account_host = account_host
self.request_session = request_session
Expand Down Expand Up @@ -394,6 +407,29 @@ def __init__(
max_concurrency = batch_size
self.max_concurrency = max_concurrency

def _warn_sync_credential(self):
"""Emit a warning if the user passes a synchronous Azure credential.

adlfs uses the Azure SDK's async clients internally. If a user passes
a synchronous credential (e.g. ``azure.identity.DefaultAzureCredential``
instead of ``azure.identity.aio.DefaultAzureCredential``), we emit a
warning.
"""
if self.credential is None or isinstance(self.credential, (str, dict)):
return

if isinstance(self.credential, TokenCredential) and not isinstance(
self.credential, AsyncTokenCredential
):
warnings.warn(
"Passing synchronous credentials (e.g. from azure.identity) is "
"not supported. Use async credentials from azure.identity.aio "
"instead. Synchronous credentials may experience unexpected failures "
"in future versions of adlfs.",
FutureWarning,
stacklevel=4,
)

@classmethod
def _strip_protocol(cls, path: str):
"""
Expand Down
35 changes: 35 additions & 0 deletions adlfs/tests/test_spec.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import numpy as np
import pandas as pd
import pytest
from azure.identity import DefaultAzureCredential
from packaging.version import parse as parse_version
from pandas.testing import assert_frame_equal

Expand Down Expand Up @@ -2436,3 +2437,37 @@ def test_exists_kwargs(storage):
)

assert fs.exists("data/top_file.txt", test_kwarg="test")


def test_sync_credential_warning():
sync_cred = DefaultAzureCredential()
with pytest.warns(FutureWarning, match="synchronous credentials"):
fs = AzureBlobFileSystem(
account_name="fakeaccount",
credential=sync_cred,
anon=True,
skip_instance_cache=True,
)
assert fs.credential is sync_cred


@pytest.mark.parametrize(
"credential",
(
pytest.param(
"sv=2021-06-08&ss=b&srt=sco&sp=rl",
id="sas_token",
),
pytest.param(None, id="none"),
),
)
def test_no_sync_credential_warning(credential):
with warnings.catch_warnings():
warnings.simplefilter("error", FutureWarning)
fs = AzureBlobFileSystem(
account_name="fakeaccount",
credential=credential,
anon=True,
skip_instance_cache=True,
)
assert fs.credential is credential
6 changes: 4 additions & 2 deletions adlfs/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,5 +81,7 @@ async def close_credential(file_obj):
Implements asynchronous closure of credentials for
AzureBlobFile objects
"""
if not isinstance(file_obj.credential, (type(None), str)):
await file_obj.credential.close()
if not isinstance(file_obj.credential, (type(None), str, dict)):
result = file_obj.credential.close()
if result is not None:
await result
20 changes: 14 additions & 6 deletions docs/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,20 +37,28 @@ For anonymous authentication, simply provide the storage account name:

For operations to succeed, the storage container must allow anonymous access.

For authenticated access, you have several options:
For authenticated access, the preferred approach is to set `anon=False` and let adlfs resolve
credentials automatically using Azure's `DefaultAzureCredential`:

1. Using a `SAS_TOKEN`
2. Using an account key
3. Using a managed identity
```{code-block} python
>>> fs = adlfs.AzureBlobFileSystem(account_name="ai4edataeuwest", anon=False)
```

Regardless of the method your using, you provide the values using the `credential` argument.
You can also authenticate with a SAS token or account key via the `credential` argument:

```{code-block} python
>>> fs = adlfs.AzureBlobFileSystem(account_name="ai4edataeuwest", credential=SAS_TOKEN)
>>> fs = adlfs.AzureBlobFileSystem(account_name="ai4edataeuwest", credential=ACCOUNT_KEY)
```

If you need to pass a credential object directly, use an **async** credential from
`azure.identity.aio`:

```{code-block} python
>>> from azure.identity.aio import DefaultAzureCredential
>>> fs = adlfs.AzureBlobFileSystem(
... account_name="ai4edataeuwest",
... credential=azure.identity.DefaultAzureCredential()
... credential=DefaultAzureCredential()
... )
```

Expand Down