diff --git a/sdk/monitor/azure-monitor-opentelemetry-exporter/CHANGELOG.md b/sdk/monitor/azure-monitor-opentelemetry-exporter/CHANGELOG.md index 3cc2b39de100..8494c6ad9a2f 100644 --- a/sdk/monitor/azure-monitor-opentelemetry-exporter/CHANGELOG.md +++ b/sdk/monitor/azure-monitor-opentelemetry-exporter/CHANGELOG.md @@ -9,6 +9,8 @@ ### Breaking Changes ### Bugs Fixed +- Exempt specific GenAI attributes values from custom properties truncation + ([#45596](https://github.com/Azure/azure-sdk-for-python/pull/45596)) - Fix attributes override in Rate Limited Sampler ([#45592](https://github.com/Azure/azure-sdk-for-python/pull/45592)) - Add environment variable to disable/enable custom properties truncation diff --git a/sdk/monitor/azure-monitor-opentelemetry-exporter/README.md b/sdk/monitor/azure-monitor-opentelemetry-exporter/README.md index b4bb06afad30..720a1e5bb027 100644 --- a/sdk/monitor/azure-monitor-opentelemetry-exporter/README.md +++ b/sdk/monitor/azure-monitor-opentelemetry-exporter/README.md @@ -139,10 +139,6 @@ All configuration options can be passed through the constructors of exporters th * `storage_directory`: Storage directory in which to store retry files. Defaults to `/Microsoft/AzureMonitor/opentelemetry-python-`. * `credential`: Token credential, such as ManagedIdentityCredential or ClientSecretCredential, used for [Azure Active Directory (AAD) authentication][aad_for_ai_docs]. Defaults to None. See [samples][exporter_samples] for examples. The credential will be automatically created from the `APPLICATIONINSIGHTS_AUTHENTICATION_STRING` environment variable if not explicitly passed in. See [documentation][aad_env_var_docs] for more. -## Environment Variables - -* Set `AZURE_MONITOR_DISABLE_CUSTOM_DIMENSIONS_LIMIT` to `True` to remove the 64kb truncation limit on custom dimensions. Defaults to `False`. - ## Examples ### Logging (experimental) diff --git a/sdk/monitor/azure-monitor-opentelemetry-exporter/azure/monitor/opentelemetry/exporter/_constants.py b/sdk/monitor/azure-monitor-opentelemetry-exporter/azure/monitor/opentelemetry/exporter/_constants.py index 6ebf38321c64..c16c7f662ddc 100644 --- a/sdk/monitor/azure-monitor-opentelemetry-exporter/azure/monitor/opentelemetry/exporter/_constants.py +++ b/sdk/monitor/azure-monitor-opentelemetry-exporter/azure/monitor/opentelemetry/exporter/_constants.py @@ -355,7 +355,15 @@ class _RP_Names(Enum): # Resource attribute applicationId _APPLICATION_ID_RESOURCE_KEY = "microsoft.applicationId" -# Custom dimensions limit truncation toggle -AZURE_MONITOR_DISABLE_CUSTOM_DIMENSIONS_LIMIT = "AZURE_MONITOR_DISABLE_CUSTOM_DIMENSIONS_LIMIT" +# Gen AI attributes whose value should be exempt from truncation +_GEN_AI_ATTRIBUTES = ( + "gen_ai.input.messages", + "gen_ai.output.messages", + "gen_ai.system_instructions", + "gen_ai.tool.definitions", + "gen_ai.tool.call.arguments", + "gen_ai.tool.call.result", + "gen_ai.evaluation.explanation", +) # cSpell:disable diff --git a/sdk/monitor/azure-monitor-opentelemetry-exporter/azure/monitor/opentelemetry/exporter/_utils.py b/sdk/monitor/azure-monitor-opentelemetry-exporter/azure/monitor/opentelemetry/exporter/_utils.py index d1a272d30615..5b8d016eaf76 100644 --- a/sdk/monitor/azure-monitor-opentelemetry-exporter/azure/monitor/opentelemetry/exporter/_utils.py +++ b/sdk/monitor/azure-monitor-opentelemetry-exporter/azure/monitor/opentelemetry/exporter/_utils.py @@ -29,7 +29,7 @@ _KUBERNETES_SERVICE_HOST, _PYTHON_APPLICATIONINSIGHTS_ENABLE_TELEMETRY, _WEBSITE_SITE_NAME, - AZURE_MONITOR_DISABLE_CUSTOM_DIMENSIONS_LIMIT, + _GEN_AI_ATTRIBUTES, ) from azure.monitor.opentelemetry.exporter._constants import ( _TYPE_MAP, @@ -351,9 +351,7 @@ def _is_any_synthetic_source(properties: Optional[Any]) -> bool: # pylint: disable=W0622 def _filter_custom_properties(properties: Attributes, filter=None) -> Dict[str, str]: - disable_custom_dimensions_limit = ( - environ.get(AZURE_MONITOR_DISABLE_CUSTOM_DIMENSIONS_LIMIT, "").strip().lower() == "true" - ) + max_length = 64 * 1024 processed_properties: Dict[str, str] = {} if not properties: return processed_properties @@ -362,14 +360,13 @@ def _filter_custom_properties(properties: Attributes, filter=None) -> Dict[str, if filter is not None: if not filter(key, val): continue - # Apply truncation/filtering rules - # Max key length is 150 + # Apply truncation rules + # Max key length is 150, value is 64 * 1024 if not key or len(key) > 150 or val is None: continue - if disable_custom_dimensions_limit: + if key in _GEN_AI_ATTRIBUTES: processed_properties[key] = str(val) else: - max_length = 64 * 1024 processed_properties[key] = str(val)[:max_length] return processed_properties diff --git a/sdk/monitor/azure-monitor-opentelemetry-exporter/tests/test_utils.py b/sdk/monitor/azure-monitor-opentelemetry-exporter/tests/test_utils.py index 232e7414588e..3101992e8f25 100644 --- a/sdk/monitor/azure-monitor-opentelemetry-exporter/tests/test_utils.py +++ b/sdk/monitor/azure-monitor-opentelemetry-exporter/tests/test_utils.py @@ -10,6 +10,7 @@ from opentelemetry.sdk.resources import Resource from azure.monitor.opentelemetry.exporter import _utils from azure.monitor.opentelemetry.exporter._generated.exporter.models import TelemetryItem +from azure.monitor.opentelemetry.exporter._constants import _GEN_AI_ATTRIBUTES from opentelemetry.sdk.resources import Resource from unittest.mock import patch @@ -53,40 +54,51 @@ def test_filter_custom_properties_drops_invalid_entries(self): self.assertEqual(filtered["short"], "ok") self.assertNotIn("k" * 151, filtered) - def test_filter_custom_properties_preserves_large_values_after_disable_limit(self): - # Ensure values larger than 64KiB are not truncated when the env variable is set to true - enable_values = ["true", "True", "TRUE", "TrUE", " true "] - large_value = "x" * (64 * 1024 + 1000) - properties = {"large_key": large_value} - - for env_value in enable_values: - with self.subTest(env_value=env_value): - with patch.dict( - "azure.monitor.opentelemetry.exporter._utils.environ", - {"AZURE_MONITOR_DISABLE_CUSTOM_DIMENSIONS_LIMIT": env_value}, - ): - filtered = _utils._filter_custom_properties(properties) - self.assertIn("large_key", filtered) - self.assertEqual(filtered["large_key"], large_value) - self.assertEqual(len(filtered["large_key"]), 64 * 1024 + 1000) - - def test_filter_custom_properties_preserves_large_values_after_enable_limit(self): - # Ensure values larger than 64KiB are not truncated when the env variable is set to false/empty/invalid - disable_values = ["", "False", "truthy", "89", "fALSE", " "] + def test_custom_properties_gen_ai_attributes_not_truncated(self): + # All values in _GEN_AI_ATTRIBUTES should not be truncated even when > 64KiB large_value = "x" * (64 * 1024 + 1000) + properties = {key: large_value for key in _GEN_AI_ATTRIBUTES} + filtered = _utils._filter_custom_properties(properties) + for key in _GEN_AI_ATTRIBUTES: + with self.subTest(key=key): + self.assertIn(key, filtered) + self.assertEqual(len(filtered[key]), 64 * 1024 + 1000) + + def test_filter_custom_properties_non_gen_ai_truncated_at_64kb(self): + # Regular properties exceeding 64KiB should be truncated max_length = 64 * 1024 - properties = {"large_key": large_value} - - for env_value in disable_values: - with self.subTest(env_value=env_value): - with patch.dict( - "azure.monitor.opentelemetry.exporter._utils.environ", - {"AZURE_MONITOR_DISABLE_CUSTOM_DIMENSIONS_LIMIT": env_value}, - ): - filtered = _utils._filter_custom_properties(properties) - self.assertIn("large_key", filtered) - self.assertEqual(filtered["large_key"], "x" * max_length) - self.assertEqual(len(filtered["large_key"]), max_length) + large_value = "y" * (max_length + 2000) + properties = { + "span_kind": large_value, + "gen_ai.agent.version": large_value, + "http.method": large_value, + "custom.attribute": large_value, + } + filtered = _utils._filter_custom_properties(properties) + for key in properties: + with self.subTest(key=key): + self.assertIn(key, filtered) + self.assertEqual(len(filtered[key]), max_length) + + def test_filter_custom_properties_mixed_gen_ai_and_regular(self): + # Gen AI attributes keep full value, regular ones are truncated + max_length = 64 * 1024 + large_value = "z" * (max_length + 3000) + properties = { + "gen_ai.input.messages": large_value, + "gen_ai.output.messages": large_value, + "gen_ai.agent.version": large_value, + "span_kind": large_value, + "db.statement": large_value, + } + filtered = _utils._filter_custom_properties(properties) + # Gen AI attributes — not truncated + self.assertEqual(len(filtered["gen_ai.input.messages"]), max_length + 3000) + self.assertEqual(len(filtered["gen_ai.output.messages"]), max_length + 3000) + # Regular attributes — truncated + self.assertEqual(len(filtered["gen_ai.agent.version"]), max_length) + self.assertEqual(len(filtered["span_kind"]), max_length) + self.assertEqual(len(filtered["db.statement"]), max_length) def test_nanoseconds_to_duration(self): ns_to_duration = _utils.ns_to_duration