diff --git a/src/sentry/dynamic_sampling/per_org/tasks/configuration.py b/src/sentry/dynamic_sampling/per_org/tasks/configuration.py index 17bb7932f4ac18..cd430149603cb5 100644 --- a/src/sentry/dynamic_sampling/per_org/tasks/configuration.py +++ b/src/sentry/dynamic_sampling/per_org/tasks/configuration.py @@ -2,16 +2,20 @@ from abc import ABC, abstractmethod from collections.abc import Mapping +from datetime import timedelta from django.core.exceptions import ObjectDoesNotExist from sentry import options, quotas from sentry.constants import SAMPLING_MODE_DEFAULT, TARGET_SAMPLE_RATE_DEFAULT, ObjectStatus +from sentry.dynamic_sampling.per_org.tasks.queries import get_eap_organization_volume from sentry.dynamic_sampling.per_org.tasks.telemetry import ( DynamicSamplingException, DynamicSamplingStatus, ) from sentry.dynamic_sampling.rules.utils import ProjectId +from sentry.dynamic_sampling.tasks.common import compute_sliding_window_sample_rate +from sentry.dynamic_sampling.tasks.helpers.sliding_window import FALLBACK_SLIDING_WINDOW_SIZE from sentry.dynamic_sampling.types import DynamicSamplingMode, SamplingMeasure from sentry.dynamic_sampling.utils import has_custom_dynamic_sampling from sentry.models.options.project_option import ProjectOption @@ -49,12 +53,17 @@ class BaseDynamicSamplingConfiguration(ABC): def __init__(self, organization: Organization) -> None: self.organization = organization + self.sliding_window_sample_rate: TargetSampleRate = None @property @abstractmethod def is_enabled(self) -> bool: raise NotImplementedError + @abstractmethod + def get_sample_rate(self) -> TargetSampleRate: + raise NotImplementedError + @property def is_span_based(self) -> bool: return self.measure == SamplingMeasure.SPANS @@ -78,12 +87,15 @@ def _get_projects(self) -> list[Project]: class NoDynamicSamplingConfiguration(BaseDynamicSamplingConfiguration): def __init__(self) -> None: - pass + self.sliding_window_sample_rate: TargetSampleRate = None @property def is_enabled(self) -> bool: return False + def get_sample_rate(self) -> TargetSampleRate: + return None + class AutomaticDynamicSamplingConfiguration(BaseDynamicSamplingConfiguration): sample_rate: TargetSampleRate @@ -98,11 +110,34 @@ def __init__(self, organization: Organization) -> None: except ObjectDoesNotExist as exc: raise DynamicSamplingException(DynamicSamplingStatus.NO_SUBSCRIPTION) from exc self.projects = self._get_projects() + self.sliding_window_sample_rate = self._get_sliding_window_sample_rate() @property def is_enabled(self) -> bool: return self.sample_rate is not None + def get_sample_rate(self) -> TargetSampleRate: + if self.sliding_window_sample_rate is not None: + return self.sliding_window_sample_rate + return self.sample_rate + + def _get_sliding_window_sample_rate(self) -> TargetSampleRate: + if not self.projects: + return None + + org_volume_24h = get_eap_organization_volume( + self, time_interval=timedelta(hours=FALLBACK_SLIDING_WINDOW_SIZE) + ) + if org_volume_24h is None: + return None + + return compute_sliding_window_sample_rate( + org_id=self.organization.id, + project_id=None, + total_root_count=org_volume_24h.total, + window_size=FALLBACK_SLIDING_WINDOW_SIZE, + ) + class CustomDynamicSamplingOrganizationConfiguration(BaseDynamicSamplingConfiguration): sample_rate: TargetSampleRate @@ -120,6 +155,9 @@ def __init__(self, organization: Organization) -> None: def is_enabled(self) -> bool: return True + def get_sample_rate(self) -> TargetSampleRate: + return self.sample_rate + class CustomDynamicSamplingProjectConfiguration(BaseDynamicSamplingConfiguration): project_target_sample_rates: ProjectTargetSampleRates @@ -136,6 +174,9 @@ def is_enabled(self) -> bool: sample_rate is not None for sample_rate in self.project_target_sample_rates.values() ) + def get_sample_rate(self) -> TargetSampleRate: + return None + def _get_project_target_sample_rates(self) -> ProjectTargetSampleRates: project_sample_rates = ProjectOption.objects.get_value_bulk( self.projects, "sentry:target_sample_rate" diff --git a/src/sentry/dynamic_sampling/per_org/tasks/queries.py b/src/sentry/dynamic_sampling/per_org/tasks/queries.py index 63f708279006d0..8290c37959a5f1 100644 --- a/src/sentry/dynamic_sampling/per_org/tasks/queries.py +++ b/src/sentry/dynamic_sampling/per_org/tasks/queries.py @@ -2,15 +2,16 @@ from collections.abc import Iterator, Mapping from datetime import UTC, datetime, timedelta -from typing import Any +from typing import Any, Protocol from sentry_protos.snuba.v1.trace_item_attribute_pb2 import ExtrapolationMode -from sentry.dynamic_sampling.per_org.tasks.configuration import BaseDynamicSamplingConfiguration from sentry.dynamic_sampling.tasks.common import ( ACTIVE_ORGS_VOLUMES_DEFAULT_TIME_INTERVAL, OrganizationDataVolume, ) +from sentry.models.organization import Organization +from sentry.models.project import Project from sentry.search.eap.constants import SAMPLING_MODE_HIGHEST_ACCURACY from sentry.search.eap.types import SearchResolverConfig from sentry.search.events.types import SnubaParams @@ -18,6 +19,11 @@ from sentry.snuba.spans_rpc import Spans +class OrganizationVolumeConfig(Protocol): + organization: Organization + projects: list[Project] + + def _get_aggregate_int(row: Mapping[str, Any], column: str) -> int: return int(row.get(column, 0)) @@ -46,7 +52,7 @@ def run_eap_spans_table_query_in_chunks( def get_eap_organization_volume( - config: BaseDynamicSamplingConfiguration, + config: OrganizationVolumeConfig, time_interval: timedelta = ACTIVE_ORGS_VOLUMES_DEFAULT_TIME_INTERVAL, ) -> OrganizationDataVolume | None: end_time = datetime.now(UTC) diff --git a/src/sentry/dynamic_sampling/per_org/tasks/scheduler.py b/src/sentry/dynamic_sampling/per_org/tasks/scheduler.py index 91924b6f53573a..f54411d378a24e 100644 --- a/src/sentry/dynamic_sampling/per_org/tasks/scheduler.py +++ b/src/sentry/dynamic_sampling/per_org/tasks/scheduler.py @@ -49,6 +49,7 @@ def _next_bucket_index() -> int: @track_dynamic_sampling def schedule_per_org_calculations() -> None: bucket_index = _next_bucket_index() + bucket_tag = {"bucket_index": str(bucket_index)} dispatched = 0 skipped = 0 diff --git a/tests/sentry/dynamic_sampling/per_org/tasks/test_configuration.py b/tests/sentry/dynamic_sampling/per_org/tasks/test_configuration.py index 3648da8ef7e3a1..b9d2c0d7e647fd 100644 --- a/tests/sentry/dynamic_sampling/per_org/tasks/test_configuration.py +++ b/tests/sentry/dynamic_sampling/per_org/tasks/test_configuration.py @@ -1,6 +1,7 @@ from __future__ import annotations from collections.abc import Callable +from datetime import timedelta from typing import NamedTuple from unittest.mock import patch @@ -18,6 +19,8 @@ DynamicSamplingException, DynamicSamplingStatus, ) +from sentry.dynamic_sampling.tasks.common import OrganizationDataVolume +from sentry.dynamic_sampling.tasks.helpers.sliding_window import FALLBACK_SLIDING_WINDOW_SIZE from sentry.dynamic_sampling.types import DynamicSamplingMode, SamplingMeasure from sentry.models.organization import Organization from sentry.testutils.cases import TestCase @@ -66,6 +69,66 @@ def test_subscription_backed_org_uses_blended_sample_rate(self) -> None: with pytest.raises(AttributeError): getattr(configuration, "project_target_sample_rates") get_blended_sample_rate.assert_called_once_with(organization_id=org.id) + assert configuration.get_sample_rate() == 0.5 + + def test_subscription_backed_org_uses_eap_sliding_window_sample_rate(self) -> None: + org = self.create_organization() + self.create_project(organization=org) + sliding_window_volume = OrganizationDataVolume(org_id=org.id, total=1000, indexed=250) + + with ( + patch( + "sentry.dynamic_sampling.per_org.tasks.configuration.quotas.backend.get_blended_sample_rate", + return_value=0.5, + ), + patch( + "sentry.dynamic_sampling.per_org.tasks.configuration.get_eap_organization_volume", + return_value=sliding_window_volume, + ) as get_volume, + patch( + "sentry.dynamic_sampling.per_org.tasks.configuration.compute_sliding_window_sample_rate", + return_value=0.25, + ) as compute_sample_rate, + ): + configuration = get_configuration(org.id) + + assert isinstance(configuration, AutomaticDynamicSamplingConfiguration) + assert configuration.get_sample_rate() == 0.25 + get_volume.assert_called_once() + assert get_volume.call_args.kwargs["time_interval"] == timedelta( + hours=FALLBACK_SLIDING_WINDOW_SIZE + ) + compute_sample_rate.assert_called_once_with( + org_id=org.id, + project_id=None, + total_root_count=1000, + window_size=FALLBACK_SLIDING_WINDOW_SIZE, + ) + + def test_subscription_backed_org_falls_back_to_blended_sample_rate_without_volume( + self, + ) -> None: + org = self.create_organization() + self.create_project(organization=org) + + with ( + patch( + "sentry.dynamic_sampling.per_org.tasks.configuration.quotas.backend.get_blended_sample_rate", + return_value=0.5, + ), + patch( + "sentry.dynamic_sampling.per_org.tasks.configuration.get_eap_organization_volume", + return_value=None, + ), + patch( + "sentry.dynamic_sampling.per_org.tasks.configuration.compute_sliding_window_sample_rate", + ) as compute_sample_rate, + ): + configuration = get_configuration(org.id) + + assert isinstance(configuration, AutomaticDynamicSamplingConfiguration) + assert configuration.get_sample_rate() == 0.5 + compute_sample_rate.assert_not_called() def test_subscription_backed_org_without_sample_rate_is_disabled(self) -> None: org = self.create_organization() @@ -142,6 +205,7 @@ def test_org_mode_custom_dynamic_sampling_uses_org_target_sample_rate(self) -> N measure_case.expected_measure == SamplingMeasure.SEGMENTS ) assert configuration.sample_rate == 0.3 + assert configuration.get_sample_rate() == 0.3 with pytest.raises(AttributeError): getattr(configuration, "project_target_sample_rates") get_blended_sample_rate.assert_not_called() @@ -182,6 +246,7 @@ def test_project_mode_custom_dynamic_sampling_stores_project_sample_rates(self) project.id: 0.2, project_without_rate.id: None, } + assert configuration.get_sample_rate() is None with pytest.raises(AttributeError): getattr(configuration, "sample_rate") get_blended_sample_rate.assert_not_called() diff --git a/tests/sentry/dynamic_sampling/per_org/tasks/test_queries.py b/tests/sentry/dynamic_sampling/per_org/tasks/test_queries.py index 5eee5b058cbc50..0e58a5956ce098 100644 --- a/tests/sentry/dynamic_sampling/per_org/tasks/test_queries.py +++ b/tests/sentry/dynamic_sampling/per_org/tasks/test_queries.py @@ -79,9 +79,15 @@ def test_iterates_query_data_in_offset_chunks(self) -> None: class EAPOrganizationVolumeTest(TestCase, SnubaTestCase, SpanTestCase): def get_config(self, organization: Organization) -> BaseDynamicSamplingConfiguration: - with patch( - "sentry.dynamic_sampling.per_org.tasks.configuration.quotas.backend.get_blended_sample_rate", - return_value=1.0, + with ( + patch( + "sentry.dynamic_sampling.per_org.tasks.configuration.quotas.backend.get_blended_sample_rate", + return_value=1.0, + ), + patch( + "sentry.dynamic_sampling.per_org.tasks.configuration.get_eap_organization_volume", + return_value=None, + ), ): return get_configuration(organization.id) diff --git a/tests/sentry/dynamic_sampling/per_org/tasks/test_scheduler.py b/tests/sentry/dynamic_sampling/per_org/tasks/test_scheduler.py index 403f0470d7218d..77959a8f74e936 100644 --- a/tests/sentry/dynamic_sampling/per_org/tasks/test_scheduler.py +++ b/tests/sentry/dynamic_sampling/per_org/tasks/test_scheduler.py @@ -133,6 +133,10 @@ def test_run_calculations_per_org_returns_no_volume_without_traffic(self) -> Non "sentry.dynamic_sampling.per_org.tasks.configuration.quotas.backend.get_blended_sample_rate", return_value=1.0, ), + patch( + "sentry.dynamic_sampling.per_org.tasks.configuration.get_eap_organization_volume", + return_value=None, + ), patch( "sentry.dynamic_sampling.per_org.tasks.scheduler.get_eap_organization_volume", return_value=None, @@ -154,6 +158,10 @@ def test_run_calculations_per_org_continues_with_traffic(self) -> None: "sentry.dynamic_sampling.per_org.tasks.configuration.quotas.backend.get_blended_sample_rate", return_value=1.0, ), + patch( + "sentry.dynamic_sampling.per_org.tasks.configuration.get_eap_organization_volume", + return_value=None, + ), patch( "sentry.dynamic_sampling.per_org.tasks.scheduler.get_eap_organization_volume", return_value=org_volume,