Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,15 @@
from sentry.api.bases import NoProjects, OrganizationEventsEndpointBase
from sentry.api.endpoints.organization_trace_item_attributes import adjust_start_end_window
from sentry.api.utils import handle_query_errors
from sentry.auth.staff import is_active_staff
from sentry.auth.superuser import is_active_superuser
from sentry.exceptions import InvalidSearchQuery
from sentry.models.organization import Organization
from sentry.search.eap.resolver import SearchResolver
from sentry.search.eap.spans.attributes import SPANS_STATS_EXCLUDED_ATTRIBUTES
from sentry.search.eap.spans.definitions import SPAN_DEFINITIONS
from sentry.search.eap.types import SearchResolverConfig, SupportedTraceItemType
from sentry.search.eap.utils import can_expose_attribute, translate_internal_to_public_alias
from sentry.search.eap.utils import can_expose_attribute_to_api, translate_internal_to_public_alias
from sentry.search.events import fields
from sentry.seer.endpoints.compare import compare_distributions
from sentry.seer.signed_seer_api import SeerViewerContext
Expand Down Expand Up @@ -58,6 +60,7 @@ def get(self, request: Request, organization: Organization) -> Response:
above = request.GET.get("above") == "1"
query_1 = request.GET.get("query_1", "") # Suspect query
query_2 = request.GET.get("query_2", "") # Query for all the spans with the base query
include_internal = is_active_superuser(request) or is_active_staff(request)

if query_1 == query_2:
return Response({"rankedAttributes": []})
Expand All @@ -68,6 +71,7 @@ def get(self, request: Request, organization: Organization) -> Response:
above=above,
query_1=query_1,
query_2=query_2,
include_internal=include_internal,
)

cohort_2_distribution = distributions_result["cohort_2_distribution"]
Expand Down Expand Up @@ -131,7 +135,19 @@ def get(self, request: Request, organization: Organization) -> Response:
if public_alias is None:
public_alias = attr

if not public_alias.startswith("tags[") and (
if (
can_expose_attribute_to_api(
attr,
SupportedTraceItemType.SPANS,
include_internal=include_internal,
)
and can_expose_attribute_to_api(
public_alias,
SupportedTraceItemType.SPANS,
include_internal=include_internal,
)
and not public_alias.startswith("tags[")
) and (
not public_alias.startswith("sentry.")
or public_alias == "sentry.normalized_description"
):
Expand Down Expand Up @@ -175,6 +191,7 @@ def query_attribute_distributions(
above: bool,
query_1: str,
query_2: str,
include_internal: bool = False,
) -> AttributeDistributionsResponse:
"""
Query for and compute span attribute distributions for outlier (query_1) and baseline (query_2) cohorts.
Expand All @@ -190,7 +207,9 @@ def query_attribute_distributions(
AttributeDistributionsResponse with distribution data and total counts for both cohorts
"""
resolver_config = SearchResolverConfig(
extrapolation_mode=ExtrapolationMode.EXTRAPOLATION_MODE_NONE
extrapolation_mode=ExtrapolationMode.EXTRAPOLATION_MODE_NONE,
api_attribute_visibility_item_type=SupportedTraceItemType.SPANS.value,
api_attribute_visibility_include_internal=include_internal,
)

resolver = SearchResolver(
Expand Down Expand Up @@ -261,6 +280,16 @@ def query_attribute_distributions(

cohort_1, _, _ = resolver.resolve_query(query_1)
cohort_2, _, _ = resolver.resolve_query(query_2)
if resolver.has_hidden_api_attributes():
return AttributeDistributionsResponse(
cohort_2_distribution=[],
cohort_2_distribution_map={},
total_cohort_2=0,
cohort_1_distribution=[],
cohort_1_distribution_map={},
total_cohort_1=0,
cohort_1_function_value=function_value,
)
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ranked resolver misses hidden function

Medium Severity

The ranked-attribute flow calls Spans.run_table_query without passing the shared SearchResolver, so hidden API fields referenced only in the percentile function are tracked on a throwaway resolver. has_hidden_api_attributes() on the main resolver stays false and the endpoint can still run full distribution analysis instead of returning empty ranked results.

Fix in Cursor Fix in Web

Reviewed by Cursor Bugbot for commit ab4a530. Configure here.


# Fetch attribute names for parallelization
adjusted_start_date, adjusted_end_date = adjust_start_end_window(
Expand Down Expand Up @@ -291,6 +320,12 @@ def query_attribute_distributions(
for i, attr_proto in enumerate(attrs_response.attributes):
if attr_proto.name in SPANS_STATS_EXCLUDED_ATTRIBUTES:
continue
if not can_expose_attribute_to_api(
attr_proto.name,
SupportedTraceItemType.SPANS,
include_internal=include_internal,
):
continue

chunked_attributes[i % PARALLELIZATION_FACTOR].append(
AttributeKey(name=attr_proto.name, type=AttributeKey.TYPE_STRING)
Expand Down Expand Up @@ -367,7 +402,11 @@ def run_table_query_with_error_handling(query_string):
processed_cohort_2_buckets = set()

for attribute in cohort_2_data:
if not can_expose_attribute(attribute.attribute_name, SupportedTraceItemType.SPANS):
if not can_expose_attribute_to_api(
attribute.attribute_name,
SupportedTraceItemType.SPANS,
include_internal=include_internal,
):
continue

for bucket in attribute.buckets:
Expand All @@ -376,7 +415,11 @@ def run_table_query_with_error_handling(query_string):
)

for attribute in cohort_1_data:
if not can_expose_attribute(attribute.attribute_name, SupportedTraceItemType.SPANS):
if not can_expose_attribute_to_api(
attribute.attribute_name,
SupportedTraceItemType.SPANS,
include_internal=include_internal,
):
continue
for bucket in attribute.buckets:
cohort_1_distribution.append((attribute.attribute_name, bucket.label, bucket.value))
Expand Down
33 changes: 29 additions & 4 deletions src/sentry/api/endpoints/organization_trace_item_stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
from sentry.api.event_search import translate_escape_sequences
from sentry.api.serializers.base import serialize
from sentry.api.utils import handle_query_errors
from sentry.auth.staff import is_active_staff
from sentry.auth.superuser import is_active_superuser
from sentry.models.organization import Organization
from sentry.search.eap.columns import ColumnDefinitions
from sentry.search.eap.constants import SUPPORTED_STATS_TYPES
Expand All @@ -31,7 +33,8 @@
SPANS_STATS_EXCLUDED_ATTRIBUTES_PUBLIC_ALIAS,
)
from sentry.search.eap.spans.definitions import SPAN_DEFINITIONS
from sentry.search.eap.types import SearchResolverConfig
from sentry.search.eap.types import SearchResolverConfig, SupportedTraceItemType
from sentry.search.eap.utils import can_expose_attribute_to_api
from sentry.snuba import rpc_dataset_common
from sentry.snuba.occurrences_rpc import Occurrences
from sentry.snuba.referrer import Referrer
Expand Down Expand Up @@ -143,9 +146,15 @@ def get(self, request: Request, organization: Organization) -> Response:
return Response(serializer.errors, status=400)
serialized = serializer.validated_data

stats_config = get_trace_item_stats_config(serialized.get("itemType", "spans"))
item_type = serialized.get("itemType", "spans")
stats_config = get_trace_item_stats_config(item_type)
api_item_type = SupportedTraceItemType(item_type)
include_internal = is_active_superuser(request) or is_active_staff(request)

resolver_config = SearchResolverConfig()
resolver_config = SearchResolverConfig(
api_attribute_visibility_item_type=api_item_type.value,
api_attribute_visibility_include_internal=include_internal,
)
resolver = SearchResolver(
params=snuba_params, config=resolver_config, definitions=stats_config.definitions
)
Expand All @@ -159,7 +168,7 @@ def get_table_results():
with handle_query_errors():
return stats_config.rpc_class.run_table_query(
params=snuba_params,
config=SearchResolverConfig(),
config=resolver_config,
offset=0,
limit=serialized.get("traceItemsLimit", 1000),
sampling_mode=snuba_params.sampling_mode,
Expand All @@ -180,6 +189,7 @@ def run_stats_query_with_item_ids(item_id_filter):
search_resolver=resolver,
max_buckets=1,
skip_translate_internal_to_public_alias=True,
include_internal=include_internal,
)

def run_stats_query_with_error_handling(attributes):
Expand All @@ -192,6 +202,7 @@ def run_stats_query_with_error_handling(attributes):
config=resolver_config,
search_resolver=resolver,
attributes=attributes,
include_internal=include_internal,
)

def data_fn(offset: int, limit: int):
Expand Down Expand Up @@ -219,6 +230,20 @@ def data_fn(offset: int, limit: int):
if public_alias in stats_config.excluded_attributes:
continue

if not (
can_expose_attribute_to_api(
internal_name,
api_item_type,
include_internal=include_internal,
)
and can_expose_attribute_to_api(
public_alias,
api_item_type,
include_internal=include_internal,
)
):
continue

if value_substring_match:
if value_substring_match in public_alias:
sanitized_keys.append(internal_name)
Expand Down
45 changes: 38 additions & 7 deletions src/sentry/api/endpoints/project_trace_item_details.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
TraceItemAttribute,
)
from sentry.search.eap.utils import (
can_expose_attribute,
can_expose_attribute_to_api,
get_deprecated_source_internal_names,
is_sentry_convention_replacement_attribute,
translate_internal_to_public_alias,
Expand Down Expand Up @@ -118,7 +118,7 @@ def convert_rpc_attribute_to_json(
for attribute in attributes:
internal_name = attribute["name"]

if not can_expose_attribute(
if not can_expose_attribute_to_api(
internal_name, trace_item_type, include_internal=include_internal
):
continue
Expand Down Expand Up @@ -209,6 +209,7 @@ def convert_rpc_attribute_to_json(
def serialize_meta(
attributes: list[dict],
trace_item_type: SupportedTraceItemType,
include_internal: bool = False,
) -> dict:
internal_name = ""
attribute = {}
Expand Down Expand Up @@ -243,6 +244,12 @@ def extract_key(key: str) -> str | None:
result = json.loads(attribute["value"]["valStr"])
# Map the internal field key name back to its public name
if field_key in attribute_map:
if not can_expose_attribute_to_api(
field_key,
trace_item_type,
include_internal=include_internal,
):
continue
item_type: Literal["string", "number", "boolean"]
if (
"valInt" in attribute_map[field_key]
Expand Down Expand Up @@ -270,7 +277,11 @@ def extract_key(key: str) -> str | None:
return meta_result


def serialize_links(attributes: list[dict]) -> list[dict] | None:
def serialize_links(
attributes: list[dict],
trace_item_type: SupportedTraceItemType,
include_internal: bool = False,
) -> list[dict] | None:
"""Links are temporarily stored in `sentry.links` so lets parse that back out and return separately"""
link_attribute = None
for attribute in attributes:
Expand All @@ -285,15 +296,26 @@ def serialize_links(attributes: list[dict]) -> list[dict] | None:
value = link_attribute.get("value", {}).get("valStr", None)
if value is not None:
links = json.loads(value)
return [serialize_link(link) for link in links]
return [
serialize_link(
link,
trace_item_type=trace_item_type,
include_internal=include_internal,
)
for link in links
]
else:
return None
except Exception as e:
sentry_sdk.capture_exception(e)
return None


def serialize_link(link: dict) -> dict:
def serialize_link(
link: dict,
trace_item_type: SupportedTraceItemType,
include_internal: bool = False,
) -> dict:
clean_link = {
"itemId": link["span_id"],
"traceId": link["trace_id"],
Expand All @@ -307,6 +329,11 @@ def serialize_link(link: dict) -> dict:
{"name": k, "value": v, "type": infer_type(v)}
for k, v in attributes.items()
if infer_type(v) is not None
and can_expose_attribute_to_api(
k,
trace_item_type,
include_internal=include_internal,
)
]

return clean_link
Expand Down Expand Up @@ -428,8 +455,12 @@ def get(request: Request, project: Project, item_id: str) -> Response:
include_internal=include_internal,
include_arrays=include_arrays,
),
"meta": serialize_meta(resp["attributes"], item_type),
"links": serialize_links(resp["attributes"]),
"meta": serialize_meta(
resp["attributes"], item_type, include_internal=include_internal
),
"links": serialize_links(
resp["attributes"], item_type, include_internal=include_internal
),
}

return Response(resp_dict)
4 changes: 2 additions & 2 deletions src/sentry/data_export/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from sentry.search.eap.constants import PROTOBUF_TYPE_TO_SEARCH_TYPE
from sentry.search.eap.rpc_utils import anyvalue_to_python
from sentry.search.eap.types import SupportedTraceItemType
from sentry.search.eap.utils import can_expose_attribute, translate_internal_to_public_alias
from sentry.search.eap.utils import can_expose_attribute_to_api, translate_internal_to_public_alias
from sentry.search.events.constants import TIMEOUT_ERROR_MESSAGE
from sentry.snuba import discover
from sentry.utils import metrics, snuba
Expand Down Expand Up @@ -146,7 +146,7 @@ def trace_item_to_row(
_merge_trace_export_cell(row, "id", item.item_id.hex() if item.item_id else None)

for internal_key, av in item.attributes.items():
if not can_expose_attribute(internal_key, item_type, include_internal=False):
if not can_expose_attribute_to_api(internal_key, item_type, include_internal=False):
continue
which = av.WhichOneof("value")
value = anyvalue_to_python(av)
Expand Down
16 changes: 13 additions & 3 deletions src/sentry/snuba/occurrences_rpc.py
Original file line number Diff line number Diff line change
Expand Up @@ -281,9 +281,14 @@ def run_stats_query(
max_buckets: int = 75,
skip_translate_internal_to_public_alias: bool = False,
occurrence_category: OccurrenceCategory | None = None,
include_internal: bool = False,
) -> list[dict[str, Any]]:
search_resolver = search_resolver or cls.get_resolver(params, config)
stats_filter, _, _ = search_resolver.resolve_query(query_string)
if search_resolver.has_hidden_api_attributes():
# Hidden attributes here came from the query filter. Dropping them would
# broaden the query and return results for different search semantics.
return []

stats_filter = and_trace_item_filters(
stats_filter, cls._build_category_filter(occurrence_category)
Expand Down Expand Up @@ -315,16 +320,21 @@ def run_stats_query(
response = snuba_rpc.trace_item_stats_rpc(stats_request)
stats = []

from sentry.search.eap.utils import can_expose_attribute, translate_internal_to_public_alias
from sentry.search.eap.utils import (
can_expose_attribute_to_api,
translate_internal_to_public_alias,
)

for result in response.results:
if "attributeDistributions" in stats_types and result.HasField(
"attribute_distributions"
):
attrs: dict[str, list[dict[str, Any]]] = defaultdict(list)
for attribute in result.attribute_distributions.attributes:
if not can_expose_attribute(
attribute.attribute_name, SupportedTraceItemType.OCCURRENCES
if not can_expose_attribute_to_api(
attribute.attribute_name,
SupportedTraceItemType.OCCURRENCES,
include_internal=include_internal,
):
continue

Expand Down
2 changes: 2 additions & 0 deletions src/sentry/snuba/rpc_dataset_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -390,6 +390,8 @@ def _run_table_query(
) -> EAPResponse:
"""Run the query"""
table_request = cls.get_table_rpc_request(query)
if query.resolver.has_hidden_api_attributes():
return {"data": [], "meta": {"fields": {}}, "confidence": []}
rpc_request = table_request.rpc_request
try:
rpc_response = snuba_rpc.table_rpc([rpc_request])[0]
Expand Down
Loading
Loading