Skip to content

Commit

Permalink
fix(spans): Support new attr format (getsentry#78148)
Browse files Browse the repository at this point in the history
- Attrs before sep 23rd have the format `attr_str[something]`, after sep
23rd if they're sentry attrs they become `attr_str[sentry.something]`
- This means until December 22nd we'll have to query both
- Can't just do `has(attr_str, "sentry.something")` to know what to
query since `attr_str` isn't a real column in Clickhouse, instead its
something like `attr_str_0`
- Instead settling on doing a pseudo coalesce by checking the column
isn't empty if we query for it
  • Loading branch information
wmak authored Sep 26, 2024
1 parent 6d05f34 commit 729a0e9
Show file tree
Hide file tree
Showing 5 changed files with 90 additions and 16 deletions.
2 changes: 1 addition & 1 deletion src/sentry/search/events/builder/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -1309,7 +1309,7 @@ def default_filter_converter(
if search_filter.operator in ("=", "!=") and search_filter.value.value == "":
if is_tag or is_attr or is_context or name in self.config.non_nullable_keys:
return Condition(lhs, Op(search_filter.operator), value)
else:
elif isinstance(lhs, Column):
# If not a tag, we can just check that the column is null.
return Condition(Function("isNull", [lhs]), Op(search_filter.operator), 1)

Expand Down
52 changes: 47 additions & 5 deletions src/sentry/search/events/builder/spans_indexed.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,25 +89,67 @@ def resolve_field(self, raw_field: str, alias: bool = False) -> Column:
# attr field is less permissive than tags, we can't have - in them
or "-" in field
):
return super().resolve_field(raw_field, alias)
# Temporary until at least after 22 Dec 2024 when old data rotates out, otherwise we should just call super
# here and return default_field without any extra work
default_field = super().resolve_field(raw_field, alias)
if (
isinstance(default_field, Column)
and default_field.subscriptable == "attr_str"
or isinstance(default_field, AliasedExpression)
and default_field.exp.subscriptable == "attr_str"
):
key = (
default_field.key
if isinstance(default_field, Column)
else default_field.exp.key
)
unprefixed_field = Column(f"attr_str[{key}]")
prefixed_field = Column(f"attr_str[sentry.{key}]")
return Function(
"if",
[
Function("mapContains", [Column("attr_str"), key]),
unprefixed_field,
prefixed_field,
],
raw_field if alias else None,
)
else:
return default_field

if field_type not in ["number", "string"]:
raise InvalidSearchQuery(
f"Unknown type for field {raw_field}, only string and number are supported"
)

if field_type == "string":
col = Column(f"attr_str[{field}]")
attr_type = "attr_str"
field_col = Column(f"attr_str[{field}]")
else:
col = Column(f"attr_num[{field}]")
attr_type = "attr_num"
field_col = Column(f"attr_num[{field}]")

if alias:
field_alias = f"tags_{field}@{field_type}"
self.typed_tag_to_alias_map[raw_field] = field_alias
self.alias_to_typed_tag_map[field_alias] = raw_field
return AliasedExpression(col, field_alias)
else:
return col
field_alias = None

# Temporary until at least after 22 Dec 2024 when old data rotates out
unprefixed_field = field_col
prefixed_field = Column(f"{attr_type}[sentry.{field}]")
col = Function(
"if",
[
Function("mapContains", [Column(attr_type), field]),
unprefixed_field,
prefixed_field,
],
field_alias,
)

return col


class TimeseriesSpanIndexedQueryBuilder(SpansIndexedQueryBuilderMixin, TimeseriesQueryBuilder):
Expand Down
6 changes: 3 additions & 3 deletions src/sentry/search/events/datasets/field_aliases.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,11 +93,11 @@ def resolve_span_module(builder: BaseQueryBuilder, alias: str) -> SelectType:
return Function(
"if",
[
Function("in", [builder.column("span.op"), list(OP_MAPPING.keys())]),
Function("in", [builder.resolve_field("span.op"), list(OP_MAPPING.keys())]),
Function(
"transform",
[
builder.column("span.op"),
builder.resolve_field("span.op"),
list(OP_MAPPING.keys()),
list(OP_MAPPING.values()),
"other",
Expand All @@ -106,7 +106,7 @@ def resolve_span_module(builder: BaseQueryBuilder, alias: str) -> SelectType:
Function(
"transform",
[
builder.column("span.category"),
builder.resolve_field("span.category"),
constants.SPAN_MODULE_CATEGORY_VALUES,
constants.SPAN_MODULE_CATEGORY_VALUES,
"other",
Expand Down
6 changes: 6 additions & 0 deletions src/sentry/utils/snuba.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,13 +201,19 @@ def log_snuba_info(content):
"trace": "trace_id",
"transaction": "segment_name",
"transaction.id": "segment_id",
"transaction.method": "attr_str[transaction.method]",
"is_transaction": "is_segment",
"segment.id": "segment_id",
# We should be able to delete origin.transaction and just use transaction
"origin.transaction": "segment_name",
# Copy paste, unsure if this is truth in production
"messaging.destination.name": "attr_str[messaging.destination.name]",
"messaging.message.id": "attr_str[messaging.message.id]",
"span.status_code": "attr_str[status_code]",
"replay.id": "attr_str[replay_id]",
"span.ai.pipeline.group": "attr_str[ai_pipeline_group]",
"trace.status": "attr_str[trace.status]",
"browser.name": "attr_str[browser.name]",
"ai.total_tokens.used": "attr_num[ai_total_tokens_used]",
"ai.total_cost": "attr_num[ai_total_cost]",
}
Expand Down
40 changes: 33 additions & 7 deletions tests/snuba/api/endpoints/test_organization_events_span_indexed.py
Original file line number Diff line number Diff line change
Expand Up @@ -525,7 +525,9 @@ def test_query_for_missing_tag(self):
assert response.data["data"] == [{"foo": "", "count()": 1}]


@pytest.mark.xfail(reason="Snuba is prefixing keys, and Sentry wasn't updated first")
@pytest.mark.xfail(
reason="Snuba is not stable for the EAP dataset, xfailing since its prone to failure"
)
class OrganizationEventsEAPSpanEndpointTest(OrganizationEventsSpanIndexedEndpointTest):
is_eap = True

Expand Down Expand Up @@ -649,7 +651,11 @@ def test_numeric_attr_without_space(self):
self.store_spans(
[
self.create_span(
{"description": "foo", "sentry_tags": {"status": "success", "foo": "five"}},
{
"description": "foo",
"sentry_tags": {"status": "success"},
"tags": {"foo": "five"},
},
measurements={"foo": {"value": 5}},
start_ts=self.ten_mins_ago,
),
Expand Down Expand Up @@ -678,7 +684,11 @@ def test_numeric_attr_with_spaces(self):
self.store_spans(
[
self.create_span(
{"description": "foo", "sentry_tags": {"status": "success", "foo": "five"}},
{
"description": "foo",
"sentry_tags": {"status": "success"},
"tags": {"foo": "five"},
},
measurements={"foo": {"value": 5}},
start_ts=self.ten_mins_ago,
),
Expand Down Expand Up @@ -707,7 +717,11 @@ def test_numeric_attr_filtering(self):
self.store_spans(
[
self.create_span(
{"description": "foo", "sentry_tags": {"status": "success", "foo": "five"}},
{
"description": "foo",
"sentry_tags": {"status": "success"},
"tags": {"foo": "five"},
},
measurements={"foo": {"value": 5}},
start_ts=self.ten_mins_ago,
),
Expand Down Expand Up @@ -754,17 +768,29 @@ def test_numeric_attr_orderby(self):
self.store_spans(
[
self.create_span(
{"description": "baz", "sentry_tags": {"status": "success", "foo": "five"}},
{
"description": "baz",
"sentry_tags": {"status": "success"},
"tags": {"foo": "five"},
},
measurements={"foo": {"value": 71}},
start_ts=self.ten_mins_ago,
),
self.create_span(
{"description": "foo", "sentry_tags": {"status": "success", "foo": "five"}},
{
"description": "foo",
"sentry_tags": {"status": "success"},
"tags": {"foo": "five"},
},
measurements={"foo": {"value": 5}},
start_ts=self.ten_mins_ago,
),
self.create_span(
{"description": "bar", "sentry_tags": {"status": "success", "foo": "five"}},
{
"description": "bar",
"sentry_tags": {"status": "success"},
"tags": {"foo": "five"},
},
measurements={"foo": {"value": 8}},
start_ts=self.ten_mins_ago,
),
Expand Down

0 comments on commit 729a0e9

Please sign in to comment.