Skip to content

Commit

Permalink
ref(alerts): Updates for new Seer response (getsentry#77372)
Browse files Browse the repository at this point in the history
Update the places we call Seer to expect `success` as a bool and an
optional `message` which gets added if `success` is false.

Closes https://getsentry.atlassian.net/browse/ALRT-264
  • Loading branch information
ceorourke authored Sep 16, 2024
1 parent 3bcec2b commit 8259fe4
Show file tree
Hide file tree
Showing 18 changed files with 290 additions and 105 deletions.
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
from drf_spectacular.utils import extend_schema
from pydantic import BaseModel
from rest_framework.request import Request
from rest_framework.response import Response

Expand All @@ -25,11 +24,7 @@
from sentry.seer.anomaly_detection.get_historical_anomalies import (
get_historical_anomaly_data_from_seer,
)
from sentry.seer.anomaly_detection.types import TimeSeriesPoint


class DetectAnomaliesResponse(BaseModel):
timeseries: list[TimeSeriesPoint]
from sentry.seer.anomaly_detection.types import DetectAnomaliesResponse


@region_silo_endpoint
Expand Down
12 changes: 10 additions & 2 deletions src/sentry/incidents/logic.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from django.db.models.signals import post_save
from django.forms import ValidationError
from django.utils import timezone as django_timezone
from parsimonious.exceptions import ParseError
from snuba_sdk import Column, Condition, Limit, Op
from urllib3.exceptions import MaxRetryError, TimeoutError

Expand Down Expand Up @@ -667,7 +668,10 @@ def create_alert_rule(
except (TimeoutError, MaxRetryError):
alert_rule.delete()
raise TimeoutError("Failed to send data to Seer - cannot create alert rule.")
except ValidationError:
except ParseError:
alert_rule.delete()
raise ParseError("Failed to parse Seer store data response")
except (ValidationError, Exception):
alert_rule.delete()
raise
else:
Expand Down Expand Up @@ -949,7 +953,11 @@ def update_alert_rule(
alert_rule.update(status=AlertRuleStatus.NOT_ENOUGH_DATA.value)
except (TimeoutError, MaxRetryError):
raise TimeoutError("Failed to send data to Seer - cannot update alert rule.")
except ValidationError:
except ParseError:
raise ParseError(
"Failed to parse Seer store data response - cannot update alert rule."
)
except (ValidationError, Exception):
# If there's no historical data available—something went wrong when querying snuba
raise ValidationError("Failed to send data to Seer - cannot update alert rule.")
else:
Expand Down
5 changes: 5 additions & 0 deletions src/sentry/incidents/serializers/alert_rule.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from django.conf import settings
from django.db import router, transaction
from django.utils import timezone
from parsimonious.exceptions import ParseError
from rest_framework import serializers
from snuba_sdk import Column, Condition, Entity, Limit, Op
from urllib3.exceptions import MaxRetryError, TimeoutError
Expand Down Expand Up @@ -514,6 +515,8 @@ def create(self, validated_data):
)
except (TimeoutError, MaxRetryError):
raise RequestTimeout
except ParseError:
raise serializers.ValidationError("Failed to parse Seer store data response")
except forms.ValidationError as e:
# if we fail in create_metric_alert, then only one message is ever returned
raise serializers.ValidationError(e.error_list[0].message)
Expand Down Expand Up @@ -546,6 +549,8 @@ def update(self, instance, validated_data):
)
except (TimeoutError, MaxRetryError):
raise RequestTimeout
except ParseError:
raise serializers.ValidationError("Failed to parse Seer store data response")
except forms.ValidationError as e:
# if we fail in update_metric_alert, then only one message is ever returned
raise serializers.ValidationError(e.error_list[0].message)
Expand Down
88 changes: 57 additions & 31 deletions src/sentry/incidents/subscription_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@
AnomalyDetectionConfig,
AnomalyType,
DetectAnomaliesRequest,
DetectAnomaliesResponse,
TimeSeriesPoint,
)
from sentry.seer.anomaly_detection.utils import translate_direction
Expand Down Expand Up @@ -681,7 +682,9 @@ def anomaly_has_confidence(self, anomaly: TimeSeriesPoint) -> bool:
anomaly_type = anomaly.get("anomaly", {}).get("anomaly_type")
return anomaly_type != AnomalyType.NO_DATA.value

def get_anomaly_data_from_seer(self, aggregation_value: float | None):
def get_anomaly_data_from_seer(
self, aggregation_value: float | None
) -> list[TimeSeriesPoint] | None:
anomaly_detection_config = AnomalyDetectionConfig(
time_period=int(self.alert_rule.snuba_query.time_window / 60),
sensitivity=self.alert_rule.sensitivity,
Expand All @@ -700,6 +703,13 @@ def get_anomaly_data_from_seer(self, aggregation_value: float | None):
config=anomaly_detection_config,
context=context,
)
extra_data = {
"subscription_id": self.subscription.id,
"dataset": self.subscription.snuba_query.dataset,
"organization_id": self.subscription.project.organization.id,
"project_id": self.subscription.project_id,
"alert_rule_id": self.alert_rule.id,
}
try:
response = make_signed_seer_api_request(
self.seer_anomaly_detection_connection_pool,
Expand All @@ -708,43 +718,22 @@ def get_anomaly_data_from_seer(self, aggregation_value: float | None):
)
except (TimeoutError, MaxRetryError):
logger.warning(
"Timeout error when hitting anomaly detection endpoint",
extra={
"subscription_id": self.subscription.id,
"dataset": self.subscription.snuba_query.dataset,
"organization_id": self.subscription.project.organization.id,
"project_id": self.subscription.project_id,
"alert_rule_id": self.alert_rule.id,
},
"Timeout error when hitting anomaly detection endpoint", extra=extra_data
)
return None

if response.status != 200:
if response.status > 400:
logger.error(
f"Received {response.status} when calling Seer endpoint {SEER_ANOMALY_DETECTION_ENDPOINT_URL}.", # noqa
extra={"response_data": response.data},
"Error when hitting Seer detect anomalies endpoint",
extra={
"response_data": response.data,
**extra_data,
},
)
return None

try:
results = json.loads(response.data.decode("utf-8")).get("timeseries")
if not results:
logger.warning(
"Seer anomaly detection response returned no potential anomalies",
extra={
"ad_config": anomaly_detection_config,
"context": context,
"response_data": response.data,
"reponse_code": response.status,
},
)
return None
return results
except (
AttributeError,
UnicodeError,
JSONDecodeError,
):
decoded_data = response.data.decode("utf-8")
except AttributeError:
logger.exception(
"Failed to parse Seer anomaly detection response",
extra={
Expand All @@ -756,6 +745,43 @@ def get_anomaly_data_from_seer(self, aggregation_value: float | None):
)
return None

try:
results: DetectAnomaliesResponse = json.loads(decoded_data)
except JSONDecodeError:
logger.exception(
"Failed to parse Seer anomaly detection response",
extra={
"ad_config": anomaly_detection_config,
"context": context,
"response_data": decoded_data,
"reponse_code": response.status,
},
)
return None

if not results.get("success"):
logger.error(
"Error when hitting Seer detect anomalies endpoint",
extra={
"error_message": results.get("message", ""),
**extra_data,
},
)
return None

ts = results.get("timeseries")
if not ts:
logger.warning(
"Seer anomaly detection response returned no potential anomalies",
extra={
"ad_config": anomaly_detection_config,
"context": context,
"response_data": results.get("message"),
},
)
return None
return ts

def calculate_event_date_from_update_date(self, update_date: datetime) -> datetime:
"""
Calculates the date that an event actually happened based on the date that we
Expand Down
55 changes: 54 additions & 1 deletion src/sentry/seer/anomaly_detection/store_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

from django.conf import settings
from django.core.exceptions import ValidationError
from parsimonious.exceptions import ParseError
from urllib3.exceptions import MaxRetryError, TimeoutError

from sentry.conf.server import SEER_ANOMALY_DETECTION_STORE_DATA_URL
Expand All @@ -13,6 +14,7 @@
AlertInSeer,
AnomalyDetectionConfig,
StoreDataRequest,
StoreDataResponse,
TimeSeriesPoint,
)
from sentry.seer.anomaly_detection.utils import (
Expand All @@ -24,6 +26,7 @@
from sentry.snuba.models import SnubaQuery
from sentry.snuba.utils import get_dataset
from sentry.utils import json
from sentry.utils.json import JSONDecodeError

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -68,6 +71,7 @@ def send_historical_data_to_seer(alert_rule: AlertRule, project: Project) -> Ale
formatted_data = format_historical_data(historical_data, dataset)
if not formatted_data:
raise ValidationError("Unable to get historical data for this alert.")

if (
not alert_rule.sensitivity
or not alert_rule.seasonality
Expand All @@ -92,7 +96,7 @@ def send_historical_data_to_seer(alert_rule: AlertRule, project: Project) -> Ale
timeseries=formatted_data,
)
try:
make_signed_seer_api_request(
response = make_signed_seer_api_request(
connection_pool=seer_anomaly_detection_connection_pool,
path=SEER_ANOMALY_DETECTION_STORE_DATA_URL,
body=json.dumps(body).encode("utf-8"),
Expand All @@ -108,6 +112,55 @@ def send_historical_data_to_seer(alert_rule: AlertRule, project: Project) -> Ale
)
raise TimeoutError

if response.status > 400:
logger.error(
"Error when hitting Seer detect anomalies endpoint",
extra={"response_code": response.status},
)
raise Exception("Error when hitting Seer detect anomalies endpoint")

try:
decoded_data = response.data.decode("utf-8")
except AttributeError:
data_format_error_string = "Seer store data response data is malformed"
logger.exception(
data_format_error_string,
extra={
"ad_config": anomaly_detection_config,
"alert": alert_rule.id,
"response_data": response.data,
"reponse_code": response.status,
},
)
raise AttributeError(data_format_error_string)

try:
results: StoreDataResponse = json.loads(decoded_data)
except JSONDecodeError:
parse_error_string = "Failed to parse Seer store data response"
logger.exception(
parse_error_string,
extra={
"ad_config": anomaly_detection_config,
"alert": alert_rule.id,
"response_data": response.data,
"reponse_code": response.status,
},
)
raise ParseError(parse_error_string)

if not results.get("success"):
message = results.get("message", "")
logger.error(
"Error when hitting Seer store data endpoint",
extra={
"rule_id": alert_rule.id,
"project_id": project.id,
"error_message": message,
},
)
raise Exception(message)

MIN_DAYS = 7
data_start_index, data_end_index = _get_start_and_end_indices(formatted_data)
if data_start_index == -1:
Expand Down
11 changes: 11 additions & 0 deletions src/sentry/seer/anomaly_detection/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,13 +33,24 @@ class StoreDataRequest(TypedDict):
timeseries: list[TimeSeriesPoint]


class StoreDataResponse(TypedDict):
success: bool
message: NotRequired[str]


class DetectAnomaliesRequest(TypedDict):
organization_id: int
project_id: int
config: AnomalyDetectionConfig
context: AlertInSeer | list[TimeSeriesPoint]


class DetectAnomaliesResponse(TypedDict):
success: bool
message: NotRequired[str]
timeseries: list[TimeSeriesPoint]


class AnomalyType(Enum):
HIGH_CONFIDENCE = "anomaly_higher_confidence"
LOW_CONFIDENCE = "anomaly_lower_confidence"
Expand Down
6 changes: 5 additions & 1 deletion tests/sentry/incidents/action_handlers/test_email.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from functools import cached_property
from unittest.mock import patch

import orjson
import pytest
import responses
from django.conf import settings
Expand All @@ -26,6 +27,7 @@
from sentry.incidents.models.incident import INCIDENT_STATUS, IncidentStatus, TriggerStatus
from sentry.incidents.utils.types import AlertRuleActivationConditionType
from sentry.models.notificationsettingoption import NotificationSettingOption
from sentry.seer.anomaly_detection.types import StoreDataResponse
from sentry.sentry_metrics import indexer
from sentry.sentry_metrics.use_case_id_registry import UseCaseID
from sentry.snuba.dataset import Dataset
Expand Down Expand Up @@ -351,7 +353,9 @@ def test_with_activated_alert(self):
"sentry.seer.anomaly_detection.store_data.seer_anomaly_detection_connection_pool.urlopen"
)
def test_dynamic_alert(self, mock_seer_request):
mock_seer_request.return_value = HTTPResponse(status=200)

seer_return_value: StoreDataResponse = {"success": True}
mock_seer_request.return_value = HTTPResponse(orjson.dumps(seer_return_value), status=200)
trigger_status = TriggerStatus.ACTIVE
alert_rule = self.create_alert_rule(
detection_type=AlertRuleDetectionType.DYNAMIC,
Expand Down
5 changes: 4 additions & 1 deletion tests/sentry/incidents/action_handlers/test_msteams.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from typing import cast
from unittest.mock import patch

import orjson
import responses
from urllib3.response import HTTPResponse

Expand All @@ -22,6 +23,7 @@
TextBlock,
)
from sentry.integrations.msteams.spec import MsTeamsMessagingSpec
from sentry.seer.anomaly_detection.types import StoreDataResponse
from sentry.silo.base import SiloMode
from sentry.testutils.helpers.datetime import freeze_time
from sentry.testutils.helpers.features import with_feature
Expand Down Expand Up @@ -135,7 +137,8 @@ def test_build_incident_attachment_dynamic_alert(self, mock_seer_request):
build_incident_attachment,
)

mock_seer_request.return_value = HTTPResponse(status=200)
seer_return_value: StoreDataResponse = {"success": True}
mock_seer_request.return_value = HTTPResponse(orjson.dumps(seer_return_value), status=200)
alert_rule = self.create_alert_rule(
detection_type=AlertRuleDetectionType.DYNAMIC,
time_window=30,
Expand Down
Loading

0 comments on commit 8259fe4

Please sign in to comment.