forked from Hirni-Meshram2/pants
-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Send anonymous telemetry to bugout (pantsbuild#11697)
Uses the `humbug` library to send anonymized telemetry data to bugout. Repos must opt in to telemetry. We log a warning if a repo has not opted in or out. See the code and also https://www.pantsbuild.org/v2.4/docs/anonymous-telemetry for more info on the data we send and how we preserve anonymity and prevent leakage of proprietary information. [ci skip-rust] [ci skip-build-wheels]
- Loading branch information
Showing
9 changed files
with
425 additions
and
7 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,192 @@ | ||
# Copyright 2021 Pants project contributors (see CONTRIBUTORS.md). | ||
# Licensed under the Apache License, Version 2.0 (see LICENSE). | ||
|
||
from __future__ import annotations | ||
|
||
import json | ||
import logging | ||
import re | ||
import uuid | ||
from typing import cast | ||
|
||
from humbug.consent import HumbugConsent # type: ignore | ||
from humbug.report import Modes, Report, Reporter # type: ignore | ||
|
||
from pants.engine.internals.scheduler import Workunit | ||
from pants.engine.rules import collect_rules, rule | ||
from pants.engine.streaming_workunit_handler import ( | ||
StreamingWorkunitContext, | ||
WorkunitsCallback, | ||
WorkunitsCallbackFactory, | ||
WorkunitsCallbackFactoryRequest, | ||
) | ||
from pants.engine.unions import UnionRule | ||
from pants.option.subsystem import Subsystem | ||
from pants.util.docutil import docs_url | ||
|
||
logger = logging.getLogger(__name__) | ||
|
||
|
||
_bugout_access_token = "3ae76900-9a68-4a87-a127-7c9f179d7272" | ||
_bugout_journal_id = "801e9b3c-6b03-40a7-870f-5b25d326da66" | ||
_telemetry_docs_url = docs_url("anonymous-telemetry") | ||
_telemetry_docs_referral = f"See {_telemetry_docs_url} for details" | ||
|
||
|
||
class AnonymousTelemetry(Subsystem): | ||
options_scope = "anonymous-telemetry" | ||
help = "Options related to sending anonymous stats to the Pants project, to aid development." | ||
|
||
@classmethod | ||
def register_options(cls, register): | ||
register( | ||
"--enabled", | ||
advanced=True, | ||
type=bool, | ||
default=False, | ||
help=( | ||
f"Whether to send anonymous telemetry to the Pants project.\nTelemetry is sent " | ||
f"asynchronously, with silent failure, and does not impact build times or " | ||
f"outcomes.\n{_telemetry_docs_referral}." | ||
), | ||
) | ||
register( | ||
"--repo-id", | ||
advanced=True, | ||
type=str, | ||
default=None, | ||
help=( | ||
f"An anonymized ID representing this repo.\nFor private repos, you likely want the " | ||
f"ID to not be derived from, or algorithmically convertible to, anything " | ||
f"identifying the repo.\nFor public repos the ID may be visible in that repo's " | ||
f"config file, so anonymity of the repo is not guaranteed (although user anonymity " | ||
f"is always guaranteed).\n{_telemetry_docs_referral}." | ||
), | ||
) | ||
|
||
@property | ||
def enabled(self) -> bool: | ||
return cast(bool, self.options.enabled) | ||
|
||
@property | ||
def repo_id(self) -> str | None: | ||
return cast("str | None", self.options.repo_id) | ||
|
||
|
||
class AnonymousTelemetryCallback(WorkunitsCallback): | ||
def __init__(self, anonymous_telemetry: AnonymousTelemetry) -> None: | ||
super().__init__() | ||
self._anonymous_telemetry = anonymous_telemetry | ||
|
||
@property | ||
def can_finish_async(self) -> bool: | ||
# Because we don't log anything, it's safe to finish in the background. | ||
return True | ||
|
||
@staticmethod | ||
def validate_repo_id(repo_id: str) -> bool: | ||
is_valid = re.match(r"^[a-zA-Z0-9-_]{30,60}$", repo_id) is not None | ||
if not is_valid: | ||
logger.error( | ||
"The repo_id must be between 30 and 60 characters long, and consist of only " | ||
"alphanumeric characters, dashes and underscores." | ||
) | ||
return is_valid | ||
|
||
def __call__( | ||
self, | ||
*, | ||
started_workunits: tuple[Workunit, ...], | ||
completed_workunits: tuple[Workunit, ...], | ||
finished: bool, | ||
context: StreamingWorkunitContext, | ||
) -> None: | ||
if not finished: | ||
return | ||
|
||
if self._anonymous_telemetry.options.is_default("enabled"): | ||
logger.warning( | ||
f"Please either set `enabled = true` in the [anonymous-telemetry] section of " | ||
f"pants.toml to enable sending anonymous stats to the Pants project to aid " | ||
f"development, or set `enabled = false` to disable it. No telemetry sent " | ||
f"for this run. An explicit setting will get rid of this message. " | ||
f"{_telemetry_docs_referral}." | ||
) | ||
|
||
if self._anonymous_telemetry.enabled: | ||
repo_id = self._anonymous_telemetry.repo_id | ||
if repo_id is None: | ||
logger.error( | ||
f'Please set `repo_id = "<uuid>"` in the [anonymous-telemetry] section ' | ||
f"of pants.toml, where `<uuid>` is some fixed random identifier, such as " | ||
f"one generated by uuidgen. No telemetry sent for this run. " | ||
f"{_telemetry_docs_referral}." | ||
) | ||
elif self.validate_repo_id(repo_id): | ||
# Assemble and send the telemetry. | ||
# Note that this method is called with finished=True only after the | ||
# StreamingWorkunitHandler context ends, i.e., after end_run() has been called, | ||
# so the RunTracker will have had a chance to finalize its state. | ||
telemetry_data = context.run_tracker.get_anonymous_telemetry_data(repo_id) | ||
# TODO: Add information about any errors that occurred. | ||
|
||
reporter = Reporter( | ||
name="pantsbuild/pants", | ||
# We've already established consent at this point. | ||
consent=HumbugConsent(True), | ||
session_id=telemetry_data.get("run_id", str(uuid.uuid4())), | ||
bugout_token=_bugout_access_token, | ||
bugout_journal_id=_bugout_journal_id, | ||
timeout_seconds=5, | ||
# We don't want to spawn a thread in the engine, and we're | ||
# already running in a background thread in pantsd. | ||
mode=Modes.SYNCHRONOUS, | ||
) | ||
|
||
# This is copied from humbug code, to ensure that future changes to humbug | ||
# don't add tags that inadvertently violate our anonymity promise. | ||
system_tags = [ | ||
"humbug", | ||
"source:{}".format(reporter.name), | ||
"os:{}".format(reporter.system_information.os), | ||
"arch:{}".format(reporter.system_information.machine), | ||
"python:{}".format(reporter.system_information.python_version_major), | ||
"python:{}.{}".format( | ||
reporter.system_information.python_version_major, | ||
reporter.system_information.python_version_minor, | ||
), | ||
"python:{}".format(reporter.system_information.python_version), | ||
"session:{}".format(reporter.session_id), | ||
] | ||
tags = ( | ||
system_tags | ||
+ [ | ||
f"pants_version:{telemetry_data.get('pants_version')}", | ||
] | ||
+ [f"goal:{goal}" for goal in telemetry_data.get("goals", [])] | ||
) | ||
|
||
report = Report( | ||
title=f"pants run {reporter.session_id}", | ||
tags=tags, | ||
content=json.dumps(telemetry_data, sort_keys=True), | ||
) | ||
reporter.publish(report) | ||
|
||
|
||
class AnonymousTelemetryCallbackFactoryRequest: | ||
"""A unique request type that is installed to trigger construction of the WorkunitsCallback.""" | ||
|
||
|
||
@rule | ||
def construct_callback( | ||
_: AnonymousTelemetryCallbackFactoryRequest, anonymous_telemetry: AnonymousTelemetry | ||
) -> WorkunitsCallbackFactory: | ||
return WorkunitsCallbackFactory(lambda: AnonymousTelemetryCallback(anonymous_telemetry)) | ||
|
||
|
||
def rules(): | ||
return [ | ||
UnionRule(WorkunitsCallbackFactoryRequest, AnonymousTelemetryCallbackFactoryRequest), | ||
*collect_rules(), | ||
] |
60 changes: 60 additions & 0 deletions
60
src/python/pants/goal/anonymous_telemetry_integration_test.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
# Copyright 2021 Pants project contributors (see CONTRIBUTORS.md). | ||
# Licensed under the Apache License, Version 2.0 (see LICENSE). | ||
|
||
from __future__ import annotations | ||
|
||
from pants.testutil.pants_integration_test import run_pants | ||
|
||
_no_explicit_setting_msg = "An explicit setting will get rid of this message" | ||
_no_repo_id_msg = 'set `repo_id = "<uuid>"` in the [anonymous-telemetry] section of pants.toml' | ||
_bad_repo_id_msg = "The repo_id must be between 30 and 60 characters long" | ||
|
||
|
||
def test_warn_if_no_explicit_setting() -> None: | ||
result = run_pants(["roots"], config={}, use_pantsd=False) | ||
result.assert_success() | ||
assert _no_explicit_setting_msg in result.stderr | ||
assert _no_repo_id_msg not in result.stderr | ||
assert _bad_repo_id_msg not in result.stderr | ||
|
||
|
||
def test_warn_if_repo_id_unset() -> None: | ||
result = run_pants( | ||
["roots"], config={"anonymous-telemetry": {"enabled": True}}, use_pantsd=False | ||
) | ||
result.assert_success() | ||
assert _no_explicit_setting_msg not in result.stderr | ||
assert _no_repo_id_msg in result.stderr | ||
assert _bad_repo_id_msg not in result.stderr | ||
|
||
|
||
def test_warn_if_repo_id_invalid() -> None: | ||
result = run_pants( | ||
["roots"], | ||
config={"anonymous-telemetry": {"enabled": True, "repo_id": "tooshort"}}, | ||
use_pantsd=False, | ||
) | ||
result.assert_success() | ||
assert _no_explicit_setting_msg not in result.stderr | ||
assert _no_repo_id_msg not in result.stderr | ||
assert _bad_repo_id_msg in result.stderr | ||
|
||
|
||
def test_no_warn_if_explicitly_on() -> None: | ||
result = run_pants( | ||
["roots"], | ||
config={"anonymous-telemetry": {"enabled": True, "repo_id": 36 * "a"}}, | ||
use_pantsd=False, | ||
) | ||
result.assert_success() | ||
assert _no_explicit_setting_msg not in result.stderr | ||
assert _no_repo_id_msg not in result.stderr | ||
assert _bad_repo_id_msg not in result.stderr | ||
|
||
|
||
def test_no_warn_if_explicitly_off() -> None: | ||
result = run_pants(["roots"], config={"anonymous-telemetry": {"enabled": False}}) | ||
result.assert_success() | ||
assert _no_explicit_setting_msg not in result.stderr | ||
assert _no_repo_id_msg not in result.stderr | ||
assert _bad_repo_id_msg not in result.stderr |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
# Copyright 2021 Pants project contributors (see CONTRIBUTORS.md). | ||
# Licensed under the Apache License, Version 2.0 (see LICENSE). | ||
|
||
import pytest | ||
|
||
from pants.goal.anonymous_telemetry import AnonymousTelemetryCallback | ||
|
||
|
||
@pytest.mark.parametrize( | ||
"repo_id", | ||
[ | ||
"a" * 30, | ||
"2" * 31, | ||
"C" * 60, | ||
"c1db8737-06b4-4aa8-b18f-8cde023eb524", | ||
"D2E39BA4_BA82_4A85_99DC_9E99E4528D3F", | ||
], | ||
) | ||
def test_valid_repo_ids(repo_id) -> None: | ||
assert AnonymousTelemetryCallback.validate_repo_id(repo_id) | ||
|
||
|
||
@pytest.mark.parametrize( | ||
"repo_id", | ||
[ | ||
"", | ||
"x", | ||
"a" * 29, | ||
"2" * 61, | ||
"@c1db8737-06b4-4aa8-b18f-8cde023eb524", | ||
"D2E39BA4-BA82-4A85-99DC-9Eá9E4528D3F", | ||
], | ||
) | ||
def test_invalid_repo_ids(repo_id) -> None: | ||
assert not AnonymousTelemetryCallback.validate_repo_id(repo_id) |
Oops, something went wrong.