Skip to content

Commit

Permalink
Integrate metrics (ray-project#4246)
Browse files Browse the repository at this point in the history
  • Loading branch information
jovany-wang authored and pcmoritz committed Apr 3, 2019
1 parent 8e19d37 commit 7d776f3
Show file tree
Hide file tree
Showing 14 changed files with 560 additions and 1 deletion.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -156,3 +156,6 @@ venv
# Vim
.*.swp
*.swp

# tools
tools/prometheus*
4 changes: 4 additions & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,10 @@ install:
- ./ci/suppress_output ./ci/travis/install-cython-examples.sh

- ./ci/suppress_output bash src/ray/test/run_gcs_tests.sh
# stats test.
- ./ci/suppress_output bazel build //:stats_test -c opt
- ./bazel-bin/stats_test

# Raylet tests.
- ./ci/suppress_output bash src/ray/test/run_object_manager_tests.sh
- ./ci/suppress_output bazel test --build_tests_only --test_lang_filters=cc //:all
Expand Down
53 changes: 53 additions & 0 deletions BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,40 @@ cc_binary(
],
)

cc_library(
name = "stats_lib",
srcs = glob(
[
"src/ray/stats/*.cc",
],
exclude = [
"src/ray/stats/*_test.cc",
],
),
hdrs = glob(
[
"src/ray/stats/*.h",
],
),
copts = COPTS,
includes = [
"src",
],
linkopts = ["-pthread"],
deps = [
":ray_util",
"@com_github_jupp0r_prometheus_cpp//pull",
"@com_google_absl//absl/base:core_headers",
"@com_google_absl//absl/memory",
"@com_google_absl//absl/strings",
"@com_google_googletest//:gtest",
"@io_opencensus_cpp//opencensus/exporters/stats/prometheus:prometheus_exporter",
"@io_opencensus_cpp//opencensus/exporters/stats/stdout:stdout_exporter",
"@io_opencensus_cpp//opencensus/stats",
"@io_opencensus_cpp//opencensus/tags",
],
)

cc_library(
name = "raylet_lib",
srcs = glob(
Expand All @@ -48,15 +82,24 @@ cc_library(
"src/ray/raylet/*.h",
]),
copts = COPTS,
linkopts = ["-pthread"],
deps = [
":gcs",
":gcs_fbs",
":node_manager_fbs",
":object_manager",
":ray_common",
":ray_util",
":stats_lib",
"@boost//:asio",
"@com_github_jupp0r_prometheus_cpp//pull",
"@com_google_absl//absl/base:core_headers",
"@com_google_absl//absl/memory",
"@com_google_absl//absl/strings",
"@com_google_googletest//:gtest",
"@io_opencensus_cpp//opencensus/exporters/stats/prometheus:prometheus_exporter",
"@io_opencensus_cpp//opencensus/stats",
"@io_opencensus_cpp//opencensus/tags",
"@plasma//:plasma_client",
],
)
Expand Down Expand Up @@ -146,6 +189,15 @@ cc_test(
],
)

cc_test(
name = "stats_test",
srcs = ["src/ray/stats/stats_test.cc"],
deps = [
":stats_lib",
"@com_google_googletest//:gtest_main",
],
)

cc_library(
name = "object_manager",
srcs = glob([
Expand Down Expand Up @@ -299,6 +351,7 @@ cc_library(
":node_manager_fbs",
":ray_common",
":ray_util",
":stats_lib",
"@boost//:asio",
],
)
Expand Down
30 changes: 30 additions & 0 deletions WORKSPACE
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ git_repository(
remote = "https://github.com/ruifangChen/checkstyle_java",
)

load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive")

git_repository(
name = "com_github_nelhage_rules_boost",
commit = "6d6fd834281cb8f8e758dd9ad76df86304bf1869",
Expand Down Expand Up @@ -63,3 +65,31 @@ new_git_repository(
load("@//bazel:python_configure.bzl", "python_configure")

python_configure(name = "local_config_python")

http_archive(
name = "io_opencensus_cpp",
strip_prefix = "opencensus-cpp-0.3.0",
urls = ["https://github.com/census-instrumentation/opencensus-cpp/archive/v0.3.0.zip"],
)

# OpenCensus depends on Abseil so we have to explicitly pull it in.
# This is how diamond dependencies are prevented.
git_repository(
name = "com_google_absl",
commit = "88a152ae747c3c42dc9167d46c590929b048d436",
remote = "https://github.com/abseil/abseil-cpp.git",
)

# OpenCensus depends on jupp0r/prometheus-cpp
http_archive(
name = "com_github_jupp0r_prometheus_cpp",
strip_prefix = "prometheus-cpp-master",

# TODO(qwang): We should use the repository of `jupp0r` here when this PR
# `https://github.com/jupp0r/prometheus-cpp/pull/225` getting merged.
urls = ["https://github.com/jovany-wang/prometheus-cpp/archive/master.zip"],
)

load("@com_github_jupp0r_prometheus_cpp//:repositories.bzl", "prometheus_cpp_repositories")

prometheus_cpp_repositories()
14 changes: 13 additions & 1 deletion src/ray/raylet/main.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

#include "ray/ray_config.h"
#include "ray/raylet/raylet.h"
#include "ray/stats/stats.h"
#include "ray/status.h"

#ifndef RAYLET_TEST
Expand All @@ -20,7 +21,7 @@ int main(int argc, char *argv[]) {
ray::RayLogLevel::INFO,
/*log_dir=*/"");
ray::RayLog::InstallFailureSignalHandler();
RAY_CHECK(argc >= 14 && argc <= 16);
RAY_CHECK(argc >= 14 && argc <= 18);

const std::string raylet_socket_name = std::string(argv[1]);
const std::string store_socket_name = std::string(argv[2]);
Expand All @@ -37,6 +38,17 @@ int main(int argc, char *argv[]) {
const std::string java_worker_command = std::string(argv[13]);
const std::string redis_password = (argc >= 15 ? std::string(argv[14]) : "");
const std::string temp_dir = (argc >= 16 ? std::string(argv[15]) : "/tmp/ray");
const std::string disable_stats_str(argc >= 17 ? std::string(argv[16]) : "false");
const bool disable_stats = ("true" == disable_stats_str);
const std::string stat_address =
(argc >= 18 ? std::string(argv[17]) : "127.0.0.1:8888");

// Initialize stats.
const ray::stats::TagsType global_tags = {
{ray::stats::JobNameKey, "raylet"},
{ray::stats::VersionKey, "0.7.0"},
{ray::stats::NodeAddressKey, node_ip_address}};
ray::stats::Init(stat_address, global_tags, disable_stats);

// Configuration for the node manager.
ray::raylet::NodeManagerConfig node_manager_config;
Expand Down
2 changes: 2 additions & 0 deletions src/ray/raylet/node_manager.cc
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#include "ray/common/common_protocol.h"
#include "ray/id.h"
#include "ray/raylet/format/node_manager_generated.h"
#include "ray/stats/stats.h"

namespace {

Expand Down Expand Up @@ -1318,6 +1319,7 @@ void NodeManager::TreatTaskAsFailedIfLost(const Task &task) {

void NodeManager::SubmitTask(const Task &task, const Lineage &uncommitted_lineage,
bool forwarded) {
stats::TaskCountReceived().Record(1);
const TaskSpecification &spec = task.GetTaskSpecification();
const TaskID &task_id = spec.TaskId();
RAY_LOG(DEBUG) << "Submitting task: task_id=" << task_id
Expand Down
9 changes: 9 additions & 0 deletions src/ray/raylet/worker_pool.cc
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
#include <algorithm>
#include <thread>

#include "ray/ray_config.h"
#include "ray/stats/stats.h"
#include "ray/status.h"
#include "ray/util/logging.h"

Expand Down Expand Up @@ -130,6 +132,8 @@ void WorkerPool::StartWorkerProcess(const Language &language) {
RAY_LOG(DEBUG) << "Started worker process with pid " << pid;
state.starting_worker_processes.emplace(
std::make_pair(pid, num_workers_per_process_));
stats::CurrentWorker().Record(pid, {{stats::LanguageKey, EnumNameLanguage(language)},
{stats::WorkerPidKey, std::to_string(pid)}});
return;
}
}
Expand Down Expand Up @@ -232,6 +236,11 @@ std::shared_ptr<Worker> WorkerPool::PopWorker(const TaskSpecification &task_spec
bool WorkerPool::DisconnectWorker(const std::shared_ptr<Worker> &worker) {
auto &state = GetStateForLanguage(worker->GetLanguage());
RAY_CHECK(RemoveWorker(state.registered_workers, worker));

stats::CurrentWorker().Record(
0, {{stats::LanguageKey, EnumNameLanguage(worker->GetLanguage())},
{stats::WorkerPidKey, std::to_string(worker->Pid())}});

return RemoveWorker(state.idle, worker);
}

Expand Down
105 changes: 105 additions & 0 deletions src/ray/stats/metric.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
#include "ray/stats/metric.h"

namespace ray {

namespace stats {

static void RegisterAsView(opencensus::stats::ViewDescriptor view_descriptor,
const std::vector<opencensus::tags::TagKey> &keys) {
// Register global keys.
for (const auto &tag : ray::stats::StatsConfig::instance().GetGlobalTags()) {
view_descriptor = view_descriptor.add_column(tag.first);
}

// Register custom keys.
for (const auto &key : keys) {
view_descriptor = view_descriptor.add_column(key);
}

opencensus::stats::View view(view_descriptor);
view_descriptor.RegisterForExport();
}

StatsConfig &StatsConfig::instance() {
static StatsConfig instance;
return instance;
}

void StatsConfig::SetGlobalTags(const TagsType &global_tags) {
global_tags_ = global_tags;
}

const TagsType &StatsConfig::GetGlobalTags() const { return global_tags_; }

void StatsConfig::SetIsDisableStats(bool disable_stats) {
is_stats_disabled_ = disable_stats;
}

bool StatsConfig::IsStatsDisabled() const { return is_stats_disabled_; }

void Metric::Record(double value, const TagsType &tags) {
if (StatsConfig::instance().IsStatsDisabled()) {
return;
}

if (measure_ == nullptr) {
measure_.reset(new opencensus::stats::Measure<double>(
opencensus::stats::Measure<double>::Register(name_, description_, unit_)));
RegisterView();
}

// Do record.
TagsType combined_tags(tags);
combined_tags.insert(std::end(combined_tags),
std::begin(StatsConfig::instance().GetGlobalTags()),
std::end(StatsConfig::instance().GetGlobalTags()));
opencensus::stats::Record({{*measure_, value}}, combined_tags);
}

void Gauge::RegisterView() {
opencensus::stats::ViewDescriptor view_descriptor =
opencensus::stats::ViewDescriptor()
.set_name(name_)
.set_description(description_)
.set_measure(name_)
.set_aggregation(opencensus::stats::Aggregation::LastValue());
RegisterAsView(view_descriptor, tag_keys_);
}

void Histogram::RegisterView() {
opencensus::stats::ViewDescriptor view_descriptor =
opencensus::stats::ViewDescriptor()
.set_name(name_)
.set_description(description_)
.set_measure(name_)
.set_aggregation(opencensus::stats::Aggregation::Distribution(
opencensus::stats::BucketBoundaries::Explicit(boundaries_)));

RegisterAsView(view_descriptor, tag_keys_);
}

void Count::RegisterView() {
opencensus::stats::ViewDescriptor view_descriptor =
opencensus::stats::ViewDescriptor()
.set_name(name_)
.set_description(description_)
.set_measure(name_)
.set_aggregation(opencensus::stats::Aggregation::Count());

RegisterAsView(view_descriptor, tag_keys_);
}

void Sum::RegisterView() {
opencensus::stats::ViewDescriptor view_descriptor =
opencensus::stats::ViewDescriptor()
.set_name(name_)
.set_description(description_)
.set_measure(name_)
.set_aggregation(opencensus::stats::Aggregation::Count());

RegisterAsView(view_descriptor, tag_keys_);
}

} // namespace stats

} // namespace ray
Loading

0 comments on commit 7d776f3

Please sign in to comment.