Local scheduler sends a null heartbeat to global scheduler (ray-proje…

…ct#962) * Local scheduler sends a null heartbeat to global scheduler to notify death * Add whitespace. * Speed up component failures test * Free local scheduler state upon plasma manager disconnection
hbcbh1999 · Sep 12, 2017 · 74ac806 · 74ac806
1 parent dd4e99b
commit 74ac806
Show file tree

Hide file tree

Showing 11 changed files with 109 additions and 26 deletions.
diff --git a/src/common/format/common.fbs b/src/common/format/common.fbs
@@ -129,6 +129,9 @@ table LocalSchedulerInfoMessage {
   // The resource vector of resources currently available to this local
   // scheduler.
   dynamic_resources: [double];
+  // Whether the local scheduler is dead. If true, then all other fields
+  // besides `db_client_id` will not be set.
+  is_dead: bool;
 }
 
 root_type LocalSchedulerInfoMessage;

diff --git a/src/common/state/local_scheduler_table.cc b/src/common/state/local_scheduler_table.cc
@@ -27,3 +27,7 @@ void local_scheduler_table_send_info(DBHandle *db_handle,
   init_table_callback(db_handle, NIL_ID, __func__, data, retry, NULL,
                       redis_local_scheduler_table_send_info, NULL);
 }
+
+void local_scheduler_table_disconnect(DBHandle *db_handle) {
+  redis_local_scheduler_table_disconnect(db_handle);
+}
diff --git a/src/common/state/local_scheduler_table.h b/src/common/state/local_scheduler_table.h
@@ -21,6 +21,9 @@ typedef struct {
   /** The resource vector of resources currently available to this local
    *  scheduler. */
   double dynamic_resources[ResourceIndex_MAX];
+  /** Whether the local scheduler is dead. If true, then all other fields
+   *  should be ignored. */
+  bool is_dead;
 } LocalSchedulerInfo;
 
 /*
@@ -58,13 +61,14 @@ typedef struct {
 } LocalSchedulerTableSubscribeData;
 
 /**
- * Send a heartbeat to all subscriers to the local scheduler table. This
+ * Send a heartbeat to all subscribers to the local scheduler table. This
  * heartbeat contains some information about the load on the local scheduler.
  *
  * @param db_handle Database handle.
  * @param info Information about the local scheduler, including the load on the
  *        local scheduler.
  * @param retry Information about retrying the request to the database.
+ * @return Void.
  */
 void local_scheduler_table_send_info(DBHandle *db_handle,
                                      LocalSchedulerInfo *info,
@@ -77,4 +81,14 @@ typedef struct {
   LocalSchedulerInfo info;
 } LocalSchedulerTableSendInfoData;
 
+/**
+ * Send a null heartbeat to all subscribers to the local scheduler table to
+ * notify them that we are about to exit. This operation is performed
+ * synchronously.
+ *
+ * @param db_handle Database handle.
+ * @return Void.
+ */
+void local_scheduler_table_disconnect(DBHandle *db_handle);
+
 #endif /* LOCAL_SCHEDULER_TABLE_H */
diff --git a/src/common/state/redis.cc b/src/common/state/redis.cc
@@ -1289,14 +1289,22 @@ void redis_local_scheduler_table_subscribe_callback(redisAsyncContext *c,
     DBClientID client_id = from_flatbuf(message->db_client_id());
     /* Extract the fields of the local scheduler info struct. */
     LocalSchedulerInfo info;
-    info.total_num_workers = message->total_num_workers();
-    info.task_queue_length = message->task_queue_length();
-    info.available_workers = message->available_workers();
-    for (int i = 0; i < ResourceIndex_MAX; ++i) {
-      info.static_resources[i] = message->static_resources()->Get(i);
-    }
-    for (int i = 0; i < ResourceIndex_MAX; ++i) {
-      info.dynamic_resources[i] = message->dynamic_resources()->Get(i);
+    memset(&info, 0, sizeof(info));
+    if (message->is_dead()) {
+      /* If the local scheduler is dead, then ignore all other fields in the
+       * message. */
+      info.is_dead = true;
+    } else {
+      /* If the local scheduler is alive, collect load information. */
+      info.total_num_workers = message->total_num_workers();
+      info.task_queue_length = message->task_queue_length();
+      info.available_workers = message->available_workers();
+      for (int i = 0; i < ResourceIndex_MAX; ++i) {
+        info.static_resources[i] = message->static_resources()->Get(i);
+      }
+      for (int i = 0; i < ResourceIndex_MAX; ++i) {
+        info.dynamic_resources[i] = message->dynamic_resources()->Get(i);
+      }
     }
 
     /* Call the subscribe callback. */
@@ -1355,7 +1363,7 @@ void redis_local_scheduler_table_send_info(TableCallbackData *callback_data) {
       fbb, to_flatbuf(fbb, db->client), info.total_num_workers,
       info.task_queue_length, info.available_workers,
       fbb.CreateVector(info.static_resources, ResourceIndex_MAX),
-      fbb.CreateVector(info.dynamic_resources, ResourceIndex_MAX));
+      fbb.CreateVector(info.dynamic_resources, ResourceIndex_MAX), false);
   fbb.Finish(message);
 
   int status = redisAsyncCommand(
@@ -1368,6 +1376,22 @@ void redis_local_scheduler_table_send_info(TableCallbackData *callback_data) {
   }
 }
 
+void redis_local_scheduler_table_disconnect(DBHandle *db) {
+  flatbuffers::FlatBufferBuilder fbb;
+  LocalSchedulerInfoMessageBuilder builder(fbb);
+  builder.add_db_client_id(to_flatbuf(fbb, db->client));
+  builder.add_is_dead(true);
+  auto message = builder.Finish();
+  fbb.Finish(message);
+  redisReply *reply = (redisReply *) redisCommand(
+      db->sync_context, "PUBLISH local_schedulers %b", fbb.GetBufferPointer(),
+      fbb.GetSize());
+  CHECK(reply->type != REDIS_REPLY_ERROR);
+  CHECK(reply->type == REDIS_REPLY_INTEGER);
+  LOG_DEBUG("%" PRId64 " subscribers received this publish.\n", reply->integer);
+  freeReplyObject(reply);
+}
+
 void redis_driver_table_subscribe_callback(redisAsyncContext *c,
                                            void *r,
                                            void *privdata) {

diff --git a/src/common/state/redis.h b/src/common/state/redis.h
@@ -291,6 +291,15 @@ void redis_local_scheduler_table_subscribe(TableCallbackData *callback_data);
  */
 void redis_local_scheduler_table_send_info(TableCallbackData *callback_data);
 
+/**
+ * Synchronously publish a null update to the local scheduler table signifying
+ * that we are about to exit.
+ *
+ * @param db The database handle of the dying local scheduler.
+ * @return Void.
+ */
+void redis_local_scheduler_table_disconnect(DBHandle *db);
+
 /**
  * Subscribe to updates from the driver table.
  *

diff --git a/src/global_scheduler/global_scheduler.cc b/src/global_scheduler/global_scheduler.cc
@@ -333,14 +333,22 @@ void local_scheduler_table_handler(DBClientID client_id,
   LOG_DEBUG(
       "total workers = %d, task queue length = %d, available workers = %d",
       info.total_num_workers, info.task_queue_length, info.available_workers);
+
   /* Update the local scheduler info struct. */
   auto it = state->local_schedulers.find(client_id);
   if (it != state->local_schedulers.end()) {
-    /* Reset the number of tasks sent since the last heartbeat. */
-    LocalScheduler &local_scheduler = it->second;
-    local_scheduler.num_heartbeats_missed = 0;
-    local_scheduler.num_recent_tasks_sent = 0;
-    local_scheduler.info = info;
+    if (info.is_dead) {
+      /* The local scheduler is exiting. Increase the number of heartbeats
+       * missed to the timeout threshold. This will trigger removal of the
+       * local scheduler the next time the timeout handler fires. */
+      it->second.num_heartbeats_missed = NUM_HEARTBEATS_TIMEOUT;
+    } else {
+      /* Reset the number of tasks sent since the last heartbeat. */
+      LocalScheduler &local_scheduler = it->second;
+      local_scheduler.num_heartbeats_missed = 0;
+      local_scheduler.num_recent_tasks_sent = 0;
+      local_scheduler.info = info;
+    }
   } else {
     LOG_WARN("client_id didn't match any cached local scheduler entries");
   }

diff --git a/src/local_scheduler/local_scheduler.cc b/src/local_scheduler/local_scheduler.cc
@@ -175,10 +175,9 @@ void LocalSchedulerState_free(LocalSchedulerState *state) {
    * responsible for deleting our entry from the db_client table, so do not
    * delete it here. */
   if (state->db != NULL) {
-    /* TODO(swang): Add a null heartbeat that tells the global scheduler that
-     * we are dead. This avoids having to wait for the timeout before marking
-     * us as dead in the db_client table, in cases where we can do a clean
-     * exit. */
+    /* Send a null heartbeat that tells the global scheduler that we are dead
+     * to avoid waiting for the heartbeat timeout. */
+    local_scheduler_table_disconnect(state->db);
     DBHandle_free(state->db);
   }
 

diff --git a/src/local_scheduler/local_scheduler.h b/src/local_scheduler/local_scheduler.h
@@ -185,8 +185,6 @@ LocalSchedulerState *LocalSchedulerState_init(
     const char *worker_path,
     int num_workers);
 
-void LocalSchedulerState_free(LocalSchedulerState *state);
-
 SchedulingAlgorithmState *get_algorithm_state(LocalSchedulerState *state);
 
 void process_message(event_loop *loop,

diff --git a/src/local_scheduler/local_scheduler_algorithm.cc b/src/local_scheduler/local_scheduler_algorithm.cc
@@ -480,7 +480,14 @@ void fetch_missing_dependency(LocalSchedulerState *state,
     /* We weren't actively fetching this object. Try the fetch once
      * immediately. */
     if (state->plasma_conn->get_manager_fd() != -1) {
-      ARROW_CHECK_OK(state->plasma_conn->Fetch(1, &obj_id));
+      auto arrow_status = state->plasma_conn->Fetch(1, &obj_id);
+      if (!arrow_status.ok()) {
+        LocalSchedulerState_free(state);
+        LOG_FATAL(
+            "Lost connection to the plasma manager, local scheduler is "
+            "exiting. Error: %s",
+            arrow_status.ToString().c_str());
+      }
     }
     /* Create an entry and add it to the list of active fetch requests to
      * ensure that the fetch actually happens. The entry will be moved to the
@@ -578,9 +585,16 @@ int fetch_object_timeout_handler(event_loop *loop, timer_id id, void *context) {
   for (int64_t j = 0; j < num_object_ids; j += fetch_request_size) {
     int num_objects_in_request =
         std::min(num_object_ids, j + fetch_request_size) - j;
-    ARROW_CHECK_OK(state->plasma_conn->Fetch(
+    auto arrow_status = state->plasma_conn->Fetch(
         num_objects_in_request,
-        reinterpret_cast<plasma::ObjectID *>(&object_ids[j])));
+        reinterpret_cast<plasma::ObjectID *>(&object_ids[j]));
+    if (!arrow_status.ok()) {
+      LocalSchedulerState_free(state);
+      LOG_FATAL(
+          "Lost connection to the plasma manager, local scheduler is exiting. "
+          "Error: %s",
+          arrow_status.ToString().c_str());
+    }
   }
 
   /* Print a warning if this method took too long. */

diff --git a/src/local_scheduler/local_scheduler_shared.h b/src/local_scheduler/local_scheduler_shared.h
@@ -121,4 +121,13 @@ struct LocalSchedulerClient {
   LocalSchedulerState *local_scheduler_state;
 };
 
+/**
+ * Free the local scheduler state. This disconnects all clients and notifies
+ * the global scheduler of the local scheduler's exit.
+ *
+ * @param state The state to free.
+ * @return Void
+ */
+void LocalSchedulerState_free(LocalSchedulerState *state);
+
 #endif /* LOCAL_SCHEDULER_SHARED_H */
diff --git a/test/component_failures_test.py b/test/component_failures_test.py
@@ -151,11 +151,12 @@ def f(x, j):
         components = ray.services.all_processes[component_type]
         for process in components[1:]:
             process.terminate()
-            time.sleep(0.1)
+            time.sleep(1)
+
+        for process in components[1:]:
             process.kill()
             process.wait()
             self.assertNotEqual(process.poll(), None)
-            time.sleep(1)
 
         # Make sure that we can still get the objects after the executing tasks
         # died.