Skip to content

Commit

Permalink
Do not send keepalives to status workers.
Browse files Browse the repository at this point in the history
  • Loading branch information
btovar committed Apr 11, 2016
1 parent 23aaea8 commit ae7323b
Showing 1 changed file with 17 additions and 3 deletions.
20 changes: 17 additions & 3 deletions work_queue/src/work_queue.c
Original file line number Diff line number Diff line change
Expand Up @@ -846,6 +846,8 @@ static void add_worker(struct work_queue *q)
w->finished_tasks = 0;
w->start_time = timestamp_get();

w->last_update_msg_time = w->start_time;

struct work_queue_resources *r = work_queue_resources_create();

r->cores.smallest = r->cores.largest = r->cores.total = -1;//default_resource_value;
Expand Down Expand Up @@ -3279,12 +3281,24 @@ static void ask_for_workers_updates(struct work_queue *q) {
hash_table_firstkey(q->worker_table);
while(hash_table_nextkey(q->worker_table, &key, (void **) &w)) {
if(q->keepalive_interval > 0) {

/* do no send message to work_queue_status. */
if(!strcmp(w->hostname, "QUEUE_STATUS")) {
continue;
}

/* we have not received workqueue message from worker yet, so we
* simply check agains its start_time. */
if(!strcmp(w->hostname, "unknown")){
last_recv_elapsed_time = (int64_t)(current_time - w->start_time)/1000000;
} else {
last_recv_elapsed_time = (int64_t)(current_time - w->last_update_msg_time)/1000000;
if ((int)((current_time - w->start_time)/1000000) >= q->keepalive_timeout) {
debug(D_WQ, "Removing worker %s (%s): hasn't sent its initialization in more than %d s", w->hostname, w->addrport, q->keepalive_timeout);
handle_worker_failure(q, w);
}
continue;
}

last_recv_elapsed_time = (int64_t)(current_time - w->last_update_msg_time)/1000000;

// send new keepalive check only (1) if we received a response since last keepalive check AND
// (2) we are past keepalive interval
if(w->last_msg_recv_time >= w->last_update_msg_time) {
Expand Down

0 comments on commit ae7323b

Please sign in to comment.