From 90d1147b8be188cb53de3d5faea2818310509cf9 Mon Sep 17 00:00:00 2001 From: Julien Gilli Date: Fri, 15 Aug 2014 17:20:32 -0700 Subject: [PATCH] cluster: centralize removal from workers list. Currently, cluster workers can be removed from the workers list in three different places: - In the exit event handler for the worker process. - In the disconnect event handler of the worker process. - In the disconnect event handler of the cluster master. However, handles for a given worker are cleaned up only in one of these places: in the cluster master's disconnect event handler. Because these events happen asynchronously, it is possible that the workers list is empty before we even clean up one handle. This makes the assert that makes sure that no handle is left when the workers list is empty fail. This commit removes the worker from the cluster.workers list only when the worker is dead _and_ disconnected, at which point we're sure that its associated handles are cleaned up. Fixes #8191 and #8192. Reviewed-By: Fedor Indutny --- doc/api/cluster.markdown | 17 ++++- lib/cluster.js | 70 ++++++++++++++----- src/node.js | 2 +- .../simple/test-cluster-worker-isconnected.js | 37 ++++++++++ test/simple/test-cluster-worker-isdead.js | 27 +++++++ 5 files changed, 134 insertions(+), 19 deletions(-) create mode 100644 test/simple/test-cluster-worker-isconnected.js create mode 100644 test/simple/test-cluster-worker-isdead.js diff --git a/doc/api/cluster.markdown b/doc/api/cluster.markdown index 2d78654cc7a3d4..8228e34c1ee149 100644 --- a/doc/api/cluster.markdown +++ b/doc/api/cluster.markdown @@ -344,8 +344,10 @@ A hash that stores the active worker objects, keyed by `id` field. Makes it easy to loop through all the workers. It is only available in the master process. -A worker is removed from cluster.workers just before the `'disconnect'` or -`'exit'` event is emitted. +A worker is removed from cluster.workers after the worker has disconnected _and_ +exited. The order between these two events cannot be determined in advance. +However, it is guaranteed that the removal from the cluster.workers list happens +before last `'disconnect'` or `'exit'` event is emitted. // Go through all workers function eachWorker(callback) { @@ -511,6 +513,17 @@ the `disconnect` event has not been emitted after some time. }); } +### worker.isDead() + +This function returns `true` if the worker's process has terminated (either +because of exiting or being signaled). Otherwise, it returns `false`. + +### worker.isConnected() + +This function returns `true` if the worker is connected to its master via its IPC +channel, `false` otherwise. A worker is connected to its master after it's been +created. It is disconnected after the `disconnect` event is emitted. + ### Event: 'message' * `message` {Object} diff --git a/lib/cluster.js b/lib/cluster.js index 3e853bcdbc62c1..0914546b9eeb85 100644 --- a/lib/cluster.js +++ b/lib/cluster.js @@ -64,6 +64,14 @@ Worker.prototype.send = function() { this.process.send.apply(this.process, arguments); }; +Worker.prototype.isDead = function isDead() { + return this.process.exitCode != null || this.process.signalCode != null; +}; + +Worker.prototype.isConnected = function isConnected() { + return this.process.connected; +}; + // Master/worker specific methods are defined in the *Init() functions. function SharedHandle(key, address, port, addressType, backlog, fd) { @@ -310,20 +318,62 @@ function masterInit() { id: id, process: workerProcess }); + + function removeWorker(worker) { + assert(worker); + + delete cluster.workers[worker.id]; + + if (Object.keys(cluster.workers).length === 0) { + assert(Object.keys(handles).length === 0, 'Resource leak detected.'); + intercom.emit('disconnect'); + } + } + + function removeHandlesForWorker(worker) { + assert(worker); + + for (var key in handles) { + var handle = handles[key]; + if (handle.remove(worker)) delete handles[key]; + } + } + worker.process.once('exit', function(exitCode, signalCode) { + /* + * Remove the worker from the workers list only + * if it has disconnected, otherwise we might + * still want to access it. + */ + if (!worker.isConnected()) removeWorker(worker); + worker.suicide = !!worker.suicide; worker.state = 'dead'; worker.emit('exit', exitCode, signalCode); cluster.emit('exit', worker, exitCode, signalCode); - delete cluster.workers[worker.id]; }); + worker.process.once('disconnect', function() { + /* + * Now is a good time to remove the handles + * associated with this worker because it is + * not connected to the master anymore. + */ + removeHandlesForWorker(worker); + + /* + * Remove the worker from the workers list only + * if its process has exited. Otherwise, we might + * still want to access it. + */ + if (worker.isDead()) removeWorker(worker); + worker.suicide = !!worker.suicide; worker.state = 'disconnected'; worker.emit('disconnect'); cluster.emit('disconnect', worker); - delete cluster.workers[worker.id]; }); + worker.process.on('internalMessage', internal(worker, onmessage)); process.nextTick(function() { cluster.emit('fork', worker); @@ -345,18 +395,6 @@ function masterInit() { if (cb) intercom.once('disconnect', cb); }; - cluster.on('disconnect', function(worker) { - delete cluster.workers[worker.id]; - for (var key in handles) { - var handle = handles[key]; - if (handle.remove(worker)) delete handles[key]; - } - if (Object.keys(cluster.workers).length === 0) { - assert(Object.keys(handles).length === 0, 'Resource leak detected.'); - intercom.emit('disconnect'); - } - }); - Worker.prototype.disconnect = function() { this.suicide = true; send(this, { act: 'disconnect' }); @@ -365,7 +403,7 @@ function masterInit() { Worker.prototype.destroy = function(signo) { signo = signo || 'SIGTERM'; var proc = this.process; - if (proc.connected) { + if (this.isConnected()) { this.once('disconnect', proc.kill.bind(proc, signo)); this.disconnect(); return; @@ -595,7 +633,7 @@ function workerInit() { Worker.prototype.destroy = function() { this.suicide = true; - if (!process.connected) process.exit(0); + if (!this.isConnected()) process.exit(0); var exit = process.exit.bind(null, 0); send({ act: 'suicide' }, exit); process.once('disconnect', exit); diff --git a/src/node.js b/src/node.js index 4126bf1f475717..65092249d55504 100644 --- a/src/node.js +++ b/src/node.js @@ -587,7 +587,7 @@ }; startup.processKillAndExit = function() { - process.exitCode = 0; + process.exit = function(code) { if (code || code === 0) process.exitCode = code; diff --git a/test/simple/test-cluster-worker-isconnected.js b/test/simple/test-cluster-worker-isconnected.js new file mode 100644 index 00000000000000..ed839d49630491 --- /dev/null +++ b/test/simple/test-cluster-worker-isconnected.js @@ -0,0 +1,37 @@ +var cluster = require('cluster'); +var assert = require('assert'); +var util = require('util'); + +if (cluster.isMaster) { + var worker = cluster.fork(); + + assert.ok(worker.isConnected(), + "isConnected() should return true as soon as the worker has " + + "been created."); + + worker.on('disconnect', function() { + assert.ok(!worker.isConnected(), + "After a disconnect event has been emitted, " + + "isConncted should return false"); + }); + + worker.on('message', function(msg) { + if (msg === 'readyToDisconnect') { + worker.disconnect(); + } + }) + +} else { + assert.ok(cluster.worker.isConnected(), + "isConnected() should return true from within a worker at all " + + "times."); + + cluster.worker.process.on('disconnect', function() { + assert.ok(!cluster.worker.isConnected(), + "isConnected() should return false from within a worker " + + "after its underlying process has been disconnected from " + + "the master"); + }) + + process.send('readyToDisconnect'); +} diff --git a/test/simple/test-cluster-worker-isdead.js b/test/simple/test-cluster-worker-isdead.js new file mode 100644 index 00000000000000..11766c597f8297 --- /dev/null +++ b/test/simple/test-cluster-worker-isdead.js @@ -0,0 +1,27 @@ +var cluster = require('cluster'); +var assert = require('assert'); +var net = require('net'); + +if (cluster.isMaster) { + var worker = cluster.fork(); + assert.ok(!worker.isDead(), + "isDead() should return false right after the worker has been " + + "created."); + + worker.on('exit', function() { + assert.ok(!worker.isConnected(), + "After an event has been emitted, " + + "isDead should return true"); + }) + + worker.on('message', function(msg) { + if (msg === 'readyToDie') { + worker.kill(); + } + }); + +} else if (cluster.isWorker) { + assert.ok(!cluster.worker.isDead(), + "isDead() should return false when called from within a worker"); + process.send('readyToDie'); +}