Skip to content

Commit e5644e2

Browse files
SERVER-16257 - replSetInitiate races with initial sync, resulting in no primary
1 parent 534263f commit e5644e2

10 files changed

+52
-33
lines changed

src/mongo/db/repl/oplog.cpp

+1-15
Original file line numberDiff line numberDiff line change
@@ -145,17 +145,6 @@ namespace repl {
145145
return std::pair<OpTime,long long>(ts, hashNew);
146146
}
147147

148-
static void _logOpUninitialized(OperationContext* txn,
149-
const char *opstr,
150-
const char *ns,
151-
const char *logNS,
152-
const BSONObj& obj,
153-
BSONObj *o2,
154-
bool *bb,
155-
bool fromMigrate ) {
156-
uassert(13288, "replSet error write op to db before replSet initialized", str::startsWith(ns, "local.") || *opstr == 'n');
157-
}
158-
159148
/** write an op to the oplog that is already built.
160149
todo : make _logOpRS() call this so we don't repeat ourself?
161150
*/
@@ -404,10 +393,7 @@ namespace repl {
404393
const BSONObj& obj,
405394
BSONObj *o2,
406395
bool *bb,
407-
bool fromMigrate ) = _logOpUninitialized;
408-
void newReplUp() {
409-
_logOp = _logOpRS;
410-
}
396+
bool fromMigrate ) = _logOpRS;
411397

412398
void oldRepl() { _logOp = _logOpOld; }
413399

src/mongo/db/repl/oplog.h

+1-3
Original file line numberDiff line numberDiff line change
@@ -39,10 +39,8 @@ namespace mongo {
3939

4040
namespace repl {
4141

42-
// These functions redefine the function for logOp(),
43-
// for either master/slave or replica sets.
42+
// Redefines the function for logOp() to master/slave.
4443
void oldRepl(); // master-slave
45-
void newReplUp();// replica set after startup
4644

4745
// Create a new capped collection for the oplog if it doesn't yet exist.
4846
// This will be either local.oplog.rs (replica sets) or local.oplog.$main (master/slave)

src/mongo/db/repl/repl_coordinator_external_state.h

+8-1
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,9 @@ namespace repl {
5959
virtual ~ReplicationCoordinatorExternalState();
6060

6161
/**
62-
* Starts the background sync, producer, and sync source feedback threads, and sets up logOp
62+
* Starts the background sync, producer, and sync source feedback threads
63+
*
64+
* NOTE: Only starts threads if they are not already started,
6365
*/
6466
virtual void startThreads() = 0;
6567

@@ -74,6 +76,11 @@ namespace repl {
7476
*/
7577
virtual void shutdown() = 0;
7678

79+
/**
80+
* Creates the oplog and writes the first entry.
81+
*/
82+
virtual void initiateOplog(OperationContext* txn) = 0;
83+
7784
/**
7885
* Simple wrapper around SyncSourceFeedback::forwardSlaveHandshake. Signals to the
7986
* SyncSourceFeedback thread that it needs to wake up and send a replication handshake

src/mongo/db/repl/repl_coordinator_external_state_impl.cpp

+23-8
Original file line numberDiff line numberDiff line change
@@ -72,30 +72,45 @@ namespace {
7272
} // namespace
7373

7474
ReplicationCoordinatorExternalStateImpl::ReplicationCoordinatorExternalStateImpl() :
75-
_nextThreadId(0) {}
75+
_startedThreads(false)
76+
, _nextThreadId(0) {}
7677
ReplicationCoordinatorExternalStateImpl::~ReplicationCoordinatorExternalStateImpl() {}
7778

7879
void ReplicationCoordinatorExternalStateImpl::startThreads() {
80+
boost::lock_guard<boost::mutex> lk(_threadMutex);
81+
if (_startedThreads) {
82+
return;
83+
}
84+
log() << "Starting replication applier threads";
7985
_applierThread.reset(new boost::thread(runSyncThread));
8086
BackgroundSync* bgsync = BackgroundSync::get();
8187
_producerThread.reset(new boost::thread(stdx::bind(&BackgroundSync::producerThread,
8288
bgsync)));
8389
_syncSourceFeedbackThread.reset(new boost::thread(stdx::bind(&SyncSourceFeedback::run,
8490
&_syncSourceFeedback)));
85-
newReplUp();
91+
_startedThreads = true;
8692
}
8793

8894
void ReplicationCoordinatorExternalStateImpl::startMasterSlave(OperationContext* txn) {
8995
repl::startMasterSlave(txn);
9096
}
9197

9298
void ReplicationCoordinatorExternalStateImpl::shutdown() {
93-
_syncSourceFeedback.shutdown();
94-
_syncSourceFeedbackThread->join();
95-
_applierThread->join();
96-
BackgroundSync* bgsync = BackgroundSync::get();
97-
bgsync->shutdown();
98-
_producerThread->join();
99+
boost::lock_guard<boost::mutex> lk(_threadMutex);
100+
if (_startedThreads) {
101+
log() << "Stopping replication applier threads";
102+
_syncSourceFeedback.shutdown();
103+
_syncSourceFeedbackThread->join();
104+
_applierThread->join();
105+
BackgroundSync* bgsync = BackgroundSync::get();
106+
bgsync->shutdown();
107+
_producerThread->join();
108+
}
109+
}
110+
111+
void ReplicationCoordinatorExternalStateImpl::initiateOplog(OperationContext* txn) {
112+
createOplog(txn);
113+
logOpInitiate(txn, BSON("msg" << "initiating set"));
99114
}
100115

101116
void ReplicationCoordinatorExternalStateImpl::forwardSlaveHandshake() {

src/mongo/db/repl/repl_coordinator_external_state_impl.h

+6
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ namespace repl {
4848
virtual void startThreads();
4949
virtual void startMasterSlave(OperationContext* txn);
5050
virtual void shutdown();
51+
virtual void initiateOplog(OperationContext* txn);
5152
virtual void forwardSlaveHandshake();
5253
virtual void forwardSlaveProgress();
5354
virtual OID ensureMe(OperationContext* txn);
@@ -65,6 +66,11 @@ namespace repl {
6566
std::string getNextOpContextThreadName();
6667

6768
private:
69+
// Guards starting threads and setting _startedThreads
70+
boost::mutex _threadMutex;
71+
72+
// True when the threads have been started
73+
bool _startedThreads;
6874

6975
// The SyncSourceFeedback class is responsible for sending replSetUpdatePosition commands
7076
// for forwarding replication progress information upstream when there is chained

src/mongo/db/repl/repl_coordinator_external_state_mock.cpp

+1
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ namespace repl {
5555

5656
void ReplicationCoordinatorExternalStateMock::startThreads() {}
5757
void ReplicationCoordinatorExternalStateMock::startMasterSlave(OperationContext*) {}
58+
void ReplicationCoordinatorExternalStateMock::initiateOplog(OperationContext* txn) {}
5859
void ReplicationCoordinatorExternalStateMock::shutdown() {}
5960
void ReplicationCoordinatorExternalStateMock::forwardSlaveHandshake() {}
6061
void ReplicationCoordinatorExternalStateMock::forwardSlaveProgress() {}

src/mongo/db/repl/repl_coordinator_external_state_mock.h

+1
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ namespace repl {
5151
virtual void startThreads();
5252
virtual void startMasterSlave(OperationContext*);
5353
virtual void shutdown();
54+
virtual void initiateOplog(OperationContext* txn);
5455
virtual void forwardSlaveHandshake();
5556
virtual void forwardSlaveProgress();
5657
virtual OID ensureMe(OperationContext*);

src/mongo/db/repl/repl_coordinator_impl.cpp

+7-2
Original file line numberDiff line numberDiff line change
@@ -268,6 +268,7 @@ namespace {
268268
lk.unlock();
269269
}
270270
_performPostMemberStateUpdateAction(action);
271+
_externalState->startThreads();
271272
}
272273

273274
void ReplicationCoordinatorImpl::startReplication(OperationContext* txn) {
@@ -297,8 +298,6 @@ namespace {
297298
_topCoordDriverThread.reset(new boost::thread(stdx::bind(&ReplicationExecutor::run,
298299
&_replExecutor)));
299300

300-
_externalState->startThreads();
301-
302301
bool doneLoadingConfig = _startLoadLocalConfig(txn);
303302
if (doneLoadingConfig) {
304303
// If we're not done loading the config, then the config state will be set by
@@ -1875,6 +1874,12 @@ namespace {
18751874
configStateGuard.Dismiss();
18761875
fassert(18654, cbh.getStatus());
18771876
_replExecutor.wait(cbh.getValue());
1877+
1878+
if (status.isOK()) {
1879+
// Create the oplog with the first entry, and start repl threads.
1880+
_externalState->initiateOplog(txn);
1881+
_externalState->startThreads();
1882+
}
18781883
return status;
18791884
}
18801885

src/mongo/db/repl/repl_coordinator_impl_heartbeat.cpp

+4
Original file line numberDiff line numberDiff line change
@@ -424,6 +424,10 @@ namespace {
424424
}
425425
return;
426426
}
427+
428+
lk.unlock();
429+
430+
_externalState->startThreads();
427431
}
428432

429433
const stdx::function<void (const ReplicationExecutor::CallbackData&)> reconfigFinishFn(

src/mongo/db/repl/replset_commands.cpp

-4
Original file line numberDiff line numberDiff line change
@@ -279,10 +279,6 @@ namespace {
279279
Status status = getGlobalReplicationCoordinator()->processReplSetInitiate(txn,
280280
configObj,
281281
&result);
282-
if (status.isOK()) {
283-
createOplog(txn);
284-
logOpInitiate(txn, BSON("msg" << "initiating set"));
285-
}
286282
return appendCommandStatus(result, status);
287283
}
288284
} cmdReplSetInitiate;

0 commit comments

Comments
 (0)