Skip to content

Commit e7da324

Browse files
drew-beckmenMongoDB Bot
authored and
MongoDB Bot
committed
SERVER-91920: Report admission and execution control queue stats in logs for slow transactions (#28867)
GitOrigin-RevId: 369a61cc993df6f77285b9b675ddc1f80de39154
1 parent d6aa1fd commit e7da324

16 files changed

+442
-63
lines changed

jstests/core/administrative/getlog2.js

+1-1
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ assert(contains(query.log, function(v) {
6262
print(v);
6363
return stringContains(v, " find ") && stringContains(v, "filter:") &&
6464
stringContains(v, "keysExamined:") && stringContains(v, "docsExamined:") &&
65-
v.indexOf("SENTINEL") != -1;
65+
stringContains(v, "queues") && v.indexOf("SENTINEL") != -1;
6666
}));
6767

6868
// 2. Run a slow update

jstests/noPassthrough/architecture/ticket_queue_slow_log.js

+6-3
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
/**
22
* Test that verifies that time waiting for a ticket is logged as part of slow query logging.
3-
* Induces queueing for ticket by exhausting read tickets.
3+
* Induces queueing for ticket by exhausting read tickets. Also verifies that ingress
4+
* admission queueing stats are included in slow query logging.
45
*/
56

67
import {ReplSetTest} from "jstests/libs/replsettest.js";
@@ -83,8 +84,10 @@ for (let i = 0; i < queuedReaders.length; i++) {
8384
queuedReaders[i]();
8485
}
8586

86-
const predicate = new RegExp(
87-
`Slow query.*"${coll}.*"queues".*"execution":{"admissions":\\d+,"totalTimeQueuedMicros":\\d+}`);
87+
const predicate =
88+
new RegExp(`Slow query.*"${coll}.*"queues".*` +
89+
`"(execution|ingress)":{"admissions":\\d+(?:,"totalTimeQueuedMicros":\\d+)?}` +
90+
`.*"(execution|ingress)":{"admissions":\\d+(?:,"totalTimeQueuedMicros":\\d+)?}`);
8891
assert(checkLog.checkContainsOnce(primary, predicate),
8992
"Could not find log containing " + predicate);
9093

src/mongo/db/BUILD.bazel

+1
Original file line numberDiff line numberDiff line change
@@ -932,6 +932,7 @@ mongo_cc_library(
932932
"//src/mongo/util:fail_point", # TODO(SERVER-93876): Remove.
933933
"//src/mongo/util:progress_meter",
934934
"//src/mongo/util/concurrency:spin_lock", # TODO(SERVER-93876): Remove.
935+
"//src/mongo/util/concurrency:ticketholder_queue_stats",
935936
] + select({
936937
"//bazel/config:use_tracing_profiler_enabled": [
937938
"//src/mongo/util/tracing_profiler", # TODO(SERVER-93876): Remove.

src/mongo/db/admission/BUILD.bazel

-1
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,6 @@ mongo_cc_library(
2626
deps = [
2727
":ingress_admission_context",
2828
"//src/mongo:base", # TODO(SERVER-93876): Remove.
29-
"//src/mongo/db:commands", # TODO(SERVER-93876): Remove.
3029
"//src/mongo/db:server_base", # TODO(SERVER-93876): Remove.
3130
"//src/mongo/db:server_feature_flags", # TODO(SERVER-93876): Remove.
3231
"//src/mongo/db:service_context", # TODO(SERVER-93876): Remove.

src/mongo/db/curop.cpp

+46-57
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,7 @@
8080
#include "mongo/transport/service_executor.h"
8181
#include "mongo/util/assert_util.h"
8282
#include "mongo/util/clock_source.h"
83+
#include "mongo/util/concurrency/ticketholder_queue_stats.h"
8384
#include "mongo/util/database_name_util.h"
8485
#include "mongo/util/decorable.h"
8586
#include "mongo/util/duration.h"
@@ -97,7 +98,6 @@
9798
namespace mongo {
9899

99100
namespace {
100-
StringMap<std::function<AdmissionContext*(OperationContext*)>> gQueueMetricsRegistry;
101101

102102
auto& oplogGetMoreStats = *MetricBuilder<TimerStats>("repl.network.oplogGetMoresProcessed");
103103

@@ -115,15 +115,6 @@ BSONObj serializeDollarDbInOpDescription(boost::optional<TenantId> tenantId,
115115
.firstElement());
116116
return newCmdObj;
117117
}
118-
119-
MONGO_INITIALIZER(InitGlobalQueueLookupTable)(InitializerContext*) {
120-
gQueueMetricsRegistry["ingress"] = [](OperationContext* opCtx) {
121-
return &IngressAdmissionContext::get(opCtx);
122-
};
123-
gQueueMetricsRegistry["execution"] = [](OperationContext* opCtx) {
124-
return &ExecutionAdmissionContext::get(opCtx);
125-
};
126-
}
127118
} // namespace
128119

129120
/**
@@ -827,6 +818,47 @@ void appendObjectTruncatingAsNecessary(StringData fieldName,
827818
buildTruncatedObject(obj, *maxSize, builder);
828819
truncatedBuilder.doneFast();
829820
}
821+
822+
/**
823+
* Populates the BSONObjBuilder with the queueing statistics of the current operation. Calculates
824+
* overall queue stats and records the current queue if the operation is presently queued.
825+
*/
826+
void populateCurrentOpQueueStats(OperationContext* opCtx,
827+
TickSource* tickSource,
828+
BSONObjBuilder* currOpStats) {
829+
boost::optional<std::tuple<TicketHolderQueueStats::QueueType, Microseconds>> currentQueue;
830+
BSONObjBuilder queuesBuilder(currOpStats->subobjStart("queues"));
831+
832+
for (auto&& [queueType, lookup] : TicketHolderQueueStats::getQueueMetricsRegistry()) {
833+
AdmissionContext* admCtx = lookup(opCtx);
834+
Microseconds totalTimeQueuedMicros = admCtx->totalTimeQueuedMicros();
835+
836+
if (auto startQueueingTime = admCtx->startQueueingTime()) {
837+
Microseconds currentQueueTimeQueuedMicros = tickSource->ticksTo<Microseconds>(
838+
opCtx->getServiceContext()->getTickSource()->getTicks() - *startQueueingTime);
839+
totalTimeQueuedMicros += currentQueueTimeQueuedMicros;
840+
currentQueue = std::make_tuple(queueType, currentQueueTimeQueuedMicros);
841+
}
842+
BSONObjBuilder queueMetricsBuilder(
843+
queuesBuilder.subobjStart(TicketHolderQueueStats::queueTypeToString(queueType)));
844+
queueMetricsBuilder.append("admissions", admCtx->getAdmissions());
845+
queueMetricsBuilder.append("totalTimeQueuedMicros",
846+
durationCount<Microseconds>(totalTimeQueuedMicros));
847+
queueMetricsBuilder.append("isHoldingTicket", admCtx->isHoldingTicket());
848+
queueMetricsBuilder.done();
849+
}
850+
queuesBuilder.done();
851+
if (currentQueue) {
852+
BSONObjBuilder currentQueueBuilder(currOpStats->subobjStart("currentQueue"));
853+
currentQueueBuilder.append(
854+
"name", TicketHolderQueueStats::queueTypeToString(std::get<0>(*currentQueue)));
855+
currentQueueBuilder.append("timeQueuedMicros",
856+
durationCount<Microseconds>(std::get<1>(*currentQueue)));
857+
currentQueueBuilder.done();
858+
} else {
859+
currOpStats->appendNull("currentQueue");
860+
}
861+
};
830862
} // namespace
831863

832864
BSONObj CurOp::truncateAndSerializeGenericCursor(GenericCursor cursor,
@@ -972,38 +1004,7 @@ void CurOp::reportState(BSONObjBuilder* builder,
9721004
builder->append("waitForWriteConcernDurationMillis",
9731005
durationCount<Milliseconds>(elapsedTimeTotal));
9741006
}
975-
976-
boost::optional<std::tuple<std::string, Microseconds>> currentQueue;
977-
BSONObjBuilder queuesBuilder(builder->subobjStart("queues"));
978-
for (auto&& [queueName, lookup] : gQueueMetricsRegistry) {
979-
AdmissionContext* admCtx = lookup(opCtx);
980-
Microseconds totalTimeQueuedMicros = admCtx->totalTimeQueuedMicros();
981-
982-
if (auto startQueueingTime = admCtx->startQueueingTime()) {
983-
Microseconds currentQueueTimeQueuedMicros = computeElapsedTimeTotal(
984-
*startQueueingTime, opCtx->getServiceContext()->getTickSource()->getTicks());
985-
totalTimeQueuedMicros += currentQueueTimeQueuedMicros;
986-
currentQueue = std::make_tuple(queueName, currentQueueTimeQueuedMicros);
987-
}
988-
989-
BSONObjBuilder queueMetricsBuilder(queuesBuilder.subobjStart(queueName));
990-
queueMetricsBuilder.append("admissions", admCtx->getAdmissions());
991-
queueMetricsBuilder.append("totalTimeQueuedMicros",
992-
durationCount<Microseconds>(totalTimeQueuedMicros));
993-
queueMetricsBuilder.append("isHoldingTicket", admCtx->isHoldingTicket());
994-
queueMetricsBuilder.done();
995-
}
996-
queuesBuilder.done();
997-
998-
if (currentQueue) {
999-
BSONObjBuilder currentQueueBuilder(builder->subobjStart("currentQueue"));
1000-
currentQueueBuilder.append("name", std::get<0>(*currentQueue));
1001-
currentQueueBuilder.append("timeQueuedMicros",
1002-
durationCount<Microseconds>(std::get<1>(*currentQueue)));
1003-
currentQueueBuilder.done();
1004-
} else {
1005-
builder->appendNull("currentQueue");
1006-
}
1007+
populateCurrentOpQueueStats(opCtx, _tickSource, builder);
10071008
}
10081009

10091010
CurOp::AdditiveResourceStats CurOp::getAdditiveResourceStats(
@@ -1329,21 +1330,9 @@ void OpDebug::report(OperationContext* opCtx,
13291330
pAttrs->add("remoteOpWaitMillis", durationCount<Milliseconds>(*remoteOpWaitTime));
13301331
}
13311332

1332-
BSONObjBuilder queuesBuilder;
1333-
for (auto&& [queueName, lookup] : gQueueMetricsRegistry) {
1334-
AdmissionContext* admCtx = lookup(opCtx);
1335-
BSONObjBuilder bb;
1336-
if (auto admissions = admCtx->getAdmissions(); admissions > 0) {
1337-
bb.append("admissions", admissions);
1338-
}
1339-
if (auto queued = durationCount<Microseconds>(admCtx->totalTimeQueuedMicros());
1340-
queued > 0) {
1341-
bb.append("totalTimeQueuedMicros", queued);
1342-
}
1343-
queuesBuilder.append(queueName, bb.obj());
1344-
}
1345-
1346-
pAttrs->add("queues", queuesBuilder.obj());
1333+
// Extract admisson and execution control queueing stats from AdmissionContext stored on opCtx
1334+
TicketHolderQueueStats queueingStats(opCtx);
1335+
pAttrs->add("queues", queueingStats.toBson());
13471336

13481337
// workingMillis should always be present for any operation
13491338
pAttrs->add("workingMillis", workingTimeMillis.count());

src/mongo/db/curop_stats_test.cpp

+57-1
Original file line numberDiff line numberDiff line change
@@ -27,11 +27,14 @@
2727
* it in the license file.
2828
*/
2929

30+
#include "mongo/db/admission/execution_admission_context.h"
31+
#include "mongo/db/admission/ingress_admission_context.h"
3032
#include "mongo/db/curop.h"
3133
#include "mongo/db/prepare_conflict_tracker.h"
3234
#include "mongo/db/service_context_test_fixture.h"
3335
#include "mongo/db/transaction_resources.h"
3436
#include "mongo/unittest/assert.h"
37+
#include "mongo/unittest/bson_test_util.h"
3538
#include "mongo/unittest/framework.h"
3639
#include "mongo/util/tick_source_mock.h"
3740
namespace mongo {
@@ -80,7 +83,7 @@ int64_t addWaitForLock(OperationContext* opCtx,
8083
return stats.get(resId, MODE_S).combinedWaitTimeMicros;
8184
}
8285

83-
void addTicketQueueTime(ExecutionAdmissionContext* admCtx,
86+
void addTicketQueueTime(AdmissionContext* admCtx,
8487
TickSourceMock<Microseconds>* tickSource,
8588
Milliseconds& executionTime,
8689
Milliseconds waitForTickets) {
@@ -438,5 +441,58 @@ TEST_F(CurOpStatsTest, SubOperationStats) {
438441
executionTime2 - waitForTickets2 - waitForFlowControlTicket2 - waitForLocks2);
439442
}
440443

444+
TEST_F(CurOpStatsTest, CheckAdmissionQueueStats) {
445+
auto opCtx = makeOperationContext();
446+
auto curop = CurOp::get(*opCtx);
447+
448+
Milliseconds waitForIngressAdmission = Milliseconds(2);
449+
auto* ingressAdmCtx = &IngressAdmissionContext::get(opCtx.get());
450+
ingressAdmCtx->setAdmission_forTest(5);
451+
452+
Milliseconds waitForExecutionTicket = Milliseconds(5);
453+
auto* executionAdmCtx = &ExecutionAdmissionContext::get(opCtx.get());
454+
executionAdmCtx->setAdmission_forTest(7);
455+
456+
// initialize timer to non-zero value
457+
advanceTime(Milliseconds{100});
458+
curop->setTickSource_forTest(tickSource());
459+
460+
Milliseconds executionTime = Milliseconds(512);
461+
advanceTime(executionTime);
462+
curop->done();
463+
464+
// Add queueing time for ingress admission control
465+
addTicketQueueTime(ingressAdmCtx, tickSource(), executionTime, waitForIngressAdmission);
466+
467+
BSONObjBuilder builder;
468+
SerializationContext sc = SerializationContext::stateCommandReply();
469+
sc.setPrefixState(false);
470+
{
471+
// Simulate operation currently waiting in execution control queue
472+
WaitingForAdmissionGuard waitForAdmission(executionAdmCtx, tickSource());
473+
tickSource()->advance(waitForExecutionTicket);
474+
executionTime += waitForExecutionTicket;
475+
476+
stdx::lock_guard<Client> lk(*opCtx->getClient());
477+
curop->reportState(&builder, sc);
478+
}
479+
auto bsonObj = builder.done();
480+
BSONObj currentQueue = bsonObj.getObjectField("currentQueue");
481+
BSONObj queueStats = bsonObj.getObjectField("queues");
482+
483+
auto expectedCurrentQueue = BSON("name"
484+
<< "execution"
485+
<< "timeQueuedMicros" << 5000);
486+
auto expectedQueueStats =
487+
BSON("execution" << BSON("admissions" << 7 << "totalTimeQueuedMicros" << 5000
488+
<< "isHoldingTicket" << false)
489+
<< "ingress"
490+
<< BSON("admissions" << 5 << "totalTimeQueuedMicros" << 2000
491+
<< "isHoldingTicket" << false));
492+
493+
ASSERT_BSONOBJ_EQ(currentQueue, expectedCurrentQueue);
494+
ASSERT_BSONOBJ_EQ_UNORDERED(queueStats, expectedQueueStats);
495+
}
496+
441497
} // namespace
442498
} // namespace mongo

src/mongo/db/stats/BUILD.bazel

+1
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,7 @@ mongo_cc_library(
133133
deps = [
134134
"//src/mongo/db:server_base",
135135
"//src/mongo/db/repl:read_concern_args",
136+
"//src/mongo/util/concurrency:ticketholder_queue_stats",
136137
],
137138
)
138139

src/mongo/db/stats/single_transaction_stats.h

+21
Original file line numberDiff line numberDiff line change
@@ -41,9 +41,11 @@
4141
#include "mongo/bson/timestamp.h"
4242
#include "mongo/db/client.h"
4343
#include "mongo/db/curop.h"
44+
#include "mongo/db/operation_context.h"
4445
#include "mongo/db/repl/read_concern_args.h"
4546
#include "mongo/db/session/logical_session_id.h"
4647
#include "mongo/rpc/metadata/client_metadata.h"
48+
#include "mongo/util/concurrency/ticketholder_queue_stats.h"
4749
#include "mongo/util/duration.h"
4850
#include "mongo/util/net/hostandport.h"
4951
#include "mongo/util/tick_source.h"
@@ -210,6 +212,21 @@ class SingleTransactionStats {
210212
_lastClientInfo.update(client);
211213
}
212214

215+
216+
/**
217+
* Returns the TicketHolderQueueStats object stored in this SingleTransactionStats.
218+
*/
219+
const TicketHolderQueueStats& getQueueStats() const {
220+
return _queueStats;
221+
}
222+
/**
223+
* Updates the TicketHolderQueueStats to include the latest operation's queueing stats.
224+
*/
225+
void updateQueueStats(OperationContext* opCtx) {
226+
TicketHolderQueueStats newOpQueueStat(opCtx);
227+
_queueStats.add(newOpQueueStat);
228+
}
229+
213230
/**
214231
* Set the autoCommit field. If this field is unset, this is not a transaction but a
215232
* retryable write and other values will not be meaningful.
@@ -286,6 +303,10 @@ class SingleTransactionStats {
286303
// not be set in a transaction that is in state kPrepared if an exception is thrown after the
287304
// transaction transitions to the prepared state but before setPreparedStartTime is called.
288305
boost::optional<TickSource::Tick> _preparedStartTime{boost::none};
306+
307+
// Tracks and aggregates statistics for admission and execution control queueing across all
308+
// operations within a transaction.
309+
TicketHolderQueueStats _queueStats;
289310
};
290311

291312
} // namespace mongo

src/mongo/db/transaction/transaction_metrics_observer.cpp

+3
Original file line numberDiff line numberDiff line change
@@ -212,6 +212,9 @@ void TransactionMetricsObserver::onTransactionOperation(OperationContext* opCtx,
212212
// TransactionMetricsObserver with this Client's information. This is the last client that ran a
213213
// transaction operation on the txnParticipant.
214214
_singleTransactionStats.updateLastClientInfo(opCtx->getClient());
215+
216+
// Update TicketHolderQueueStats with the latest operation's queueing data.
217+
_singleTransactionStats.updateQueueStats(opCtx);
215218
}
216219

217220
void TransactionMetricsObserver::_onAbort(OperationContext* opCtx,

src/mongo/db/transaction/transaction_participant.cpp

+6
Original file line numberDiff line numberDiff line change
@@ -2883,6 +2883,8 @@ std::string TransactionParticipant::Participant::_transactionInfoForLog(
28832883
s << " prepareOpTime:" << o().prepareOpTime.toString();
28842884
}
28852885

2886+
s << " queues:" << singleTransactionStats.getQueueStats().toBson().toString();
2887+
28862888
// Total duration of the transaction.
28872889
s << ", "
28882890
<< duration_cast<Milliseconds>(singleTransactionStats.getDuration(tickSource, curTick));
@@ -2955,6 +2957,8 @@ void TransactionParticipant::Participant::_transactionInfoForLog(
29552957
pAttrs->add("prepareOpTime", o().prepareOpTime);
29562958
}
29572959

2960+
pAttrs->add("queues", singleTransactionStats.getQueueStats().toBson());
2961+
29582962
// Total duration of the transaction.
29592963
pAttrs->add(
29602964
"duration",
@@ -3031,6 +3035,8 @@ BSONObj TransactionParticipant::Participant::_transactionInfoBSONForLog(
30313035
attrs.append("prepareOpTime", o().prepareOpTime.toBSON());
30323036
}
30333037

3038+
attrs.append("queues", singleTransactionStats.getQueueStats().toBson());
3039+
30343040
// Total duration of the transaction.
30353041
attrs.append(
30363042
"durationMillis",

0 commit comments

Comments
 (0)