From fb571509a78f697a7a0f901c3448736cb1c2a30f Mon Sep 17 00:00:00 2001 From: Manuel Ung Date: Thu, 13 Feb 2020 18:43:16 -0800 Subject: [PATCH] WriteUnPrepared: Enable WAL during crash recovery (#6418) Summary: Unfortunately, it seems like mysqld reuses xids across machine restarts. When that happens, we could have something like the following happening: ``` BEGIN_PREPARE(unprepared) Put(a) END_PREPARE(xid = 1) -- crash and recover with Put(a) rolled back as it was not prepared BEGIN_PREPARE(prepared) Put(b) END_PREPARE(xid = 1) COMMIT(xid = 1) -- crash and recover with both a, b ``` To solve this, we will have to log the rollback batch into the WAL during recovery. WritePrepared already logs the rollback batch into the WAL, if a rollback happens after prepare, so there is no problem there. Pull Request resolved: https://github.com/facebook/rocksdb/pull/6418 Differential Revision: D19896151 Pulled By: lth fbshipit-source-id: 2ff65ddc5fe75efd57736fed4b7cd7a109d26609 --- .../transactions/write_unprepared_txn_db.cc | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/utilities/transactions/write_unprepared_txn_db.cc b/utilities/transactions/write_unprepared_txn_db.cc index a2290b8d4e2..298b66aba93 100644 --- a/utilities/transactions/write_unprepared_txn_db.cc +++ b/utilities/transactions/write_unprepared_txn_db.cc @@ -21,10 +21,23 @@ Status WriteUnpreparedTxnDB::RollbackRecoveredTransaction( assert(rtxn->unprepared_); auto cf_map_shared_ptr = WritePreparedTxnDB::GetCFHandleMap(); auto cf_comp_map_shared_ptr = WritePreparedTxnDB::GetCFComparatorMap(); + // In theory we could write with disableWAL = true during recovery, and + // assume that if we crash again during recovery, we can just replay from + // the very beginning. Unfortunately, the XIDs from the application may not + // necessarily be unique across restarts, potentially leading to situations + // like this: + // + // BEGIN_PREPARE(unprepared) Put(a) END_PREPARE(xid = 1) + // -- crash and recover with Put(a) rolled back as it was not prepared + // BEGIN_PREPARE(prepared) Put(b) END_PREPARE(xid = 1) + // COMMIT(xid = 1) + // -- crash and recover with both a, b + // + // We could just write the rollback marker, but then we would have to extend + // MemTableInserter during recovery to actually do writes into the DB + // instead of just dropping the in-memory write batch. + // WriteOptions w_options; - // If we crash during recovery, we can just recalculate and rewrite the - // rollback batch. - w_options.disableWAL = true; class InvalidSnapshotReadCallback : public ReadCallback { public: