Skip to content

Commit 34656c8

Browse files
moustafamaherMongoDB Bot
authored andcommitted
SERVER-99185 Handle transactionally replicated vectored inserts when restoring config.transactions during rollback (#31750)
GitOrigin-RevId: aee7790
1 parent 8a153d3 commit 34656c8

9 files changed

+312
-70
lines changed

etc/backports_required_for_multiversion_tests.yml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -374,6 +374,8 @@ last-continuous:
374374
ticket: SERVER-89883
375375
- test_file: jstests/sharding/metadata_refresh_on_shard_removal.js
376376
ticket: SERVER-99277
377+
- test_file: jstests/replsets/rollback_with_coalesced_txn_table_updates_during_oplog_application_inserts.js
378+
ticket: SERVER-99185
377379
suites: null
378380
last-lts:
379381
all:
@@ -795,4 +797,6 @@ last-lts:
795797
ticket: SERVER-89883
796798
- test_file: jstests/sharding/metadata_refresh_on_shard_removal.js
797799
ticket: SERVER-99277
800+
- test_file: jstests/replsets/rollback_with_coalesced_txn_table_updates_during_oplog_application_inserts.js
801+
ticket: SERVER-99185
798802
suites: null

jstests/replsets/rollback_with_coalesced_txn_table_updates_during_oplog_application.js renamed to jstests/replsets/libs/rollback_with_coalesced_txn_table_updates_during_oplog_application_helper.js

Lines changed: 46 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -5,17 +5,19 @@
55
* application.
66
* We also test that if a node crashes after oplog truncation during rollback, the update made to
77
* the 'config.transactions' table is persisted on startup.
8-
*
9-
* @tags: [requires_persistence]
108
*/
119

1210
import {configureFailPoint} from "jstests/libs/fail_point_util.js";
1311
import {ReplSetTest} from "jstests/libs/replsettest.js";
1412

15-
const oplogApplierBatchSize = 100;
16-
17-
function runTest(crashAfterRollbackTruncation) {
13+
let runTest = function(crashAfterRollbackTruncation,
14+
initFunc,
15+
stopReplProducerOnDocumentFunc,
16+
opsFunc,
17+
stmtMajorityCommittedFunc,
18+
validateFunc) {
1819
jsTestLog(`Running test with crashAfterRollbackTruncation = ${crashAfterRollbackTruncation}`);
20+
const oplogApplierBatchSize = 100;
1921
const rst = new ReplSetTest({
2022
nodes: {
2123
n0: {},
@@ -41,8 +43,9 @@ function runTest(crashAfterRollbackTruncation) {
4143
const lsid = ({id: UUID()});
4244
const primary = rst.getPrimary();
4345
const ns = "test.retryable_write_partial_rollback";
44-
assert.commandWorked(
45-
primary.getCollection(ns).insert({_id: 0, counter: 0}, {writeConcern: {w: 5}}));
46+
const counterTotal = oplogApplierBatchSize;
47+
initFunc(primary, ns, counterTotal);
48+
4649
// SERVER-65971: Do a write with `lsid` to add an entry to config.transactions. This write will
4750
// persist after rollback and be updated when the rollback code corrects for omitted writes to
4851
// the document.
@@ -63,25 +66,25 @@ function runTest(crashAfterRollbackTruncation) {
6366
const stopReplProducerFailpoints = [secondary1, secondary2, secondary3, secondary4].map(
6467
conn => configureFailPoint(conn, 'stopReplProducer'));
6568

69+
// Using an odd number ensures that when the 'ReplicateVectoredInsertsTransactionally' feature
70+
// flag is enabled, counterMajorityCommitted + 1 will be in a different oplog entry. This is
71+
// because we're using an internal batching size of 2, resulting in pairs like [0, 1], [2, 3],
72+
// etc.
73+
const counterMajorityCommitted = counterTotal - 5;
74+
6675
// While replication is still entirely disabled, additionally disable replication partway into
6776
// the retryable write on all but the first secondary. The idea is that while secondary1 will
6877
// apply all of the oplog entries in a single batch, the other secondaries will only apply up to
6978
// counterMajorityCommitted oplog entries.
70-
const counterTotal = oplogApplierBatchSize;
71-
const counterMajorityCommitted = counterTotal - 2;
7279
const stopReplProducerOnDocumentFailpoints = [secondary2, secondary3, secondary4].map(
7380
conn => configureFailPoint(conn,
7481
'stopReplProducerOnDocument',
75-
{document: {"diff.u.counter": counterMajorityCommitted + 1}}));
76-
77-
assert.commandWorked(primary.getCollection(ns).runCommand("update", {
78-
updates: Array.from({length: counterTotal}, () => ({q: {_id: 0}, u: {$inc: {counter: 1}}})),
79-
lsid,
80-
txnNumber: NumberLong(2),
81-
}));
82+
stopReplProducerOnDocumentFunc(counterMajorityCommitted)));
8283

83-
const stmtMajorityCommitted = primary.getCollection("local.oplog.rs")
84-
.findOne({ns, "o.diff.u.counter": counterMajorityCommitted});
84+
opsFunc(primary, ns, counterTotal, lsid);
85+
const stmtMajorityCommitted =
86+
primary.getCollection("local.oplog.rs")
87+
.findOne(stmtMajorityCommittedFunc(primary, ns, counterMajorityCommitted));
8588
assert.neq(null, stmtMajorityCommitted);
8689

8790
for (const fp of stopReplProducerFailpoints) {
@@ -170,25 +173,30 @@ function runTest(crashAfterRollbackTruncation) {
170173
assert.commandWorked(secondary1.adminCommand({replSetFreeze: 0}));
171174
rst.stepUp(secondary1);
172175

173-
const docBeforeRetry = secondary1.getCollection(ns).findOne({_id: 0});
174-
assert.eq(docBeforeRetry, {_id: 0, counter: counterMajorityCommitted});
175-
176-
assert.commandWorked(secondary1.getCollection(ns).runCommand("update", {
177-
updates: Array.from({length: counterTotal}, () => ({q: {_id: 0}, u: {$inc: {counter: 1}}})),
178-
lsid,
179-
txnNumber: NumberLong(2),
180-
writeConcern: {w: 5},
181-
}));
182-
183-
const docAfterRetry = secondary1.getCollection(ns).findOne({_id: 0});
184-
assert.eq(docAfterRetry, {_id: 0, counter: counterTotal});
176+
validateFunc(secondary1, ns, counterMajorityCommitted, counterTotal, lsid);
185177

186178
rst.stopSet();
187-
}
188-
189-
// Test the general scenario where we perform the appropriate update to the 'config.transactions'
190-
// table during rollback.
191-
runTest(false);
192-
// Extends the test to crash the secondary in the middle of rollback right after oplog truncation.
193-
// We assert that the update made to the 'config.transactions' table persisted on startup.
194-
runTest(true);
179+
};
180+
181+
export var runTests = function(
182+
initFunc, stopReplProducerOnDocumentFunc, opsFunc, stmtMajorityCommittedFunc, validateFunc) {
183+
// Test the general scenario where we perform the appropriate update to the
184+
// 'config.transactions'
185+
// table during rollback.
186+
runTest(false,
187+
initFunc,
188+
stopReplProducerOnDocumentFunc,
189+
opsFunc,
190+
stmtMajorityCommittedFunc,
191+
validateFunc);
192+
193+
// Extends the test to crash the secondary in the middle of rollback right after oplog
194+
// truncation. We assert that the update made to the 'config.transactions' table persisted on
195+
// startup.
196+
runTest(true,
197+
initFunc,
198+
stopReplProducerOnDocumentFunc,
199+
opsFunc,
200+
stmtMajorityCommittedFunc,
201+
validateFunc);
202+
};
Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
/**
2+
* Tests that the rollback procedure will update the 'config.transactions' table to be consistent
3+
* with the node data at the 'stableTimestamp', specifically in the case where multiple delete ops
4+
* to the 'config.transactions' table were coalesced into a single operation during secondary oplog
5+
* application.
6+
*
7+
* @tags: [requires_persistence]
8+
*/
9+
10+
import {
11+
runTests
12+
} from
13+
"jstests/replsets/libs/rollback_with_coalesced_txn_table_updates_during_oplog_application_helper.js";
14+
15+
const initFunc = (primary, ns, counterTotal) => {
16+
assert.commandWorked(primary.getCollection(ns).runCommand("insert", {
17+
documents: Array.from({length: counterTotal}, (_, i) => ({_id: i})),
18+
writeConcern: {w: 5}
19+
}));
20+
};
21+
22+
const stopReplProducerOnDocumentFunc = (counterMajorityCommitted) => {
23+
return {document: {"_id": counterMajorityCommitted + 1}};
24+
};
25+
26+
const opsFunc = (primary, ns, counterTotal, lsid) => {
27+
assert.commandWorked(primary.getCollection(ns).runCommand("delete", {
28+
deletes: Array.from({length: counterTotal}, (_, i) => ({q: {_id: i}, limit: 1})),
29+
lsid,
30+
txnNumber: NumberLong(2),
31+
}));
32+
};
33+
34+
const stmtMajorityCommittedFunc = (primary, ns, counterMajorityCommitted) => {
35+
return {ns, "o._id": counterMajorityCommitted, "op": "d"};
36+
};
37+
38+
const validateFunc = (secondary1, ns, counterMajorityCommitted, counterTotal, lsid) => {
39+
// Insert doc in the range [0, counterMajorityCommitted] which should have been deleted.
40+
assert.commandWorked(secondary1.getCollection(ns).runCommand("insert", {
41+
documents: Array.from({length: counterMajorityCommitted + 1}, (_, i) => ({_id: i})),
42+
writeConcern: {w: 5}
43+
}));
44+
45+
// Docs in the range [counterMajorityCommitted + 1, counterTotal - 1] should exist because the
46+
// delete statements were rolled back.
47+
for (var i = counterMajorityCommitted + 1; i < counterTotal; i++) {
48+
const docBeforeRetry = secondary1.getCollection(ns).findOne({_id: i});
49+
assert.eq(docBeforeRetry, {_id: i});
50+
}
51+
52+
// Retry the operation which should only delete the range
53+
// [counterMajorityCommitted + 1, counterTotal - 1].
54+
assert.commandWorked(secondary1.getCollection(ns).runCommand("delete", {
55+
deletes: Array.from({length: counterTotal}, (_, i) => ({q: {_id: i}, limit: 1})),
56+
lsid,
57+
txnNumber: NumberLong(2),
58+
writeConcern: {w: 5},
59+
}));
60+
61+
// We should still find the documents in the range [0, counterMajorityCommitted].
62+
for (var i = 0; i <= counterMajorityCommitted; i++) {
63+
const docAfterRetry = secondary1.getCollection(ns).findOne({_id: i});
64+
assert.eq(docAfterRetry, {_id: i});
65+
}
66+
67+
// Docs in the range [counterMajorityCommitted + 1, counterTotal - 1] should be deleted by the
68+
// retry.
69+
for (var i = counterMajorityCommitted + 1; i < counterTotal; i++) {
70+
const docAfterRetry = secondary1.getCollection(ns).findOne({_id: i});
71+
assert.eq(docAfterRetry, null);
72+
}
73+
};
74+
75+
runTests(
76+
initFunc, stopReplProducerOnDocumentFunc, opsFunc, stmtMajorityCommittedFunc, validateFunc);
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
/**
2+
* Tests that the rollback procedure will update the 'config.transactions' table to be consistent
3+
* with the node data at the 'stableTimestamp', specifically in the case where multiple insert ops
4+
* to the 'config.transactions' table were coalesced into a single operation during secondary oplog
5+
* application.
6+
*
7+
* @tags: [requires_persistence]
8+
*/
9+
10+
import {FeatureFlagUtil} from "jstests/libs/feature_flag_util.js";
11+
import {
12+
runTests
13+
} from
14+
"jstests/replsets/libs/rollback_with_coalesced_txn_table_updates_during_oplog_application_helper.js";
15+
16+
const initFunc = (primary, ns, counterTotal) => {
17+
if (FeatureFlagUtil.isPresentAndEnabled(primary, "ReplicateVectoredInsertsTransactionally")) {
18+
// Set the batch size to 2 so we're testing batching but don't have to insert a huge number
19+
// of documents
20+
assert.commandWorked(
21+
primary.adminCommand({setParameter: 1, internalInsertMaxBatchSize: 2}));
22+
}
23+
};
24+
25+
const stopReplProducerOnDocumentFunc = (counterMajorityCommitted) => {
26+
return {document: {"_id": counterMajorityCommitted + 1}};
27+
};
28+
29+
const opsFunc = (primary, ns, counterTotal, lsid) => {
30+
assert.commandWorked(primary.getCollection(ns).runCommand("insert", {
31+
documents: Array.from({length: counterTotal}, (_, i) => ({_id: i})),
32+
lsid,
33+
txnNumber: NumberLong(2),
34+
}));
35+
};
36+
37+
const stmtMajorityCommittedFunc = (primary, ns, counterMajorityCommitted) => {
38+
if (FeatureFlagUtil.isPresentAndEnabled(primary, "ReplicateVectoredInsertsTransactionally")) {
39+
return {"o.applyOps.ns": ns, "o.applyOps.o._id": counterMajorityCommitted};
40+
} else {
41+
return {ns: ns, "o._id": counterMajorityCommitted};
42+
}
43+
};
44+
45+
const validateFunc = (secondary1, ns, counterMajorityCommitted, counterTotal, lsid) => {
46+
// Make sure we don't re-execute operations that have already been inserted by making sure we
47+
// we don't get a 'DuplicateKeyError'.
48+
assert.commandWorked(secondary1.getCollection(ns).runCommand("insert", {
49+
documents: Array.from({length: counterTotal}, (_, i) => ({_id: i})),
50+
lsid,
51+
txnNumber: NumberLong(2),
52+
writeConcern: {w: 5},
53+
}));
54+
};
55+
56+
runTests(
57+
initFunc, stopReplProducerOnDocumentFunc, opsFunc, stmtMajorityCommittedFunc, validateFunc);
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
/**
2+
* Tests that the rollback procedure will update the 'config.transactions' table to be consistent
3+
* with the node data at the 'stableTimestamp', specifically in the case where multiple update ops
4+
* to the 'config.transactions' table were coalesced into a single operation during secondary oplog
5+
* application.
6+
*
7+
* @tags: [requires_persistence]
8+
*/
9+
10+
import {
11+
runTests
12+
} from
13+
"jstests/replsets/libs/rollback_with_coalesced_txn_table_updates_during_oplog_application_helper.js";
14+
15+
const initFunc = (primary, ns, counterTotal) => {
16+
assert.commandWorked(
17+
primary.getCollection(ns).insert({_id: 0, counter: 0}, {writeConcern: {w: 5}}));
18+
};
19+
20+
const stopReplProducerOnDocumentFunc = (counterMajorityCommitted) => {
21+
return {document: {"diff.u.counter": counterMajorityCommitted + 1}};
22+
};
23+
24+
const opsFunc = (primary, ns, counterTotal, lsid) => {
25+
assert.commandWorked(primary.getCollection(ns).runCommand("update", {
26+
updates: Array.from({length: counterTotal}, () => ({q: {_id: 0}, u: {$inc: {counter: 1}}})),
27+
lsid,
28+
txnNumber: NumberLong(2),
29+
}));
30+
};
31+
32+
const stmtMajorityCommittedFunc = (primary, ns, counterMajorityCommitted) => {
33+
return {ns, "o.diff.u.counter": counterMajorityCommitted};
34+
};
35+
36+
const validateFunc = (secondary1, ns, counterMajorityCommitted, counterTotal, lsid) => {
37+
const docBeforeRetry = secondary1.getCollection(ns).findOne({_id: 0});
38+
assert.eq(docBeforeRetry, {_id: 0, counter: counterMajorityCommitted});
39+
40+
assert.commandWorked(secondary1.getCollection(ns).runCommand("update", {
41+
updates: Array.from({length: counterTotal}, () => ({q: {_id: 0}, u: {$inc: {counter: 1}}})),
42+
lsid,
43+
txnNumber: NumberLong(2),
44+
writeConcern: {w: 5},
45+
}));
46+
47+
// Make sure we don't re-execute operations that have already been updated by making sure that
48+
// counter equals exactly the counterTotal after the retry.
49+
const docAfterRetry = secondary1.getCollection(ns).findOne({_id: 0});
50+
assert.eq(docAfterRetry, {_id: 0, counter: counterTotal});
51+
};
52+
53+
runTests(
54+
initFunc, stopReplProducerOnDocumentFunc, opsFunc, stmtMajorityCommittedFunc, validateFunc);

jstests/replsets/rollback_with_coalesced_txn_table_updates_from_vectored_inserts.js

Lines changed: 11 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,9 @@
33
* with the node data at the 'stableTimestamp', specifically in the case where multiple derived ops
44
* to the 'config.transactions' table were coalesced into a single operation when performing
55
* vectored inserts on the primary.
6-
*
6+
* Note that when the 'ReplicateVectoredInsertsTransactionally' feature flag is enabled, updates to
7+
* the 'config.transactions' table are not coalesced on the primary. However, we are still testing
8+
* this as a sanity check in case the behavior changes in the future.
79
* @tags: [requires_persistence]
810
*/
911

@@ -42,9 +44,14 @@ const [secondary1, secondary2] = rst.getSecondaries();
4244
// while the primary will apply all of the writes in a single storage transaction, the secondaries
4345
// will only apply up to insertBatchMajorityCommitted oplog entries.
4446

45-
let insertBatchTotal;
46-
let insertBatchMajorityCommitted;
47-
let stopReplProducerOnDocumentFailpoints;
47+
let insertBatchTotal = 40;
48+
// Using an odd number ensures that when the 'ReplicateVectoredInsertsTransactionally' feature flag
49+
// is enabled, insertBatchMajorityCommitted + 1 will be in a different oplog entry. This is because
50+
// we're using an internal batching size of 2, resulting in pairs like [0, 1], [2, 3], etc.
51+
let insertBatchMajorityCommitted = insertBatchTotal - 5;
52+
let stopReplProducerOnDocumentFailpoints = [secondary1, secondary2].map(
53+
conn => configureFailPoint(
54+
conn, 'stopReplProducerOnDocument', {document: {"_id": insertBatchMajorityCommitted + 1}}));
4855
let oplogFilterForMajority;
4956

5057
// When ReplicateVectoredInsertsTransactionally is enabled, inserts are batched into applyOps
@@ -53,25 +60,11 @@ if (FeatureFlagUtil.isPresentAndEnabled(primary, "ReplicateVectoredInsertsTransa
5360
// Set the batch size to 2 so we're testing batching but don't have to insert a huge number
5461
// of documents
5562
assert.commandWorked(primary.adminCommand({setParameter: 1, internalInsertMaxBatchSize: 2}));
56-
// Using an odd number tests that the short batch (which should be an 'i', not an applyOps)
57-
// works.
58-
insertBatchTotal = 41;
59-
insertBatchMajorityCommitted = insertBatchTotal - 3;
60-
stopReplProducerOnDocumentFailpoints = [secondary1, secondary2].map(
61-
conn => configureFailPoint(conn, 'stopReplProducerOnDocument', {
62-
document: {"o.applyOps.o._id": insertBatchMajorityCommitted + 1}
63-
}));
6463
oplogFilterForMajority = {
6564
"o.applyOps.ns": ns,
6665
"o.applyOps.o._id": insertBatchMajorityCommitted
6766
};
6867
} else {
69-
insertBatchTotal = 20;
70-
insertBatchMajorityCommitted = insertBatchTotal - 2;
71-
stopReplProducerOnDocumentFailpoints = [secondary1, secondary2].map(
72-
conn => configureFailPoint(conn,
73-
'stopReplProducerOnDocument',
74-
{document: {"_id": insertBatchMajorityCommitted + 1}}));
7568
oplogFilterForMajority = {ns: ns, "o._id": insertBatchMajorityCommitted};
7669
}
7770

0 commit comments

Comments
 (0)