From 0e48475d234e3b950dd9b6d07e82c8a9dc8ab658 Mon Sep 17 00:00:00 2001 From: Serge Petrenko <sergepetrenko@tarantool.org> Date: Wed, 15 Jun 2022 16:46:24 +0300 Subject: [PATCH] txn_limbo: fence upon receiving raft term greater than queue term Receiving a raft term greater than the current queue term means that someone has either already written PROMOTE (in case elections are disabled), or is going to write PROMOTE once he wins the elections (in case they are enabled). In both cases the queue owner in an old term should freeze the limbo until queue term catches up with raft term. Unfreezing happens automatically once synchro queue term catches up. Part-of #5295 NO_DOC=covered by next commit --- changelogs/unreleased/gh-5295-split-brain-detection.md | 4 ++++ src/box/raft.c | 7 +++++++ src/box/txn_limbo.c | 9 +++++---- .../gh_6842_qsync_applier_order_test.lua | 8 ++++---- 4 files changed, 20 insertions(+), 8 deletions(-) create mode 100644 changelogs/unreleased/gh-5295-split-brain-detection.md diff --git a/changelogs/unreleased/gh-5295-split-brain-detection.md b/changelogs/unreleased/gh-5295-split-brain-detection.md new file mode 100644 index 0000000000..b3d5f9c3a5 --- /dev/null +++ b/changelogs/unreleased/gh-5295-split-brain-detection.md @@ -0,0 +1,4 @@ +## bugfix/replication + +* Fixed a possible split-brain when old synchro queue owner might finalize the + transactions in presence of a new synchro queue owner (gh-5295). diff --git a/src/box/raft.c b/src/box/raft.c index 25da3f5df9..e784a58668 100644 --- a/src/box/raft.c +++ b/src/box/raft.c @@ -201,6 +201,13 @@ box_raft_on_update_f(struct trigger *trigger, void *event) */ box_update_ro_summary(); box_broadcast_election(); + /* + * Once the node becomes read-only due to new term, it should stop + * finalizing existing synchronous transactions so that it doesn't + * trigger split-brain with a new leader which will soon emerge. + */ + if (raft->volatile_term > txn_limbo.promote_greatest_term) + txn_limbo_fence(&txn_limbo); if (raft->state != RAFT_STATE_LEADER) return 0; /* diff --git a/src/box/txn_limbo.c b/src/box/txn_limbo.c index 791c68f566..d85e33bb2c 100644 --- a/src/box/txn_limbo.c +++ b/src/box/txn_limbo.c @@ -34,6 +34,7 @@ #include "iproto_constants.h" #include "journal.h" #include "box.h" +#include "raft.h" struct txn_limbo txn_limbo; @@ -64,9 +65,8 @@ txn_limbo_is_frozen(const struct txn_limbo *limbo) bool txn_limbo_is_ro(struct txn_limbo *limbo) { - return txn_limbo_is_frozen(limbo) || - (limbo->owner_id != REPLICA_ID_NIL && - limbo->owner_id != instance_id); + return limbo->owner_id != REPLICA_ID_NIL && + (limbo->owner_id != instance_id || txn_limbo_is_frozen(limbo)); } struct txn_limbo_entry * @@ -854,7 +854,8 @@ txn_limbo_req_commit(struct txn_limbo *limbo, const struct synchro_request *req) vclock_follow(&limbo->promote_term_map, origin, term); if (term > limbo->promote_greatest_term) { limbo->promote_greatest_term = term; - if (iproto_type_is_promote_request(req->type)) + if (iproto_type_is_promote_request(req->type) && + term >= box_raft()->volatile_term) txn_limbo_unfence(&txn_limbo); } } else if (iproto_type_is_promote_request(req->type) && diff --git a/test/replication-luatest/gh_6842_qsync_applier_order_test.lua b/test/replication-luatest/gh_6842_qsync_applier_order_test.lua index de9214c087..5c327337e1 100644 --- a/test/replication-luatest/gh_6842_qsync_applier_order_test.lua +++ b/test/replication-luatest/gh_6842_qsync_applier_order_test.lua @@ -138,8 +138,8 @@ end -- -- Server 1 was a synchro queue owner. Then it receives a foreign PROMOTE which -- goes to WAL but is not applied yet. Server 1 during that tries to make a --- synchro transaction. It is expected to be aborted at commit attempt, because --- the queue is already in the process of ownership transition. +-- synchro transaction. It is expected to be aborted, because server gets fenced +-- seeing a new term where it isn't the limbo owner. -- g.test_local_txn_during_remote_promote = function(g) -- Server 1 takes the synchro queue. @@ -186,8 +186,8 @@ g.test_local_txn_during_remote_promote = function(g) luatest.assert(not ok1 and not ok2 and err1 and err2, 'both transactions failed') luatest.assert_equals(err1.code, err2.code, 'same error') - luatest.assert_equals(err1.code, box.error.SYNC_ROLLBACK, - 'error is synchro rollback') + luatest.assert_equals(err1.code, box.error.READONLY, + 'error is read-only') -- Server 1 correctly processed the remote PROMOTE. wait_synchro_owner(g.server1, g.server2:instance_id()) -- GitLab