From b5811f1560548338c061faed810bf426115fe241 Mon Sep 17 00:00:00 2001 From: Serge Petrenko <sergepetrenko@tarantool.org> Date: Tue, 28 Jun 2022 14:53:42 +0300 Subject: [PATCH] replication: relax split-brain checks after DEMOTE Our txn_limbo_is_replica_outdated check works correctly only when there is a stream of PROMOTE requests. Only the author of the latest PROMOTE is writable and may issue transactions. No matter synchronous or asynchronous. So txn_limbo_is_replica_outdated assumes that everyone but the node with the greatest PROMOTE/DEMOTE term is outdated. This isn't true for DEMOTE requests. There is only one server which issues the DEMOTE request, but once it's written, it's fine to accept asynchronous transactions from everyone. Now the check is too strict. Every time there is an asynchronous transaction from someone, who isn't the author of the latest PROMOTE or DEMOTE, replication is broken with ER_SPLIT_BRAIN. Let's relax it: when limbo owner is 0, it's fine to accept asynchronous transactions from everyone, no matter the term of their latest PROMOTE and DEMOTE. This means that now after a DEMOTE we will miss one case of true split-brain: when old leader continues writing data in an obsolete term, and the new leader first issues PROMOTE and then DEMOTE. This is a tradeoff for making async master-master work after DEMOTE. The completely correct fix would be to write the term the transaction was written in with each transaction and replace txn_limbo_is_replica_outdated with txn_limbo_is_request_outdated, so that we decide whether to filter the request or not judging by the term it was applied in, not by the term we seen in some past PROMOTE from the node. This fix seems too costy though, given that we only miss one case of split-brain at the moment when the user enables master-master replication (by writing a DEMOTE). And in master-master there is no such thing as a split-brain. Follow-up #5295 Closes #7286 NO_DOC=internal chcange --- .../gh-7286-false-positive-split-brain.md | 3 ++ src/box/applier.cc | 9 ++-- ...h_7286_split_brain_false_positive_test.lua | 47 +++++++++++++++++++ 3 files changed, 55 insertions(+), 4 deletions(-) create mode 100644 changelogs/unreleased/gh-7286-false-positive-split-brain.md create mode 100644 test/replication-luatest/gh_7286_split_brain_false_positive_test.lua diff --git a/changelogs/unreleased/gh-7286-false-positive-split-brain.md b/changelogs/unreleased/gh-7286-false-positive-split-brain.md new file mode 100644 index 0000000000..54f2049fb3 --- /dev/null +++ b/changelogs/unreleased/gh-7286-false-positive-split-brain.md @@ -0,0 +1,3 @@ +## bugfix/replication + +* Fixed a false positive split-brain error after `box.ctl.demote()` (gh-7286). diff --git a/src/box/applier.cc b/src/box/applier.cc index 3d2bae2465..c842e44a84 100644 --- a/src/box/applier.cc +++ b/src/box/applier.cc @@ -1205,14 +1205,15 @@ applier_synchro_filter_tx(struct stailq *rows) * This means the only filtered out transactions are synchronous ones or * the ones depending on them. * - * Any asynchronous transaction from an obsolete term is a marker of - * split-brain by itself: consider it a synchronous transaction, which - * is committed with quorum 1. + * Any asynchronous transaction from an obsolete term when limbo is + * claimed by someone is a marker of split-brain by itself: consider it + * a synchronous transaction, which is committed with quorum 1. */ struct xrow_header *last_row = &stailq_last_entry(rows, struct applier_tx_row, next)->row; if (!last_row->wait_sync) { - if (iproto_type_is_dml(last_row->type)) { + if (iproto_type_is_dml(last_row->type) && + txn_limbo.owner_id != REPLICA_ID_NIL) { tnt_raise(ClientError, ER_SPLIT_BRAIN, "got an async transaction from an old term"); } diff --git a/test/replication-luatest/gh_7286_split_brain_false_positive_test.lua b/test/replication-luatest/gh_7286_split_brain_false_positive_test.lua new file mode 100644 index 0000000000..5a8b96aa6f --- /dev/null +++ b/test/replication-luatest/gh_7286_split_brain_false_positive_test.lua @@ -0,0 +1,47 @@ +local t = require('luatest') +local cluster = require('test.luatest_helpers.cluster') +local server = require('test.luatest_helpers.server') + +local g = t.group('gh-7286') + +-- gh-7286: false positive ER_SPLIT_BRAIN error after box.ctl.demote() with +-- disabled elections. +g.before_all(function(cg) + cg.cluster = cluster:new{} + + cg.box_cfg = { + -- Just in case it's turned on by default sometime. + election_mode = 'off', + replication_timeout = 0.1, + replication = { + server.build_instance_uri('node1'), + server.build_instance_uri('node2'), + }, + } + cg.node1 = cg.cluster:build_and_add_server{ + alias = 'node1', + box_cfg = cg.box_cfg, + } + cg.node2 = cg.cluster:build_and_add_server{ + alias = 'node2', + box_cfg = cg.box_cfg, + } + cg.cluster:start() +end) + +g.after_all(function(cg) + cg.cluster:drop() +end) + +g.test_false_positive_split_brain = function(cg) + cg.node1:exec(function() + box.ctl.promote() + box.ctl.demote() + end) + cg.node2:wait_vclock_of(cg.node1) + cg.node2:exec(function() + box.space._schema:replace{'smth'} + end) + cg.node1:wait_vclock_of(cg.node2) + cg.node1:assert_follows_upstream(cg.node2:instance_id()) +end -- GitLab