diff --git a/changelogs/unreleased/gh-5295-split-brain-detection.md b/changelogs/unreleased/gh-5295-split-brain-detection.md index b3d5f9c3a51fef072ec4b64bca67653163261317..04bc9d9ef03c70cb97104682ed00edf6e55dc82a 100644 --- a/changelogs/unreleased/gh-5295-split-brain-detection.md +++ b/changelogs/unreleased/gh-5295-split-brain-detection.md @@ -2,3 +2,8 @@ * Fixed a possible split-brain when old synchro queue owner might finalize the transactions in presence of a new synchro queue owner (gh-5295). + +* Fixed servers not noticing possible split-brain situations, for example when + multiple leaders were working independently due to manually lowered quorum. + Once a node discovers that it received some foreign data, it immediately + stops replication from such a node with ER_SPLIT_BRAIN error (gh-5295). diff --git a/src/box/applier.cc b/src/box/applier.cc index 0d68eca511c4e998f3f2ae2fe5e22944a0583dae..861bac2aace445dc41c52a973968d6fb5b820ca1 100644 --- a/src/box/applier.cc +++ b/src/box/applier.cc @@ -1200,23 +1200,27 @@ applier_synchro_filter_tx(struct stailq *rows) if (!txn_limbo_is_replica_outdated(&txn_limbo, row->replica_id)) return; - if (stailq_last_entry(rows, struct applier_tx_row, next)->row.wait_sync) - goto nopify; - /* - * Not waiting for sync and not a synchro request - this make it already - * NOP or an asynchronous transaction not depending on any synchronous - * ones - let it go as is. - */ - if (!iproto_type_is_synchro_request(row->type)) - return; - /* - * Do not NOPify promotion, otherwise won't even know who is the limbo - * owner now. + * We do not nopify promotion/demotion and confirm/rollback. + * Such syncrhonous requests should be filtered by txn_limbo to detect + * possible split brain situations. + * + * This means the only filtered out transactions are synchronous ones or + * the ones depending on them. + * + * Any asynchronous transaction from an obsolete term is a marker of + * split-brain by itself: consider it a synchronous transaction, which + * is committed with quorum 1. */ - if (iproto_type_is_promote_request(row->type)) + struct xrow_header *last_row = + &stailq_last_entry(rows, struct applier_tx_row, next)->row; + if (!last_row->wait_sync) { + if (iproto_type_is_dml(last_row->type)) { + tnt_raise(ClientError, ER_SPLIT_BRAIN, + "got an async transaction from an old term"); + } return; -nopify:; + } struct applier_tx_row *item; stailq_foreach_entry(item, rows, next) { row = &item->row; diff --git a/src/box/box.cc b/src/box/box.cc index b381629dfd592feb191b863e0c0f6befe218d7ae..bfef966fd9934bc97082138c8954d7eda57c97e0 100644 --- a/src/box/box.cc +++ b/src/box/box.cc @@ -3996,6 +3996,13 @@ box_cfg_xc(void) if (box_set_election_mode() != 0) diag_raise(); + /* + * Enable split brain detection once node is fully recovered or + * bootstrapped. No split brain could happen during bootstrap or local + * recovery. + */ + txn_limbo_filter_enable(&txn_limbo); + title("running"); say_info("ready to accept requests"); diff --git a/src/box/errcode.h b/src/box/errcode.h index eba998f7ed1bac4f821458f88ec6d43595cc199f..733d3f1370c0bdba2f9f91f3542b517ab84cd6e3 100644 --- a/src/box/errcode.h +++ b/src/box/errcode.h @@ -296,6 +296,7 @@ struct errcode_record { /*241 */_(ER_WRONG_SPACE_UPGRADE_OPTIONS, "Wrong space upgrade options: %s") \ /*242 */_(ER_NO_ELECTION_QUORUM, "Not enough peers connected to start elections: %d out of minimal required %d")\ /*243 */_(ER_SSL, "%s") \ + /*244 */_(ER_SPLIT_BRAIN, "Split-Brain discovered: %s") \ /* * !IMPORTANT! Please follow instructions at start of the file diff --git a/src/box/txn_limbo.c b/src/box/txn_limbo.c index c506e8ea80a24b9d575c351b0b6080512ffc931d..a1f53af07c733f06471882d483c56f560f96036d 100644 --- a/src/box/txn_limbo.c +++ b/src/box/txn_limbo.c @@ -35,6 +35,7 @@ #include "journal.h" #include "box.h" #include "raft.h" +#include "tt_static.h" struct txn_limbo txn_limbo; @@ -55,6 +56,7 @@ txn_limbo_create(struct txn_limbo *limbo) limbo->svp_confirmed_lsn = -1; limbo->frozen_reasons = 0; limbo->is_frozen_until_promotion = true; + limbo->do_validate = false; } static inline bool @@ -783,37 +785,308 @@ txn_limbo_wait_empty(struct txn_limbo *limbo, double timeout) return 0; } +static int +txn_write_cb(struct trigger *trigger, void *event) +{ + (void)event; + struct fiber *fiber = (struct fiber *)trigger->data; + fiber_wakeup(fiber); + return 0; +} + +/** + * Wait until all the limbo entries receive an lsn. + */ +static int +txn_limbo_wait_persisted(struct txn_limbo *limbo) +{ + if (txn_limbo_is_empty(limbo)) + return 0; + struct txn_limbo_entry *e = txn_limbo_last_entry(limbo); + while (e != NULL && e->lsn <= 0) { + struct trigger on_wal_write; + trigger_create(&on_wal_write, txn_write_cb, fiber(), NULL); + txn_on_wal_write(e->txn, &on_wal_write); + fiber_yield(); + trigger_clear(&on_wal_write); + if (fiber_is_cancelled()) { + diag_set(FiberIsCancelled); + return -1; + } + e = txn_limbo_last_entry(limbo); + } + return 0; +} + +/** + * Fill the reject reason with request data. + * The function is not reenterable, use with care. + */ +static const char * +reject_str(const struct synchro_request *req) +{ + const char *type_name = iproto_type_name(req->type); + + return tt_sprintf("RAFT: rejecting %s (%d) request from origin_id %u " + "replica_id %u term %llu", + type_name ? type_name : "UNKNOWN", req->type, + req->origin_id, req->replica_id, + (long long)req->term); +} + +/** + * Common filter for any incoming packet. + */ +static int +txn_limbo_filter_generic(struct txn_limbo *limbo, + const struct synchro_request *req) +{ + assert(latch_is_locked(&limbo->promote_latch)); + + if (!limbo->do_validate) + return 0; + + /* + * Zero replica_id is allowed for PROMOTE packets only. + */ + if (req->replica_id == REPLICA_ID_NIL) { + if (req->type != IPROTO_RAFT_PROMOTE) { + say_error("%s. Zero replica_id detected", + reject_str(req)); + diag_set(ClientError, ER_UNSUPPORTED, "Replication", + "synchronous requests with zero replica_id"); + return -1; + } + } + if (req->replica_id != limbo->owner_id) { + /* + * Incoming packets should esteem limbo owner, + * if it doesn't match it means the sender + * missed limbo owner migrations and is out of date. + */ + say_error("%s. Limbo owner mismatch, owner_id %u", + reject_str(req), limbo->owner_id); + diag_set(ClientError, ER_SPLIT_BRAIN, + "got a request from a foreign synchro queue owner"); + return -1; + } + + return 0; +} + +/** + * A common filter for all synchro requests, checking that request operates + * over a valid lsn range. + */ +static int +txn_limbo_filter_queue_boundaries(struct txn_limbo *limbo, + const struct synchro_request *req) +{ + int64_t lsn = req->lsn; + /* + * Easy case - processed LSN matches the new one which comes inside + * request, everything is consistent. This is allowed only for + * PROMOTE/DEMOTE. + */ + if (limbo->confirmed_lsn == lsn) { + if (iproto_type_is_promote_request(req->type)) { + return 0; + } else { + say_error("%s. Duplicate request with confirmed lsn " + "%lld = request lsn %lld", reject_str(req), + (long long)limbo->confirmed_lsn, + (long long)lsn); + diag_set(ClientError, ER_UNSUPPORTED, "Replication", + "Duplicate CONFIRM/ROLLBACK request"); + return -1; + } + } + + /* + * Explicit split brain situation. Request comes in with an old LSN + * which we've already processed. + */ + if (limbo->confirmed_lsn > lsn) { + say_error("%s. confirmed lsn %lld > request lsn %lld", + reject_str(req), (long long)limbo->confirmed_lsn, + (long long)lsn); + diag_set(ClientError, ER_SPLIT_BRAIN, + "got a request with lsn from an already " + "processed range"); + return -1; + } + + /* + * The last case requires a few subcases. + */ + assert(limbo->confirmed_lsn < lsn); + + if (txn_limbo_is_empty(limbo)) { + /* + * Transactions are rolled back already, + * since the limbo is empty. + */ + say_error("%s. confirmed lsn %lld < request lsn %lld " + "and empty limbo", reject_str(req), + (long long)limbo->confirmed_lsn, + (long long)lsn); + diag_set(ClientError, ER_SPLIT_BRAIN, + "got a request mentioning future lsn"); + return -1; + } else { + /* + * Some entries are present in the limbo, we need to make sure + * that request lsn lays inside limbo [first; last] range. + * So that the request has some queued data to process, + * otherwise it means the request comes from split brained node. + */ + int64_t first_lsn = txn_limbo_first_entry(limbo)->lsn; + int64_t last_lsn = txn_limbo_last_synchro_entry(limbo)->lsn; + + if (lsn < first_lsn || last_lsn < lsn) { + say_error("%s. request lsn %lld out of range " + "[%lld; %lld]", reject_str(req), + (long long)lsn, + (long long)first_lsn, + (long long)last_lsn); + diag_set(ClientError, ER_SPLIT_BRAIN, + "got a request lsn out of queue range"); + return -1; + } + } + + return 0; +} + +/** + * Filter CONFIRM and ROLLBACK packets. + */ +static int +txn_limbo_filter_confirm_rollback(struct txn_limbo *limbo, + const struct synchro_request *req) +{ + assert(latch_is_locked(&limbo->promote_latch)); + assert(limbo->do_validate); + assert(req->type == IPROTO_RAFT_CONFIRM || + req->type == IPROTO_RAFT_ROLLBACK); + /* + * Zero LSN is allowed for PROMOTE and DEMOTE requests only. + */ + if (req->lsn == 0) { + say_error("%s. Zero lsn detected", reject_str(req)); + diag_set(ClientError, ER_UNSUPPORTED, "Replication", + "zero LSN for CONFIRM/ROLLBACK"); + return -1; + } + + return txn_limbo_filter_queue_boundaries(limbo, req); +} + +/** A filter PROMOTE and DEMOTE packets. */ +static int +txn_limbo_filter_promote_demote(struct txn_limbo *limbo, + const struct synchro_request *req) +{ + assert(latch_is_locked(&limbo->promote_latch)); + assert(limbo->do_validate); + assert(iproto_type_is_promote_request(req->type)); + /* + * PROMOTE and DEMOTE packets must not have zero + * term supplied, otherwise it is a broken packet. + */ + if (req->term == 0) { + say_error("%s. Zero term detected", reject_str(req)); + diag_set(ClientError, ER_UNSUPPORTED, + "Replication", "PROMOTE/DEMOTE with a zero term"); + return -1; + } + + /* + * If the term is already seen it means it comes + * from a node which didn't notice new elections, + * thus been living in subdomain and its data is + * no longer consistent. + */ + if (limbo->promote_greatest_term >= req->term) { + say_error("%s. Max term seen is %llu", reject_str(req), + (long long)limbo->promote_greatest_term); + diag_set(ClientError, ER_SPLIT_BRAIN, + "got a PROMOTE/DEMOTE with an obsolete term"); + return -1; + } + + return txn_limbo_filter_queue_boundaries(limbo, req); +} + +/** A fine-grained filter checking specific request type constraints. */ +static int +txn_limbo_filter_request(struct txn_limbo *limbo, + const struct synchro_request *req) +{ + if (!limbo->do_validate) + return 0; + /* + * Wait until all the entries receive an lsn. The lsn will be + * used to determine whether filtered request is safe to apply. + */ + if (txn_limbo_wait_persisted(limbo) < 0) + return -1; + switch (req->type) { + case IPROTO_RAFT_CONFIRM: + case IPROTO_RAFT_ROLLBACK: + return txn_limbo_filter_confirm_rollback(limbo, req); + case IPROTO_RAFT_PROMOTE: + case IPROTO_RAFT_DEMOTE: + return txn_limbo_filter_promote_demote(limbo, req); + default: + unreachable(); + } +} + int txn_limbo_req_prepare(struct txn_limbo *limbo, const struct synchro_request *req) { assert(latch_is_locked(&limbo->promote_latch)); + + if (txn_limbo_filter_generic(limbo, req) < 0) + return -1; + + /* + * Guard against new transactions appearing during WAL write. It is + * necessary because otherwise when PROMOTE/DEMOTE would be done and it + * would see a txn without LSN in the limbo, it couldn't tell whether + * the transaction should be confirmed or rolled back. It could be + * delivered to the PROMOTE/DEMOTE initiator even before than to the + * local TX thread, or could be not. + * + * CONFIRM and ROLLBACK need this guard only during the filter stage. + * Because the filter needs to see all the transactions LSNs to work + * correctly. + */ + assert(!limbo->is_in_rollback); + limbo->is_in_rollback = true; + if (txn_limbo_filter_request(limbo, req) < 0) { + limbo->is_in_rollback = false; + return -1; + } + /* Prepare for request execution and fine-grained filtering. */ switch (req->type) { + case IPROTO_RAFT_CONFIRM: + case IPROTO_RAFT_ROLLBACK: + limbo->is_in_rollback = false; + break; case IPROTO_RAFT_PROMOTE: case IPROTO_RAFT_DEMOTE: { - assert(!limbo->is_in_rollback); assert(limbo->svp_confirmed_lsn == -1); - /* - * Guard against new transactions appearing during WAL write. It - * is necessary because otherwise when PROMOTE/DEMOTE would be - * done and it would see a txn without LSN in the limbo, it - * couldn't tell whether the transaction should be confirmed or - * rolled back. It could be delivered to the PROMOTE/DEMOTE - * initiator even before than to the local TX thread, or could - * be not. - */ limbo->svp_confirmed_lsn = limbo->confirmed_lsn; limbo->confirmed_lsn = req->lsn; - limbo->is_in_rollback = true; break; } /* * XXX: ideally all requests should go through req_* methods. To unify * their work from applier and locally. */ - default: { - break; - } } return 0; } @@ -883,41 +1156,9 @@ txn_limbo_req_commit(struct txn_limbo *limbo, const struct synchro_request *req) txn_limbo_unfreeze_on_first_promote(&txn_limbo); } } - } else if (iproto_type_is_promote_request(req->type) && - limbo->promote_greatest_term > 1) { - /* PROMOTE for outdated term. Ignore. */ - say_info("RAFT: ignoring %s request from instance " - "id %u for term %llu. Greatest term seen " - "before (%llu) is bigger.", - iproto_type_name(req->type), origin, (long long)term, - (long long)limbo->promote_greatest_term); - return; } int64_t lsn = req->lsn; - if (req->replica_id == REPLICA_ID_NIL) { - /* - * The limbo was empty on the instance issuing the request. - * This means this instance must empty its limbo as well. - */ - assert(lsn == 0 && iproto_type_is_promote_request(req->type)); - } else if (req->replica_id != limbo->owner_id) { - /* - * Ignore CONFIRM/ROLLBACK messages for a foreign master. - * These are most likely outdated messages for already confirmed - * data from an old leader, who has just started and written - * confirm right on synchronous transaction recovery. - */ - if (!iproto_type_is_promote_request(req->type)) - return; - /* - * Promote has a bigger term, and tries to steal the limbo. It - * means it probably was elected with a quorum, and it makes no - * sense to wait here for confirmations. The other nodes already - * elected a new leader. Rollback all the local txns. - */ - lsn = 0; - } switch (req->type) { case IPROTO_RAFT_CONFIRM: txn_limbo_read_confirm(limbo, lsn); @@ -997,6 +1238,14 @@ txn_limbo_unfence(struct txn_limbo *limbo) box_update_ro_summary(); } +void +txn_limbo_filter_enable(struct txn_limbo *limbo) +{ + latch_lock(&limbo->promote_latch); + limbo->do_validate = true; + latch_unlock(&limbo->promote_latch); +} + void txn_limbo_init(void) { diff --git a/src/box/txn_limbo.h b/src/box/txn_limbo.h index 430379bf09dc23e93d6d9cb5ec0b8e90716032e1..cdd7a14dd76e9884ad470e108cf7373b5c0df6f9 100644 --- a/src/box/txn_limbo.h +++ b/src/box/txn_limbo.h @@ -215,6 +215,15 @@ struct txn_limbo { bool is_frozen_until_promotion : 1; }; }; + /** + * Whether this instance validates incoming synchro requests. When the + * setting is on, the instance only allows CONFIRM/ROLLBACK from the + * limbo owner, tracks PROMOTE/DEMOTE term and owner_id consistency. + * The filtering is turned off during bootstrap, because it makes no + * sense when applying a full copy of a remote instance's data. There + * can't be any inconsistencies. + */ + bool do_validate; }; /** @@ -362,8 +371,8 @@ txn_limbo_rollback(struct txn_limbo *limbo) } /** - * Prepare a limbo request for WAL write and commit. Similar to txn_stmt - * prepare. + * Prepare a limbo request for WAL write and commit. And check if the request is + * valid. Similar to txn_stmt prepare. */ int txn_limbo_req_prepare(struct txn_limbo *limbo, @@ -427,6 +436,10 @@ txn_limbo_write_demote(struct txn_limbo *limbo, int64_t lsn, uint64_t term); void txn_limbo_on_parameters_change(struct txn_limbo *limbo); +/** Start filtering incoming syncrho requests. */ +void +txn_limbo_filter_enable(struct txn_limbo *limbo); + /** * Freeze limbo. Prevent CONFIRMs and ROLLBACKs until limbo is unfrozen. */ diff --git a/test/box/error.result b/test/box/error.result index 05244744c75d0613f891e70cae075f1d8813e0f8..5128cb01c67019f8d2ca418ed9cd45ab01cfcb4a 100644 --- a/test/box/error.result +++ b/test/box/error.result @@ -462,6 +462,7 @@ t; | 241: box.error.WRONG_SPACE_UPGRADE_OPTIONS | 242: box.error.NO_ELECTION_QUORUM | 243: box.error.SSL + | 244: box.error.SPLIT_BRAIN | ... test_run:cmd("setopt delimiter ''"); diff --git a/test/replication-luatest/gh_5295_split_brain_test.lua b/test/replication-luatest/gh_5295_split_brain_test.lua new file mode 100644 index 0000000000000000000000000000000000000000..55434648911bff8912d8382bc10de08736204b65 --- /dev/null +++ b/test/replication-luatest/gh_5295_split_brain_test.lua @@ -0,0 +1,240 @@ +local t = require('luatest') +local cluster = require('test.luatest_helpers.cluster') +local server = require('test.luatest_helpers.server') + +local g = t.group('gh-5295') + +-- To distinguish replicas by name +local test_id = 0 + +-- gh-5295: the server should stop replication from an upstream which sends data +-- conflicting in terms of promote / confirmed_lsn. +-- +-- The test requires one instance in replicaset per each split-brain case, plus +-- one "main" instance. The idea of each test case is first to join the instance +-- and then partition it from the main server. Once partitioned, create a +-- split-brain situation between the partitioned node and main. Then check that +-- the partitioned node can't reconnect. + +g.before_all(function(cg) + cg.cluster = cluster:new({}) + + cg.box_cfg = { + replication_timeout = 0.1, + replication_synchro_quorum = 1, + replication_synchro_timeout = 0.01, + election_mode = 'manual', + election_timeout = 0.1, + election_fencing_enabled = false, + log_level = 6, + } + + cg.main = cg.cluster:build_and_add_server{ + alias = 'main', + box_cfg = cg.box_cfg, + } + cg.cluster:start() + + cg.main:exec(function() + box.ctl.promote() + box.ctl.wait_rw() + local s = box.schema.space.create('sync', {is_sync = true}) + s:create_index('pk') + s = box.schema.space.create('async') + s:create_index('pk') + -- Check the test correctness. + assert(box.info.id == 1) + end) +end) + +local function update_replication(...) + return box.cfg{replication = {...}} +end + +g.before_each(function(cg) + -- Check that the servers start synced and with main being leader. + -- It's a prerequisite for each test. + cg.main:exec(function() + require('luatest').assert_equals(box.info.synchro.queue.owner, + box.info.id, 'main node is leader') + end) + + test_id = test_id + 1 + cg.box_cfg.replication = { + server.build_instance_uri('main'), + server.build_instance_uri('split_replica'..test_id), + } + cg.split_replica = cg.cluster:build_and_add_server{ + alias = 'split_replica'..test_id, + box_cfg = cg.box_cfg, + } + cg.split_replica:start() + t.helpers.retrying({}, function() + cg.split_replica:assert_follows_upstream(1) + end) + + cg.main:exec(update_replication, cg.box_cfg.replication) + t.helpers.retrying({}, function() + cg.main:assert_follows_upstream(2) + end) +end) + +-- Drop the partitioned server after each case of split-brain. +g.after_each(function(cg) + cg.split_replica:stop() + cg.split_replica:cleanup() + -- Drop the replica's cluster entry, so that next one receives same id. + cg.main:exec(function() box.space._cluster:delete{2} end) + cg.cluster.servers[2] = nil +end) + +g.after_all(function(cg) + cg.cluster:drop() + cg.cluster.servers = nil +end) + +local function partition_replica(cg) + -- Each partitioning starts on synced servers. + cg.split_replica:wait_vclock_of(cg.main) + cg.main:wait_vclock_of(cg.split_replica) + cg.split_replica:exec(update_replication, {}) + cg.main:exec(update_replication, {}) +end + +local function reconnect_and_check_split_brain(srv) + srv:exec(update_replication, {server.build_instance_uri('main')}) + t.helpers.retrying({}, srv.exec, srv, function() + local upstream = box.info.replication[1].upstream + local t = require('luatest') + t.assert_equals(upstream.status, 'stopped', 'replication is stopped') + t.assert_str_contains(upstream.message, 'Split-Brain discovered: ', + false, 'split-brain is discovered') + end) +end + +local function write_promote() + local t = require('luatest') + t.assert_not_equals(box.info.synchro.queue.owner, box.info.id, + "Promoting a follower") + box.ctl.promote() + box.ctl.wait_rw() + t.helpers.retrying({}, function() + t.assert_equals(box.info.synchro.queue.owner, box.info.id, + "Promote succeeded") + end) +end + +local function write_demote() + local t = require('luatest') + t.assert_equals(box.info.synchro.queue.owner, box.info.id, + "Demoting the leader") + box.cfg{election_mode = 'off'} + box.ctl.demote() + box.cfg{election_mode = 'manual'} + t.assert_equals(box.info.synchro.queue.owner, 0, "Demote succeeded") +end + +-- Any async transaction performed in an obsolete term means a split-brain. +g.test_async_old_term = function(cg) + partition_replica(cg) + cg.split_replica:exec(write_promote) + cg.main:exec(function() box.space.async:replace{1} end) + reconnect_and_check_split_brain(cg.split_replica) +end + +-- Any unseen sync transaction confirmation from an obsolete term means a +-- split-brain. +g.test_confirm_old_term = function(cg) + partition_replica(cg) + cg.split_replica:exec(write_promote) + cg.main:exec(function() box.space.sync:replace{1} end) + reconnect_and_check_split_brain(cg.split_replica) +end + +-- Any unseen sync transaction rollback from an obsolete term means a +-- split-brain. +g.test_rollback_old_term = function(cg) + partition_replica(cg) + cg.split_replica:exec(write_promote) + cg.main:exec(function() + box.cfg{replication_synchro_quorum = 31} + pcall(box.space.sync.replace, box.space.sync, {1}) + box.cfg{replication_synchro_quorum = 1} + end) + reconnect_and_check_split_brain(cg.split_replica) +end + +-- Conflicting demote for the same term is a split-brain. +g.test_demote_same_term = function(cg) + partition_replica(cg) + cg.split_replica:exec(write_promote) + cg.main:exec(write_demote) + reconnect_and_check_split_brain(cg.split_replica) + cg.main:exec(write_promote) +end + +-- Conflicting promote for the same term is a split-brain. +g.test_promote_same_term = function(cg) + cg.main:exec(write_demote) + partition_replica(cg) + cg.split_replica:exec(write_promote) + cg.main:exec(write_promote) + reconnect_and_check_split_brain(cg.split_replica) +end + +-- Promote from a bigger term with lsn < confirmed_lsn is a split brain. +g.test_promote_new_term_small_lsn = function(cg) + cg.split_replica:exec(write_promote) + partition_replica(cg) + cg.split_replica:exec(function() box.space.sync:replace{1} end) + cg.main:exec(write_promote) + reconnect_and_check_split_brain(cg.split_replica) +end + +local function fill_queue_and_write(server) + local wal_write_count = server:exec(function() + local fiber = require('fiber') + box.cfg{ + replication_synchro_quorum = 31, + replication_synchro_timeout = 1000, + } + local write_cnt = box.error.injection.get('ERRINJ_WAL_WRITE_COUNT') + fiber.new(box.space.sync.replace, box.space.sync, {1}) + return write_cnt + end) + t.helpers.retrying({}, server.exec, server, function(cnt) + local t = require('luatest') + local new_cnt = box.error.injection.get('ERRINJ_WAL_WRITE_COUNT') + t.assert(new_cnt > cnt, 'WAL write succeeded') + end, {wal_write_count}) +end + +local function perform_rollback(server) + assert(server:exec(function() return box.info.synchro.queue.len end) > 0) + server:exec(function() box.cfg{replication_synchro_timeout = 0.01} end) + t.helpers.retrying({delay = 0.1}, server.exec, server, function() + require('luatest').assert_equals(box.info.synchro.queue.len, 0, + 'Rollback happened') + end) +end + +-- Promote from a bigger term with lsn > confirmed_lsn is a split brain. +g.test_promote_new_term_big_lsn = function(cg) + cg.split_replica:exec(write_promote) + fill_queue_and_write(cg.split_replica) + partition_replica(cg) + perform_rollback(cg.split_replica) + cg.main:exec(write_promote) + reconnect_and_check_split_brain(cg.split_replica) +end + +-- Promote from a bigger term with conflicting queue contents is a split brain. +g.test_promote_new_term_conflicting_queue = function(cg) + cg.split_replica:exec(write_promote) + fill_queue_and_write(cg.split_replica) + partition_replica(cg) + perform_rollback(cg.split_replica) + cg.main:exec(write_promote) + fill_queue_and_write(cg.split_replica) + reconnect_and_check_split_brain(cg.split_replica) +end diff --git a/test/replication-luatest/suite.ini b/test/replication-luatest/suite.ini index cdeaec7ffd01ffcb3a9c866897bb13f040361cfa..5dcc1479a3ccd4790f99bcfe7c218a3dea1a4b25 100644 --- a/test/replication-luatest/suite.ini +++ b/test/replication-luatest/suite.ini @@ -2,4 +2,4 @@ core = luatest description = replication luatests is_parallel = True -release_disabled = gh_6036_qsync_order_test.lua gh_6842_qsync_applier_order_test.lua +release_disabled = gh_5295_split_brain_test.lua gh_6036_qsync_order_test.lua gh_6842_qsync_applier_order_test.lua