diff --git a/test/replication-luatest/gh_5295_split_brain_test.lua b/test/replication-luatest/gh_5295_split_brain_test.lua index 9b9cf1c2b19c69e7a220b721661763db639b810d..c34b88f9e45af73a6a9d07b709211d3a1a8b85d3 100644 --- a/test/replication-luatest/gh_5295_split_brain_test.lua +++ b/test/replication-luatest/gh_5295_split_brain_test.lua @@ -380,3 +380,90 @@ g_very_old_term.test_confirm = function( cg) cg.servers[1]:update_box_cfg({replication = cg.box_cfg.replication}) cg.cluster:wait_for_fullmesh() end + +local function test_promote_split_brain(cg, with_data) + if with_data then + cg.servers[1]:exec(function() + box.schema.create_space('test', {is_sync = true}):create_index('pk') + end) + cg.servers[2]:wait_for_vclock_of(cg.servers[1]) + cg.servers[3]:wait_for_vclock_of(cg.servers[1]) + end + for i = 2, 3 do + cg.servers[i]:exec(function() + box.cfg{election_mode = 'manual'} + end) + end + + -- Third server is network partitioned. + local new_replication = { + cg.servers[1].net_box_uri, + cg.servers[2].net_box_uri, + } + + cg.servers[3]:exec(update_replication, {}) + cg.servers[1]:exec(update_replication, new_replication) + cg.servers[2]:exec(update_replication, new_replication) + + -- Bump term on the first server, so that it is more, than term + -- of the third server after its promotion. + cg.servers[2]:exec(write_promote) + cg.servers[2]:wait_for_election_leader() + local term = cg.servers[2]:get_synchro_queue_term() + cg.servers[1]:wait_for_synchro_queue_term(term) + cg.servers[1]:exec(write_promote) + if with_data then + cg.servers[1]:exec(function() + box.space.test:replace({1, 's1'}) + end) + end + cg.servers[3]:exec(function() + -- Otherwise it won't be able to start the promotion. + box.cfg{replication_synchro_quorum = 1} + end) + cg.servers[3]:exec(write_promote) + t.assert(cg.servers[3]:get_synchro_queue_term() < + cg.servers[1]:get_synchro_queue_term()) + if with_data then + cg.servers[3]:exec(function() + box.space.test:replace({1, 's3'}) + end) + end + + -- Split-Brain should be noticed, when the first server manages to + -- connect to the third one. + cg.servers[1]:exec(update_replication, { + cg.servers[1].net_box_uri, + cg.servers[2].net_box_uri, + cg.servers[3].net_box_uri, + }) + cg.servers[1]:exec(function(id) + local message = 'Split-Brain discovered' + t.helpers.retrying({timeout = 5}, function() + local info = box.info.replication[id] + t.assert_equals(info.upstream.status, 'stopped') + t.assert_str_contains(info.upstream.message, message) + end) + end, {cg.servers[3]:get_instance_id()}) + if with_data then + cg.servers[1]:exec(function() + t.assert_equals(box.space.test:select(1), {{1, 's1'}}) + end) + cg.servers[3]:exec(function() + t.assert_equals(box.space.test:select(1), {{1, 's3'}}) + end) + end +end + +g_very_old_term.test_promote_split_brain_without_data = function(cg) + -- Test, that we can detect split-brain even if none of the data CONFIRMs + -- from the previous terms are received. + test_promote_split_brain(cg, false) +end + +g_very_old_term.test_promote_split_brain_without_data = function(cg) + -- Test, that replication won't be recovered after split-brain is + -- encountered. Data should remain the same, as it was before merging + -- separated parts. + test_promote_split_brain(cg, true) +end