From eff7d524b539319fe061f7a2ba2216a882885286 Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov <ivadmi5@gmail.com> Date: Tue, 23 Jul 2024 05:45:39 +0300 Subject: [PATCH] test(qsync): fix tests and drop the ones incompatible with new RAFT_PROMOTE NO_DOC=internal NO_TEST=internal NO_CHANGELOG=internal --- src/box/box.cc | 10 ++ src/lib/core/errinj.h | 1 + test/box-luatest/builtin_events_test.lua | 4 +- test/box/errinj.result | 1 + .../gh_5568_read_only_reason_test.lua | 6 +- .../gh_6033_box_promote_demote_test.lua | 50 +----- .../gh_6842_qsync_applier_order_test.lua | 10 +- ...h_7286_split_brain_false_positive_test.lua | 2 +- .../gh_7592_local_write_with_syncro_test.lua | 2 +- .../gh_8996_synchro_filter_enable_test.lua | 78 --------- .../replication-luatest/linearizable_test.lua | 8 +- test/replication/election_basic.result | 2 +- test/replication/election_basic.test.lua | 2 +- .../gh-5140-qsync-casc-rollback.result | 4 +- .../gh-5140-qsync-casc-rollback.test.lua | 4 +- .../gh-5144-qsync-dup-confirm.result | 2 +- .../gh-5144-qsync-dup-confirm.test.lua | 2 +- .../gh-5167-qsync-rollback-snap.result | 2 +- .../gh-5167-qsync-rollback-snap.test.lua | 2 +- .../gh-5195-qsync-replica-write.result | 2 +- .../gh-5195-qsync-replica-write.test.lua | 2 +- .../gh-5213-qsync-applier-order-3.result | 15 +- .../gh-5213-qsync-applier-order-3.test.lua | 10 +- .../gh-5430-qsync-promote-crash.result | 159 ------------------ .../gh-5430-qsync-promote-crash.test.lua | 76 --------- .../gh-6034-qsync-limbo-ownership.result | 6 +- .../gh-6034-qsync-limbo-ownership.test.lua | 6 +- .../gh-6035-election-filter.result | 2 +- .../gh-6035-election-filter.test.lua | 2 +- test/replication/qsync_advanced.result | 6 +- test/replication/qsync_advanced.test.lua | 6 +- test/replication/qsync_basic.result | 4 +- test/replication/qsync_basic.test.lua | 4 +- test/replication/qsync_errinj.result | 2 +- test/replication/qsync_errinj.test.lua | 2 +- test/replication/qsync_snapshots.result | 2 +- test/replication/qsync_snapshots.test.lua | 2 +- 37 files changed, 86 insertions(+), 414 deletions(-) delete mode 100644 test/replication-luatest/gh_8996_synchro_filter_enable_test.lua delete mode 100644 test/replication/gh-5430-qsync-promote-crash.result delete mode 100644 test/replication/gh-5430-qsync-promote-crash.test.lua diff --git a/src/box/box.cc b/src/box/box.cc index b48a21fb94..d4aaffc16d 100644 --- a/src/box/box.cc +++ b/src/box/box.cc @@ -2686,6 +2686,16 @@ box_promote(void) is_in_box_promote = false; if (box_raft_try_promote() != 0) return -1; + /* + * XXX: this restores the previous buggy behavior: when + * the function exits, it forcibly resets is_in_box_promote + * even if the raft machine fiber is stuck on a write, which + * should be protected by this flag. + * This is needed for gh_6033_box_promote_demote_test.lua. + */ + ERROR_INJECT(ERRINJ_LIMBO_WRITE_PROMOTE_FAST_EXIT, { + return 0; + }); txn_limbo_wait_promote_attempts(&txn_limbo, promote_attempts + 1, TIMEOUT_INFINITY); diff --git a/src/lib/core/errinj.h b/src/lib/core/errinj.h index ecc236821d..dcbaef23c0 100644 --- a/src/lib/core/errinj.h +++ b/src/lib/core/errinj.h @@ -103,6 +103,7 @@ struct errinj { _(ERRINJ_IPROTO_TX_DELAY, ERRINJ_BOOL, {.bparam = false}) \ _(ERRINJ_IPROTO_WRITE_ERROR_DELAY, ERRINJ_BOOL, {.bparam = false})\ _(ERRINJ_LIMBO_PROMOTE_ACK_SLEEP, ERRINJ_BOOL, {.bparam = false})\ + _(ERRINJ_LIMBO_WRITE_PROMOTE_FAST_EXIT, ERRINJ_BOOL, {.bparam = false})\ _(ERRINJ_LIMBO_WRITE_PROMOTE_SLEEP, ERRINJ_BOOL, {.bparam = false})\ _(ERRINJ_LOG_ROTATE, ERRINJ_BOOL, {.bparam = false}) \ _(ERRINJ_MAIN_MAKE_FILE_ON_RETURN, ERRINJ_BOOL, {.bparam = false}) \ diff --git a/test/box-luatest/builtin_events_test.lua b/test/box-luatest/builtin_events_test.lua index 1d9173fb0f..5b104feaf0 100644 --- a/test/box-luatest/builtin_events_test.lua +++ b/test/box-luatest/builtin_events_test.lua @@ -129,8 +129,8 @@ g.test_box_status = function(cg) {is_ro = true, is_ro_cfg = false, status = 'running'}) -- promotion should turn rm - cg.master:exec(function() box.ctl.promote() end) - t.helpers.retrying({}, function() t.assert_equals(result_no, 7) end) + cg.master:exec(function() box.ctl.promote(); box.ctl.wait_rw() end) + t.helpers.retrying({}, function() t.assert_ge(result_no, 7) end) t.assert_equals(result, {is_ro = false, is_ro_cfg = false, status = 'running'}) diff --git a/test/box/errinj.result b/test/box/errinj.result index edd5022067..78ecb10ca7 100644 --- a/test/box/errinj.result +++ b/test/box/errinj.result @@ -77,6 +77,7 @@ evals - ERRINJ_IPROTO_TX_DELAY: false - ERRINJ_IPROTO_WRITE_ERROR_DELAY: false - ERRINJ_LIMBO_PROMOTE_ACK_SLEEP: false + - ERRINJ_LIMBO_WRITE_PROMOTE_FAST_EXIT: false - ERRINJ_LIMBO_WRITE_PROMOTE_SLEEP: false - ERRINJ_LOG_ROTATE: false - ERRINJ_MAIN_MAKE_FILE_ON_RETURN: false diff --git a/test/replication-luatest/gh_5568_read_only_reason_test.lua b/test/replication-luatest/gh_5568_read_only_reason_test.lua index 0777ab9ae3..6558f5abfd 100644 --- a/test/replication-luatest/gh_5568_read_only_reason_test.lua +++ b/test/replication-luatest/gh_5568_read_only_reason_test.lua @@ -177,6 +177,7 @@ g.test_read_only_reason_election_has_leader = function(g) box.cfg{election_mode = 'voter'} end) g.master:wait_for_election_leader() + g.master:wait_for_synchro_queue_term(g.master:get_election_term()) g.replica:wait_until_election_leader_found() local ok, err = g.replica:exec(function() @@ -220,7 +221,7 @@ g.test_read_only_reason_synchro = function(g) replication_synchro_quorum = 2, replication_synchro_timeout = 1000000, } - box.ctl.promote() + box.ctl.promote(); box.ctl.wait_rw() end) t.helpers.retrying({}, function() @@ -279,6 +280,7 @@ g.test_read_only_reason_election_has_leader_no_uuid = function(g) } end) g.master:wait_for_election_leader() + g.master:wait_for_synchro_queue_term(g.master:get_election_term()) g.replica:wait_until_election_leader_found() local leader_id = g.master:get_instance_id() @@ -324,7 +326,7 @@ g.test_read_only_reason_synchro_no_uuid = function(g) replication_synchro_quorum = 2, replication_synchro_timeout = 1000000, } - box.ctl.promote() + box.ctl.promote(); box.ctl.wait_rw() box.space._cluster:run_triggers(false) box.space._cluster:delete{box.info.id} end) diff --git a/test/replication-luatest/gh_6033_box_promote_demote_test.lua b/test/replication-luatest/gh_6033_box_promote_demote_test.lua index fa1d0455b6..2730e29cfb 100644 --- a/test/replication-luatest/gh_6033_box_promote_demote_test.lua +++ b/test/replication-luatest/gh_6033_box_promote_demote_test.lua @@ -57,6 +57,9 @@ local function fiber_join(server, fiber) end local function wal_delay_start(server, countdown) + server:exec(function() + box.error.injection.set('ERRINJ_LIMBO_WRITE_PROMOTE_FAST_EXIT', true) + end) if countdown == nil then server:exec(function() box.error.injection.set('ERRINJ_WAL_DELAY', true) @@ -434,48 +437,6 @@ g_common.test_limbo_full_interfering_promote = function(g) wait_sync(g.cluster.servers) end --- Demoting should fail if it is interrupted from another server --- while waiting for synchro queue being emptied. -g_common.test_limbo_full_interfering_demote = function(g) - promote(g.server_1) - wait_sync(g.cluster.servers) - - g.server_1:exec(function() - box.schema.create_space('test', {is_sync = true}):create_index('pk') - end) - - box_cfg_update({g.server_1}, { - replication_synchro_quorum = 3, - replication_synchro_timeout = 1000, - }) - - box_cfg_update({g.server_2}, { - replication_synchro_timeout = 0.1, - }) - - g.server_1:exec(function() - local s = box.space.test - require('fiber').create(s.replace, s, {1}) - end) - wait_sync(g.cluster.servers) - - -- Start demoting server_1 and interrupt it from server_2 - local f = demote_start(g.server_1) - local term = g.server_1:get_synchro_queue_term() - g.server_2:exec(function() pcall(box.ctl.promote) end) - g.server_1:wait_for_synchro_queue_term(term + 1) - - local ok, err = fiber_join(g.server_1, f) - luatest.assert(not ok and err.code == box.error.INTERFERING_PROMOTE, - 'Interfering demote fails') - - wait_sync(g.cluster.servers) - promote(g.server_1) - g.server_1:exec(function() box.space.test:drop() end) - demote(g.server_1) - wait_sync(g.cluster.servers) -end - -- Promoting should fail if synchro queue replication timeouts during it g_common.test_fail_limbo_ack_promote = function(g) box_cfg_update({g.server_1}, { @@ -484,12 +445,13 @@ g_common.test_fail_limbo_ack_promote = function(g) }) box_cfg_update({g.server_2}, { - replication_synchro_quorum = 3, + replication_synchro_quorum = 1, replication_synchro_timeout = 1000, }) + promote(g.server_2) + box_cfg_update({g.server_2}, {replication_synchro_quorum = 3}) -- fill synchro queue on server_1 - promote(g.server_2) g.server_2:exec(function() local s = box.schema.create_space('test', {is_sync = true}) s:create_index('pk') diff --git a/test/replication-luatest/gh_6842_qsync_applier_order_test.lua b/test/replication-luatest/gh_6842_qsync_applier_order_test.lua index b3dfa82dab..bd8518d0d4 100644 --- a/test/replication-luatest/gh_6842_qsync_applier_order_test.lua +++ b/test/replication-luatest/gh_6842_qsync_applier_order_test.lua @@ -124,7 +124,7 @@ end g.test_local_txn_during_remote_promote = function(g) -- Server 1 takes the synchro queue. g.server1:exec(function() - box.ctl.promote() + box.ctl.promote(); box.ctl.wait_rw() box.cfg{ -- To hang own transactions in the synchro queue. replication_synchro_quorum = 3, @@ -198,7 +198,7 @@ end g.test_remote_promote_during_local_txn_including_it = function(g) -- Start synchro txns on server 1. local fids = g.server1:exec(function() - box.ctl.promote() + box.ctl.promote(); box.ctl.wait_rw() local s = box.schema.create_space('test', {is_sync = true}) s:create_index('pk') box.cfg{ @@ -226,7 +226,7 @@ g.test_remote_promote_during_local_txn_including_it = function(g) -- the promotion ASAP. replication_synchro_timeout = 0.001, } - box.ctl.promote() + box.ctl.promote(); box.ctl.wait_rw() box.cfg{ replication_synchro_quorum = 2, replication_synchro_timeout = 1000, @@ -287,7 +287,7 @@ end g.test_remote_promote_during_local_txn_not_including_it = function(g) -- Start a synchro txn on server 1. local fids = g.server1:exec(function() - box.ctl.promote() + box.ctl.promote(); box.ctl.wait_rw() local s = box.schema.create_space('test', {is_sync = true}) s:create_index('pk') box.cfg{ @@ -311,7 +311,7 @@ g.test_remote_promote_during_local_txn_not_including_it = function(g) box.cfg{ replication_synchro_quorum = 1, } - box.ctl.promote() + box.ctl.promote(); box.ctl.wait_rw() end) -- Server 1 receives the PROMOTE. diff --git a/test/replication-luatest/gh_7286_split_brain_false_positive_test.lua b/test/replication-luatest/gh_7286_split_brain_false_positive_test.lua index 8e7ef421ac..d75dcff52c 100644 --- a/test/replication-luatest/gh_7286_split_brain_false_positive_test.lua +++ b/test/replication-luatest/gh_7286_split_brain_false_positive_test.lua @@ -35,7 +35,7 @@ end) g.test_false_positive_split_brain = function(cg) cg.node1:exec(function() - box.ctl.promote() + box.ctl.promote(); box.ctl.wait_rw() box.ctl.demote() end) cg.node2:wait_for_vclock_of(cg.node1) diff --git a/test/replication-luatest/gh_7592_local_write_with_syncro_test.lua b/test/replication-luatest/gh_7592_local_write_with_syncro_test.lua index c3ae21e32a..a54dbbe186 100644 --- a/test/replication-luatest/gh_7592_local_write_with_syncro_test.lua +++ b/test/replication-luatest/gh_7592_local_write_with_syncro_test.lua @@ -27,7 +27,7 @@ g.before_each(function(cg) } cg.cluster:start() cg.master:exec(function() - box.ctl.promote() + box.ctl.promote(); box.ctl.wait_rw() box.schema.space.create('sync', {is_sync = true}) box.space.sync:create_index('pk') box.schema.space.create('loc', {is_local = true}) diff --git a/test/replication-luatest/gh_8996_synchro_filter_enable_test.lua b/test/replication-luatest/gh_8996_synchro_filter_enable_test.lua deleted file mode 100644 index dacc0bb2fb..0000000000 --- a/test/replication-luatest/gh_8996_synchro_filter_enable_test.lua +++ /dev/null @@ -1,78 +0,0 @@ -local t = require('luatest') -local replica_set = require('luatest.replica_set') -local server = require('luatest.server') - -local g = t.group('synchro-filter-enable-by-version') - -g.before_each(function(cg) - cg.replica_set = replica_set:new{} - cg.box_cfg = { - replication = { - server.build_listen_uri('server1', cg.replica_set.id), - server.build_listen_uri('server2', cg.replica_set.id), - }, - replication_timeout = 0.1, - } - for i = 1,2 do - cg['server' .. i] = cg.replica_set:build_and_add_server{ - alias = 'server' .. i, - box_cfg = cg.box_cfg, - } - end -end) - -g.after_each(function(cg) - cg.replica_set:drop() -end) - --- Check that split-brain detection does not work with schema version <= --- 2.10.1, and is re-enabled back after a schema upgrade. -g.test_filter_enable_disable = function(cg) - cg.replica_set:start() - cg.server1:exec(function() - box.ctl.wait_rw() - box.schema.downgrade('2.10.1') - t.assert_equals(box.space._schema:get{'version'}, - {'version', 2, 10, 1}) - end) - cg.server2:wait_for_vclock_of(cg.server1) - - cg.server1:update_box_cfg({replication = ""}) - cg.server2:update_box_cfg({replication = ""}) - - cg.server1:exec(function() - box.ctl.promote() - end) - cg.server2:exec(function() - box.ctl.promote() - end) - - cg.server1:update_box_cfg(cg.box_cfg) - cg.server2:update_box_cfg(cg.box_cfg) - cg.server1:wait_for_vclock_of(cg.server2) - cg.server2:wait_for_vclock_of(cg.server1) - cg.server1:assert_follows_upstream(cg.server2:get_instance_id()) - cg.server2:assert_follows_upstream(cg.server1:get_instance_id()) - - cg.server1:update_box_cfg({replication = ""}) - cg.server2:update_box_cfg({replication = ""}) - - for i = 1,2 do - cg['server' .. i]:exec(function() - box.ctl.promote() - box.schema.upgrade() - end) - end - - t.helpers.retrying({}, function() - for i = 1,2 do - cg['server' .. i]:update_box_cfg(cg.box_cfg) - cg['server' .. i]:exec(function(id) - t.assert_equals(box.info.replication[id].upstream.status, - 'stopped') - t.assert_str_contains(box.info.replication[id].upstream.message, - 'Split-Brain discovered') - end, {cg['server' .. 3 - i]:get_instance_id()}) - end - end) -end diff --git a/test/replication-luatest/linearizable_test.lua b/test/replication-luatest/linearizable_test.lua index 1eeadac04d..aaf46c1dd6 100644 --- a/test/replication-luatest/linearizable_test.lua +++ b/test/replication-luatest/linearizable_test.lua @@ -108,7 +108,7 @@ g.before_test('test_no_dirty_reads', function(cg) end) cg.servers[2]:wait_for_vclock_of(cg.servers[1]) cg.servers[2]:exec(function() - box.ctl.promote() + box.ctl.promote(); box.ctl.wait_rw() end) cg.servers[1]:wait_for_vclock_of(cg.servers[2]) cg.servers[3]:wait_for_vclock_of(cg.servers[2]) @@ -189,7 +189,7 @@ end g.test_leader_change = function(cg) cg.servers[1]:exec(function() - box.ctl.promote() + box.ctl.promote(); box.ctl.wait_rw() end) cg.servers[2]:wait_for_vclock_of(cg.servers[1]) cg.servers[3]:wait_for_vclock_of(cg.servers[1]) @@ -201,7 +201,7 @@ g.test_leader_change = function(cg) cg.proxies[i]:pause() end cg.servers[2]:exec(function() - box.ctl.promote() + box.ctl.promote(); box.ctl.wait_rw() box.space.sync:insert{2} end) local fid = cg.servers[1]:exec(function() @@ -250,7 +250,7 @@ g_basic.before_all(function(cg) box.schema.space.create('vinyl', {engine = 'vinyl', is_sync = true}) box.space.vinyl:create_index('pk') -- For the sake of writes to the sync space. - box.ctl.promote() + box.ctl.promote(); box.ctl.wait_rw() end) end) diff --git a/test/replication/election_basic.result b/test/replication/election_basic.result index 1fd69caf1c..3e8a602b7f 100644 --- a/test/replication/election_basic.result +++ b/test/replication/election_basic.result @@ -399,7 +399,7 @@ assert(election_tbl[6].state == 'follower') | - true | ... -box.ctl.promote() +box.ctl.promote(); box.ctl.wait_rw() | --- | ... diff --git a/test/replication/election_basic.test.lua b/test/replication/election_basic.test.lua index 8fb0f6fe5a..7a3eeb1f9e 100644 --- a/test/replication/election_basic.test.lua +++ b/test/replication/election_basic.test.lua @@ -163,7 +163,7 @@ box.cfg{election_mode='manual'} test_run:wait_cond(function() return #election_tbl == 6 end) assert(election_tbl[6].state == 'follower') -box.ctl.promote() +box.ctl.promote(); box.ctl.wait_rw() test_run:wait_cond(function() return #election_tbl == 8 end) assert(election_tbl[7].state == 'candidate') diff --git a/test/replication/gh-5140-qsync-casc-rollback.result b/test/replication/gh-5140-qsync-casc-rollback.result index a71d8f9818..7e78d40a32 100644 --- a/test/replication/gh-5140-qsync-casc-rollback.result +++ b/test/replication/gh-5140-qsync-casc-rollback.result @@ -73,7 +73,7 @@ _ = box.schema.space.create('async', {is_sync=false, engine = engine}) _ = _:create_index('pk') | --- | ... -box.ctl.promote() +box.ctl.promote(); box.ctl.wait_rw() | --- | ... -- Write something to flush the master state to replica. @@ -206,7 +206,7 @@ box.space.sync:select{} | - [4] | ... -box.ctl.promote() +box.ctl.promote(); box.ctl.wait_rw() | --- | ... diff --git a/test/replication/gh-5140-qsync-casc-rollback.test.lua b/test/replication/gh-5140-qsync-casc-rollback.test.lua index 15732e3324..74f4539049 100644 --- a/test/replication/gh-5140-qsync-casc-rollback.test.lua +++ b/test/replication/gh-5140-qsync-casc-rollback.test.lua @@ -48,7 +48,7 @@ _ = box.schema.space.create('sync', {is_sync = true, engine = engine}) _ = _:create_index('pk') _ = box.schema.space.create('async', {is_sync=false, engine = engine}) _ = _:create_index('pk') -box.ctl.promote() +box.ctl.promote(); box.ctl.wait_rw() -- Write something to flush the master state to replica. box.space.sync:replace{1} @@ -97,7 +97,7 @@ test_run:switch('default') box.space.async:select{} box.space.sync:select{} -box.ctl.promote() +box.ctl.promote(); box.ctl.wait_rw() box.space.sync:drop() box.space.async:drop() diff --git a/test/replication/gh-5144-qsync-dup-confirm.result b/test/replication/gh-5144-qsync-dup-confirm.result index 21571fba87..6496cb1b5a 100644 --- a/test/replication/gh-5144-qsync-dup-confirm.result +++ b/test/replication/gh-5144-qsync-dup-confirm.result @@ -54,7 +54,7 @@ _ = box.schema.space.create('sync', {is_sync = true, engine = engine}) _ = _:create_index('pk') | --- | ... -box.ctl.promote() +box.ctl.promote(); box.ctl.wait_rw() | --- | ... diff --git a/test/replication/gh-5144-qsync-dup-confirm.test.lua b/test/replication/gh-5144-qsync-dup-confirm.test.lua index b3e5b2ab2f..d3fd5ea031 100644 --- a/test/replication/gh-5144-qsync-dup-confirm.test.lua +++ b/test/replication/gh-5144-qsync-dup-confirm.test.lua @@ -23,7 +23,7 @@ box.cfg{replication_synchro_quorum = 2, replication_synchro_timeout = 1000} _ = box.schema.space.create('sync', {is_sync = true, engine = engine}) _ = _:create_index('pk') -box.ctl.promote() +box.ctl.promote(); box.ctl.wait_rw() -- Remember the current LSN. In the end, when the following synchronous -- transaction is committed, result LSN should be this value +2: for the diff --git a/test/replication/gh-5167-qsync-rollback-snap.result b/test/replication/gh-5167-qsync-rollback-snap.result index 85ef58612e..5246030b5c 100644 --- a/test/replication/gh-5167-qsync-rollback-snap.result +++ b/test/replication/gh-5167-qsync-rollback-snap.result @@ -41,7 +41,7 @@ _ = box.schema.space.create('sync', {is_sync = true, engine = engine}) _ = box.space.sync:create_index('pk') | --- | ... -box.ctl.promote() +box.ctl.promote(); box.ctl.wait_rw() | --- | ... -- Write something to flush the current master's state to replica. diff --git a/test/replication/gh-5167-qsync-rollback-snap.test.lua b/test/replication/gh-5167-qsync-rollback-snap.test.lua index 5e8950eb91..ecb80c6a78 100644 --- a/test/replication/gh-5167-qsync-rollback-snap.test.lua +++ b/test/replication/gh-5167-qsync-rollback-snap.test.lua @@ -16,7 +16,7 @@ fiber = require('fiber') box.cfg{replication_synchro_quorum = 2, replication_synchro_timeout = 1000} _ = box.schema.space.create('sync', {is_sync = true, engine = engine}) _ = box.space.sync:create_index('pk') -box.ctl.promote() +box.ctl.promote(); box.ctl.wait_rw() -- Write something to flush the current master's state to replica. _ = box.space.sync:insert{1} _ = box.space.sync:delete{1} diff --git a/test/replication/gh-5195-qsync-replica-write.result b/test/replication/gh-5195-qsync-replica-write.result index 26555b731d..6980d70417 100644 --- a/test/replication/gh-5195-qsync-replica-write.result +++ b/test/replication/gh-5195-qsync-replica-write.result @@ -43,7 +43,7 @@ _ = box.schema.space.create('sync', {engine = engine, is_sync = true}) _ = box.space.sync:create_index('pk') | --- | ... -box.ctl.promote() +box.ctl.promote(); box.ctl.wait_rw() | --- | ... diff --git a/test/replication/gh-5195-qsync-replica-write.test.lua b/test/replication/gh-5195-qsync-replica-write.test.lua index 8753dce6cd..ead7ef508a 100644 --- a/test/replication/gh-5195-qsync-replica-write.test.lua +++ b/test/replication/gh-5195-qsync-replica-write.test.lua @@ -18,7 +18,7 @@ test_run:cmd('start server replica with wait=True, wait_load=True') -- _ = box.schema.space.create('sync', {engine = engine, is_sync = true}) _ = box.space.sync:create_index('pk') -box.ctl.promote() +box.ctl.promote(); box.ctl.wait_rw() box.cfg{ \ replication_synchro_timeout = 1000, \ diff --git a/test/replication/gh-5213-qsync-applier-order-3.result b/test/replication/gh-5213-qsync-applier-order-3.result index 2fc9e4e7f0..9c72dea8c7 100644 --- a/test/replication/gh-5213-qsync-applier-order-3.result +++ b/test/replication/gh-5213-qsync-applier-order-3.result @@ -45,7 +45,7 @@ s = box.schema.space.create('test', {is_sync = true}) _ = s:create_index('pk') | --- | ... -box.ctl.promote() +box.ctl.promote(); box.ctl.wait_rw() | --- | ... @@ -174,7 +174,7 @@ test_run:wait_lsn('replica2', 'default') | --- | ... box.cfg{ \ - replication_synchro_quorum = 2, \ + replication_synchro_quorum = 1, \ replication_synchro_timeout = 1000, \ } | --- @@ -182,7 +182,14 @@ box.cfg{ -- Replica2 takes the limbo ownership and sends the transaction to the replica1. -- Along with the CONFIRM from the default node, which is still not applied -- on the replica1. -box.ctl.promote() +box.ctl.promote(); box.ctl.wait_rw() + | --- + | ... +box.info.id == box.info.synchro.queue.owner -- promote should've been applied + | --- + | - true + | ... +box.cfg{replication_synchro_quorum = 2} | --- | ... fiber = require('fiber') @@ -259,7 +266,7 @@ test_run:cmd('delete server replica2') box.cfg{replication_synchro_quorum = 1} | --- | ... -box.ctl.promote() +box.ctl.promote(); box.ctl.wait_rw() | --- | ... s:drop() diff --git a/test/replication/gh-5213-qsync-applier-order-3.test.lua b/test/replication/gh-5213-qsync-applier-order-3.test.lua index 18f3a194dd..996a49e42e 100644 --- a/test/replication/gh-5213-qsync-applier-order-3.test.lua +++ b/test/replication/gh-5213-qsync-applier-order-3.test.lua @@ -30,7 +30,7 @@ box.schema.user.grant('guest', 'super') s = box.schema.space.create('test', {is_sync = true}) _ = s:create_index('pk') -box.ctl.promote() +box.ctl.promote(); box.ctl.wait_rw() test_run:cmd('create server replica1 with rpl_master=default,\ script="replication/replica1.lua"') @@ -85,13 +85,15 @@ end) test_run:switch('replica2') test_run:wait_lsn('replica2', 'default') box.cfg{ \ - replication_synchro_quorum = 2, \ + replication_synchro_quorum = 1, \ replication_synchro_timeout = 1000, \ } -- Replica2 takes the limbo ownership and sends the transaction to the replica1. -- Along with the CONFIRM from the default node, which is still not applied -- on the replica1. -box.ctl.promote() +box.ctl.promote(); box.ctl.wait_rw() +box.info.id == box.info.synchro.queue.owner -- promote should've been applied +box.cfg{replication_synchro_quorum = 2} fiber = require('fiber') f = fiber.new(function() box.space.test:replace{2} end) @@ -121,7 +123,7 @@ test_run:cmd('stop server replica2') test_run:cmd('delete server replica2') -- Restore leadership to make the default instance writable. box.cfg{replication_synchro_quorum = 1} -box.ctl.promote() +box.ctl.promote(); box.ctl.wait_rw() s:drop() box.schema.user.revoke('guest', 'super') box.cfg{ \ diff --git a/test/replication/gh-5430-qsync-promote-crash.result b/test/replication/gh-5430-qsync-promote-crash.result deleted file mode 100644 index 1204c625a0..0000000000 --- a/test/replication/gh-5430-qsync-promote-crash.result +++ /dev/null @@ -1,159 +0,0 @@ --- test-run result file version 2 --- --- gh-5430: box.ctl.promote() could assert if one of replicas didn't receive --- anything from the old leader. Internally box.ctl.promote() collected a quorum --- using vclock_follow() from all connected relays by the old leader ID and in --- that case one of such replicas led to vclock_follow(0) which is always a --- crash. --- -test_run = require('test_run').new() - | --- - | ... - --- --- Start 2 fullmesh nodes working normally. --- -test_run:cmd('create server master1 with '.. \ - 'script="replication/gh-5430-master1.lua"') - | --- - | - true - | ... -test_run:cmd('start server master1 with wait=False') - | --- - | - true - | ... - -test_run:cmd('create server master2 with '.. \ - 'script="replication/gh-5430-master2.lua"') - | --- - | - true - | ... -test_run:cmd('start server master2 with wait=True') - | --- - | - true - | ... - --- --- One of them won't write to WAL anything from now on. If a new instance is --- added and it will write something, master2 won't apply it. --- -test_run:switch('master2') - | --- - | - true - | ... -box.error.injection.set('ERRINJ_WAL_DELAY', true) - | --- - | - ok - | ... - --- --- Third node is the future 'old leader', by which master2 has --- vclock[master3] == 0. --- -test_run:cmd('create server master3 with '.. \ - 'script="replication/gh-5430-master3.lua"') - | --- - | - true - | ... -test_run:cmd('start server master3 with wait=True') - | --- - | - true - | ... - --- --- Make master1 fetch data from master3 so as it could receive sync data and --- confirm it later. --- -test_run:switch('master1') - | --- - | - true - | ... --- Can't keep master2 in there because it hangs with ER_CFG about a duplicate --- connection. Even reset via replication = {} does not help for 100%. But for --- the test it does not matter. -box.cfg{replication = {test_run:eval('master3', 'return box.cfg.listen')[1]}} - | --- - | ... - --- --- Master3 fills the limbo and dies. --- -test_run:switch('master3') - | --- - | - true - | ... -box.ctl.promote() - | --- - | ... -s = box.schema.create_space('test', {is_sync = true}) - | --- - | ... -_ = s:create_index('pk') - | --- - | ... -_ = require('fiber').create(s.replace, s, {1}) - | --- - | ... -test_run:wait_lsn('master1', 'master3') - | --- - | ... - -test_run:switch('master1') - | --- - | - true - | ... -test_run:cmd('stop server master3') - | --- - | - true - | ... -test_run:cmd('delete server master3') - | --- - | - true - | ... - --- --- Master1 tries to promote self. In the meantime master2 has --- vclock[master3] == 0. It is still blocked in the WAL thread. Master1 should --- be ready to seeing 0 LSN by the old leader's component in some replicas. --- -box.cfg{replication_synchro_timeout = 0.1} - | --- - | ... -assert(box.info.synchro.queue.len > 0) - | --- - | - true - | ... -assert(not pcall(box.ctl.promote)) - | --- - | - true - | ... - -test_run:switch('master2') - | --- - | - true - | ... -box.error.injection.set('ERRINJ_WAL_DELAY', false) - | --- - | - ok - | ... - -test_run:switch('default') - | --- - | - true - | ... -test_run:cmd('stop server master2') - | --- - | - true - | ... -test_run:cmd('delete server master2') - | --- - | - true - | ... -test_run:cmd('stop server master1') - | --- - | - true - | ... -test_run:cmd('delete server master1') - | --- - | - true - | ... diff --git a/test/replication/gh-5430-qsync-promote-crash.test.lua b/test/replication/gh-5430-qsync-promote-crash.test.lua deleted file mode 100644 index 7ef8860e79..0000000000 --- a/test/replication/gh-5430-qsync-promote-crash.test.lua +++ /dev/null @@ -1,76 +0,0 @@ --- --- gh-5430: box.ctl.promote() could assert if one of replicas didn't receive --- anything from the old leader. Internally box.ctl.promote() collected a quorum --- using vclock_follow() from all connected relays by the old leader ID and in --- that case one of such replicas led to vclock_follow(0) which is always a --- crash. --- -test_run = require('test_run').new() - --- --- Start 2 fullmesh nodes working normally. --- -test_run:cmd('create server master1 with '.. \ - 'script="replication/gh-5430-master1.lua"') -test_run:cmd('start server master1 with wait=False') - -test_run:cmd('create server master2 with '.. \ - 'script="replication/gh-5430-master2.lua"') -test_run:cmd('start server master2 with wait=True') - --- --- One of them won't write to WAL anything from now on. If a new instance is --- added and it will write something, master2 won't apply it. --- -test_run:switch('master2') -box.error.injection.set('ERRINJ_WAL_DELAY', true) - --- --- Third node is the future 'old leader', by which master2 has --- vclock[master3] == 0. --- -test_run:cmd('create server master3 with '.. \ - 'script="replication/gh-5430-master3.lua"') -test_run:cmd('start server master3 with wait=True') - --- --- Make master1 fetch data from master3 so as it could receive sync data and --- confirm it later. --- -test_run:switch('master1') --- Can't keep master2 in there because it hangs with ER_CFG about a duplicate --- connection. Even reset via replication = {} does not help for 100%. But for --- the test it does not matter. -box.cfg{replication = {test_run:eval('master3', 'return box.cfg.listen')[1]}} - --- --- Master3 fills the limbo and dies. --- -test_run:switch('master3') -box.ctl.promote() -s = box.schema.create_space('test', {is_sync = true}) -_ = s:create_index('pk') -_ = require('fiber').create(s.replace, s, {1}) -test_run:wait_lsn('master1', 'master3') - -test_run:switch('master1') -test_run:cmd('stop server master3') -test_run:cmd('delete server master3') - --- --- Master1 tries to promote self. In the meantime master2 has --- vclock[master3] == 0. It is still blocked in the WAL thread. Master1 should --- be ready to seeing 0 LSN by the old leader's component in some replicas. --- -box.cfg{replication_synchro_timeout = 0.1} -assert(box.info.synchro.queue.len > 0) -assert(not pcall(box.ctl.promote)) - -test_run:switch('master2') -box.error.injection.set('ERRINJ_WAL_DELAY', false) - -test_run:switch('default') -test_run:cmd('stop server master2') -test_run:cmd('delete server master2') -test_run:cmd('stop server master1') -test_run:cmd('delete server master1') diff --git a/test/replication/gh-6034-qsync-limbo-ownership.result b/test/replication/gh-6034-qsync-limbo-ownership.result index 58acf7db28..4f558fa1ae 100644 --- a/test/replication/gh-6034-qsync-limbo-ownership.result +++ b/test/replication/gh-6034-qsync-limbo-ownership.result @@ -42,7 +42,7 @@ box.space.sync:insert{1} -- error. | - error: The synchronous transaction queue doesn't belong to any instance | ... -box.ctl.promote() +box.ctl.promote(); box.ctl.wait_rw() | --- | ... assert(not box.info.ro) @@ -107,7 +107,7 @@ assert(err.code == box.error.READONLY) | ... -- Promotion on the other node. Default should become ro. -box.ctl.promote() +box.ctl.promote(); box.ctl.wait_rw() | --- | ... assert(not box.info.ro) @@ -150,7 +150,7 @@ assert(err.code == box.error.READONLY) | - true | ... -box.ctl.promote() +box.ctl.promote(); box.ctl.wait_rw() | --- | ... box.ctl.demote() diff --git a/test/replication/gh-6034-qsync-limbo-ownership.test.lua b/test/replication/gh-6034-qsync-limbo-ownership.test.lua index 0f62ba6a40..3b690f8a35 100644 --- a/test/replication/gh-6034-qsync-limbo-ownership.test.lua +++ b/test/replication/gh-6034-qsync-limbo-ownership.test.lua @@ -17,7 +17,7 @@ box.space.async:insert{1} -- success. -- Synchro spaces aren't writeable box.space.sync:insert{1} -- error. -box.ctl.promote() +box.ctl.promote(); box.ctl.wait_rw() assert(not box.info.ro) assert(box.info.synchro.queue.owner == box.info.id) box.space.sync:insert{1} -- success. @@ -39,7 +39,7 @@ assert(not ok) assert(err.code == box.error.READONLY) -- Promotion on the other node. Default should become ro. -box.ctl.promote() +box.ctl.promote(); box.ctl.wait_rw() assert(not box.info.ro) assert(box.info.synchro.queue.owner == box.info.id) box.space.sync:insert{2} -- success. @@ -52,7 +52,7 @@ ok, err = pcall(box.space.sync.insert, box.space.sync, {3}) assert(not ok) assert(err.code == box.error.READONLY) -box.ctl.promote() +box.ctl.promote(); box.ctl.wait_rw() box.ctl.demote() assert(not box.info.ro) box.space.sync:insert{3} -- still fails. diff --git a/test/replication/gh-6035-election-filter.result b/test/replication/gh-6035-election-filter.result index 2fa593267b..6c18acba7b 100644 --- a/test/replication/gh-6035-election-filter.result +++ b/test/replication/gh-6035-election-filter.result @@ -69,7 +69,7 @@ box.cfg({ \ | --- | ... -box.ctl.promote() +box.ctl.promote(); box.ctl.wait_rw() | --- | ... _ = box.schema.space.create("sync", {is_sync = true}) diff --git a/test/replication/gh-6035-election-filter.test.lua b/test/replication/gh-6035-election-filter.test.lua index 716c84bb6e..8d04218a1c 100644 --- a/test/replication/gh-6035-election-filter.test.lua +++ b/test/replication/gh-6035-election-filter.test.lua @@ -38,7 +38,7 @@ box.cfg({ \ election_mode = 'manual', \ }) -box.ctl.promote() +box.ctl.promote(); box.ctl.wait_rw() _ = box.schema.space.create("sync", {is_sync = true}) _ = box.space.sync:create_index("pk") box.space.sync:insert{1} diff --git a/test/replication/qsync_advanced.result b/test/replication/qsync_advanced.result index 067abac0cf..57124441ac 100644 --- a/test/replication/qsync_advanced.result +++ b/test/replication/qsync_advanced.result @@ -79,7 +79,7 @@ _ = box.schema.space.create('sync', {is_sync=true, engine=engine}) _ = box.space.sync:create_index('pk') | --- | ... -box.ctl.promote() +box.ctl.promote(); box.ctl.wait_rw() | --- | ... -- Testcase body. @@ -493,7 +493,7 @@ box.space.sync:select{} -- 1 box.cfg{read_only=false} -- promote replica to master | --- | ... -box.ctl.promote() +box.ctl.promote(); box.ctl.wait_rw() | --- | ... test_run:switch('default') @@ -539,7 +539,7 @@ test_run:switch('default') box.cfg{read_only=false} | --- | ... -box.ctl.promote() +box.ctl.promote(); box.ctl.wait_rw() | --- | ... test_run:switch('replica') diff --git a/test/replication/qsync_advanced.test.lua b/test/replication/qsync_advanced.test.lua index 179cf3dd0e..5b8370544c 100644 --- a/test/replication/qsync_advanced.test.lua +++ b/test/replication/qsync_advanced.test.lua @@ -35,7 +35,7 @@ test_run:switch('default') box.cfg{replication_synchro_quorum=NUM_INSTANCES, replication_synchro_timeout=1000} _ = box.schema.space.create('sync', {is_sync=true, engine=engine}) _ = box.space.sync:create_index('pk') -box.ctl.promote() +box.ctl.promote(); box.ctl.wait_rw() -- Testcase body. box.space.sync:insert{1} -- success test_run:switch('replica') @@ -181,7 +181,7 @@ box.space.sync:select{} -- 1 test_run:switch('replica') box.space.sync:select{} -- 1 box.cfg{read_only=false} -- promote replica to master -box.ctl.promote() +box.ctl.promote(); box.ctl.wait_rw() test_run:switch('default') box.cfg{read_only=true} -- demote master to replica test_run:switch('replica') @@ -194,7 +194,7 @@ t -- Revert cluster configuration. test_run:switch('default') box.cfg{read_only=false} -box.ctl.promote() +box.ctl.promote(); box.ctl.wait_rw() test_run:switch('replica') box.cfg{read_only=true} -- Testcase cleanup. diff --git a/test/replication/qsync_basic.result b/test/replication/qsync_basic.result index 8eae516cd8..14c15c0164 100644 --- a/test/replication/qsync_basic.result +++ b/test/replication/qsync_basic.result @@ -14,7 +14,7 @@ s1.is_sync pk = s1:create_index('pk') | --- | ... -box.ctl.promote() +box.ctl.promote(); box.ctl.wait_rw() | --- | ... box.begin() s1:insert({1}) s1:insert({2}) box.commit() @@ -712,7 +712,7 @@ assert(box.info.lsn == old_lsn + 1) | --- | - true | ... -box.ctl.promote() +box.ctl.promote(); box.ctl.wait_rw() | --- | ... diff --git a/test/replication/qsync_basic.test.lua b/test/replication/qsync_basic.test.lua index a1dfc9e9cf..fddc943e50 100644 --- a/test/replication/qsync_basic.test.lua +++ b/test/replication/qsync_basic.test.lua @@ -6,7 +6,7 @@ s1 = box.schema.create_space('test1', {is_sync = true}) s1.is_sync pk = s1:create_index('pk') -box.ctl.promote() +box.ctl.promote(); box.ctl.wait_rw() box.begin() s1:insert({1}) s1:insert({2}) box.commit() s1:select{} @@ -274,7 +274,7 @@ box.space.sync:before_replace(nil, skip_row) assert(box.space.sync:get{1} == nil) assert(box.space.sync:get{2} == nil) assert(box.info.lsn == old_lsn + 1) -box.ctl.promote() +box.ctl.promote(); box.ctl.wait_rw() box.space.sync:truncate() diff --git a/test/replication/qsync_errinj.result b/test/replication/qsync_errinj.result index cf1e30a907..9bab38cc96 100644 --- a/test/replication/qsync_errinj.result +++ b/test/replication/qsync_errinj.result @@ -35,7 +35,7 @@ _ = box.schema.space.create('sync', {is_sync = true, engine = engine}) _ = box.space.sync:create_index('pk') | --- | ... -box.ctl.promote() +box.ctl.promote(); box.ctl.wait_rw() | --- | ... diff --git a/test/replication/qsync_errinj.test.lua b/test/replication/qsync_errinj.test.lua index e7c85c58ce..548c78ca4f 100644 --- a/test/replication/qsync_errinj.test.lua +++ b/test/replication/qsync_errinj.test.lua @@ -12,7 +12,7 @@ test_run:cmd('start server replica with wait=True, wait_load=True') _ = box.schema.space.create('sync', {is_sync = true, engine = engine}) _ = box.space.sync:create_index('pk') -box.ctl.promote() +box.ctl.promote(); box.ctl.wait_rw() -- -- gh-5100: slow ACK sending shouldn't stun replica for the diff --git a/test/replication/qsync_snapshots.result b/test/replication/qsync_snapshots.result index 742a56fce0..ecdb7f9f9a 100644 --- a/test/replication/qsync_snapshots.result +++ b/test/replication/qsync_snapshots.result @@ -57,7 +57,7 @@ _ = box.schema.space.create('sync', {is_sync=true, engine=engine}) _ = box.space.sync:create_index('pk') | --- | ... -box.ctl.promote() +box.ctl.promote(); box.ctl.wait_rw() | --- | ... -- Testcase body. diff --git a/test/replication/qsync_snapshots.test.lua b/test/replication/qsync_snapshots.test.lua index 5208c26d4e..c16f4dc06a 100644 --- a/test/replication/qsync_snapshots.test.lua +++ b/test/replication/qsync_snapshots.test.lua @@ -23,7 +23,7 @@ test_run:switch('default') box.cfg{replication_synchro_quorum=NUM_INSTANCES, replication_synchro_timeout=1000} _ = box.schema.space.create('sync', {is_sync=true, engine=engine}) _ = box.space.sync:create_index('pk') -box.ctl.promote() +box.ctl.promote(); box.ctl.wait_rw() -- Testcase body. box.space.sync:insert{1} box.space.sync:select{} -- 1 -- GitLab