From 742a6e6724690c8a4a11c241d03b6993b0218108 Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov <ivadmi5@gmail.com> Date: Wed, 4 Dec 2024 17:51:07 +0300 Subject: [PATCH] Revert "fix(qpromote): drop flaky test" This reverts commit 1357799055e38981f32e6e4e5560887e0d615c05. --- ...mote_several_outstanding_promotes_test.lua | 112 ++++++++++++++++++ test/replication-luatest/suite.ini | 2 +- 2 files changed, 113 insertions(+), 1 deletion(-) create mode 100644 test/replication-luatest/qpromote_several_outstanding_promotes_test.lua diff --git a/test/replication-luatest/qpromote_several_outstanding_promotes_test.lua b/test/replication-luatest/qpromote_several_outstanding_promotes_test.lua new file mode 100644 index 0000000000..67e6f19852 --- /dev/null +++ b/test/replication-luatest/qpromote_several_outstanding_promotes_test.lua @@ -0,0 +1,112 @@ +local t = require('luatest') +local common = require('test.replication-luatest.qpromote_common') + +local g = common.make_test_group({nodes=5, quorum=3}) + +-- The idea here is that in a cluster of 5 nodes we can have 2 nodes +-- being unresponsive and cluster still should continue. In this case two +-- nodes become unresponsive by emitting promotes that get stuck. +g.test_two_stuck_outstanding_promotes = function(g) + local n1 = g.cluster.servers[1] + local n2 = g.cluster.servers[2] + local n3 = g.cluster.servers[3] + + -- Both n1 and n2 have hard time pushing out their promotes. + common.spawn_stuck_promote(n1) + common.spawn_stuck_promote(n2) + + common.promote(n3) + + n3:exec(function() + box.space.test:replace({ 1 }) + end) + + common.remove_wal_delay_on_xrow_type(n1) + common.remove_wal_delay_on_xrow_type(n2) + + for _, server in ipairs(g.cluster.servers) do + server:wait_for_vclock_of(n3) + + t.assert_equals(server:exec(function() + return box.space.test:get { 1 } + end), { 1 }) + end + + common.ensure_healthy(g.cluster.servers) + + common.promote(n1) + n1:exec(function() + box.space.test:replace({ 2 }) + end) + + for _, server in ipairs(g.cluster.servers) do + t.assert_equals(server:exec(function() + return box.space.test:get { 2 } + end), { 2 }) + end + + common.ensure_healthy(g.cluster.servers) +end + +-- Variation of the previous test, but here nodes get stuck on confirm request +g.test_two_stuck_outstanding_confirms = function(g) + local n1 = g.cluster.servers[1] + local n2 = g.cluster.servers[2] + local n3 = g.cluster.servers[3] + + -- Both n1 and n2 have hard time pushing out their confirms. + common.spawn_promote_stuck_on_confirm(n1) + common.spawn_promote_stuck_on_confirm(n2) + + common.promote(n3) + + n3:exec(function() + box.space.test:replace({ 1 }) + end) + + common.remove_wal_delay_on_xrow_type(n1) + common.remove_wal_delay_on_xrow_type(n2) + + for _, server in ipairs(g.cluster.servers) do + server:wait_for_vclock_of(n3) + + t.assert_equals(server:exec(function() + return box.space.test:get { 1 } + end), { 1 }) + end + + common.ensure_healthy(g.cluster.servers) + + common.promote(n1) + n1:exec(function() + box.space.test:replace({ 2 }) + end) + + for _, server in ipairs(g.cluster.servers) do + t.assert_equals(server:exec(function() + return box.space.test:get { 2 } + end), { 2 }) + end + + common.ensure_healthy(g.cluster.servers) +end + +g.test_two_dependent_promotes = function (g) + local n1 = g.cluster.servers[1] + local n2 = g.cluster.servers[2] + + -- emit promote without confirming it + common.spawn_promote_stuck_on_confirm(n1) + + -- wait until everybody has this pending promote + for _, server in ipairs(g.cluster.servers) do + common.wait_for_promote_queue_len(server, 1) + end + + -- n2's promote is logicaly dependent on n1's promote + common.promote(n2) + + common.remove_wal_delay_on_xrow_type(n1) + + common.ensure_healthy(g.cluster.servers) +end diff --git a/test/replication-luatest/suite.ini b/test/replication-luatest/suite.ini index c50a5070d5..3559ecd6bd 100644 --- a/test/replication-luatest/suite.ini +++ b/test/replication-luatest/suite.ini @@ -2,4 +2,4 @@ core = luatest description = replication luatests is_parallel = True -release_disabled = gh_5295_split_brain_test.lua gh_6036_qsync_order_test.lua gh_6842_qsync_applier_order_test.lua gh_6033_box_promote_demote_test.lua gh_7253_election_long_wal_write_test.lua gh_7086_box_issue_promote_assert_test.lua qpromote_aba_leader_test.lua qpromote_diverging_limbo_owner_test.lua qpromote_transient_wal_error_on_promote_write_test.lua +release_disabled = gh_5295_split_brain_test.lua gh_6036_qsync_order_test.lua gh_6842_qsync_applier_order_test.lua gh_6033_box_promote_demote_test.lua gh_7253_election_long_wal_write_test.lua gh_7086_box_issue_promote_assert_test.lua qpromote_aba_leader_test.lua qpromote_diverging_limbo_owner_test.lua qpromote_transient_wal_error_on_promote_write_test.lua qpromote_several_outstanding_promotes_test.lua -- GitLab