From 7d093bb19cbc1409441286bacb35523ec25b1d72 Mon Sep 17 00:00:00 2001 From: Vladislav Shpilevoy <v.shpilevoy@tarantool.org> Date: Mon, 27 Feb 2023 23:08:34 +0100 Subject: [PATCH] test: add a test for join with self delete Deletion of the own entry from _cluster space is allowed during the join stage, because the remote master could have already had the joining instance UUID in _cluster space but then deleted it. Then for the joining instance it looks like deletion of self from _cluster. But that is fine - in the end of join the master will register the replica again. The case is handled, but not covered with a test. The patch adds one. NO_DOC=test NO_CHANGELOG=test --- test/replication-luatest/join_test.lua | 66 ++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) create mode 100644 test/replication-luatest/join_test.lua diff --git a/test/replication-luatest/join_test.lua b/test/replication-luatest/join_test.lua new file mode 100644 index 0000000000..75648efe29 --- /dev/null +++ b/test/replication-luatest/join_test.lua @@ -0,0 +1,66 @@ +local t = require('luatest') +local server = require('luatest.server') +local uuid = require('uuid') + +local g = t.group('join') + +g.before_all(function(lg) + lg.master = server:new({ + alias = 'master', + box_cfg = { + replication_timeout = 0.1, + }, + }) + lg.master:start() +end) + +g.after_all(function(lg) + lg.master:drop() +end) + +-- +-- New replica (or one being re-bootstrapped) can legally see deletion of self +-- from _cluster space during join. It is allowed because the master could +-- delete the replica from _cluster before the replica tried to rejoin or join +-- first time via a previously existing UUID. +-- +g.test_fetch_self_delete_during_final_join = function(lg) + t.tarantool.skip_if_not_debug() + lg.master:exec(function() + local s = box.schema.create_space('test') + s:create_index('pk') + s:replace{1} + box.error.injection.set('ERRINJ_RELAY_SEND_DELAY', true) + end) + local replica_uuid = uuid.str() + local box_cfg = table.deepcopy(lg.master.box_cfg) + box_cfg.replication = {lg.master.net_box_uri} + box_cfg.instance_uuid = replica_uuid + local replica = server:new({ + alias = 'replica', + box_cfg = box_cfg, + }) + replica:start({wait_until_ready = false}) + local msg = ('joining replica %s'):format(replica_uuid):gsub('%-', '%%-') + t.helpers.retrying({}, function() + t.assert(lg.master:grep_log(msg)) + end) + lg.master:exec(function(replica_uuid) + local _cluster = box.space._cluster + _cluster:replace{2, replica_uuid} + _cluster:delete{2} + _cluster:replace{2, replica_uuid} + _cluster:delete{2} + box.error.injection.set('ERRINJ_RELAY_SEND_DELAY', false) + end, {replica_uuid}) + replica:wait_until_ready() + local replica_id = replica:exec(function() + t.assert_equals(box.space.test:get{1}, {1}) + return box.info.id + end) + replica:drop() + lg.master:exec(function(replica_id) + box.space.test:drop() + box.space._cluster:delete{replica_id} + end, {replica_id}) +end -- GitLab