diff --git a/changelogs/unreleased/gh-10087-update-quorum-in-on_commit-trigger.md b/changelogs/unreleased/gh-10087-update-quorum-in-on_commit-trigger.md new file mode 100644 index 0000000000000000000000000000000000000000..615f97386687945b185c4c7acb4fd98542c19d0b --- /dev/null +++ b/changelogs/unreleased/gh-10087-update-quorum-in-on_commit-trigger.md @@ -0,0 +1,7 @@ +## bugfix/replication + +* Fixed the inability to add a new replica to the replicaset if the user has + manually made space `_cluster` synchronous. Now the synchronous replication + quorum is updated after the `_cluster` change is confirmed by a quorum rather + than immediately after persisting the configuration change in the WAL + (gh-10087). diff --git a/src/box/alter.cc b/src/box/alter.cc index e135ea14767ed6517e6a36b41d6d48d0b9235d3d..36f97e720254e1068c7ae0c8226af3a777d4310c 100644 --- a/src/box/alter.cc +++ b/src/box/alter.cc @@ -4128,6 +4128,15 @@ on_replace_cluster_clear_id(struct trigger *trigger, void * /* event */) return 0; } +/** Update the synchronous replication quorum. */ +static int +on_replace_cluster_update_quorum(struct trigger * /* trigger */, + void * /* event */) +{ + box_update_replication_synchro_quorum(); + return 0; +} + /** * A trigger invoked on replace in the space _cluster, * which contains cluster configuration. @@ -4203,6 +4212,15 @@ on_replace_dd_cluster(struct trigger *trigger, void *event) diag_set(ClientError, ER_UNSUPPORTED, "Tarantool", msg); return -1; } + /* + * Update the quorum only after commit. Otherwise the replica + * would have to ack its own insertion. + */ + struct trigger *on_commit = txn_alter_trigger_new( + on_replace_cluster_update_quorum, NULL); + if (on_commit == NULL) + return -1; + txn_stmt_on_commit(stmt, on_commit); struct trigger *on_rollback = txn_alter_trigger_new( on_replace_cluster_clear_id, NULL); if (on_rollback == NULL) diff --git a/src/box/replication.cc b/src/box/replication.cc index 5730569a4cfc28255e6241d9e60683848b77ac25..5b7c76afd4f69e86f7281039fc56d29399b7b014 100644 --- a/src/box/replication.cc +++ b/src/box/replication.cc @@ -360,7 +360,6 @@ replica_set_id(struct replica *replica, uint32_t replica_id) say_info("assigned id %d to replica %s", replica->id, tt_uuid_str(&replica->uuid)); replica->anon = false; - box_update_replication_synchro_quorum(); box_broadcast_ballot(); } diff --git a/test/replication-luatest/gh_10087_update_quorum_in_on_commit_trigger_test.lua b/test/replication-luatest/gh_10087_update_quorum_in_on_commit_trigger_test.lua new file mode 100644 index 0000000000000000000000000000000000000000..7fc1ed70b331239b1e2fa35b36226e69d6130342 --- /dev/null +++ b/test/replication-luatest/gh_10087_update_quorum_in_on_commit_trigger_test.lua @@ -0,0 +1,94 @@ +local t = require('luatest') +local replica_set = require('luatest.replica_set') +local server = require('luatest.server') + +local g_one_member_cluster = t.group('one_member_cluster') +local g_three_member_cluster = t.group('three_member_cluster') + +g_one_member_cluster.before_all(function(cg) + cg.master = server:new{alias = 'master'} + cg.master:start() + -- Make `_cluster` space synchronous. + cg.master:exec(function() + box.ctl.promote() + box.space._cluster:alter{is_sync = true} + end) +end) + +-- Test that synchronous insertion into 1-member cluster works properly. +g_one_member_cluster.test_insertion = function(cg) + cg.replica = server:new{alias = 'replica', box_cfg = { + replication = cg.master.net_box_uri, + }} + cg.replica:start() + cg.master:wait_for_downstream_to(cg.replica) + cg.replica:exec(function() + t.assert_not_equals(box.space._cluster:get{box.info.id}, nil) + end) +end + +g_one_member_cluster.after_all(function(cg) + cg.master:drop() + if cg.replica ~= nil then + cg.replica:drop() + end +end) + +g_three_member_cluster.before_all(function(cg) + cg.replica_set = replica_set:new{} + cg.master = cg.replica_set:build_and_add_server{alias = 'master'} + cg.master:start() + cg.replica_to_be_disabled = + cg.replica_set:build_and_add_server{alias = 'to_be_disabled', + box_cfg = { + replication = { + cg.master.net_box_uri, + server.build_listen_uri('replica', cg.replica_set.id), + }, + }} + cg.replica = cg.replica_set:build_and_add_server{alias = 'replica', + box_cfg = { + replication = { + cg.master.net_box_uri, + server.build_listen_uri('to_be_disabled', cg.replica_set.id), + }, + }} + cg.replica_set:start() + + -- Make `_cluster` space synchronous. + cg.master:exec(function() + box.ctl.promote() + box.space._cluster:alter{is_sync = true} + end) + + cg.master:wait_for_downstream_to(cg.replica_to_be_disabled) + cg.master:wait_for_downstream_to(cg.replica) +end) + +-- Test that synchronous insertion into 3-member cluster with 1 disabled node +-- works properly. +g_three_member_cluster.test_insertion = function(cg) + cg.replica_to_be_disabled:exec(function() + box.cfg{replication = ''} + end) + cg.replica_to_be_added = + cg.replica_set:build_and_add_server{alias = 'to_be_added', + box_cfg = { + replication = { + cg.master.net_box_uri, + server.build_listen_uri('to_be_disabled', cg.replica_set.id), + server.build_listen_uri('replica', cg.replica_set.id), + }, + replication_sync_timeout = 0, + }} + cg.replica_to_be_added:start() + cg.master:wait_for_downstream_to(cg.replica) + cg.master:wait_for_downstream_to(cg.replica_to_be_added) + cg.replica_to_be_added:exec(function() + t.assert_not_equals(box.space._cluster:get{box.info.id}, nil) + end) +end + +g_three_member_cluster.after_all(function(cg) + cg.replica_set:drop() +end)