From 29d1c0fa6c76ff06d748d8916c5ffbbc9e4a55e9 Mon Sep 17 00:00:00 2001 From: Georgiy Lebedev <g.lebedev@tarantool.org> Date: Mon, 3 Jun 2024 12:06:24 +0300 Subject: [PATCH] box: update synchro quorum in on_commit trigger instead of on_replace MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently, we update the synchronous replication quorum from the `on_replace` trigger of the `_cluster` space when registering a new replica. However, during the join process, the replica cannot ack its own insertion into the `_cluster` space. In the scope of #9723, we are going to enable synchronous replication for most of the system spaces, including the `_cluster` space. There are several problems with this: 1. Joining a replica to a 1-member cluster without manual changing of quorum won't work: it is impossible to commit the insertion into the `_cluster` space with only 1 node, since the quorum will equal to 2 right after the insertion. 2. Joining a replica to a 3-member cluster may fail: the quorum will become equal to 3 right after the insertion, the newly joined replica cannot ACK its own insertion into the `_cluster` space — if one out of original 3 nodes fails, then reconfiguration will fail. Generally speaking, it will be impossible to join a new replica to the cluster, if a quorum, which includes the newly added replica (which cannot ACK), cannot be gathered. To solve these problems, let's update the quorum in the `on_commit` trigger. This way we’ll be able to insert a node regardless of the current configuration. This somewhat contradicts with the Raft specification, which requires application of all configuration changes in the `on_replace` trigger (i.e., as soon as they are persisted in the WAL, without quorum confirmation), but still forbids several reconfigurations at the same time. Closes #10087 NO_DOC=<no special documentation page devoted to cluster reconfiguration> --- ...0087-update-quorum-in-on_commit-trigger.md | 5 + src/box/alter.cc | 18 ++++ src/box/replication.cc | 1 - ...pdate_quorum_in_on_commit_trigger_test.lua | 93 +++++++++++++++++++ 4 files changed, 116 insertions(+), 1 deletion(-) create mode 100644 changelogs/unreleased/gh-10087-update-quorum-in-on_commit-trigger.md create mode 100644 test/replication-luatest/gh_10087_update_quorum_in_on_commit_trigger_test.lua diff --git a/changelogs/unreleased/gh-10087-update-quorum-in-on_commit-trigger.md b/changelogs/unreleased/gh-10087-update-quorum-in-on_commit-trigger.md new file mode 100644 index 0000000000..da8673a8b1 --- /dev/null +++ b/changelogs/unreleased/gh-10087-update-quorum-in-on_commit-trigger.md @@ -0,0 +1,5 @@ +## feature/replication + +* Now the synchronous replication quorum is updated after the cluster + reconfiguration change is confirmed by a quorum rather than immediately after + persisting the configuration change in the WAL (gh-10087). diff --git a/src/box/alter.cc b/src/box/alter.cc index 0259139c30..2d1565b26c 100644 --- a/src/box/alter.cc +++ b/src/box/alter.cc @@ -4424,6 +4424,15 @@ on_replace_cluster_clear_id(struct trigger *trigger, void * /* event */) return 0; } +/** Update the synchronous replication quorum. */ +static int +on_replace_cluster_update_quorum(struct trigger * /* trigger */, + void * /* event */) +{ + box_update_replication_synchro_quorum(); + return 0; +} + /** Replica definition. */ struct replica_def { /** Instance ID. */ @@ -4639,6 +4648,15 @@ on_replace_dd_cluster_insert(const struct replica_def *new_def) tt_uuid_str(&replica->uuid)); return -1; } + /* + * Update the quorum only after commit. Otherwise the replica would have + * to ack its own insertion. + */ + struct trigger *on_commit = txn_alter_trigger_new( + on_replace_cluster_update_quorum, NULL); + if (on_commit == NULL) + return -1; + txn_stmt_on_commit(stmt, on_commit); struct trigger *on_rollback = txn_alter_trigger_new( on_replace_cluster_clear_id, NULL); if (on_rollback == NULL) diff --git a/src/box/replication.cc b/src/box/replication.cc index 50cbe77ef0..2a26a446ee 100644 --- a/src/box/replication.cc +++ b/src/box/replication.cc @@ -376,7 +376,6 @@ replica_set_id(struct replica *replica, uint32_t replica_id) say_info("assigned id %d to replica %s", replica->id, tt_uuid_str(&replica->uuid)); replica->anon = false; - box_update_replication_synchro_quorum(); box_broadcast_ballot(); } diff --git a/test/replication-luatest/gh_10087_update_quorum_in_on_commit_trigger_test.lua b/test/replication-luatest/gh_10087_update_quorum_in_on_commit_trigger_test.lua new file mode 100644 index 0000000000..9f8ee32cf6 --- /dev/null +++ b/test/replication-luatest/gh_10087_update_quorum_in_on_commit_trigger_test.lua @@ -0,0 +1,93 @@ +local t = require('luatest') +local replica_set = require('luatest.replica_set') +local server = require('luatest.server') + +local g_one_member_cluster = t.group('one_member_cluster') +local g_three_member_cluster = t.group('three_member_cluster') + +g_one_member_cluster.before_all(function(cg) + cg.master = server:new{alias = 'master'} + cg.master:start() + -- Make `_cluster` space synchronous. + cg.master:exec(function() + box.ctl.promote() + box.space._cluster:alter{is_sync = true} + end) +end) + +-- Test that synchronous insertion into 1-member cluster works properly. +g_one_member_cluster.test_insertion = function(cg) + cg.replica = server:new{alias = 'replica', box_cfg = { + replication = cg.master.net_box_uri, + }} + cg.replica:start() + cg.master:wait_for_downstream_to(cg.replica) + cg.replica:exec(function() + t.assert_not_equals(box.space._cluster:get{box.info.id}, nil) + end) +end + +g_one_member_cluster.after_all(function(cg) + cg.master:drop() + if cg.replica ~= nil then + cg.replica:drop() + end +end) + +g_three_member_cluster.before_all(function(cg) + cg.replica_set = replica_set:new{} + cg.master = cg.replica_set:build_and_add_server{alias = 'master'} + cg.master:start() + cg.replica_to_be_disabled = + cg.replica_set:build_and_add_server{alias = 'to_be_disabled', + box_cfg = { + replication = { + cg.master.net_box_uri, + server.build_listen_uri('replica', cg.replica_set.id), + }, + }} + cg.replica = cg.replica_set:build_and_add_server{alias = 'replica', + box_cfg = { + replication = { + cg.master.net_box_uri, + server.build_listen_uri('to_be_disabled', cg.replica_set.id), + }, + }} + cg.replica_set:start() + + -- Make `_cluster` space synchronous. + cg.master:exec(function() + box.ctl.promote() + box.space._cluster:alter{is_sync = true} + end) + + cg.master:wait_for_downstream_to(cg.replica_to_be_disabled) + cg.master:wait_for_downstream_to(cg.replica) +end) + +-- Test that synchronous insertion into 3-member cluster with 1 disabled node +-- works properly. +g_three_member_cluster.test_insertion = function(cg) + cg.replica_to_be_disabled:exec(function() + box.cfg{replication = ''} + end) + cg.replica_to_be_added = + cg.replica_set:build_and_add_server{alias = 'to_be_added', + box_cfg = { + replication = { + cg.master.net_box_uri, + server.build_listen_uri('to_be_disabled', cg.replica_set.id), + server.build_listen_uri('replica', cg.replica_set.id), + }, + }} + cg.replica_to_be_added:start() + cg.master:wait_for_downstream_to(cg.replica) + cg.master:wait_for_downstream_to(cg.replica_to_be_added) + cg.replica_to_be_added:exec(function() + t.assert_not_equals(box.space._cluster:get{box.info.id}, nil) + end) +end + +g_three_member_cluster.after_all(function(cg) + cg.replica_set:drop() +end) -- GitLab