From 1d329f0bbbf98dff878588b2ad754e2dd18404a7 Mon Sep 17 00:00:00 2001 From: Vladislav Shpilevoy <v.shpilevoy@tarantool.org> Date: Wed, 2 Sep 2020 00:09:12 +0200 Subject: [PATCH] raft: introduce box.cfg.election_* options The new options are: - election_is_enabled - enable/disable leader election (via Raft). When disabled, the node is supposed to work like if Raft does not exist. Like earlier; - election_is_candidate - a flag whether the instance can try to become a leader. Note, it can vote for other nodes regardless of value of this option; - election_timeout - how long need to wait until election end, in seconds. The options don't do anything now. They are added separately in order to keep such mundane changes from the main Raft commit, to simplify its review. Option names don't mention 'Raft' on purpose, because - Not all users know what is Raft, so they may not even know it is related to leader election; - In future the algorithm may change from Raft to something else, so better not to depend on it too much in the public API. Part of #1146 --- src/box/box.cc | 92 +++++++++++++++++++++++++++++++++ src/box/box.h | 3 ++ src/box/lua/cfg.cc | 27 ++++++++++ src/box/lua/load_cfg.lua | 15 ++++++ src/box/raft.c | 30 +++++++++++ src/box/raft.h | 35 +++++++++++++ test/app-tap/init_script.result | 3 ++ test/box/admin.result | 6 +++ test/box/cfg.result | 12 +++++ 9 files changed, 223 insertions(+) diff --git a/src/box/box.cc b/src/box/box.cc index 48fed9b2c4..99a15bfd07 100644 --- a/src/box/box.cc +++ b/src/box/box.cc @@ -472,6 +472,40 @@ box_check_uri(const char *source, const char *option_name) } } +static int +box_check_election_is_enabled(void) +{ + int b = cfg_getb("election_is_enabled"); + if (b < 0) { + diag_set(ClientError, ER_CFG, "election_is_enabled", + "the value must be a boolean"); + } + return b; +} + +static int +box_check_election_is_candidate(void) +{ + int b = cfg_getb("election_is_candidate"); + if (b < 0) { + diag_set(ClientError, ER_CFG, "election_is_candidate", + "the value must be a boolean"); + } + return b; +} + +static double +box_check_election_timeout(void) +{ + double d = cfg_getd("election_timeout"); + if (d <= 0) { + diag_set(ClientError, ER_CFG, "election_timeout", + "the value must be a positive number"); + return -1; + } + return d; +} + static void box_check_replication(void) { @@ -729,6 +763,12 @@ box_check_config(void) box_check_uri(cfg_gets("listen"), "listen"); box_check_instance_uuid(&uuid); box_check_replicaset_uuid(&uuid); + if (box_check_election_is_enabled() < 0) + diag_raise(); + if (box_check_election_is_candidate() < 0) + diag_raise(); + if (box_check_election_timeout() < 0) + diag_raise(); box_check_replication(); box_check_replication_timeout(); box_check_replication_connect_timeout(); @@ -751,6 +791,36 @@ box_check_config(void) diag_raise(); } +int +box_set_election_is_enabled(void) +{ + int b = box_check_election_is_enabled(); + if (b < 0) + return -1; + raft_cfg_is_enabled(b); + return 0; +} + +int +box_set_election_is_candidate(void) +{ + int b = box_check_election_is_candidate(); + if (b < 0) + return -1; + raft_cfg_is_candidate(b); + return 0; +} + +int +box_set_election_timeout(void) +{ + double d = box_check_election_timeout(); + if (d < 0) + return -1; + raft_cfg_election_timeout(d); + return 0; +} + /* * Parse box.cfg.replication and create appliers. */ @@ -835,6 +905,7 @@ void box_set_replication_timeout(void) { replication_timeout = box_check_replication_timeout(); + raft_cfg_death_timeout(); } void @@ -865,6 +936,7 @@ box_set_replication_synchro_quorum(void) return -1; replication_synchro_quorum = value; txn_limbo_on_parameters_change(&txn_limbo); + raft_cfg_election_quorum(); return 0; } @@ -2686,6 +2758,26 @@ box_cfg_xc(void) fiber_gc(); is_box_configured = true; + /* + * Fill in leader election parameters after bootstrap. Before it is not + * possible - there may be relevant data to recover from WAL and + * snapshot. Also until recovery is done, it is not possible to write + * new records into WAL. It is also totally safe, because relaying is + * not started until the box is configured. So it can't happen, that + * this election-enabled node will try to relay to another + * election-enabled node without election actually enabled leading to + * disconnect. + */ + if (box_set_election_is_candidate() != 0) + diag_raise(); + if (box_set_election_timeout() != 0) + diag_raise(); + /* + * Election is enabled last. So as all the parameters are installed by + * that time. + */ + if (box_set_election_is_enabled() != 0) + diag_raise(); title("running"); say_info("ready to accept requests"); diff --git a/src/box/box.h b/src/box/box.h index 5988264a59..45ff8bbbff 100644 --- a/src/box/box.h +++ b/src/box/box.h @@ -245,6 +245,9 @@ void box_set_vinyl_memory(void); void box_set_vinyl_max_tuple_size(void); void box_set_vinyl_cache(void); void box_set_vinyl_timeout(void); +int box_set_election_is_enabled(void); +int box_set_election_is_candidate(void); +int box_set_election_timeout(void); void box_set_replication_timeout(void); void box_set_replication_connect_timeout(void); void box_set_replication_connect_quorum(void); diff --git a/src/box/lua/cfg.cc b/src/box/lua/cfg.cc index d481155cda..bbb92f038d 100644 --- a/src/box/lua/cfg.cc +++ b/src/box/lua/cfg.cc @@ -269,6 +269,30 @@ lbox_cfg_set_worker_pool_threads(struct lua_State *L) return 0; } +static int +lbox_cfg_set_election_is_enabled(struct lua_State *L) +{ + if (box_set_election_is_enabled() != 0) + luaT_error(L); + return 0; +} + +static int +lbox_cfg_set_election_is_candidate(struct lua_State *L) +{ + if (box_set_election_is_candidate() != 0) + luaT_error(L); + return 0; +} + +static int +lbox_cfg_set_election_timeout(struct lua_State *L) +{ + if (box_set_election_timeout() != 0) + luaT_error(L); + return 0; +} + static int lbox_cfg_set_replication_timeout(struct lua_State *L) { @@ -382,6 +406,9 @@ box_lua_cfg_init(struct lua_State *L) {"cfg_set_vinyl_max_tuple_size", lbox_cfg_set_vinyl_max_tuple_size}, {"cfg_set_vinyl_cache", lbox_cfg_set_vinyl_cache}, {"cfg_set_vinyl_timeout", lbox_cfg_set_vinyl_timeout}, + {"cfg_set_election_is_enabled", lbox_cfg_set_election_is_enabled}, + {"cfg_set_election_is_candidate", lbox_cfg_set_election_is_candidate}, + {"cfg_set_election_timeout", lbox_cfg_set_election_timeout}, {"cfg_set_replication_timeout", lbox_cfg_set_replication_timeout}, {"cfg_set_replication_connect_quorum", lbox_cfg_set_replication_connect_quorum}, {"cfg_set_replication_connect_timeout", lbox_cfg_set_replication_connect_timeout}, diff --git a/src/box/lua/load_cfg.lua b/src/box/lua/load_cfg.lua index 92347a9fd9..d558e7ac9c 100644 --- a/src/box/lua/load_cfg.lua +++ b/src/box/lua/load_cfg.lua @@ -87,6 +87,9 @@ local default_cfg = { checkpoint_wal_threshold = 1e18, checkpoint_count = 2, worker_pool_threads = 4, + election_is_enabled = false, + election_is_candidate = true, + election_timeout = 5, replication_timeout = 1, replication_sync_lag = 10, replication_sync_timeout = 300, @@ -165,6 +168,9 @@ local template_cfg = { hot_standby = 'boolean', memtx_use_mvcc_engine = 'boolean', worker_pool_threads = 'number', + election_is_enabled = 'boolean', + election_is_candidate = 'boolean', + election_timeout = 'number', replication_timeout = 'number', replication_sync_lag = 'number', replication_sync_timeout = 'number', @@ -281,6 +287,9 @@ local dynamic_cfg = { require('title').update(box.cfg.custom_proc_title) end, force_recovery = function() end, + election_is_enabled = private.cfg_set_election_is_enabled, + election_is_candidate = private.cfg_set_election_is_candidate, + election_timeout = private.cfg_set_election_timeout, replication_timeout = private.cfg_set_replication_timeout, replication_connect_timeout = private.cfg_set_replication_connect_timeout, replication_connect_quorum = private.cfg_set_replication_connect_quorum, @@ -335,6 +344,9 @@ local dynamic_cfg_order = { -- the new one. This should be fixed when box.cfg is able to -- apply some parameters together and atomically. replication_anon = 250, + election_is_enabled = 300, + election_is_candidate = 310, + election_timeout = 320, } local function sort_cfg_cb(l, r) @@ -352,6 +364,9 @@ local dynamic_cfg_skip_at_load = { vinyl_cache = true, vinyl_timeout = true, too_long_threshold = true, + election_is_enabled = true, + election_is_candidate = true, + election_timeout = true, replication = true, replication_timeout = true, replication_connect_timeout = true, diff --git a/src/box/raft.c b/src/box/raft.c index 511fe42f58..ee54d02b77 100644 --- a/src/box/raft.c +++ b/src/box/raft.c @@ -37,6 +37,8 @@ /** Raft state of this instance. */ struct raft raft = { + .is_enabled = false, + .is_candidate = false, .term = 1, .vote = 0, }; @@ -63,3 +65,31 @@ raft_serialize_for_disk(struct raft_request *req) req->term = raft.term; req->vote = raft.vote; } + +void +raft_cfg_is_enabled(bool is_enabled) +{ + raft.is_enabled = is_enabled; +} + +void +raft_cfg_is_candidate(bool is_candidate) +{ + raft.is_candidate = is_candidate; +} + +void +raft_cfg_election_timeout(double timeout) +{ + raft.election_timeout = timeout; +} + +void +raft_cfg_election_quorum(void) +{ +} + +void +raft_cfg_death_timeout(void) +{ +} diff --git a/src/box/raft.h b/src/box/raft.h index 31f7becdb6..f272227528 100644 --- a/src/box/raft.h +++ b/src/box/raft.h @@ -30,6 +30,7 @@ * SUCH DAMAGE. */ #include <stdint.h> +#include <stdbool.h> #if defined(__cplusplus) extern "C" { @@ -38,8 +39,11 @@ extern "C" { struct raft_request; struct raft { + bool is_enabled; + bool is_candidate; uint64_t term; uint32_t vote; + double election_timeout; }; extern struct raft raft; @@ -48,6 +52,37 @@ extern struct raft raft; void raft_process_recovery(const struct raft_request *req); +/** Configure whether Raft is enabled. */ +void +raft_cfg_is_enabled(bool is_enabled); + +/** + * Configure whether the instance can be elected as Raft leader. Even if false, + * the node still can vote, when Raft is enabled. + */ +void +raft_cfg_is_candidate(bool is_candidate); + +/** Configure Raft leader election timeout. */ +void +raft_cfg_election_timeout(double timeout); + +/** + * Configure Raft leader election quorum. There is no a separate option. + * Instead, synchronous replication quorum is used. Since Raft is tightly bound + * with synchronous replication. + */ +void +raft_cfg_election_quorum(void); + +/** + * Configure Raft leader death timeout. I.e. number of seconds without + * heartbeats from the leader to consider it dead. There is no a separate + * option. Raft uses replication timeout for that. + */ +void +raft_cfg_death_timeout(void); + /** * Save complete Raft state into a request to be sent to other instances of the * cluster. It is allowed to save anything here, not only persistent state. diff --git a/test/app-tap/init_script.result b/test/app-tap/init_script.result index c8974d708d..d8969278bc 100644 --- a/test/app-tap/init_script.result +++ b/test/app-tap/init_script.result @@ -8,6 +8,9 @@ checkpoint_count:2 checkpoint_interval:3600 checkpoint_wal_threshold:1e+18 coredump:false +election_is_candidate:true +election_is_enabled:false +election_timeout:5 feedback_enabled:true feedback_host:https://feedback.tarantool.io feedback_interval:3600 diff --git a/test/box/admin.result b/test/box/admin.result index d1540a71e3..52b62356f0 100644 --- a/test/box/admin.result +++ b/test/box/admin.result @@ -37,6 +37,12 @@ cfg_filter(box.cfg) - 1000000000000000000 - - coredump - false + - - election_is_candidate + - true + - - election_is_enabled + - false + - - election_timeout + - 5 - - feedback_enabled - true - - feedback_host diff --git a/test/box/cfg.result b/test/box/cfg.result index fcfc64b228..f19f4bff77 100644 --- a/test/box/cfg.result +++ b/test/box/cfg.result @@ -25,6 +25,12 @@ cfg_filter(box.cfg) | - 1000000000000000000 | - - coredump | - false + | - - election_is_candidate + | - true + | - - election_is_enabled + | - false + | - - election_timeout + | - 5 | - - feedback_enabled | - true | - - feedback_host @@ -134,6 +140,12 @@ cfg_filter(box.cfg) | - 1000000000000000000 | - - coredump | - false + | - - election_is_candidate + | - true + | - - election_is_enabled + | - false + | - - election_timeout + | - 5 | - - feedback_enabled | - true | - - feedback_host -- GitLab