From 32ae02545ab3dd741cc2a7c475eeb787e4882d30 Mon Sep 17 00:00:00 2001 From: Serge Petrenko <sergepetrenko@tarantool.org> Date: Wed, 18 Jan 2023 11:36:53 +0300 Subject: [PATCH] raft: fix 'manual' nodes bumping the term excessively A node configured in 'manual' mode and promoted by `box.ctl.promote()` stays in is_candidate state for the whole term, even though it is not is_cfg_candidate. If such a node is the first one to notice leader death or to hit the election timeout, it bumps the term excessively, then immediately becomes a mere follower, because its is_candidate is reset with is_cfg_candidate. This extra term bump (one term after the node was actually promoted) is unnecessary and might lead to strange errors: tarantool> box.ctl.promote() --- - error: 'The term is outdated: old - 3, new - 4' ... Fix this by checking if a node is configured as a candidate before trying to start new elections. Closes #8168 NO_DOC=bugfix (cherry picked from commit 5765fdc4e4ecf2c6cdd0845a24bea8d39fc9d2bb) --- .../gh-8168-double-term-bump-on-promote.md | 4 ++ src/lib/raft/raft.c | 13 ++++-- .../gh_8168_extra_term_bump_test.lua | 46 +++++++++++++++++++ test/unit/raft.c | 37 ++++++++++++++- test/unit/raft.result | 11 ++++- 5 files changed, 106 insertions(+), 5 deletions(-) create mode 100644 changelogs/unreleased/gh-8168-double-term-bump-on-promote.md create mode 100644 test/replication-luatest/gh_8168_extra_term_bump_test.lua diff --git a/changelogs/unreleased/gh-8168-double-term-bump-on-promote.md b/changelogs/unreleased/gh-8168-double-term-bump-on-promote.md new file mode 100644 index 0000000000..f8370313bc --- /dev/null +++ b/changelogs/unreleased/gh-8168-double-term-bump-on-promote.md @@ -0,0 +1,4 @@ +## bugfix/raft + +* Fixed nodes configured with `election_mode` = "manual" sometimes bumping the + election term excessively after their promotion (gh-8168). diff --git a/src/lib/raft/raft.c b/src/lib/raft/raft.c index 1e5ff2a9d4..75ae79207f 100644 --- a/src/lib/raft/raft.c +++ b/src/lib/raft/raft.c @@ -333,7 +333,12 @@ raft_sm_schedule_new_election(struct raft *raft); static inline void raft_sm_election_update(struct raft *raft) { - if (!raft->is_candidate) + /* + * The node might be promoted for current term, in which case + * is_candidate would be true. But it's not enough. If is_cfg_candidate + * is false, the node would give up as soon as new term starts. + */ + if (!raft->is_cfg_candidate) return; /* * Pre-vote protection. Every node must agree that the leader is gone. @@ -615,8 +620,10 @@ raft_process_msg(struct raft *raft, const struct raft_msg *req, uint32_t source) /* * No need for pre-vote checks when the leader * deliberately told us it's resigning. + * Note, the only case when automatic elections are + * allowed is when the node is configured candidate. */ - if (raft->is_candidate) + if (raft->is_cfg_candidate) raft_sm_schedule_new_election(raft); } return 0; @@ -951,7 +958,7 @@ static void raft_sm_schedule_new_election(struct raft *raft) { say_info("RAFT: begin new election round"); - assert(raft->is_candidate); + assert(raft->is_cfg_candidate); /* Everyone is a follower until its vote for self is persisted. */ raft_sm_schedule_new_term(raft, raft->volatile_term + 1); raft_sm_schedule_new_vote(raft, raft->self, raft->vclock); diff --git a/test/replication-luatest/gh_8168_extra_term_bump_test.lua b/test/replication-luatest/gh_8168_extra_term_bump_test.lua new file mode 100644 index 0000000000..f607b01508 --- /dev/null +++ b/test/replication-luatest/gh_8168_extra_term_bump_test.lua @@ -0,0 +1,46 @@ +local t = require('luatest') +local replica_set = require('luatest.replica_set') +local server = require('luatest.server') + +local g = t.group('gh-8168') + +g.before_each(function(cg) + cg.replica_set = replica_set:new{} + local box_cfg = { + replication = { + server.build_listen_uri('server1'), + server.build_listen_uri('server2'), + }, + election_mode = 'manual', + replication_timeout = 0.1, + } + cg.server1 = cg.replica_set:build_and_add_server{ + alias = 'server1', + box_cfg = box_cfg, + } + box_cfg.election_timeout = 1e-9 + box_cfg.read_only = true + cg.server2 = cg.replica_set:build_and_add_server{ + alias = 'server2', + box_cfg = box_cfg, + } + cg.replica_set:start() +end) + +g.after_each(function(cg) + cg.replica_set:drop() +end) + +g.test_double_term_bump_on_promote = function(cg) + cg.server1:wait_for_election_leader() + local term = cg.server1:get_election_term() + local ok, _ = cg.server2:exec(function() + return pcall(box.ctl.promote) + end) + t.assert(ok, 'Promote succeeded') + t.assert_equals(cg.server2:get_election_term(), term + 1, + 'Promote bumped term once') + t.assert_equals(cg.server2:exec(function() + return box.info.election.state + end), 'leader', 'Server 2 is the leader') +end diff --git a/test/unit/raft.c b/test/unit/raft.c index 923eac192c..c2934f262a 100644 --- a/test/unit/raft.c +++ b/test/unit/raft.c @@ -1569,7 +1569,7 @@ raft_test_too_long_wal_write(void) static void raft_test_promote_restore(void) { - raft_start_test(12); + raft_start_test(21); struct raft_node node; raft_node_create(&node); @@ -1617,6 +1617,41 @@ raft_test_promote_restore(void) raft_node_restore(&node); ok(!node.raft.is_candidate, "not a candidate"); + /* + * The node doesn't schedule new elections in the next round after + * promotion. + */ + raft_node_cfg_is_candidate(&node, false); + raft_node_cfg_is_enabled(&node, true); + raft_node_cfg_election_quorum(&node, 2); + raft_node_promote(&node); + + is(node.raft.state, RAFT_STATE_CANDIDATE, "became candidate"); + is(node.raft.term, 5, "new term"); + + /* Wait for the election timeout. */ + double ts = raft_time(); + raft_run_next_event(); + ok(raft_time() - ts >= node.raft.election_timeout, + "election timeout passed"); + is(node.raft.state, RAFT_STATE_CANDIDATE, "still a candidate"); + is(node.raft.term, 5, "do not bump term"); + + is(raft_node_send_leader( + &node, + 5 /* Term. */, + 2 /* Source. */ + ), 0, "another leader is accepted"); + + is(raft_node_send_follower( + &node, + 5 /* Term. */, + 2 /* Source. */ + ), 0, "leader resign is accepted"); + + is(node.raft.state, RAFT_STATE_FOLLOWER, "stay follower"); + is(node.raft.term, 5, "do not bump term"); + raft_node_destroy(&node); raft_finish_test(); } diff --git a/test/unit/raft.result b/test/unit/raft.result index d32947a469..2fdca67ef3 100644 --- a/test/unit/raft.result +++ b/test/unit/raft.result @@ -265,7 +265,7 @@ ok 13 - subtests ok 14 - subtests *** raft_test_too_long_wal_write: done *** *** raft_test_promote_restore *** - 1..12 + 1..21 ok 1 - became leader after promotion ok 2 - restore drops a non-candidate leader to a follower ok 3 - became leader after promotion @@ -278,6 +278,15 @@ ok 14 - subtests ok 10 - still old term ok 11 - not a candidate ok 12 - not a candidate + ok 13 - became candidate + ok 14 - new term + ok 15 - election timeout passed + ok 16 - still a candidate + ok 17 - do not bump term + ok 18 - another leader is accepted + ok 19 - leader resign is accepted + ok 20 - stay follower + ok 21 - do not bump term ok 15 - subtests *** raft_test_promote_restore: done *** *** raft_test_bump_term_before_cfg *** -- GitLab