diff --git a/changelogs/unreleased/gh-8168-double-term-bump-on-promote.md b/changelogs/unreleased/gh-8168-double-term-bump-on-promote.md new file mode 100644 index 0000000000000000000000000000000000000000..f8370313bc3c267649b20826fa820630bf47fd13 --- /dev/null +++ b/changelogs/unreleased/gh-8168-double-term-bump-on-promote.md @@ -0,0 +1,4 @@ +## bugfix/raft + +* Fixed nodes configured with `election_mode` = "manual" sometimes bumping the + election term excessively after their promotion (gh-8168). diff --git a/src/lib/raft/raft.c b/src/lib/raft/raft.c index 1e5ff2a9d47d741920e6c2b37cefc55519aea8a2..75ae79207f4dfab295a2e6e81d9c6297da5a9b7e 100644 --- a/src/lib/raft/raft.c +++ b/src/lib/raft/raft.c @@ -333,7 +333,12 @@ raft_sm_schedule_new_election(struct raft *raft); static inline void raft_sm_election_update(struct raft *raft) { - if (!raft->is_candidate) + /* + * The node might be promoted for current term, in which case + * is_candidate would be true. But it's not enough. If is_cfg_candidate + * is false, the node would give up as soon as new term starts. + */ + if (!raft->is_cfg_candidate) return; /* * Pre-vote protection. Every node must agree that the leader is gone. @@ -615,8 +620,10 @@ raft_process_msg(struct raft *raft, const struct raft_msg *req, uint32_t source) /* * No need for pre-vote checks when the leader * deliberately told us it's resigning. + * Note, the only case when automatic elections are + * allowed is when the node is configured candidate. */ - if (raft->is_candidate) + if (raft->is_cfg_candidate) raft_sm_schedule_new_election(raft); } return 0; @@ -951,7 +958,7 @@ static void raft_sm_schedule_new_election(struct raft *raft) { say_info("RAFT: begin new election round"); - assert(raft->is_candidate); + assert(raft->is_cfg_candidate); /* Everyone is a follower until its vote for self is persisted. */ raft_sm_schedule_new_term(raft, raft->volatile_term + 1); raft_sm_schedule_new_vote(raft, raft->self, raft->vclock); diff --git a/test/replication-luatest/gh_8168_extra_term_bump_test.lua b/test/replication-luatest/gh_8168_extra_term_bump_test.lua new file mode 100644 index 0000000000000000000000000000000000000000..f607b01508dd8c2d419d5815efe9869e4eae8c9d --- /dev/null +++ b/test/replication-luatest/gh_8168_extra_term_bump_test.lua @@ -0,0 +1,46 @@ +local t = require('luatest') +local replica_set = require('luatest.replica_set') +local server = require('luatest.server') + +local g = t.group('gh-8168') + +g.before_each(function(cg) + cg.replica_set = replica_set:new{} + local box_cfg = { + replication = { + server.build_listen_uri('server1'), + server.build_listen_uri('server2'), + }, + election_mode = 'manual', + replication_timeout = 0.1, + } + cg.server1 = cg.replica_set:build_and_add_server{ + alias = 'server1', + box_cfg = box_cfg, + } + box_cfg.election_timeout = 1e-9 + box_cfg.read_only = true + cg.server2 = cg.replica_set:build_and_add_server{ + alias = 'server2', + box_cfg = box_cfg, + } + cg.replica_set:start() +end) + +g.after_each(function(cg) + cg.replica_set:drop() +end) + +g.test_double_term_bump_on_promote = function(cg) + cg.server1:wait_for_election_leader() + local term = cg.server1:get_election_term() + local ok, _ = cg.server2:exec(function() + return pcall(box.ctl.promote) + end) + t.assert(ok, 'Promote succeeded') + t.assert_equals(cg.server2:get_election_term(), term + 1, + 'Promote bumped term once') + t.assert_equals(cg.server2:exec(function() + return box.info.election.state + end), 'leader', 'Server 2 is the leader') +end diff --git a/test/unit/raft.c b/test/unit/raft.c index 923eac192cac5666c63fe685099b730afc850a89..c2934f262aba740b40dd2b5829ff8a60f3ac0765 100644 --- a/test/unit/raft.c +++ b/test/unit/raft.c @@ -1569,7 +1569,7 @@ raft_test_too_long_wal_write(void) static void raft_test_promote_restore(void) { - raft_start_test(12); + raft_start_test(21); struct raft_node node; raft_node_create(&node); @@ -1617,6 +1617,41 @@ raft_test_promote_restore(void) raft_node_restore(&node); ok(!node.raft.is_candidate, "not a candidate"); + /* + * The node doesn't schedule new elections in the next round after + * promotion. + */ + raft_node_cfg_is_candidate(&node, false); + raft_node_cfg_is_enabled(&node, true); + raft_node_cfg_election_quorum(&node, 2); + raft_node_promote(&node); + + is(node.raft.state, RAFT_STATE_CANDIDATE, "became candidate"); + is(node.raft.term, 5, "new term"); + + /* Wait for the election timeout. */ + double ts = raft_time(); + raft_run_next_event(); + ok(raft_time() - ts >= node.raft.election_timeout, + "election timeout passed"); + is(node.raft.state, RAFT_STATE_CANDIDATE, "still a candidate"); + is(node.raft.term, 5, "do not bump term"); + + is(raft_node_send_leader( + &node, + 5 /* Term. */, + 2 /* Source. */ + ), 0, "another leader is accepted"); + + is(raft_node_send_follower( + &node, + 5 /* Term. */, + 2 /* Source. */ + ), 0, "leader resign is accepted"); + + is(node.raft.state, RAFT_STATE_FOLLOWER, "stay follower"); + is(node.raft.term, 5, "do not bump term"); + raft_node_destroy(&node); raft_finish_test(); } diff --git a/test/unit/raft.result b/test/unit/raft.result index d32947a469b51dd228129e564355ae8a65bb452d..2fdca67ef3e0f179852bd02c9be94695cb1f5068 100644 --- a/test/unit/raft.result +++ b/test/unit/raft.result @@ -265,7 +265,7 @@ ok 13 - subtests ok 14 - subtests *** raft_test_too_long_wal_write: done *** *** raft_test_promote_restore *** - 1..12 + 1..21 ok 1 - became leader after promotion ok 2 - restore drops a non-candidate leader to a follower ok 3 - became leader after promotion @@ -278,6 +278,15 @@ ok 14 - subtests ok 10 - still old term ok 11 - not a candidate ok 12 - not a candidate + ok 13 - became candidate + ok 14 - new term + ok 15 - election timeout passed + ok 16 - still a candidate + ok 17 - do not bump term + ok 18 - another leader is accepted + ok 19 - leader resign is accepted + ok 20 - stay follower + ok 21 - do not bump term ok 15 - subtests *** raft_test_promote_restore: done *** *** raft_test_bump_term_before_cfg ***