Skip to content
Snippets Groups Projects
Commit b115dd98 authored by Vladislav Shpilevoy's avatar Vladislav Shpilevoy
Browse files

election: activate raft split vote handling

Raft needs to know cluster size in order to detect and handle
split vote. The patch uses registered server count as cluster
size.

It is not documented nor has a changelog file because this is an
optimization. Can't be observed except in logs or with a watch.

Closes #5285
parent 3165e01b
No related branches found
No related tags found
No related merge requests found
......@@ -229,7 +229,9 @@ box_raft_update_election_quorum(void)
* be lost.
*/
int quorum = MIN(replication_synchro_quorum, max);
raft_cfg_election_quorum(box_raft(), quorum);
struct raft *raft = box_raft();
raft_cfg_election_quorum(raft, quorum);
raft_cfg_cluster_size(raft, max);
}
void
......
local t = require('luatest')
local cluster = require('test.luatest_helpers.cluster')
local helpers = require('test.luatest_helpers')
local wait_timeout = 120
--
-- gh-5285: split vote is when in the current term there can't be winner of the
-- leader role. Number of unused votes is not enough for anyone to get the
-- quorum. It can be detected to speed up the term bump.
--
local g = t.group('split-vote')
g.before_each(function()
g.cluster = cluster:new({})
local node1_uri = helpers.instance_uri('node1')
local node2_uri = helpers.instance_uri('node2')
local replication = {node1_uri, node2_uri}
local box_cfg = {
listen = node1_uri,
replication = replication,
-- To speed up new term when try to elect a first leader.
replication_timeout = 0.1,
replication_synchro_quorum = 2,
election_timeout = 1000000,
}
g.node1 = g.cluster:build_server({alias = 'node1', box_cfg = box_cfg})
box_cfg.listen = node2_uri
g.node2 = g.cluster:build_server({alias = 'node2', box_cfg = box_cfg})
g.cluster:add_server(g.node1)
g.cluster:add_server(g.node2)
g.cluster:start()
end)
g.after_each(function()
g.cluster:drop()
end)
g.test_split_vote = function(g)
-- Stop the replication so as the nodes can't request votes from each other.
local node1_repl = g.node1:exec(function()
local repl = box.cfg.replication
box.cfg{replication = {}}
return repl
end)
local node2_repl = g.node2:exec(function()
local repl = box.cfg.replication
box.cfg{replication = {}}
return repl
end)
-- Both vote for self but don't see the split-vote yet.
g.node1:exec(function()
box.cfg{election_mode = 'candidate'}
end)
g.node2:exec(function()
box.cfg{election_mode = 'candidate'}
end)
-- Wait for the votes to actually happen.
t.helpers.retrying({timeout = wait_timeout}, function()
local func = function()
return box.info.election.vote == box.info.id
end
assert(g.node1:exec(func))
assert(g.node2:exec(func))
end)
-- Now let the nodes notice the split vote.
g.node1:exec(function(repl)
box.cfg{replication = repl}
end, {node1_repl})
g.node2:exec(function(repl)
box.cfg{replication = repl}
end, {node2_repl})
t.helpers.retrying({timeout = wait_timeout}, function()
local msg = 'split vote is discovered'
assert(g.node1:grep_log(msg) or g.node2:grep_log(msg))
end)
-- Ensure a leader is eventually elected. Nothing is broken for good.
g.node1:exec(function()
box.cfg{election_timeout = 1}
end)
g.node2:exec(function()
box.cfg{election_timeout = 1}
end)
g.node1:wait_election_leader_found()
g.node2:wait_election_leader_found()
end
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment