Skip to content
Snippets Groups Projects
Verified Commit 0aee4c20 authored by Yaroslav Dynnikov's avatar Yaroslav Dynnikov
Browse files

test: raft log rollback

Also, fix the code to pass the test:

1. Use `replace` instead of `insert` when appending entries. They may be
   overridden.
2. Increase pool worker queue size. Raft node may send several messages
   within a single tick. Without this change the second message is
   dropped.

The second change is a poorly designed workaround. Constant limit
doesn't seem to be both cost-effective and reliable at the same time,
but we don't have a better solution at the moment.

Close https://gitlab.com/picodata/picodata/picodata/-/issues/26
parent 67e479a4
No related branches found
No related tags found
1 merge request!53test: raft log rollback
Pipeline #3617 canceled
......@@ -111,7 +111,7 @@ fn raft_main(
let mut next_tick = Instant::now() + Node::TICK;
let mut pool = ConnectionPool::builder()
.handler_name(".raft_interact")
.queue_len(0)
.queue_len(10)
.timeout(Node::TICK * 4)
.build();
......
......@@ -210,7 +210,7 @@ impl Storage {
let mut space = Storage::space(RAFT_LOG)?;
for e in entries {
let row = row::Entry::try_from(e.clone())?;
space.insert(&row).map_err(box_err!())?;
space.replace(&row).map_err(box_err!())?;
}
Ok(())
......
local t = require('luatest')
local h = require('test.helper')
local g = t.group()
local fio = require('fio')
g.before_all(function()
g.data_dir = fio.tempdir()
local peer = {
'127.0.0.1:13301',
'127.0.0.1:13302',
'127.0.0.1:13303',
}
g.cluster = {
i1 = h.Picodata:new({
name = 'i1',
data_dir = g.data_dir .. '/i1',
listen = '127.0.0.1:13301',
peer = peer,
env = {PICODATA_RAFT_ID = "1"},
}),
i2 = h.Picodata:new({
name = 'i2',
data_dir = g.data_dir .. '/i2',
listen = '127.0.0.1:13302',
peer = peer,
env = {PICODATA_RAFT_ID = "2"},
}),
i3 = h.Picodata:new({
name = 'i3',
data_dir = g.data_dir .. '/i3',
listen = '127.0.0.1:13303',
peer = peer,
env = {PICODATA_RAFT_ID = "3"},
}),
}
for _, node in pairs(g.cluster) do
node:start()
end
end)
g.after_all(function()
for _, node in pairs(g.cluster) do
node:stop()
end
fio.rmtree(g.data_dir)
end)
local function propose_state_change(srv, value)
-- It's just a boilerplate
local code = string.format(
'box.space.raft_state:put({"test-timeline", %q})',
value
)
return srv:raft_propose_eval(0.1, code)
end
g.test = function()
-- Speed up node election
g.cluster.i1:try_promote()
h.retrying({}, function()
g.cluster.i2:assert_raft_status("Follower", 1)
g.cluster.i3:assert_raft_status("Follower", 1)
end)
t.assert_equals(
propose_state_change(g.cluster.i1, "i1 is a leader"),
true
)
-- Simulate the network partitioning: i1 can't reach i2 and i3.
g.cluster.i2:stop()
g.cluster.i3:stop()
t.assert_equals(
propose_state_change(g.cluster.i1, "i1 lost the quorum"),
false
)
-- And now i2 + i3 can't reach i1.
g.cluster.i1:stop()
g.cluster.i2:start()
g.cluster.i3:start()
-- Help I2 to become a new leader.
g.cluster.i2:try_promote()
h.retrying({}, function()
g.cluster.i3:assert_raft_status("Follower", 2)
end)
t.assert_equals(
propose_state_change(g.cluster.i2, "i2 takes the leadership"),
true
)
-- Now i1 has an uncommitted, but persisted entry that should be rolled back.
g.cluster.i1:start()
h.retrying({}, function()
g.cluster.i1:assert_raft_status("Follower", 2)
end)
t.assert_equals(
propose_state_change(g.cluster.i1, "i1 is alive again"),
true
)
end
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment