From df6cf5ec639aa414e3cd013de3edb31f59fbbe04 Mon Sep 17 00:00:00 2001 From: Serge Petrenko <sergepetrenko@tarantool.org> Date: Tue, 17 Jan 2023 14:18:52 +0300 Subject: [PATCH] raft: fix an assertion failure in raft_stop_candidate There is a false assertion in raft_stop_candidate(): it assumes that the node must always have a running timer whenever it sees the leader. This is not true when the node is busy writing the new term on disk. Cover the mentioned case in the assertion. Closes #8169 NO_DOC=bugfix Co-authored-by: Sergey Ostanevich <sergos@tarantool.org> --- .../gh-8169-raft-assert-on-wal-write.md | 4 ++ src/lib/raft/raft.c | 6 ++- test/unit/raft.c | 45 ++++++++++++++++++- 3 files changed, 52 insertions(+), 3 deletions(-) create mode 100644 changelogs/unreleased/gh-8169-raft-assert-on-wal-write.md diff --git a/changelogs/unreleased/gh-8169-raft-assert-on-wal-write.md b/changelogs/unreleased/gh-8169-raft-assert-on-wal-write.md new file mode 100644 index 0000000000..e7f8752804 --- /dev/null +++ b/changelogs/unreleased/gh-8169-raft-assert-on-wal-write.md @@ -0,0 +1,4 @@ +## bugfix/raft + +* Fixed an assertion failure in case an election candidate is reconfigured to a + voter during an ongoning WAL write (gh-8169). diff --git a/src/lib/raft/raft.c b/src/lib/raft/raft.c index 87ef5b7871..4dccc7aea2 100644 --- a/src/lib/raft/raft.c +++ b/src/lib/raft/raft.c @@ -1163,9 +1163,11 @@ raft_stop_candidate(struct raft *raft) } else { /* * Leader is seen and node is waiting for its death. Do not stop - * the timer. + * the timer. If there is a write in progress the timer is + * stopped now, but will be re-started once the write completes. */ - assert(raft_ev_timer_is_active(&raft->timer)); + assert(raft_ev_timer_is_active(&raft->timer) || + raft->is_write_in_progress); } raft->state = RAFT_STATE_FOLLOWER; raft_schedule_broadcast(raft); diff --git a/test/unit/raft.c b/test/unit/raft.c index b63183e888..923eac192c 100644 --- a/test/unit/raft.c +++ b/test/unit/raft.c @@ -2347,10 +2347,52 @@ raft_test_resign(void) raft_finish_test(); } +static void +raft_test_candidate_disable_during_wal_write(void) +{ + raft_start_test(2); + /* + * There was a false-positive assertion failure in a special case: the + * node has just received a is_leader notification and is currently + * writing it on disk. At the same time it is configured as voter + * (gh-8169). + */ + struct raft_node node; + raft_node_create(&node); + raft_node_cfg_is_candidate(&node, true); + raft_node_block(&node); + raft_node_send_leader(&node, 2, 2); + ok(raft_node_check_full_state( + &node, + RAFT_STATE_FOLLOWER /* State. */, + 2 /* Leader. */, + 1 /* Term. */, + 0 /* Vote. */, + 2 /* Volatile term. */, + 0 /* Volatile vote. */, + "{}" /* Vclock. */ + ), "Leader is seen, but wal write is in progress"); + raft_node_cfg_is_candidate(&node, false); + raft_node_unblock(&node); + ok(raft_node_check_full_state( + &node, + RAFT_STATE_FOLLOWER /* State. */, + 2 /* Leader. */, + 2 /* Term. */, + 0 /* Vote. */, + 2 /* Volatile term. */, + 0 /* Volatile vote. */, + "{0: 1}" /* Vclock. */ + ), "State is persisted"); + + raft_node_destroy(&node); + raft_finish_test(); +} + static int main_f(va_list ap) { - raft_start_test(19); + raft_start_test(20); (void) ap; fakeev_init(); @@ -2374,6 +2416,7 @@ main_f(va_list ap) raft_test_split_vote(); raft_test_pre_vote(); raft_test_resign(); + raft_test_candidate_disable_during_wal_write(); fakeev_free(); -- GitLab