From df6cf5ec639aa414e3cd013de3edb31f59fbbe04 Mon Sep 17 00:00:00 2001
From: Serge Petrenko <sergepetrenko@tarantool.org>
Date: Tue, 17 Jan 2023 14:18:52 +0300
Subject: [PATCH] raft: fix an assertion failure in raft_stop_candidate

There is a false assertion in raft_stop_candidate(): it assumes that the
node must always have a running timer whenever it sees the leader.
This is not true when the node is busy writing the new term on disk.

Cover the mentioned case in the assertion.

Closes #8169

NO_DOC=bugfix

Co-authored-by: Sergey Ostanevich <sergos@tarantool.org>
---
 .../gh-8169-raft-assert-on-wal-write.md       |  4 ++
 src/lib/raft/raft.c                           |  6 ++-
 test/unit/raft.c                              | 45 ++++++++++++++++++-
 3 files changed, 52 insertions(+), 3 deletions(-)
 create mode 100644 changelogs/unreleased/gh-8169-raft-assert-on-wal-write.md

diff --git a/changelogs/unreleased/gh-8169-raft-assert-on-wal-write.md b/changelogs/unreleased/gh-8169-raft-assert-on-wal-write.md
new file mode 100644
index 0000000000..e7f8752804
--- /dev/null
+++ b/changelogs/unreleased/gh-8169-raft-assert-on-wal-write.md
@@ -0,0 +1,4 @@
+## bugfix/raft
+
+* Fixed an assertion failure in case an election candidate is reconfigured to a
+  voter during an ongoning WAL write (gh-8169).
diff --git a/src/lib/raft/raft.c b/src/lib/raft/raft.c
index 87ef5b7871..4dccc7aea2 100644
--- a/src/lib/raft/raft.c
+++ b/src/lib/raft/raft.c
@@ -1163,9 +1163,11 @@ raft_stop_candidate(struct raft *raft)
 	} else {
 		/*
 		 * Leader is seen and node is waiting for its death. Do not stop
-		 * the timer.
+		 * the timer. If there is a write in progress the timer is
+		 * stopped now, but will be re-started once the write completes.
 		 */
-		assert(raft_ev_timer_is_active(&raft->timer));
+		assert(raft_ev_timer_is_active(&raft->timer) ||
+		       raft->is_write_in_progress);
 	}
 	raft->state = RAFT_STATE_FOLLOWER;
 	raft_schedule_broadcast(raft);
diff --git a/test/unit/raft.c b/test/unit/raft.c
index b63183e888..923eac192c 100644
--- a/test/unit/raft.c
+++ b/test/unit/raft.c
@@ -2347,10 +2347,52 @@ raft_test_resign(void)
 	raft_finish_test();
 }
 
+static void
+raft_test_candidate_disable_during_wal_write(void)
+{
+	raft_start_test(2);
+	/*
+	 * There was a false-positive assertion failure in a special case: the
+	 * node has just received a is_leader notification and is currently
+	 * writing it on disk. At the same time it is configured as voter
+	 * (gh-8169).
+	 */
+	struct raft_node node;
+	raft_node_create(&node);
+	raft_node_cfg_is_candidate(&node, true);
+	raft_node_block(&node);
+	raft_node_send_leader(&node, 2, 2);
+	ok(raft_node_check_full_state(
+		&node,
+		RAFT_STATE_FOLLOWER /* State. */,
+		2 /* Leader. */,
+		1 /* Term. */,
+		0 /* Vote. */,
+		2 /* Volatile term. */,
+		0 /* Volatile vote. */,
+		"{}" /* Vclock. */
+	), "Leader is seen, but wal write is in progress");
+	raft_node_cfg_is_candidate(&node, false);
+	raft_node_unblock(&node);
+	ok(raft_node_check_full_state(
+		&node,
+		RAFT_STATE_FOLLOWER /* State. */,
+		2 /* Leader. */,
+		2 /* Term. */,
+		0 /* Vote. */,
+		2 /* Volatile term. */,
+		0 /* Volatile vote. */,
+		"{0: 1}" /* Vclock. */
+	), "State is persisted");
+
+	raft_node_destroy(&node);
+	raft_finish_test();
+}
+
 static int
 main_f(va_list ap)
 {
-	raft_start_test(19);
+	raft_start_test(20);
 
 	(void) ap;
 	fakeev_init();
@@ -2374,6 +2416,7 @@ main_f(va_list ap)
 	raft_test_split_vote();
 	raft_test_pre_vote();
 	raft_test_resign();
+	raft_test_candidate_disable_during_wal_write();
 
 	fakeev_free();
 
-- 
GitLab