From 6f038f4b4253909f826cb665159044248faba807 Mon Sep 17 00:00:00 2001
From: Ilya Kosarev <i.kosarev@tarantool.org>
Date: Fri, 22 Nov 2019 21:46:47 +0300
Subject: [PATCH] replication: make anon replicas iteration safe

In replicaset_follow we iterate anon replicas list: list of replicas
that haven't received an UUID. In case of successful connect replica
link is being removed from anon list. If it happens immediately,
without yield in applier, iteration breaks. Now it is fixed by
rlist_foreach_entry_safe instead of common rlist_foreach_entry.
Relevant test case is added.

Part of #4586
Closes #4576
Closes #4440
---
 src/box/replication.cc                        |  3 +-
 .../box_set_replication_stress.result         | 38 +++++++++++++++++++
 .../box_set_replication_stress.test.lua       | 17 +++++++++
 3 files changed, 57 insertions(+), 1 deletion(-)
 create mode 100644 test/replication/box_set_replication_stress.result
 create mode 100644 test/replication/box_set_replication_stress.test.lua

diff --git a/src/box/replication.cc b/src/box/replication.cc
index f15e51f4da..81f19aa076 100644
--- a/src/box/replication.cc
+++ b/src/box/replication.cc
@@ -802,7 +802,8 @@ replicaset_follow(void)
 		if (replica->applier != NULL)
 			applier_resume(replica->applier);
 	}
-	rlist_foreach_entry(replica, &replicaset.anon, in_anon) {
+	struct replica *tmp;
+	rlist_foreach_entry_safe(replica, &replicaset.anon, in_anon, tmp) {
 		/* Restart appliers that failed to connect. */
 		applier_start(replica->applier);
 	}
diff --git a/test/replication/box_set_replication_stress.result b/test/replication/box_set_replication_stress.result
new file mode 100644
index 0000000000..e683c06437
--- /dev/null
+++ b/test/replication/box_set_replication_stress.result
@@ -0,0 +1,38 @@
+-- test-run result file version 2
+test_run = require('test_run').new()
+ | ---
+ | ...
+SERVERS = {'master_quorum1', 'master_quorum2'}
+ | ---
+ | ...
+
+-- Deploy a cluster.
+test_run:create_cluster(SERVERS)
+ | ---
+ | ...
+test_run:wait_fullmesh(SERVERS)
+ | ---
+ | ...
+
+test_run:cmd("switch master_quorum1")
+ | ---
+ | - true
+ | ...
+repl = box.cfg.replication
+ | ---
+ | ...
+for i = 1, 1000 do              \
+    box.cfg{replication = ""}   \
+    box.cfg{replication = repl} \
+end
+ | ---
+ | ...
+test_run:cmd("switch default")
+ | ---
+ | - true
+ | ...
+
+-- Cleanup.
+test_run:drop_cluster(SERVERS)
+ | ---
+ | ...
diff --git a/test/replication/box_set_replication_stress.test.lua b/test/replication/box_set_replication_stress.test.lua
new file mode 100644
index 0000000000..407e91e0f1
--- /dev/null
+++ b/test/replication/box_set_replication_stress.test.lua
@@ -0,0 +1,17 @@
+test_run = require('test_run').new()
+SERVERS = {'master_quorum1', 'master_quorum2'}
+
+-- Deploy a cluster.
+test_run:create_cluster(SERVERS)
+test_run:wait_fullmesh(SERVERS)
+
+test_run:cmd("switch master_quorum1")
+repl = box.cfg.replication
+for i = 1, 1000 do              \
+    box.cfg{replication = ""}   \
+    box.cfg{replication = repl} \
+end
+test_run:cmd("switch default")
+
+-- Cleanup.
+test_run:drop_cluster(SERVERS)
-- 
GitLab