From b6630a4108512d9267fb4c9a1f32473418a39562 Mon Sep 17 00:00:00 2001
From: Valentin Syrovatskiy <>
Date: Tue, 5 Jul 2022 08:36:50 +0000
Subject: [PATCH] refactor: rewrite lua tests in python

 test/couple_test.lua             |  71 ------------
 test/helper.lua                  |   7 --
 test/helper/picodata.lua         | 185 -------------------------------
 test/int/             |  19 +++-
 test/int/ | 104 +++++++++++++++++
 test/int/      |  95 ++++++++++++++++
 test/single_test.lua             |  38 -------
 test/supervisor_test.lua         |  85 --------------
 test/threesome_test.lua          | 159 --------------------------
 9 files changed, 217 insertions(+), 546 deletions(-)
 delete mode 100644 test/couple_test.lua
 delete mode 100644 test/helper.lua
 delete mode 100644 test/helper/picodata.lua
 create mode 100644 test/int/
 create mode 100644 test/int/
 delete mode 100644 test/single_test.lua
 delete mode 100644 test/supervisor_test.lua
 delete mode 100644 test/threesome_test.lua

diff --git a/test/couple_test.lua b/test/couple_test.lua
deleted file mode 100644
index eca4226cdf..0000000000
--- a/test/couple_test.lua
+++ /dev/null
diff --git a/test/helper.lua b/test/helper.lua
deleted file mode 100644
index dfd7d5da1f..0000000000
--- a/test/helper.lua
+++ /dev/null
diff --git a/test/int/ b/test/int/
index f73ddf9870..4efbc601fb 100644
--- a/test/int/
+++ b/test/int/
@@ -9,6 +9,7 @@ import pytest
 import signal
 import subprocess
+from datetime import datetime
 from shutil import rmtree
 from typing import Callable, Generator, Iterator
 from itertools import count
@@ -260,13 +261,16 @@ class Instance:
-    def start(self):
+    def start(self, peers=[]):
         if self.process:
             # Be idempotent
         eprint(f"{self} starting...")
+        if peers != []:
+            self.peers = map(lambda i: i.listen, peers)
         self.process = subprocess.Popen(
             env=self.env or None,
@@ -493,3 +497,16 @@ def cluster(
 def instance(cluster: Cluster) -> Generator[Instance, None, None]:
     yield cluster[0]
+def retrying(fn, timeout=3):
+    # Usage example:
+    #   retrying(lambda: assert(value == 1))
+    #   retrying(lambda: assert(value == 1), timeout = 5)
+    start =
+    while True:
+        try:
+            return fn()
+        except AssertionError as ex:
+            if ( - start).seconds > timeout:
+                raise ex from ex
diff --git a/test/int/ b/test/int/
new file mode 100644
index 0000000000..6130db0cb5
--- /dev/null
+++ b/test/int/
@@ -0,0 +1,104 @@
+import pytest
+from conftest import (
+    Cluster,
+    Instance,
+    ReturnError,
+    retrying,
+def cluster3(cluster: Cluster):
+    cluster.deploy(instance_count=3)
+    return cluster
+def test_log_rollback(cluster3: Cluster):
+    # Scanario: the Leader can't propose without Followers
+    #   Given a cluster
+    #   When all Followers killed without graceful shutdown
+    #   And the Leader proposing changes
+    #   Then the proposition failed
+    i1, i2, i3 = cluster3.instances
+    i1.assert_raft_status("Leader")
+    i2.assert_raft_status("Follower")
+    i3.assert_raft_status("Follower")
+    def propose_state_change(srv: Instance, value):
+        code = '{"test-timeline", "%s"})' % value
+        return srv.raft_propose_eval(code, 0.1)
+    propose_state_change(i1, "i1 is a leader")
+    # Simulate the network partitioning: i1 can't reach i2 and i3.
+    i2.kill()
+    i3.kill()
+    # No operations can be committed, i1 is alone.
+    with pytest.raises(ReturnError, match="timeout"):
+        propose_state_change(i1, "i1 lost the quorum")
+    # And now i2 + i3 can't reach i1.
+    i1.terminate()
+    i2.start(peers=[i3])
+    i3.start(peers=[i2])
+    i2.wait_ready()
+    i3.wait_ready()
+    # Help i2 to become a new leader
+    i2.promote_or_fail()
+    retrying(lambda: i3.assert_raft_status("Follower", i2.raft_id))
+    propose_state_change(i2, "i2 takes the leadership")
+    # Now i1 has an uncommitted, but persisted entry that should be rolled back.
+    i1.start(peers=[i2, i3])
+    i1.wait_ready()
+    retrying(lambda: i1.assert_raft_status("Follower", i2.raft_id))
+    propose_state_change(i1, "i1 is alive again")
+def test_leader_disruption(cluster3: Cluster):
+    # Scenario: Follower reconnection on disconnect from the cluster
+    #   Given a cluster
+    #   When any Follower lost network connection with all other cluster nodes
+    #   And this Follower starts new election
+    #   And the network connection was established again
+    #   Then the Follower became Follower as it was before
+    i1, i2, i3 = cluster3.instances
+    i1.assert_raft_status("Leader")
+    i2.assert_raft_status("Follower")
+    i3.assert_raft_status("Follower")
+    # Simulate asymmetric network failure.
+    # Node i3 doesn't receive any messages,
+    # including the heartbeat from the leader.
+    # Then it starts a new election.
+"box.schema.func.drop", ".raft_interact")
+    # Speed up election timeout
+    i3.eval(
+        """
+        while picolib.raft_status().raft_state == 'Follower' do
+            picolib.raft_tick(1)
+        end
+        """
+    )
+    i3.assert_raft_status("PreCandidate", None)
+    # Advance the raft log. It makes i1 and i2 to reject the RequestPreVote.
+    i1.raft_propose_eval("return", timeout_seconds=1)
+    # Restore normal network operation
+        "box.schema.func.create",
+        ".raft_interact",
+        {"language": "C", "if_not_exists": True},
+    )
+    # i3 should become the follower again without disrupting i1
+    retrying(lambda: i3.assert_raft_status("Follower", i1.raft_id))
diff --git a/test/int/ b/test/int/
new file mode 100644
index 0000000000..684bf1fa31
--- /dev/null
+++ b/test/int/
@@ -0,0 +1,95 @@
+import signal
+import subprocess
+import os
+import time
+import pytest
+from conftest import (
+    Cluster,
+    Instance,
+    retrying,
+from functools import reduce
+def instance(cluster: Cluster):
+    cluster.deploy(instance_count=1)
+    [i1] = cluster.instances
+    return i1
+def pgrep_tree(pid):
+    command = f"exec pgrep -P{pid}"
+    try:
+        ps = subprocess.check_output(command, shell=True)
+        ps = ps.strip().split()
+        ps = list(map(lambda p: int(p), ps))
+        subps = map(lambda p: pgrep_tree(p), ps)  # list of lists of pids
+        subps = reduce(lambda acc, p: [*acc, *p], subps, [])  # list of pids
+        return [pid, *subps]
+    except subprocess.SubprocessError:
+        return [pid]
+def pid_alive(pid):
+    """Check For the existence of a unix pid."""
+    try:
+        os.kill(pid, 0)
+    except OSError:
+        return False
+    else:
+        return True
+def assert_all_pids_down(pids):
+    assert all(map(lambda pid: not pid_alive(pid), pids))
+def test_sigkill(instance: Instance):
+    # Scenario: terminating process should terminate all child processes
+    #   Given an instance
+    #   When the process terminated
+    #   Then all subprocesses are teminated too
+    assert instance.process
+    pids = pgrep_tree(
+    assert len(pids) == 2
+    instance.kill()
+    retrying(lambda: assert_all_pids_down(pids))
+def test_sigint(instance: Instance):
+    # Scenario: suspending of child process prevents the parent process from interrupting
+    #   Given an instance
+    #   When child process is stopped
+    #   And parent process got SIGINT
+    #   Then parent process keep living
+    #   When child process is continued
+    #   Then parent process gracefully exits
+    assert instance.process
+    pids = pgrep_tree(
+    child_pid = pids[1]
+    os.kill(child_pid, signal.SIGSTOP)
+    # Signal the supervisor and give it some time to handle one.
+    # Without a sleep the next assertion is useless. Unfortunately,
+    # there're no alternatives to sleep, because the signal
+    # delivery is a mystery of the kernel.
+    os.kill(, signal.SIGINT)
+    time.sleep(0.1)
+    # We've signalled supervisor. It should forward the signal
+    # the child and keep waiting. But the child is stopped now,
+    # and can't handle the forwarded signal.
+    # Supervisor must still be alive.
+    assert pid_alive(
+    os.kill(child_pid, signal.SIGCONT)
+    instance.process.wait(timeout=1)
+    retrying(lambda: assert_all_pids_down(pids))
diff --git a/test/single_test.lua b/test/single_test.lua
deleted file mode 100644
index dd2b397adf..0000000000
--- a/test/single_test.lua
+++ /dev/null
diff --git a/test/supervisor_test.lua b/test/supervisor_test.lua
deleted file mode 100644
index f856ee0e02..0000000000
--- a/test/supervisor_test.lua
+++ /dev/null
diff --git a/test/threesome_test.lua b/test/threesome_test.lua
deleted file mode 100644
index 352ccc0fa9..0000000000
--- a/test/threesome_test.lua
+++ /dev/null
