From 8b4b4ba880d901d85d8cb7d4cbce7696b29780de Mon Sep 17 00:00:00 2001
From: Yaroslav Dynnikov <yaroslav.dynnikov@gmail.com>
Date: Mon, 17 Oct 2022 11:10:41 +0300
Subject: [PATCH] refactor: replace is_ready flag with grade checking

---
 src/main.rs                      |  2 --
 src/traft/node.rs                | 11 ---------
 test/conftest.py                 | 41 ++++++++++++++++++++------------
 test/int/test_couple.py          | 20 ++++++++--------
 test/int/test_joining.py         |  6 ++---
 test/int/test_network_effects.py |  6 ++---
 test/manual/test_benchmark.py    |  4 ++--
 test/manual/test_scaling.py      |  6 ++---
 test/rand/test_randomized.py     |  4 ++--
 9 files changed, 49 insertions(+), 51 deletions(-)

diff --git a/src/main.rs b/src/main.rs
index b6b7bfcd58..e5597e9436 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -887,8 +887,6 @@ fn postjoin(args: &args::Run, storage: Storage) {
             }
         };
     }
-
-    node.mark_as_ready();
 }
 
 fn main_tarantool(args: args::Tarantool) -> ! {
diff --git a/src/traft/node.rs b/src/traft/node.rs
index 2f66435028..cf58db4fac 100644
--- a/src/traft/node.rs
+++ b/src/traft/node.rs
@@ -103,9 +103,6 @@ pub struct Status {
     pub leader_id: Option<RaftId>,
     /// Current raft state
     pub raft_state: RaftState,
-    /// Whether instance has finished its `postjoin`
-    /// initialization stage
-    pub is_ready: bool,
 }
 
 /// The heart of `traft` module - the Node.
@@ -144,7 +141,6 @@ impl Node {
             id: raft_id,
             leader_id: None,
             raft_state: RaftState::Follower,
-            is_ready: false,
         }));
 
         let node_impl = Rc::new(Mutex::new(node_impl));
@@ -181,13 +177,6 @@ impl Node {
         self.status.get()
     }
 
-    pub fn mark_as_ready(&self) {
-        let mut status = self.status.get();
-        status.is_ready = true;
-        self.status.set(status);
-        event::broadcast(Event::StatusChanged);
-    }
-
     /// Wait for the status to be changed.
     /// **This function yields**
     pub fn wait_status(&self) {
diff --git a/test/conftest.py b/test/conftest.py
index 96b97f28eb..ac2320391c 100644
--- a/test/conftest.py
+++ b/test/conftest.py
@@ -159,7 +159,6 @@ def normalize_net_box_result(func):
 class RaftStatus:
     id: int
     raft_state: str
-    is_ready: bool
     leader_id: int | None = None
 
 
@@ -417,13 +416,25 @@ class Instance:
         assert want == have
 
     @funcy.retry(tries=30, timeout=0.2)
-    def wait_ready(self):
-        status = self._raft_status()
-        assert status.is_ready
-        self.raft_id = status.id
-        with self.connect(timeout=2) as conn:
-            self.instance_id = conn.space("raft_state").select(("instance_id",))[0][1]
-        eprint(f"{self} is ready")
+    def wait_online(self):
+        """Wait until instance attains Online grade
+
+        Raises:
+            AssertionError: if doesn't succeed
+        """
+
+        whoami = self.call("picolib.whoami")
+        assert isinstance(whoami, dict)
+        assert isinstance(whoami["raft_id"], int)
+        assert isinstance(whoami["instance_id"], str)
+        self.raft_id = whoami["raft_id"]
+        self.instance_id = whoami["instance_id"]
+
+        myself = self.call("picolib.peer_info", self.instance_id)
+        assert isinstance(myself, dict)
+        assert myself["current_grade"] == "Online"
+
+        eprint(f"{self} is online")
 
     @funcy.retry(tries=4, timeout=0.1, errors=AssertionError)
     def promote_or_fail(self):
@@ -478,28 +489,28 @@ class Cluster:
 
         for _ in range(instance_count):
             self.add_instance(
-                wait_ready=False, init_replication_factor=init_replication_factor
+                wait_online=False, init_replication_factor=init_replication_factor
             )
 
         for instance in self.instances:
             instance.start()
 
         for instance in self.instances:
-            instance.wait_ready()
+            instance.wait_online()
 
         eprint(f" {self} deployed ".center(80, "="))
         return self.instances
 
     def add_instance(
         self,
-        wait_ready=True,
+        wait_online=True,
         peers=None,
         instance_id: str | bool = True,
         failure_domain=dict(),
         init_replication_factor=1,
     ) -> Instance:
         """Add an `Instance` into the list of instances of the cluster and wait
-        for it to start unless `wait_ready` is `False`.
+        for it to attain Online grade unless `wait_online` is `False`.
 
         `instance_id` specifies how the instance's id is generated in the
         following way:
@@ -544,9 +555,9 @@ class Cluster:
         assert self.base_port <= instance.port <= self.max_port
         self.instances.append(instance)
 
-        if wait_ready:
+        if wait_online:
             instance.start()
-            instance.wait_ready()
+            instance.wait_online()
 
         return instance
 
@@ -558,7 +569,7 @@ class Cluster:
         init_replication_factor=1,
     ):
         instance = self.add_instance(
-            wait_ready=False,
+            wait_online=False,
             peers=peers,
             instance_id=instance_id,
             failure_domain=failure_domain,
diff --git a/test/int/test_couple.py b/test/int/test_couple.py
index 6106fa86fb..35dc444a74 100644
--- a/test/int/test_couple.py
+++ b/test/int/test_couple.py
@@ -60,7 +60,7 @@ def test_restart_follower(cluster2: Cluster):
 
     i1, i2 = cluster2.instances
     i2.restart()
-    i2.wait_ready()
+    i2.wait_online()
     i1.assert_raft_status("Leader")
     i2.assert_raft_status("Follower")
 
@@ -73,7 +73,7 @@ def test_restart_leader(cluster2: Cluster):
 
     i1, _ = cluster2.instances
     i1.restart()
-    i1.wait_ready()
+    i1.wait_online()
     i1.raft_propose_eval("return")
 
 
@@ -87,8 +87,8 @@ def test_restart_both(cluster2: Cluster):
     i2.terminate()
 
     @funcy.retry(tries=20, timeout=0.1)
-    def wait_alive(instance):
-        assert instance._raft_status().is_ready is False
+    def wait_alive(instance: Instance):
+        assert instance._raft_status().leader_id is None
 
     i1.start()
     # This synchronization is necessary for proper test case reproducing.
@@ -97,8 +97,8 @@ def test_restart_both(cluster2: Cluster):
     wait_alive(i1)
     i2.start()
 
-    i1.wait_ready()
-    i2.wait_ready()
+    i1.wait_online()
+    i2.wait_online()
 
     i1.raft_propose_eval("rawset(_G, 'check', true)")
     assert i1.eval("return check") is True
@@ -140,7 +140,7 @@ def test_deactivation(cluster2: Cluster):
     assert is_voter_is_online(i1, i2.raft_id) == (False, False)
 
     i2.start()
-    i2.wait_ready()
+    i2.wait_online()
 
     assert is_voter_is_online(i1, i1.raft_id) == (True, True)
     assert_is_voter_is_online(i2, i2.raft_id, True, True)
@@ -159,8 +159,8 @@ def test_deactivation(cluster2: Cluster):
     i1.start()
     i2.start()
 
-    i1.wait_ready()
-    i2.wait_ready()
+    i1.wait_online()
+    i2.wait_online()
 
     assert is_voter_is_online(i1, i1.raft_id) == (True, True)
     assert is_voter_is_online(i2, i2.raft_id) == (True, True)
@@ -215,7 +215,7 @@ def test_gl127_graceul_shutdown(cluster2: Cluster):
 
     # make sure i1 is leader
     i1.promote_or_fail()
-    i2.wait_ready()
+    i2.wait_online()
 
     global on_shutdown_timed_out
     on_shutdown_timed_out = False
diff --git a/test/int/test_joining.py b/test/int/test_joining.py
index 1dc7e0a13e..edf59cac22 100644
--- a/test/int/test_joining.py
+++ b/test/int/test_joining.py
@@ -237,7 +237,7 @@ def test_rebootstrap_follower(cluster3: Cluster):
 
     i1, i2, i3 = cluster3.instances
     i3.restart(remove_data=True)
-    i3.wait_ready()
+    i3.wait_online()
     i3.assert_raft_status("Follower")
 
     # git.picodata.io: #114
@@ -317,13 +317,13 @@ def test_reconfigure_failure_domains(cluster: Cluster):
 
     i1.failure_domain = dict(planet="Mars", owner="Bob")
     i1.start()
-    i1.wait_ready()
+    i1.wait_online()
     # replicaset doesn't change automatically
     assert replicaset_id(i1) == "r1"
 
     i2.failure_domain = dict(planet="Earth", owner="Jon")
     i2.start()
-    i2.wait_ready()
+    i2.wait_online()
     assert replicaset_id(i2) == "r1"
 
     i2.terminate()
diff --git a/test/int/test_network_effects.py b/test/int/test_network_effects.py
index 6717635c0b..52744976b2 100644
--- a/test/int/test_network_effects.py
+++ b/test/int/test_network_effects.py
@@ -44,8 +44,8 @@ def test_log_rollback(cluster3: Cluster):
     i1.terminate()
     i2.start(peers=[i3])
     i3.start(peers=[i2])
-    i2.wait_ready()
-    i3.wait_ready()
+    i2.wait_online()
+    i3.wait_online()
 
     # Help i2 to become a new leader
     i2.promote_or_fail()
@@ -55,7 +55,7 @@ def test_log_rollback(cluster3: Cluster):
 
     # Now i1 has an uncommitted, but persisted entry that should be rolled back.
     i1.start(peers=[i2, i3])
-    i1.wait_ready()
+    i1.wait_online()
     retrying(lambda: i1.assert_raft_status("Follower", i2.raft_id))
 
     propose_state_change(i1, "i1 is alive again")
diff --git a/test/manual/test_benchmark.py b/test/manual/test_benchmark.py
index 885815fb12..8da3e89955 100644
--- a/test/manual/test_benchmark.py
+++ b/test/manual/test_benchmark.py
@@ -114,7 +114,7 @@ def test_benchmark_nop(cluster, tmpdir, cluster_size, fibers, with_flamegraph):
     def expand_cluster(cluster: Cluster, size: int):
         c = 0
         while len(cluster.instances) < size:
-            cluster.add_instance(wait_ready=False).start()
+            cluster.add_instance(wait_online=False).start()
             c += 1
             if c % 5 == 0:
                 wait_longer(cluster)
@@ -195,7 +195,7 @@ def test_benchmark_nop(cluster, tmpdir, cluster_size, fibers, with_flamegraph):
 @funcy.retry(tries=30, timeout=10)  # type: ignore
 def wait_longer(cluster):
     for instance in cluster.instances:
-        instance.wait_ready()
+        instance.wait_online()
 
 
 def test_summarize_replace_and_nop(fixture_store, with_flamegraph, capsys):
diff --git a/test/manual/test_scaling.py b/test/manual/test_scaling.py
index 0516a3fb63..e886d1160f 100644
--- a/test/manual/test_scaling.py
+++ b/test/manual/test_scaling.py
@@ -10,14 +10,14 @@ from conftest import (
 @funcy.retry(tries=30, timeout=10)
 def wait_longer(cluster):
     for instance in cluster.instances:
-        instance.wait_ready()
+        instance.wait_online()
 
 
 def test_instant(cluster: Cluster):
     t1 = time.time()
     cluster.deploy(instance_count=5)
     for i in range(60):
-        cluster.add_instance(wait_ready=False).start()
+        cluster.add_instance(wait_online=False).start()
     wait_longer(cluster)
     t2 = time.time()
 
@@ -42,7 +42,7 @@ def test_chunked(cluster: Cluster):
     total_size -= len(cluster.instances)
     for j in range(int(total_size / chunk_size)):
         for i in range(chunk_size):
-            cluster.add_instance(wait_ready=False).start()
+            cluster.add_instance(wait_online=False).start()
 
         wait_longer(cluster)
 
diff --git a/test/rand/test_randomized.py b/test/rand/test_randomized.py
index 7721383e5b..4e796037db 100644
--- a/test/rand/test_randomized.py
+++ b/test/rand/test_randomized.py
@@ -7,7 +7,7 @@ STEP_DELAY = 500  # ms
 
 
 def create(c: Cluster, istate):
-    i = c.add_instance(wait_ready=False)
+    i = c.add_instance(wait_online=False)
     istate[i.instance_id] = {"instance": i, "started": False}
     return i, istate
 
@@ -120,4 +120,4 @@ def test_randomized(cluster: Cluster, seed: str, delay: int, capsys):
     for instance_id in istate:
         ist = istate[instance_id]
         if ist["started"]:
-            ist["instance"].wait_ready()
+            ist["instance"].wait_online()
-- 
GitLab