From 8b4b4ba880d901d85d8cb7d4cbce7696b29780de Mon Sep 17 00:00:00 2001 From: Yaroslav Dynnikov <yaroslav.dynnikov@gmail.com> Date: Mon, 17 Oct 2022 11:10:41 +0300 Subject: [PATCH] refactor: replace is_ready flag with grade checking --- src/main.rs | 2 -- src/traft/node.rs | 11 --------- test/conftest.py | 41 ++++++++++++++++++++------------ test/int/test_couple.py | 20 ++++++++-------- test/int/test_joining.py | 6 ++--- test/int/test_network_effects.py | 6 ++--- test/manual/test_benchmark.py | 4 ++-- test/manual/test_scaling.py | 6 ++--- test/rand/test_randomized.py | 4 ++-- 9 files changed, 49 insertions(+), 51 deletions(-) diff --git a/src/main.rs b/src/main.rs index b6b7bfcd58..e5597e9436 100644 --- a/src/main.rs +++ b/src/main.rs @@ -887,8 +887,6 @@ fn postjoin(args: &args::Run, storage: Storage) { } }; } - - node.mark_as_ready(); } fn main_tarantool(args: args::Tarantool) -> ! { diff --git a/src/traft/node.rs b/src/traft/node.rs index 2f66435028..cf58db4fac 100644 --- a/src/traft/node.rs +++ b/src/traft/node.rs @@ -103,9 +103,6 @@ pub struct Status { pub leader_id: Option<RaftId>, /// Current raft state pub raft_state: RaftState, - /// Whether instance has finished its `postjoin` - /// initialization stage - pub is_ready: bool, } /// The heart of `traft` module - the Node. @@ -144,7 +141,6 @@ impl Node { id: raft_id, leader_id: None, raft_state: RaftState::Follower, - is_ready: false, })); let node_impl = Rc::new(Mutex::new(node_impl)); @@ -181,13 +177,6 @@ impl Node { self.status.get() } - pub fn mark_as_ready(&self) { - let mut status = self.status.get(); - status.is_ready = true; - self.status.set(status); - event::broadcast(Event::StatusChanged); - } - /// Wait for the status to be changed. /// **This function yields** pub fn wait_status(&self) { diff --git a/test/conftest.py b/test/conftest.py index 96b97f28eb..ac2320391c 100644 --- a/test/conftest.py +++ b/test/conftest.py @@ -159,7 +159,6 @@ def normalize_net_box_result(func): class RaftStatus: id: int raft_state: str - is_ready: bool leader_id: int | None = None @@ -417,13 +416,25 @@ class Instance: assert want == have @funcy.retry(tries=30, timeout=0.2) - def wait_ready(self): - status = self._raft_status() - assert status.is_ready - self.raft_id = status.id - with self.connect(timeout=2) as conn: - self.instance_id = conn.space("raft_state").select(("instance_id",))[0][1] - eprint(f"{self} is ready") + def wait_online(self): + """Wait until instance attains Online grade + + Raises: + AssertionError: if doesn't succeed + """ + + whoami = self.call("picolib.whoami") + assert isinstance(whoami, dict) + assert isinstance(whoami["raft_id"], int) + assert isinstance(whoami["instance_id"], str) + self.raft_id = whoami["raft_id"] + self.instance_id = whoami["instance_id"] + + myself = self.call("picolib.peer_info", self.instance_id) + assert isinstance(myself, dict) + assert myself["current_grade"] == "Online" + + eprint(f"{self} is online") @funcy.retry(tries=4, timeout=0.1, errors=AssertionError) def promote_or_fail(self): @@ -478,28 +489,28 @@ class Cluster: for _ in range(instance_count): self.add_instance( - wait_ready=False, init_replication_factor=init_replication_factor + wait_online=False, init_replication_factor=init_replication_factor ) for instance in self.instances: instance.start() for instance in self.instances: - instance.wait_ready() + instance.wait_online() eprint(f" {self} deployed ".center(80, "=")) return self.instances def add_instance( self, - wait_ready=True, + wait_online=True, peers=None, instance_id: str | bool = True, failure_domain=dict(), init_replication_factor=1, ) -> Instance: """Add an `Instance` into the list of instances of the cluster and wait - for it to start unless `wait_ready` is `False`. + for it to attain Online grade unless `wait_online` is `False`. `instance_id` specifies how the instance's id is generated in the following way: @@ -544,9 +555,9 @@ class Cluster: assert self.base_port <= instance.port <= self.max_port self.instances.append(instance) - if wait_ready: + if wait_online: instance.start() - instance.wait_ready() + instance.wait_online() return instance @@ -558,7 +569,7 @@ class Cluster: init_replication_factor=1, ): instance = self.add_instance( - wait_ready=False, + wait_online=False, peers=peers, instance_id=instance_id, failure_domain=failure_domain, diff --git a/test/int/test_couple.py b/test/int/test_couple.py index 6106fa86fb..35dc444a74 100644 --- a/test/int/test_couple.py +++ b/test/int/test_couple.py @@ -60,7 +60,7 @@ def test_restart_follower(cluster2: Cluster): i1, i2 = cluster2.instances i2.restart() - i2.wait_ready() + i2.wait_online() i1.assert_raft_status("Leader") i2.assert_raft_status("Follower") @@ -73,7 +73,7 @@ def test_restart_leader(cluster2: Cluster): i1, _ = cluster2.instances i1.restart() - i1.wait_ready() + i1.wait_online() i1.raft_propose_eval("return") @@ -87,8 +87,8 @@ def test_restart_both(cluster2: Cluster): i2.terminate() @funcy.retry(tries=20, timeout=0.1) - def wait_alive(instance): - assert instance._raft_status().is_ready is False + def wait_alive(instance: Instance): + assert instance._raft_status().leader_id is None i1.start() # This synchronization is necessary for proper test case reproducing. @@ -97,8 +97,8 @@ def test_restart_both(cluster2: Cluster): wait_alive(i1) i2.start() - i1.wait_ready() - i2.wait_ready() + i1.wait_online() + i2.wait_online() i1.raft_propose_eval("rawset(_G, 'check', true)") assert i1.eval("return check") is True @@ -140,7 +140,7 @@ def test_deactivation(cluster2: Cluster): assert is_voter_is_online(i1, i2.raft_id) == (False, False) i2.start() - i2.wait_ready() + i2.wait_online() assert is_voter_is_online(i1, i1.raft_id) == (True, True) assert_is_voter_is_online(i2, i2.raft_id, True, True) @@ -159,8 +159,8 @@ def test_deactivation(cluster2: Cluster): i1.start() i2.start() - i1.wait_ready() - i2.wait_ready() + i1.wait_online() + i2.wait_online() assert is_voter_is_online(i1, i1.raft_id) == (True, True) assert is_voter_is_online(i2, i2.raft_id) == (True, True) @@ -215,7 +215,7 @@ def test_gl127_graceul_shutdown(cluster2: Cluster): # make sure i1 is leader i1.promote_or_fail() - i2.wait_ready() + i2.wait_online() global on_shutdown_timed_out on_shutdown_timed_out = False diff --git a/test/int/test_joining.py b/test/int/test_joining.py index 1dc7e0a13e..edf59cac22 100644 --- a/test/int/test_joining.py +++ b/test/int/test_joining.py @@ -237,7 +237,7 @@ def test_rebootstrap_follower(cluster3: Cluster): i1, i2, i3 = cluster3.instances i3.restart(remove_data=True) - i3.wait_ready() + i3.wait_online() i3.assert_raft_status("Follower") # git.picodata.io: #114 @@ -317,13 +317,13 @@ def test_reconfigure_failure_domains(cluster: Cluster): i1.failure_domain = dict(planet="Mars", owner="Bob") i1.start() - i1.wait_ready() + i1.wait_online() # replicaset doesn't change automatically assert replicaset_id(i1) == "r1" i2.failure_domain = dict(planet="Earth", owner="Jon") i2.start() - i2.wait_ready() + i2.wait_online() assert replicaset_id(i2) == "r1" i2.terminate() diff --git a/test/int/test_network_effects.py b/test/int/test_network_effects.py index 6717635c0b..52744976b2 100644 --- a/test/int/test_network_effects.py +++ b/test/int/test_network_effects.py @@ -44,8 +44,8 @@ def test_log_rollback(cluster3: Cluster): i1.terminate() i2.start(peers=[i3]) i3.start(peers=[i2]) - i2.wait_ready() - i3.wait_ready() + i2.wait_online() + i3.wait_online() # Help i2 to become a new leader i2.promote_or_fail() @@ -55,7 +55,7 @@ def test_log_rollback(cluster3: Cluster): # Now i1 has an uncommitted, but persisted entry that should be rolled back. i1.start(peers=[i2, i3]) - i1.wait_ready() + i1.wait_online() retrying(lambda: i1.assert_raft_status("Follower", i2.raft_id)) propose_state_change(i1, "i1 is alive again") diff --git a/test/manual/test_benchmark.py b/test/manual/test_benchmark.py index 885815fb12..8da3e89955 100644 --- a/test/manual/test_benchmark.py +++ b/test/manual/test_benchmark.py @@ -114,7 +114,7 @@ def test_benchmark_nop(cluster, tmpdir, cluster_size, fibers, with_flamegraph): def expand_cluster(cluster: Cluster, size: int): c = 0 while len(cluster.instances) < size: - cluster.add_instance(wait_ready=False).start() + cluster.add_instance(wait_online=False).start() c += 1 if c % 5 == 0: wait_longer(cluster) @@ -195,7 +195,7 @@ def test_benchmark_nop(cluster, tmpdir, cluster_size, fibers, with_flamegraph): @funcy.retry(tries=30, timeout=10) # type: ignore def wait_longer(cluster): for instance in cluster.instances: - instance.wait_ready() + instance.wait_online() def test_summarize_replace_and_nop(fixture_store, with_flamegraph, capsys): diff --git a/test/manual/test_scaling.py b/test/manual/test_scaling.py index 0516a3fb63..e886d1160f 100644 --- a/test/manual/test_scaling.py +++ b/test/manual/test_scaling.py @@ -10,14 +10,14 @@ from conftest import ( @funcy.retry(tries=30, timeout=10) def wait_longer(cluster): for instance in cluster.instances: - instance.wait_ready() + instance.wait_online() def test_instant(cluster: Cluster): t1 = time.time() cluster.deploy(instance_count=5) for i in range(60): - cluster.add_instance(wait_ready=False).start() + cluster.add_instance(wait_online=False).start() wait_longer(cluster) t2 = time.time() @@ -42,7 +42,7 @@ def test_chunked(cluster: Cluster): total_size -= len(cluster.instances) for j in range(int(total_size / chunk_size)): for i in range(chunk_size): - cluster.add_instance(wait_ready=False).start() + cluster.add_instance(wait_online=False).start() wait_longer(cluster) diff --git a/test/rand/test_randomized.py b/test/rand/test_randomized.py index 7721383e5b..4e796037db 100644 --- a/test/rand/test_randomized.py +++ b/test/rand/test_randomized.py @@ -7,7 +7,7 @@ STEP_DELAY = 500 # ms def create(c: Cluster, istate): - i = c.add_instance(wait_ready=False) + i = c.add_instance(wait_online=False) istate[i.instance_id] = {"instance": i, "started": False} return i, istate @@ -120,4 +120,4 @@ def test_randomized(cluster: Cluster, seed: str, delay: int, capsys): for instance_id in istate: ist = istate[instance_id] if ist["started"]: - ist["instance"].wait_ready() + ist["instance"].wait_online() -- GitLab