diff --git a/src/governor/mod.rs b/src/governor/mod.rs index a22e73ae4be71ceaa185ad7b782c93abda511b28..aeb52d64bad5025dc936746661dd951179fe32cf 100644 --- a/src/governor/mod.rs +++ b/src/governor/mod.rs @@ -854,6 +854,9 @@ impl Loop { } } + governor_status + .send_modify(|s| s.step_counter += 1) + .expect("status shouldn't ever be borrowed across yields"); ControlFlow::Continue(()) } @@ -866,6 +869,7 @@ impl Loop { let (waker_tx, waker_rx) = watch::channel(()); let (governor_status_tx, governor_status_rx) = watch::channel(GovernorStatus { governor_loop_status: "initializing", + step_counter: 0, }); let state = State { @@ -894,7 +898,8 @@ fn set_status(status: &mut watch::Sender<GovernorStatus>, msg: &'static str) { if status.get().governor_loop_status == msg { return; } - tlog!(Debug, "governor_loop_status = '{msg}'"); + let counter = status.get().step_counter; + tlog!(Debug, "governor_loop_status = #{counter} '{msg}'"); status .send_modify(|s| s.governor_loop_status = msg) .expect("status shouldn't ever be borrowed across yields"); @@ -926,4 +931,10 @@ pub struct GovernorStatus { /// /// Is set by governor to explain the reason why it has yielded. pub governor_loop_status: &'static str, + + /// Number of times the current instance has successfully executed a + /// governor step. Is reset on restart. + /// + /// This value is only used for testing purposes. + pub step_counter: u64, } diff --git a/src/info.rs b/src/info.rs index 6a07ac04dffe58f4c5f0306c4b5f8f230b9e82ae..0fb70f5665b05c3a88621a177bc0a4762cae5fa0 100644 --- a/src/info.rs +++ b/src/info.rs @@ -195,15 +195,18 @@ pub fn proc_raft_info() -> Result<RaftInfo, Error> { pub struct InternalInfo<'a> { pub main_loop_status: Cow<'a, str>, pub governor_loop_status: Cow<'a, str>, + pub governor_step_counter: u64, } impl tarantool::tuple::Encode for InternalInfo<'_> {} impl InternalInfo<'static> { pub fn get(node: &node::Node) -> Self { + let governor = node.governor_loop.status.get(); InternalInfo { main_loop_status: node.status().main_loop_status.into(), - governor_loop_status: node.governor_loop.status.get().governor_loop_status.into(), + governor_loop_status: governor.governor_loop_status.into(), + governor_step_counter: governor.step_counter, } } } diff --git a/test/conftest.py b/test/conftest.py index 170598d11385fb3b65fa74b29382e68353fdf4e0..5161382ae8c4a6d5df66521e5ec3f6ac685b3d8a 100644 --- a/test/conftest.py +++ b/test/conftest.py @@ -1437,7 +1437,16 @@ class Instance: timeout=timeout + 1, # this timeout is for network call ) - def wait_governor_status(self, expected_status: str, timeout: int | float = 10): + def governor_step_counter(self) -> int: + info = self.call(".proc_runtime_info")["internal"] + return info["governor_step_counter"] + + def wait_governor_status( + self, + expected_status: str, + old_step_counter: int | None = None, + timeout: int | float = 10, + ): assert expected_status != "not a leader", "use another function" def impl(): @@ -1446,6 +1455,9 @@ class Instance: if actual_status == "not a leader": raise NotALeader("not a leader") + if old_step_counter: + assert old_step_counter != info["governor_step_counter"] + assert actual_status == expected_status Retriable(timeout=timeout, rps=1, fatal=NotALeader).call(impl) diff --git a/test/int/test_basics.py b/test/int/test_basics.py index 95a017f42e4b47889b0cb39faf4d3735cbd4cf84..7f837e2d4308ae963fbef66d32c03385dbfda80b 100644 --- a/test/int/test_basics.py +++ b/test/int/test_basics.py @@ -741,6 +741,9 @@ def test_proc_runtime_info(instance: Instance): internal=dict( main_loop_status="idle", governor_loop_status="idle", + # This is a counter which increases each time governor successfully performs a step. + # This value may change in the future if we add or remove some of those steps. + governor_step_counter=6, ), http=dict( host=host, diff --git a/test/int/test_sharding.py b/test/int/test_sharding.py index 24eef904361eb8ccb5926690b10f284e28e53ea1..957ad312d6936776f964124641befa780d49e34f 100644 --- a/test/int/test_sharding.py +++ b/test/int/test_sharding.py @@ -221,26 +221,23 @@ def test_vshard_updates_on_master_change(cluster: Cluster): assert replicaset_masters[r1_uuid] == i1.name assert replicaset_masters[r2_uuid] == i3.name + old_step_counter = i1.governor_step_counter() + rows = i1.sql( """ SELECT current_vshard_config_version FROM _pico_tier WHERE name = 'default' """ ) old_vshard_config_version = rows[0][0] - cluster.cas( - "update", - "_pico_replicaset", - key=["r1"], - ops=[("=", "target_master_name", i2.name)], - ) - index = cluster.cas( - "update", - "_pico_replicaset", - key=["r2"], - ops=[("=", "target_master_name", i4.name)], - ) - cluster.raft_wait_index(index) + update_target_master = """ + UPDATE _pico_replicaset SET target_master_name = ? WHERE replicaset_id = ? + """ + i1.sql(update_target_master, i2.name, "r1") + i1.sql(update_target_master, i4.name, "r2") + + # Wait until governor performs all the necessary actions + i1.wait_governor_status("idle", old_step_counter=old_step_counter) - # Wait for governor to change current master and update the vshard config. + # Make sure vshard config version changed. wait_current_vshard_config_changed(i1, old_vshard_config_version) for i in cluster.instances: