From b76c2fb14e8fa97cfac77d1e5586a336846a8ff4 Mon Sep 17 00:00:00 2001 From: Georgy Moshkin <gmoshkin@picodata.io> Date: Wed, 28 Aug 2024 18:40:53 +0300 Subject: [PATCH] feat: add some parameters to configure timeouts in governor loop --- src/config.rs | 21 +++++++++++++++++++++ src/governor/mod.rs | 25 ++++++++++++++++++------- test/int/test_basics.py | 5 ++++- test/int/test_sql.py | 6 ++++++ 4 files changed, 49 insertions(+), 8 deletions(-) diff --git a/src/config.rs b/src/config.rs index 568959d6d1..999916676c 100644 --- a/src/config.rs +++ b/src/config.rs @@ -1482,6 +1482,27 @@ pub struct AlterSystemParameters { #[introspection(sbroad_type = SbroadType::Double)] #[introspection(config_default = (24 * 3600))] pub snapshot_read_view_close_timeout: f64, + + /// Governor proposes operations to raft log and waits for them to be + /// applied to the local raft machine. + /// Governor will only wait for this many seconds before going to the new loop iteration. + #[introspection(sbroad_type = SbroadType::Double)] + #[introspection(config_default = 3.0)] + pub governor_raft_op_timeout: f64, + + /// Governor sets up replication and configuration in the cluster as well as + /// applying global schema changes. It is doing so by making RPC requests to + /// other instances. + /// Governor will only wait for the RPC responses for this many seconds before going to the new loop iteration. + #[introspection(sbroad_type = SbroadType::Double)] + #[introspection(config_default = 3.0)] + pub governor_common_rpc_timeout: f64, + + /// Governor sets up the plugin system by making RPC requests to other instances. + /// Governor will only wait for the RPC responses for this many seconds before going to the new loop iteration. + #[introspection(sbroad_type = SbroadType::Double)] + #[introspection(config_default = 10.0)] + pub governor_plugin_rpc_timeout: f64, } /// A special macro helper for referring to alter system parameters thoroughout diff --git a/src/governor/mod.rs b/src/governor/mod.rs index bed84873c7..95fd79c22d 100644 --- a/src/governor/mod.rs +++ b/src/governor/mod.rs @@ -49,9 +49,6 @@ pub(crate) mod plan; impl Loop { const RETRY_TIMEOUT: Duration = Duration::from_millis(250); - const RAFT_OP_TIMEOUT: Duration = Duration::from_secs(3); - const COMMON_RPC_TIMEOUT: Duration = Duration::from_secs(3); - const PLUGIN_RPC_TIMEOUT: Duration = Duration::from_secs(10); async fn iter_fn( State { @@ -69,9 +66,23 @@ impl Loop { return ControlFlow::Continue(()); } - let raft_op_timeout = Loop::RAFT_OP_TIMEOUT; - let rpc_timeout = Loop::COMMON_RPC_TIMEOUT; - let plugin_rpc_timeout = Loop::PLUGIN_RPC_TIMEOUT; + let v: f64 = storage + .properties + .get_or_alter_system_default(crate::system_parameter_name!(governor_raft_op_timeout)) + .expect("storage should never ever fail"); + let raft_op_timeout = Duration::from_secs_f64(v); + + let v: f64 = storage + .properties + .get_or_alter_system_default(crate::system_parameter_name!(governor_common_rpc_timeout)) + .expect("storage should never ever fail"); + let rpc_timeout = Duration::from_secs_f64(v); + + let v: f64 = storage + .properties + .get_or_alter_system_default(crate::system_parameter_name!(governor_plugin_rpc_timeout)) + .expect("storage should never ever fail"); + let plugin_rpc_timeout = Duration::from_secs_f64(v); let instances = storage .instances @@ -218,7 +229,7 @@ impl Loop { tlog!(Info, "proposing conf_change"; "cc" => ?conf_change); if let Err(e) = node.propose_conf_change_and_wait(term, conf_change) { tlog!(Warning, "failed proposing conf_change: {e}"); - fiber::sleep(Duration::from_secs(1)); + fiber::sleep(Loop::RETRY_TIMEOUT); } } diff --git a/test/int/test_basics.py b/test/int/test_basics.py index d8d0786273..e92ba59a62 100644 --- a/test/int/test_basics.py +++ b/test/int/test_basics.py @@ -396,7 +396,10 @@ Insert(_pico_property, ["max_heartbeat_period",5.0]), Insert(_pico_property, ["max_pg_statements",1024]), Insert(_pico_property, ["max_pg_portals",1024]), Insert(_pico_property, ["snapshot_chunk_max_size",16777216]), -Insert(_pico_property, ["snapshot_read_view_close_timeout",86400]))| +Insert(_pico_property, ["snapshot_read_view_close_timeout",86400]), +Insert(_pico_property, ["governor_raft_op_timeout",3.0]), +Insert(_pico_property, ["governor_common_rpc_timeout",3.0]), +Insert(_pico_property, ["governor_plugin_rpc_timeout",10.0]))| | 0 | 1 |BatchDml( Insert(_pico_user, [0,"guest",0,["md5","md5084e0343a0486ff05530df6c705c8bb4"],1,"user"]), Insert(_pico_privilege, [1,0,"login","universe",0,0]), diff --git a/test/int/test_sql.py b/test/int/test_sql.py index d8fb43c3c3..dc8118159d 100644 --- a/test/int/test_sql.py +++ b/test/int/test_sql.py @@ -398,6 +398,9 @@ def test_read_from_system_tables(cluster: Cluster): assert keys == [ "auto_offline_timeout", "global_schema_version", + "governor_common_rpc_timeout", + "governor_plugin_rpc_timeout", + "governor_raft_op_timeout", "max_heartbeat_period", "max_login_attempts", "max_pg_portals", @@ -5065,6 +5068,9 @@ def test_alter_system_property(cluster: Cluster): ("max_login_attempts", 8), ("max_pg_statements", 4096), ("max_pg_portals", 2048), + ("governor_raft_op_timeout", 10), + ("governor_common_rpc_timeout", 10), + ("governor_plugin_rpc_timeout", 20), ] default_prop = [] -- GitLab