From af02467997af6c9b898ec918b92bd9f83fdd74e6 Mon Sep 17 00:00:00 2001
From: Georgy Moshkin <gmoshkin@picodata.io>
Date: Thu, 20 Oct 2022 15:20:20 +0300
Subject: [PATCH] fix(governor): vshard_bootstrapped flag is set via raft log

---
 src/traft/node.rs         | 14 ++++++++++++++
 src/traft/rpc/sharding.rs | 11 ++---------
 2 files changed, 16 insertions(+), 9 deletions(-)

diff --git a/src/traft/node.rs b/src/traft/node.rs
index a6be8aac09..c7726aadff 100644
--- a/src/traft/node.rs
+++ b/src/traft/node.rs
@@ -1225,6 +1225,7 @@ fn raft_conf_change_loop(status: Rc<Cell<Status>>, storage: Storage) {
             .any(|peer| peer.has_grades(CurrentGrade::Replicated, TargetGrade::Online));
         if need_sharding {
             let res = (|| -> Result<(), Error> {
+                let vshard_bootstrapped = storage.state.vshard_bootstrapped()?;
                 // TODO: filter out Offline & Expelled peers
                 let reqs = peers.iter().map(|peer| {
                     (
@@ -1232,6 +1233,7 @@ fn raft_conf_change_loop(status: Rc<Cell<Status>>, storage: Storage) {
                         sharding::Request {
                             leader_id,
                             term,
+                            bootstrap: !vshard_bootstrapped && peer.raft_id == leader_id,
                             ..Default::default()
                         },
                     )
@@ -1254,6 +1256,18 @@ fn raft_conf_change_loop(status: Rc<Cell<Status>>, storage: Storage) {
                         "instance_id" => &*peer_iid,
                     );
                 }
+
+                if !vshard_bootstrapped {
+                    node.propose_and_wait(
+                        traft::OpDML::replace(
+                            ClusterSpace::State,
+                            &(StateKey::VshardBootstrapped, true),
+                        )?,
+                        // TODO: don't hard code the timeout
+                        Duration::from_secs(3),
+                    )??;
+                }
+
                 Ok(())
             })();
             if let Err(e) = res {
diff --git a/src/traft/rpc/sharding.rs b/src/traft/rpc/sharding.rs
index 133c303a3f..d396561fdd 100644
--- a/src/traft/rpc/sharding.rs
+++ b/src/traft/rpc/sharding.rs
@@ -1,6 +1,5 @@
 use ::tarantool::{proc, tlua};
 
-use crate::traft::storage::StateKey;
 use crate::traft::{error::Error, node, RaftId, RaftTerm};
 
 #[proc(packed_args)]
@@ -33,15 +32,8 @@ fn proc_sharding(req: Request) -> Result<Response, Error> {
     lua.exec_with("vshard.router.cfg(...)", &cfg)
         .map_err(tlua::LuaError::from)?;
 
-    // TODO: governor should decide who does this, and propose a OpDML entry
-    // afterwards
-    if !storage
-        .state
-        .get(StateKey::VshardBootstrapped)?
-        .unwrap_or(false)
-    {
+    if req.bootstrap {
         lua.exec("vshard.router.bootstrap()")?;
-        storage.state.put(StateKey::VshardBootstrapped, &true)?;
     }
 
     Ok(Response {})
@@ -53,6 +45,7 @@ pub struct Request {
     pub leader_id: RaftId,
     pub term: RaftTerm,
     pub weights: Option<cfg::ReplicasetWeights>,
+    pub bootstrap: bool,
 }
 impl ::tarantool::tuple::Encode for Request {}
 
-- 
GitLab