Skip to content
Snippets Groups Projects
Commit 4c67b472 authored by Georgy Moshkin's avatar Georgy Moshkin :speech_balloon:
Browse files

fix: don't retry if join request failed

parent 5a430a54
No related branches found
No related tags found
1 merge request!193fix: don't retry if join request failed
......@@ -631,12 +631,22 @@ fn start_join(args: &args::Run, leader_address: String) {
// leader.
let fn_name = stringify_cfunc!(traft::node::raft_join);
let resp: traft::JoinResponse = loop {
match tarantool::net_box_call_or_log(&leader_address, fn_name, &req, Duration::MAX) {
Some(resp) => break resp,
None => {
fiber::sleep(Duration::from_secs(1));
let now = Instant::now();
// TODO: exponential decay
let timeout = Duration::from_secs(1);
match tarantool::net_box_call(&leader_address, fn_name, &req, Duration::MAX) {
Err(TntError::IO(e)) => {
tlog!(Warning, "join request failed: {e}");
fiber::sleep(timeout.saturating_sub(now.elapsed()));
continue;
}
Err(e) => {
tlog!(Error, "join request failed: {e}");
std::process::exit(-1);
}
Ok(resp) => {
break resp;
}
}
};
......
......@@ -422,13 +422,23 @@ class Cluster:
self,
wait_ready=True,
peers=None,
generate_instance_id=True,
instance_id=None,
failure_domain=dict(),
init_replication_factor=1,
) -> Instance:
"""Add an `Instance` into the list of instances of the cluster and wait
for it to start unless `wait_ready` is `False`.
If `instance_id` is not specified (or is set `None`), this function will
generate a value for it.
Otherwise `instance_id` is passed to the command as the `--instance-id`
parameter.
Passing an empty string (`instance_id = ""`) will force the cluster
leader to choose a value when the instance is joined.
"""
i = 1 + len(self.instances)
instance_id = f"i{i}" if generate_instance_id else ""
instance_id = f"i{i}" if instance_id is None else instance_id
instance = Instance(
binary_path=self.binary_path,
......@@ -452,6 +462,23 @@ class Cluster:
return instance
def fail_to_add_instance(
self,
peers=None,
instance_id=None,
failure_domain=dict(),
init_replication_factor=1,
):
instance = self.add_instance(
wait_ready=False,
peers=peers,
instance_id=instance_id,
failure_domain=failure_domain,
init_replication_factor=init_replication_factor,
)
self.instances.remove(instance)
instance.fail_to_start()
def kill(self):
for instance in self.instances:
instance.kill()
......
......@@ -248,8 +248,10 @@ def test_join_without_explicit_instance_id(cluster: Cluster):
# Then the one of the instances became Leader with instance_id=1
# And the second one of the became Follower with instance_id 2
i1 = cluster.add_instance(generate_instance_id=False)
i2 = cluster.add_instance(generate_instance_id=False)
# start the instances with empty instance_ids which will force the Leader to
# choose ones for them when they join
i1 = cluster.add_instance(instance_id="")
i2 = cluster.add_instance(instance_id="")
i1.assert_raft_status("Leader")
assert i1.instance_id == "i1"
......@@ -327,3 +329,33 @@ def test_reconfigure_failure_domains(cluster: Cluster):
# fail to remove domain subdivision
i1.failure_domain = dict(planet="Mars")
i1.fail_to_start()
def test_fail_to_join(cluster: Cluster):
# Check scenarios in which instances fail to join the cluster for different
# reasons
i1 = cluster.add_instance(failure_domain=dict(owner="Tom"))
# Cluster has a required failure domain,
# so instance without the required failure domain cannot join
# and therefore exits with failure
cluster.fail_to_add_instance(failure_domain=dict())
# An instance with the given instance_id is already present in the cluster
# so this instance cannot join
# and therefore exits with failure
cluster.fail_to_add_instance(
instance_id=i1.instance_id, failure_domain=dict(owner="Jim")
)
joined_instances = i1.eval(
"""
res = {}
for _, t in pairs(box.space.raft_group:select()) do
table.insert(res, { t.instance_id, t.raft_id })
end
return res
"""
)
assert {tuple(i) for i in joined_instances} == {(i1.instance_id, i1.raft_id)}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment