Skip to content
GitLab
Explore
Sign in
Register
Primary navigation
Search or go to…
Project
P
picodata
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Container Registry
Model registry
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
core
picodata
Commits
4c67b472
Commit
4c67b472
authored
2 years ago
by
Georgy Moshkin
Browse files
Options
Downloads
Patches
Plain Diff
fix: don't retry if join request failed
parent
5a430a54
No related branches found
Branches containing commit
No related tags found
Tags containing commit
1 merge request
!193
fix: don't retry if join request failed
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
src/main.rs
+14
-4
14 additions, 4 deletions
src/main.rs
test/conftest.py
+29
-2
29 additions, 2 deletions
test/conftest.py
test/int/test_joining.py
+34
-2
34 additions, 2 deletions
test/int/test_joining.py
with
77 additions
and
8 deletions
src/main.rs
+
14
−
4
View file @
4c67b472
...
...
@@ -631,12 +631,22 @@ fn start_join(args: &args::Run, leader_address: String) {
// leader.
let
fn_name
=
stringify_cfunc!
(
traft
::
node
::
raft_join
);
let
resp
:
traft
::
JoinResponse
=
loop
{
match
tarantool
::
net_box_call_or_log
(
&
leader_address
,
fn_name
,
&
req
,
Duration
::
MAX
)
{
Some
(
resp
)
=>
break
resp
,
None
=>
{
fiber
::
sleep
(
Duration
::
from_secs
(
1
));
let
now
=
Instant
::
now
();
// TODO: exponential decay
let
timeout
=
Duration
::
from_secs
(
1
);
match
tarantool
::
net_box_call
(
&
leader_address
,
fn_name
,
&
req
,
Duration
::
MAX
)
{
Err
(
TntError
::
IO
(
e
))
=>
{
tlog!
(
Warning
,
"join request failed: {e}"
);
fiber
::
sleep
(
timeout
.saturating_sub
(
now
.elapsed
()));
continue
;
}
Err
(
e
)
=>
{
tlog!
(
Error
,
"join request failed: {e}"
);
std
::
process
::
exit
(
-
1
);
}
Ok
(
resp
)
=>
{
break
resp
;
}
}
};
...
...
This diff is collapsed.
Click to expand it.
test/conftest.py
+
29
−
2
View file @
4c67b472
...
...
@@ -422,13 +422,23 @@ class Cluster:
self
,
wait_ready
=
True
,
peers
=
None
,
generate_
instance_id
=
Tru
e
,
instance_id
=
Non
e
,
failure_domain
=
dict
(),
init_replication_factor
=
1
,
)
->
Instance
:
"""
Add an `Instance` into the list of instances of the cluster and wait
for it to start unless `wait_ready` is `False`.
If `instance_id` is not specified (or is set `None`), this function will
generate a value for it.
Otherwise `instance_id` is passed to the command as the `--instance-id`
parameter.
Passing an empty string (`instance_id =
""
`) will force the cluster
leader to choose a value when the instance is joined.
"""
i
=
1
+
len
(
self
.
instances
)
instance_id
=
f
"
i
{
i
}
"
if
generate_
instance_id
else
""
instance_id
=
f
"
i
{
i
}
"
if
instance_id
is
None
else
instance_id
instance
=
Instance
(
binary_path
=
self
.
binary_path
,
...
...
@@ -452,6 +462,23 @@ class Cluster:
return
instance
def
fail_to_add_instance
(
self
,
peers
=
None
,
instance_id
=
None
,
failure_domain
=
dict
(),
init_replication_factor
=
1
,
):
instance
=
self
.
add_instance
(
wait_ready
=
False
,
peers
=
peers
,
instance_id
=
instance_id
,
failure_domain
=
failure_domain
,
init_replication_factor
=
init_replication_factor
,
)
self
.
instances
.
remove
(
instance
)
instance
.
fail_to_start
()
def
kill
(
self
):
for
instance
in
self
.
instances
:
instance
.
kill
()
...
...
This diff is collapsed.
Click to expand it.
test/int/test_joining.py
+
34
−
2
View file @
4c67b472
...
...
@@ -248,8 +248,10 @@ def test_join_without_explicit_instance_id(cluster: Cluster):
# Then the one of the instances became Leader with instance_id=1
# And the second one of the became Follower with instance_id 2
i1
=
cluster
.
add_instance
(
generate_instance_id
=
False
)
i2
=
cluster
.
add_instance
(
generate_instance_id
=
False
)
# start the instances with empty instance_ids which will force the Leader to
# choose ones for them when they join
i1
=
cluster
.
add_instance
(
instance_id
=
""
)
i2
=
cluster
.
add_instance
(
instance_id
=
""
)
i1
.
assert_raft_status
(
"
Leader
"
)
assert
i1
.
instance_id
==
"
i1
"
...
...
@@ -327,3 +329,33 @@ def test_reconfigure_failure_domains(cluster: Cluster):
# fail to remove domain subdivision
i1
.
failure_domain
=
dict
(
planet
=
"
Mars
"
)
i1
.
fail_to_start
()
def
test_fail_to_join
(
cluster
:
Cluster
):
# Check scenarios in which instances fail to join the cluster for different
# reasons
i1
=
cluster
.
add_instance
(
failure_domain
=
dict
(
owner
=
"
Tom
"
))
# Cluster has a required failure domain,
# so instance without the required failure domain cannot join
# and therefore exits with failure
cluster
.
fail_to_add_instance
(
failure_domain
=
dict
())
# An instance with the given instance_id is already present in the cluster
# so this instance cannot join
# and therefore exits with failure
cluster
.
fail_to_add_instance
(
instance_id
=
i1
.
instance_id
,
failure_domain
=
dict
(
owner
=
"
Jim
"
)
)
joined_instances
=
i1
.
eval
(
"""
res = {}
for _, t in pairs(box.space.raft_group:select()) do
table.insert(res, { t.instance_id, t.raft_id })
end
return res
"""
)
assert
{
tuple
(
i
)
for
i
in
joined_instances
}
==
{(
i1
.
instance_id
,
i1
.
raft_id
)}
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment