Skip to content
Snippets Groups Projects
Commit 90ebc380 authored by Nikita Zheleztsov's avatar Nikita Zheleztsov Committed by Alexander Turenko
Browse files

config: fix names not always applying after config reload

We need to apply instance/replicaset_name as soon as the instance
becomes RW, so currently we try to do so at every box.status broadcast.
Even though broadcast happens pretty often, it's not enough:

The bug is reproduced in config-luatest/set_names_reload, which checks
the following situation:

    1. Cluster is recovered from the xlogs without names set.
    2. User forgets to set UUID for one replica, starts the cluster.
    3. Replica, UUID of which have not been set, fails to start.
    4. User notices that and updates config, reloading it on the
       instances, which succeeded to start, starting failed one.
    5. Master must apply name for a failed replica.

The test worked all right in the majority of runs, because box.status
broadcast happens often: e.g. it's broadcasted, when master's applier
synced with replica. However, under heavy load on CPU, the test failed
sometimes, when master fails to subscribe on replica and broadcast
doesn't happen.

Let's try to set names not only, when box.status is broadcasted, but
immediately after reload, as at this time new names, which must be
set, might appear. Let's also change test so that, it doesn't rely on
broadcast anymore.

Closes tarantool/tarantool-qa#328

NO_DOC=bugfix
parent a03064bc
No related branches found
No related tags found
No related merge requests found
## bugfix/config
* Increased stability and speed of automatic names applying after config reload.
......@@ -90,8 +90,7 @@ local names_state = {
-- from box.info) and which have not be alerted already.
local function names_alert_missing(config, missing_names)
local msg = 'box_cfg.apply: name %s for %s uuid is missing from the ' ..
'snapshot. It will be automatically set when master switches' ..
' to Read Write mode'
'snapshot. It will be automatically set when possible.'
local replicaset_name = config._configdata._replicaset_name
if missing_names[replicaset_name] ~= nil and
config._alerts[replicaset_name] == nil then
......@@ -158,13 +157,15 @@ local function missing_names_is_empty(missing_names, replicaset_name)
table.equals(missing_names._peers, {})
end
local function names_on_rw_transit()
local function names_try_set_missing()
local configdata = names_state.config._configdata
local replicaset_name = configdata._replicaset_name
local missing_names = configdata:missing_names()
if missing_names_is_empty(missing_names, replicaset_name) then
-- Somebody have done work for us, nothing to update.
if box.info.ro or missing_names_is_empty(missing_names,
replicaset_name) then
-- Somebody have done work for us, nothing to update or we're not
-- a rw, which is possible if the function was invoked after reload.
return
end
......@@ -186,10 +187,10 @@ end
local function names_rw_watcher()
return box.watch('box.status', function(_, status)
-- It's ok, if names_on_rw_transit will be triggered
-- It's ok, if names_try_set_missing will be triggered
-- several times. It's NoOp after first execution.
if status.is_ro == false then
schedule_task(names_on_rw_transit)
schedule_task(names_try_set_missing)
end
end)
end
......@@ -270,6 +271,10 @@ local function names_apply(config, missing_names, schema_version)
-- Even if everything is configured we try to make alerts one
-- more time, as new instances without names may be found.
names_alert_missing(config, missing_names)
-- Don't wait for box.status to change, we may be already rw, set names
-- on reload, if it's possible and needed.
names_try_set_missing()
if names_state.is_configured then
-- All triggers are already configured, nothing to do, but wait.
return
......
......@@ -170,19 +170,25 @@ g.test_names_are_set_after_reload = function(g)
instance_uuid = g.uuids['instance-002']
}
-- Reload config on instance-001 and wait for the name to be assigned
local cfg = yaml.encode(g.config)
opts.config_file = treegen.write_script(g.dir, 'cfg.yaml', cfg)
g.instance_2 = server:new(fun.chain(opts, {alias = 'instance-002'}):tomap())
g.instance_1:exec(function(uuid)
require('config'):reload()
t.helpers.retrying({timeout = 20}, function()
t.assert_equals(box.space._cluster.index.uuid:select(uuid)[1][3],
'instance-002')
end)
end, {g.uuids['instance-002']})
g.instance_1:grep_log(msg:format('instance-002', g.uuids['instance-002']),
1024, {filename = g.dir .. log_postfix})
-- Start instance-002
g.instance_2 = server:new(fun.chain(opts, {alias = 'instance-002'}):tomap())
g.instance_2:start({wait_until_ready = false})
g.instance_1:exec(function()
require('config'):reload()
end)
g.instance_2:wait_until_ready()
g.instance_2:wait_for_vclock_of(g.instance_1)
g.instance_2:exec(check_names, {rs_name, 'instance-002', g.uuids})
g.instance_1:grep_log(msg:format('instance-002', g.uuids['instance-002']),
1024, {filename = g.dir .. log_postfix})
end
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment