diff --git a/src/box/box.cc b/src/box/box.cc index 80249919efe193ce32d05e8d09be8b027256d1b4..13d9c41eb0363b5245b1b7227b8f61f84ab4a6c1 100644 --- a/src/box/box.cc +++ b/src/box/box.cc @@ -257,7 +257,7 @@ box_wait_ro(bool ro, double timeout) } void -box_set_orphan(bool orphan) +box_do_set_orphan(bool orphan) { if (is_orphan == orphan) return; /* nothing to do */ @@ -266,7 +266,12 @@ box_set_orphan(bool orphan) is_orphan = orphan; fiber_cond_broadcast(&ro_cond); +} +void +box_set_orphan(bool orphan) +{ + box_do_set_orphan(orphan); /* Update the title to reflect the new status. */ if (is_orphan) { say_crit("entering orphan mode"); @@ -699,11 +704,10 @@ box_set_replication(void) box_check_replication(); /* * Try to connect to all replicas within the timeout period. - * The configuration will succeed as long as we've managed - * to connect to at least replication_connect_quorum - * masters. + * Stay in orphan mode in case we fail to connect to at least + * 'replication_connect_quorum' remote instances. */ - box_sync_replication(true); + box_sync_replication(false); /* Follow replica */ replicaset_follow(); /* Wait until appliers are in sync */ diff --git a/src/box/box.h b/src/box/box.h index ddcfbe2e596f423a5e8c241bdd519eadf8b4c1be..ccd527bd54a998de57ff2a5b97e0fa5a3b4dc31a 100644 --- a/src/box/box.h +++ b/src/box/box.h @@ -127,6 +127,13 @@ box_wait_ro(bool ro, double timeout); void box_set_orphan(bool orphan); +/** + * Set orphan mode but don't update instance title. + * \sa box_set_orphan + */ +void +box_do_set_orphan(bool orphan); + /** * Iterate over all spaces and save them to the * snapshot file. diff --git a/src/box/replication.cc b/src/box/replication.cc index 28f7acedceda8879d2dd8cde2cb575164399c89a..d691ce4876b96e190ea088d27ffde863c1bfa237 100644 --- a/src/box/replication.cc +++ b/src/box/replication.cc @@ -610,6 +610,17 @@ replicaset_connect(struct applier **appliers, int count, say_info("connecting to %d replicas", count); + if (!connect_quorum) { + /* + * Enter orphan mode on configuration change and + * only leave it when we manage to sync with + * replicaset_quorum instances. Don't change + * title though, it should be 'loading' during + * local recovery. + */ + box_do_set_orphan(true); + } + /* * Simultaneously connect to remote peers to receive their UUIDs * and fill the resulting set: diff --git a/test/replication/misc.result b/test/replication/misc.result index 0a57edda5914c8cf409656a0d1ca9bf52b7763ce..ae72ce3e444b40b63271bf721f8d7aa02c6a4d0b 100644 --- a/test/replication/misc.result +++ b/test/replication/misc.result @@ -18,10 +18,19 @@ replication_connect_timeout = box.cfg.replication_connect_timeout box.cfg{replication_timeout=0.05, replication_connect_timeout=0.05, replication={}} --- ... +box.cfg{replication_connect_quorum=2} +--- +... box.cfg{replication = {'127.0.0.1:12345', box.cfg.listen}} --- -- error: 'Incorrect value for option ''replication'': failed to connect to one or - more replicas' +... +box.info.status +--- +- orphan +... +box.info.ro +--- +- true ... -- gh-3606 - Tarantool crashes if box.cfg.replication is updated concurrently fiber = require('fiber') @@ -47,8 +56,16 @@ c:get() --- - true ... -box.cfg{replication_timeout = replication_timeout, replication_connect_timeout = replication_connect_timeout} +box.cfg{replication = "", replication_timeout = replication_timeout, replication_connect_timeout = replication_connect_timeout} +--- +... +box.info.status +--- +- running +... +box.info.ro --- +- false ... -- gh-3111 - Allow to rebootstrap a replica from a read-only master replica_uuid = uuid.new() @@ -729,3 +746,84 @@ test_run:cleanup_cluster() box.schema.user.revoke('guest', 'replication') --- ... +-- +-- gh-4424 Always enter orphan mode on error in replication +-- configuration change. +-- +replication_connect_timeout = box.cfg.replication_connect_timeout +--- +... +replication_connect_quorum = box.cfg.replication_connect_quorum +--- +... +box.cfg{replication="12345", replication_connect_timeout=0.1, replication_connect_quorum=1} +--- +... +box.info.status +--- +- orphan +... +box.info.ro +--- +- true +... +-- reset replication => leave orphan mode +box.cfg{replication=""} +--- +... +box.info.status +--- +- running +... +box.info.ro +--- +- false +... +-- no switch to orphan when quorum == 0 +box.cfg{replication="12345", replication_connect_quorum=0} +--- +... +box.info.status +--- +- running +... +box.info.ro +--- +- false +... +-- we could connect to one out of two replicas. Set orphan. +box.cfg{replication_connect_quorum=2} +--- +... +box.cfg{replication={box.cfg.listen, "12345"}} +--- +... +box.info.status +--- +- orphan +... +box.info.ro +--- +- true +... +-- lower quorum => leave orphan mode +box.cfg{replication_connect_quorum=1} +--- +... +box.info.status +--- +- running +... +box.info.ro +--- +- false +... +box.cfg{replication=""} +--- +... +box.cfg{replication_connect_timeout=replication_connect_timeout} +--- +... +box.cfg{replication_connect_quorum=replication_connect_quorum} +--- +... diff --git a/test/replication/misc.test.lua b/test/replication/misc.test.lua index 99e9955093e9a159d910296c38033d0c157580b3..16e7e9e42eaec7b74ad103acc90b4a2d12cf49be 100644 --- a/test/replication/misc.test.lua +++ b/test/replication/misc.test.lua @@ -8,7 +8,10 @@ box.schema.user.grant('guest', 'replication') replication_timeout = box.cfg.replication_timeout replication_connect_timeout = box.cfg.replication_connect_timeout box.cfg{replication_timeout=0.05, replication_connect_timeout=0.05, replication={}} +box.cfg{replication_connect_quorum=2} box.cfg{replication = {'127.0.0.1:12345', box.cfg.listen}} +box.info.status +box.info.ro -- gh-3606 - Tarantool crashes if box.cfg.replication is updated concurrently fiber = require('fiber') @@ -19,7 +22,9 @@ f() c:get() c:get() -box.cfg{replication_timeout = replication_timeout, replication_connect_timeout = replication_connect_timeout} +box.cfg{replication = "", replication_timeout = replication_timeout, replication_connect_timeout = replication_connect_timeout} +box.info.status +box.info.ro -- gh-3111 - Allow to rebootstrap a replica from a read-only master replica_uuid = uuid.new() @@ -293,3 +298,37 @@ test_run:cmd("cleanup server replica") test_run:cmd("delete server replica") test_run:cleanup_cluster() box.schema.user.revoke('guest', 'replication') + +-- +-- gh-4424 Always enter orphan mode on error in replication +-- configuration change. +-- +replication_connect_timeout = box.cfg.replication_connect_timeout +replication_connect_quorum = box.cfg.replication_connect_quorum +box.cfg{replication="12345", replication_connect_timeout=0.1, replication_connect_quorum=1} +box.info.status +box.info.ro +-- reset replication => leave orphan mode +box.cfg{replication=""} +box.info.status +box.info.ro +-- no switch to orphan when quorum == 0 +box.cfg{replication="12345", replication_connect_quorum=0} +box.info.status +box.info.ro + +-- we could connect to one out of two replicas. Set orphan. +box.cfg{replication_connect_quorum=2} +box.cfg{replication={box.cfg.listen, "12345"}} +box.info.status +box.info.ro +-- lower quorum => leave orphan mode +box.cfg{replication_connect_quorum=1} +box.info.status +box.info.ro + +box.cfg{replication=""} + + +box.cfg{replication_connect_timeout=replication_connect_timeout} +box.cfg{replication_connect_quorum=replication_connect_quorum}