From 72ed72a28552f17392b66d307cd8bda351185ebf Mon Sep 17 00:00:00 2001 From: Vladimir Davydov <vdavydov.dev@gmail.com> Date: Thu, 15 Feb 2018 14:42:11 +0300 Subject: [PATCH] replication: fix rebootstrap race that results in broken subscription While a node of the cluster is re-bootstrapping (joining again), other nodes may try to re-subscribe to it. They will fail, because the rebootstrapped node hasn't tried to subscribe hence hasn't been added to the _cluster table yet and so is not present in the hash at the subscriber's side for replica_on_applier_reconnect() to look it up. Fix this by making a subscriber create an id-less (REPLICA_ID_NIL) struct replica in this case and reattach the applier to it. It will be assigned an id when it finally subscribes and is registered in _cluster. Fixes 71b3340537e9 replication: reconnect applier on master rebootstrap --- src/box/box.cc | 9 ++-- src/box/replication.cc | 13 ++++-- test/replication/quorum.result | 71 +++++++++++++++++++++++++++++--- test/replication/quorum.test.lua | 35 ++++++++++++++-- 4 files changed, 111 insertions(+), 17 deletions(-) diff --git a/src/box/box.cc b/src/box/box.cc index fa1eb051db..33d237bcb3 100644 --- a/src/box/box.cc +++ b/src/box/box.cc @@ -1152,7 +1152,7 @@ box_register_replica(uint32_t id, const struct tt_uuid *uuid) if (boxk(IPROTO_INSERT, BOX_CLUSTER_ID, "[%u%s]", (unsigned) id, tt_uuid_str(uuid)) != 0) diag_raise(); - assert(replica_by_uuid(uuid) != NULL); + assert(replica_by_uuid(uuid)->id == id); } /** @@ -1166,7 +1166,7 @@ static void box_on_join(const tt_uuid *instance_uuid) { struct replica *replica = replica_by_uuid(instance_uuid); - if (replica != NULL) + if (replica != NULL && replica->id != REPLICA_ID_NIL) return; /* nothing to do - already registered */ box_check_writable_xc(); @@ -1270,7 +1270,8 @@ box_process_join(struct ev_io *io, struct xrow_header *header) * is complete. Fail early if the caller does not have * appropriate access privileges. */ - if (replica_by_uuid(&instance_uuid) == NULL) { + struct replica *replica = replica_by_uuid(&instance_uuid); + if (replica == NULL || replica->id == REPLICA_ID_NIL) { box_check_writable_xc(); struct space *space = space_cache_find_xc(BOX_CLUSTER_ID); access_check_space_xc(space, PRIV_W); @@ -1323,7 +1324,7 @@ box_process_join(struct ev_io *io, struct xrow_header *header) */ box_on_join(&instance_uuid); - struct replica *replica = replica_by_uuid(&instance_uuid); + replica = replica_by_uuid(&instance_uuid); assert(replica != NULL); replica->gc = gc; gc_guard.is_active = false; diff --git a/src/box/replication.cc b/src/box/replication.cc index fc8f900b1d..b1c84d36c9 100644 --- a/src/box/replication.cc +++ b/src/box/replication.cc @@ -284,10 +284,15 @@ replica_on_applier_reconnect(struct replica *replica) * the new UUID and reassign the applier to it. */ struct replica *orig = replica_by_uuid(&applier->uuid); - if (orig == NULL || orig->applier != NULL) { - tnt_raise(ClientError, ER_INSTANCE_UUID_MISMATCH, - tt_uuid_str(&replica->uuid), - tt_uuid_str(&applier->uuid)); + if (orig == NULL) { + orig = replica_new(); + orig->uuid = applier->uuid; + replica_hash_insert(&replicaset.hash, orig); + } + + if (orig->applier != NULL) { + tnt_raise(ClientError, ER_CFG, "replication", + "duplicate connection to the same replica"); } replica_set_applier(orig, applier); diff --git a/test/replication/quorum.result b/test/replication/quorum.result index 5a29410303..34408d421c 100644 --- a/test/replication/quorum.result +++ b/test/replication/quorum.result @@ -133,7 +133,7 @@ test_run:cmd('switch quorum3') --- - true ... -box.error.injection.set("ERRINJ_RELAY_TIMEOUT", 0.01) +box.error.injection.set("ERRINJ_RELAY_TIMEOUT", 0.001) --- - ok ... @@ -141,7 +141,7 @@ test_run:cmd('switch quorum2') --- - true ... -box.error.injection.set("ERRINJ_RELAY_TIMEOUT", 0.01) +box.error.injection.set("ERRINJ_RELAY_TIMEOUT", 0.001) --- - ok ... @@ -149,7 +149,7 @@ test_run:cmd('stop server quorum1') --- - true ... -for i = 1, 10 do box.space.test:insert{i} end +for i = 1, 100 do box.space.test:insert{i} end --- ... fiber = require('fiber') @@ -166,9 +166,70 @@ test_run:cmd('switch quorum1') --- - true ... -box.space.test:count() -- 10 +box.space.test:count() -- 100 --- -- 10 +- 100 +... +-- Rebootstrap one node of the cluster and check that others follow. +-- Note, due to ERRINJ_RELAY_TIMEOUT there is a substantial delay +-- between the moment the node starts listening and the moment it +-- completes bootstrap and subscribes. Other nodes will try and +-- fail to subscribe to the restarted node during this period. +-- This is OK - they have to retry until the bootstrap is complete. +test_run:cmd('switch quorum3') +--- +- true +... +box.snapshot() +--- +- ok +... +test_run:cmd('switch quorum2') +--- +- true +... +box.snapshot() +--- +- ok +... +test_run:cmd('switch quorum1') +--- +- true +... +test_run:cmd('restart server quorum1 with cleanup=1') +box.space.test:count() -- 100 +--- +- 100 +... +-- The rebootstrapped replica will be assigned id = 4, +-- because ids 1..3 are busy. +test_run:cmd('switch quorum2') +--- +- true +... +fiber = require('fiber') +--- +... +while box.info.replication[4].upstream.status ~= 'follow' do fiber.sleep(0.001) end +--- +... +box.info.replication[4].upstream.status +--- +- follow +... +test_run:cmd('switch quorum3') +--- +- true +... +fiber = require('fiber') +--- +... +while box.info.replication[4].upstream.status ~= 'follow' do fiber.sleep(0.001) end +--- +... +box.info.replication[4].upstream.status +--- +- follow ... -- Cleanup. test_run:cmd('switch default') diff --git a/test/replication/quorum.test.lua b/test/replication/quorum.test.lua index 192f136951..856006843d 100644 --- a/test/replication/quorum.test.lua +++ b/test/replication/quorum.test.lua @@ -54,18 +54,45 @@ box.info.id == 3 or box.info.replication[3].upstream.status == 'follow' -- Check that box.cfg() doesn't return until the instance -- catches up with all configured replicas. test_run:cmd('switch quorum3') -box.error.injection.set("ERRINJ_RELAY_TIMEOUT", 0.01) +box.error.injection.set("ERRINJ_RELAY_TIMEOUT", 0.001) test_run:cmd('switch quorum2') -box.error.injection.set("ERRINJ_RELAY_TIMEOUT", 0.01) +box.error.injection.set("ERRINJ_RELAY_TIMEOUT", 0.001) test_run:cmd('stop server quorum1') -for i = 1, 10 do box.space.test:insert{i} end +for i = 1, 100 do box.space.test:insert{i} end fiber = require('fiber') fiber.sleep(0.1) test_run:cmd('start server quorum1') test_run:cmd('switch quorum1') -box.space.test:count() -- 10 +box.space.test:count() -- 100 + +-- Rebootstrap one node of the cluster and check that others follow. +-- Note, due to ERRINJ_RELAY_TIMEOUT there is a substantial delay +-- between the moment the node starts listening and the moment it +-- completes bootstrap and subscribes. Other nodes will try and +-- fail to subscribe to the restarted node during this period. +-- This is OK - they have to retry until the bootstrap is complete. +test_run:cmd('switch quorum3') +box.snapshot() +test_run:cmd('switch quorum2') +box.snapshot() + +test_run:cmd('switch quorum1') +test_run:cmd('restart server quorum1 with cleanup=1') + +box.space.test:count() -- 100 + +-- The rebootstrapped replica will be assigned id = 4, +-- because ids 1..3 are busy. +test_run:cmd('switch quorum2') +fiber = require('fiber') +while box.info.replication[4].upstream.status ~= 'follow' do fiber.sleep(0.001) end +box.info.replication[4].upstream.status +test_run:cmd('switch quorum3') +fiber = require('fiber') +while box.info.replication[4].upstream.status ~= 'follow' do fiber.sleep(0.001) end +box.info.replication[4].upstream.status -- Cleanup. test_run:cmd('switch default') -- GitLab