From eae84efbfbf9e504660eba26c013487c7991459b Mon Sep 17 00:00:00 2001 From: Konstantin Belyavskiy <k.belyavskiy@tarantool.org> Date: Thu, 29 Mar 2018 18:35:51 +0300 Subject: [PATCH] replication: recover missing local data from replica In case of sudden power-loss, if data was not written to WAL but already sent to remote replica, local can't recover properly and we have different datasets. Fix it by using remote replica's data and LSN comparison. Based on @GeorgyKirichenko proposal and @locker race free check. Closes #3210 --- src/box/relay.cc | 16 ++- src/box/wal.cc | 15 ++- test/replication/recover_missing_xlog.result | 113 ++++++++++++++++++ .../replication/recover_missing_xlog.test.lua | 40 +++++++ test/replication/suite.ini | 2 +- 5 files changed, 181 insertions(+), 5 deletions(-) create mode 100644 test/replication/recover_missing_xlog.result create mode 100644 test/replication/recover_missing_xlog.test.lua diff --git a/src/box/relay.cc b/src/box/relay.cc index 2bd05ad5f2..d2ceaf1108 100644 --- a/src/box/relay.cc +++ b/src/box/relay.cc @@ -110,6 +110,11 @@ struct relay { struct vclock recv_vclock; /** Replicatoin slave version. */ uint32_t version_id; + /** + * Local vclock at the moment of subscribe, used to check + * dataset on the other side and send missing data rows if any. + */ + struct vclock local_vclock_at_subscribe; /** Relay endpoint */ struct cbus_endpoint endpoint; @@ -541,6 +546,7 @@ relay_subscribe(int fd, uint64_t sync, struct replica *replica, relay.version_id = replica_version_id; relay.replica = replica; replica_set_relay(replica, &relay); + vclock_copy(&relay.local_vclock_at_subscribe, &replicaset.vclock); int rc = cord_costart(&relay.cord, tt_sprintf("relay_%p", &relay), relay_subscribe_f, &relay); @@ -583,10 +589,16 @@ relay_send_row(struct xstream *stream, struct xrow_header *packet) /* * We're feeding a WAL, thus responding to SUBSCRIBE request. * In that case, only send a row if it is not from the same replica - * (i.e. don't send replica's own rows back). + * (i.e. don't send replica's own rows back) or if this row is + * missing on the other side (i.e. in case of sudden power-loss, + * data was not written to WAL, so remote master can't recover + * it). In the latter case packet's LSN is less than or equal to + * local master's LSN at the moment it received 'SUBSCRIBE' request. */ if (relay->replica == NULL || - packet->replica_id != relay->replica->id) { + packet->replica_id != relay->replica->id || + packet->lsn <= vclock_get(&relay->local_vclock_at_subscribe, + packet->replica_id)) { relay_send(relay, packet); } } diff --git a/src/box/wal.cc b/src/box/wal.cc index 4576cfe095..099c70caaf 100644 --- a/src/box/wal.cc +++ b/src/box/wal.cc @@ -770,8 +770,19 @@ wal_write(struct journal *journal, struct journal_entry *entry) * and promote vclock. */ if ((*last)->replica_id == instance_id) { - vclock_follow(&replicaset.vclock, instance_id, - (*last)->lsn); + /* + * In master-master configuration, during sudden + * power-loss, if the data have not been written + * to WAL but have already been sent to others, + * they will send the data back. In this case + * vclock has already been promoted by applier. + */ + if (vclock_get(&replicaset.vclock, + instance_id) < (*last)->lsn) { + vclock_follow(&replicaset.vclock, + instance_id, + (*last)->lsn); + } break; } --last; diff --git a/test/replication/recover_missing_xlog.result b/test/replication/recover_missing_xlog.result new file mode 100644 index 0000000000..027f8761ef --- /dev/null +++ b/test/replication/recover_missing_xlog.result @@ -0,0 +1,113 @@ +env = require('test_run') +--- +... +test_run = env.new() +--- +... +SERVERS = { 'autobootstrap1', 'autobootstrap2', 'autobootstrap3' } +--- +... +-- Start servers +test_run:create_cluster(SERVERS) +--- +... +-- Wait for full mesh +test_run:wait_fullmesh(SERVERS) +--- +... +test_run:cmd("switch autobootstrap1") +--- +- true +... +for i = 0, 9 do box.space.test:insert{i, 'test' .. i} end +--- +... +box.space.test:count() +--- +- 10 +... +test_run:cmd('switch default') +--- +- true +... +vclock1 = test_run:get_vclock('autobootstrap1') +--- +... +vclock2 = test_run:wait_cluster_vclock(SERVERS, vclock1) +--- +... +test_run:cmd("switch autobootstrap2") +--- +- true +... +box.space.test:count() +--- +- 10 +... +box.error.injection.set("ERRINJ_RELAY_TIMEOUT", 0.01) +--- +- ok +... +test_run:cmd("stop server autobootstrap1") +--- +- true +... +fio = require('fio') +--- +... +-- This test checks ability to recover missing local data +-- from remote replica. See #3210. +-- Delete data on first master and test that after restart, +-- due to difference in vclock it will be able to recover +-- all missing data from replica. +-- Also check that there is no concurrency, i.e. master is +-- in 'read-only' mode unless it receives all data. +fio.unlink(fio.pathjoin(fio.abspath("."), string.format('autobootstrap1/%020d.xlog', 8))) +--- +- true +... +test_run:cmd("start server autobootstrap1") +--- +- true +... +test_run:cmd("switch autobootstrap1") +--- +- true +... +for i = 10, 19 do box.space.test:insert{i, 'test' .. i} end +--- +... +fiber = require('fiber') +--- +... +box.space.test:select() +--- +- - [0, 'test0'] + - [1, 'test1'] + - [2, 'test2'] + - [3, 'test3'] + - [4, 'test4'] + - [5, 'test5'] + - [6, 'test6'] + - [7, 'test7'] + - [8, 'test8'] + - [9, 'test9'] + - [10, 'test10'] + - [11, 'test11'] + - [12, 'test12'] + - [13, 'test13'] + - [14, 'test14'] + - [15, 'test15'] + - [16, 'test16'] + - [17, 'test17'] + - [18, 'test18'] + - [19, 'test19'] +... +-- Cleanup. +test_run:cmd('switch default') +--- +- true +... +test_run:drop_cluster(SERVERS) +--- +... diff --git a/test/replication/recover_missing_xlog.test.lua b/test/replication/recover_missing_xlog.test.lua new file mode 100644 index 0000000000..57bc7d31f9 --- /dev/null +++ b/test/replication/recover_missing_xlog.test.lua @@ -0,0 +1,40 @@ +env = require('test_run') +test_run = env.new() + +SERVERS = { 'autobootstrap1', 'autobootstrap2', 'autobootstrap3' } +-- Start servers +test_run:create_cluster(SERVERS) +-- Wait for full mesh +test_run:wait_fullmesh(SERVERS) + +test_run:cmd("switch autobootstrap1") +for i = 0, 9 do box.space.test:insert{i, 'test' .. i} end +box.space.test:count() + +test_run:cmd('switch default') +vclock1 = test_run:get_vclock('autobootstrap1') +vclock2 = test_run:wait_cluster_vclock(SERVERS, vclock1) + +test_run:cmd("switch autobootstrap2") +box.space.test:count() +box.error.injection.set("ERRINJ_RELAY_TIMEOUT", 0.01) +test_run:cmd("stop server autobootstrap1") +fio = require('fio') +-- This test checks ability to recover missing local data +-- from remote replica. See #3210. +-- Delete data on first master and test that after restart, +-- due to difference in vclock it will be able to recover +-- all missing data from replica. +-- Also check that there is no concurrency, i.e. master is +-- in 'read-only' mode unless it receives all data. +fio.unlink(fio.pathjoin(fio.abspath("."), string.format('autobootstrap1/%020d.xlog', 8))) +test_run:cmd("start server autobootstrap1") + +test_run:cmd("switch autobootstrap1") +for i = 10, 19 do box.space.test:insert{i, 'test' .. i} end +fiber = require('fiber') +box.space.test:select() + +-- Cleanup. +test_run:cmd('switch default') +test_run:drop_cluster(SERVERS) diff --git a/test/replication/suite.ini b/test/replication/suite.ini index ee76a3b007..b489add581 100644 --- a/test/replication/suite.ini +++ b/test/replication/suite.ini @@ -3,7 +3,7 @@ core = tarantool script = master.lua description = tarantool/box, replication disabled = consistent.test.lua -release_disabled = catch.test.lua errinj.test.lua gc.test.lua before_replace.test.lua quorum.test.lua +release_disabled = catch.test.lua errinj.test.lua gc.test.lua before_replace.test.lua quorum.test.lua recover_missing_xlog.test.lua config = suite.cfg lua_libs = lua/fast_replica.lua long_run = prune.test.lua -- GitLab