diff --git a/src/box/replication.cc b/src/box/replication.cc index bf7b8c225cdea1921050c798c1f1252734ad91f3..48956d2ed6c4be633c943ba55817b8f868cefa85 100644 --- a/src/box/replication.cc +++ b/src/box/replication.cc @@ -206,6 +206,19 @@ replica_clear_id(struct replica *replica) */ replicaset.replica_by_id[replica->id] = NULL; replica->id = REPLICA_ID_NIL; + /* + * The replica will never resubscribe so we don't need to keep + * WALs for it anymore. Unregister it with the garbage collector + * if the relay thread is stopped. In case the relay thread is + * still running, it may need to access replica->gc so leave the + * job to replica_on_relay_stop, which will be called as soon as + * the relay thread exits. + */ + if (replica->gc != NULL && + relay_get_state(replica->relay) != RELAY_FOLLOW) { + gc_consumer_unregister(replica->gc); + replica->gc = NULL; + } if (replica_is_orphan(replica)) { replica_hash_remove(&replicaset.hash, replica); replica_delete(replica); @@ -759,6 +772,16 @@ replicaset_check_quorum(void) void replica_on_relay_stop(struct replica *replica) { + /* + * If the replica was evicted from the cluster, we don't + * need to keep WALs for it anymore. Unregister it with + * the garbage collector then. See also replica_clear_id. + */ + assert(replica->gc != NULL); + if (replica->id == REPLICA_ID_NIL) { + gc_consumer_unregister(replica->gc); + replica->gc = NULL; + } if (replica_is_orphan(replica)) { replica_hash_remove(&replicaset.hash, replica); replica_delete(replica); diff --git a/test/replication/gc.result b/test/replication/gc.result index e5c5cfccdc9a83d0d56289d4ff97dfc2c43ed917..3f9db26ce07208032382261a6904cd038e00ed02 100644 --- a/test/replication/gc.result +++ b/test/replication/gc.result @@ -369,6 +369,89 @@ replica_set.wait_all(test_run) replica_set.drop_all(test_run) --- ... +-- +-- Check that once a replica is removed from the cluster table, +-- all xlogs kept for it are removed even if it is configured as +-- a replication master (gh-3546). +-- +fio = require('fio') +--- +... +fiber = require('fiber') +--- +... +-- Start a replica and set it up as a master for this instance. +test_run:cmd("start server replica") +--- +- true +... +replica_port = test_run:eval('replica', 'return box.cfg.listen')[1] +--- +... +replica_port ~= nil +--- +- true +... +box.cfg{replication = replica_port} +--- +... +-- Stop the replica and write a few WALs. +test_run:cmd("stop server replica") +--- +- true +... +test_run:cmd("cleanup server replica") +--- +- true +... +_ = s:auto_increment{} +--- +... +box.snapshot() +--- +- ok +... +_ = s:auto_increment{} +--- +... +box.snapshot() +--- +- ok +... +_ = s:auto_increment{} +--- +... +box.snapshot() +--- +- ok +... +#fio.glob('./master/*.xlog') == 3 or fio.listdir('./master') +--- +- true +... +-- Delete the replica from the cluster table and check that +-- all xlog files are removed. +test_run:cleanup_cluster() +--- +... +box.snapshot() +--- +- ok +... +t = fiber.time() +--- +... +while #fio.glob('./master/*xlog') > 0 and fiber.time() - t < 10 do fiber.sleep(0.01) end +--- +... +#fio.glob('./master/*.xlog') == 0 or fio.listdir('./master') +--- +- true +... +-- Restore the config. +box.cfg{replication = {}} +--- +... -- Cleanup. s:drop() --- diff --git a/test/replication/gc.test.lua b/test/replication/gc.test.lua index a465140c8316bc2c25c93cb212c87790a77259a2..96f11f8d46466095b52ed345511b42604aaed82b 100644 --- a/test/replication/gc.test.lua +++ b/test/replication/gc.test.lua @@ -172,6 +172,42 @@ replica_set.start_all(test_run) replica_set.wait_all(test_run) replica_set.drop_all(test_run) +-- +-- Check that once a replica is removed from the cluster table, +-- all xlogs kept for it are removed even if it is configured as +-- a replication master (gh-3546). +-- +fio = require('fio') +fiber = require('fiber') + +-- Start a replica and set it up as a master for this instance. +test_run:cmd("start server replica") +replica_port = test_run:eval('replica', 'return box.cfg.listen')[1] +replica_port ~= nil +box.cfg{replication = replica_port} + +-- Stop the replica and write a few WALs. +test_run:cmd("stop server replica") +test_run:cmd("cleanup server replica") +_ = s:auto_increment{} +box.snapshot() +_ = s:auto_increment{} +box.snapshot() +_ = s:auto_increment{} +box.snapshot() +#fio.glob('./master/*.xlog') == 3 or fio.listdir('./master') + +-- Delete the replica from the cluster table and check that +-- all xlog files are removed. +test_run:cleanup_cluster() +box.snapshot() +t = fiber.time() +while #fio.glob('./master/*xlog') > 0 and fiber.time() - t < 10 do fiber.sleep(0.01) end +#fio.glob('./master/*.xlog') == 0 or fio.listdir('./master') + +-- Restore the config. +box.cfg{replication = {}} + -- Cleanup. s:drop() box.error.injection.set("ERRINJ_RELAY_REPORT_INTERVAL", 0)