From f9bbfff9ce46e04c9d4073cd175e6867f7a8ae05 Mon Sep 17 00:00:00 2001 From: Vladimir Davydov <vdavydov@tarantool.org> Date: Mon, 25 Oct 2021 20:00:56 +0300 Subject: [PATCH] box: don't delete inprogress files during normal GC *.inprogress files shouldn't be deleted during normal GC, because they may be in use. E.g. GC may happen to run while WAL is rotating xlog; if GC removes the transient xlog.inprogress file created by WAL (see xdir_create_xlog), WAL will fail to rename it and abort the current transaction. Initially, inprogress files were removed only after recovery. For this reason, xdir_collect_inprogress, which is used for deleting inprogress files, accesses FS directly, without COIO. This was changed by commit 5aa243deebbad248566fc421b42cbbe426e83982 ("recovery: build secondary index in hot standby mode") which moved xdir_collect_inprogress from memtx_engine_end_recovery to memtx_engine_collect_garbage so that a hot standby instance doesn't delete inprogress files of the master instance by mistake. To fix this issue, let's move xdir_collect_inprogress back where it belongs, to engine_end_recovery, and introduce a new callback for memtx to build its secondary keys before entering the hot standby mode - engine_begin_hot_standby. Let's also remove engine_collect_garbage from gc_init, which was added there by the aforementioned commit, probably to make tests pass. The bug was reported by the vinyl/deferred_delete test (#5089). Closes #6554 --- ...h-6554-fix-gc-removing-inprogress-xlogs.md | 4 + src/box/blackhole.c | 1 + src/box/box.cc | 4 +- src/box/engine.c | 18 +++ src/box/engine.h | 20 ++- src/box/gc.c | 1 - src/box/memtx_engine.cc | 27 +++- src/box/service_engine.c | 1 + src/box/sysview.c | 1 + src/box/vinyl.c | 1 + src/box/xlog.c | 2 + src/lib/core/errinj.h | 1 + test/box/errinj.result | 1 + ...gh-6554-gc-removes-inprogress-xlogs.result | 118 ++++++++++++++++++ ...-6554-gc-removes-inprogress-xlogs.test.lua | 56 +++++++++ test/box/suite.ini | 2 +- 16 files changed, 250 insertions(+), 8 deletions(-) create mode 100644 changelogs/unreleased/gh-6554-fix-gc-removing-inprogress-xlogs.md create mode 100644 test/box/gh-6554-gc-removes-inprogress-xlogs.result create mode 100644 test/box/gh-6554-gc-removes-inprogress-xlogs.test.lua diff --git a/changelogs/unreleased/gh-6554-fix-gc-removing-inprogress-xlogs.md b/changelogs/unreleased/gh-6554-fix-gc-removing-inprogress-xlogs.md new file mode 100644 index 0000000000..58e055c271 --- /dev/null +++ b/changelogs/unreleased/gh-6554-fix-gc-removing-inprogress-xlogs.md @@ -0,0 +1,4 @@ +## bugfix/core + +* Fixed a bug because of which the garbage collector could remove an xlog file + that is still in use (gh-6554). diff --git a/src/box/blackhole.c b/src/box/blackhole.c index 69f1deba17..b8788580e3 100644 --- a/src/box/blackhole.c +++ b/src/box/blackhole.c @@ -188,6 +188,7 @@ static const struct engine_vtab blackhole_engine_vtab = { /* .bootstrap = */ generic_engine_bootstrap, /* .begin_initial_recovery = */ generic_engine_begin_initial_recovery, /* .begin_final_recovery = */ generic_engine_begin_final_recovery, + /* .begin_hot_standby = */ generic_engine_begin_hot_standby, /* .end_recovery = */ generic_engine_end_recovery, /* .begin_checkpoint = */ generic_engine_begin_checkpoint, /* .wait_checkpoint = */ generic_engine_wait_checkpoint, diff --git a/src/box/box.cc b/src/box/box.cc index 20361db3a3..0aa61b6c1a 100644 --- a/src/box/box.cc +++ b/src/box/box.cc @@ -3401,7 +3401,6 @@ local_recovery(const struct tt_uuid *instance_uuid, diag_raise(); diag_log(); } - engine_end_recovery_xc(); /* * Leave hot standby mode, if any, only after * acquiring the lock. @@ -3409,6 +3408,7 @@ local_recovery(const struct tt_uuid *instance_uuid, if (wal_dir_lock < 0) { title("hot_standby"); say_info("Entering hot standby mode"); + engine_begin_hot_standby_xc(); recovery_follow_local(recovery, &wal_stream.base, "hot_standby", cfg_getd("wal_dir_rescan_delay")); while (true) { @@ -3449,6 +3449,8 @@ local_recovery(const struct tt_uuid *instance_uuid, if (wal_enable() != 0) diag_raise(); + engine_end_recovery_xc(); + /* Check replica set UUID. */ if (!tt_uuid_is_nil(replicaset_uuid) && !tt_uuid_is_equal(replicaset_uuid, &REPLICASET_UUID)) { diff --git a/src/box/engine.c b/src/box/engine.c index 88ed928613..e7375e3c8a 100644 --- a/src/box/engine.c +++ b/src/box/engine.c @@ -113,6 +113,17 @@ engine_begin_final_recovery(void) return 0; } +int +engine_begin_hot_standby(void) +{ + struct engine *engine; + engine_foreach(engine) { + if (engine->vtab->begin_hot_standby(engine) != 0) + return -1; + } + return 0; +} + int engine_end_recovery(void) { @@ -348,6 +359,13 @@ generic_engine_begin_final_recovery(struct engine *engine) return 0; } +int +generic_engine_begin_hot_standby(struct engine *engine) +{ + (void)engine; + return 0; +} + int generic_engine_end_recovery(struct engine *engine) { diff --git a/src/box/engine.h b/src/box/engine.h index c4da01e139..8e3381f8b5 100644 --- a/src/box/engine.h +++ b/src/box/engine.h @@ -147,6 +147,11 @@ struct engine_vtab { * of WAL catch up durin join on slave side */ int (*begin_final_recovery)(struct engine *); + /** + * Notify the engine that the instance is about to enter + * the hot standby mode to complete recovery from WALs. + */ + int (*begin_hot_standby)(struct engine *); /** * Inform the engine about the end of recovery from the * binary log. @@ -337,9 +342,14 @@ engine_begin_initial_recovery(const struct vclock *recovery_vclock); int engine_begin_final_recovery(void); +/** + * Called before entering the hot standby mode. + */ +int +engine_begin_hot_standby(void); + /** * Called at the end of recovery. - * Build secondary keys in all spaces. */ int engine_end_recovery(void); @@ -395,6 +405,7 @@ int generic_engine_bootstrap(struct engine *); int generic_engine_begin_initial_recovery(struct engine *, const struct vclock *); int generic_engine_begin_final_recovery(struct engine *); +int generic_engine_begin_hot_standby(struct engine *); int generic_engine_end_recovery(struct engine *); int generic_engine_begin_checkpoint(struct engine *, bool); int generic_engine_wait_checkpoint(struct engine *, const struct vclock *); @@ -478,6 +489,13 @@ engine_begin_final_recovery_xc(void) diag_raise(); } +static inline void +engine_begin_hot_standby_xc(void) +{ + if (engine_begin_hot_standby() != 0) + diag_raise(); +} + static inline void engine_end_recovery_xc(void) { diff --git a/src/box/gc.c b/src/box/gc.c index 10f8999231..9ebe28421b 100644 --- a/src/box/gc.c +++ b/src/box/gc.c @@ -117,7 +117,6 @@ gc_init(void) gc_tree_new(&gc.consumers); fiber_cond_create(&gc.cleanup_cond); checkpoint_schedule_cfg(&gc.checkpoint_schedule, 0, 0); - engine_collect_garbage(&gc.vclock); gc.cleanup_fiber = fiber_new("gc", gc_cleanup_fiber_f); if (gc.cleanup_fiber == NULL) diff --git a/src/box/memtx_engine.cc b/src/box/memtx_engine.cc index de918c3353..c51f52a7ae 100644 --- a/src/box/memtx_engine.cc +++ b/src/box/memtx_engine.cc @@ -356,15 +356,33 @@ memtx_engine_begin_final_recovery(struct engine *engine) return 0; } +static int +memtx_engine_begin_hot_standby(struct engine *engine) +{ + struct memtx_engine *memtx = (struct memtx_engine *)engine; + /* + * Build secondary indexes before entering the hot standby mode + * to quickly switch to the hot standby instance after the master + * instance exits. + */ + if (memtx->state != MEMTX_OK) { + assert(memtx->state == MEMTX_FINAL_RECOVERY); + memtx->state = MEMTX_OK; + if (space_foreach(memtx_build_secondary_keys, memtx) != 0) + return -1; + } + return 0; +} + static int memtx_engine_end_recovery(struct engine *engine) { struct memtx_engine *memtx = (struct memtx_engine *)engine; /* - * Recovery is started with enabled keys when: - * - either of force_recovery - * is false + * Secondary keys have already been built in the following cases: + * - force_recovery is set * - it's a replication join + * - instance was in the hot standby mode */ if (memtx->state != MEMTX_OK) { assert(memtx->state == MEMTX_FINAL_RECOVERY); @@ -372,6 +390,7 @@ memtx_engine_end_recovery(struct engine *engine) if (space_foreach(memtx_build_secondary_keys, memtx) != 0) return -1; } + xdir_collect_inprogress(&memtx->snap_dir); return 0; } @@ -864,7 +883,6 @@ memtx_engine_collect_garbage(struct engine *engine, const struct vclock *vclock) struct memtx_engine *memtx = (struct memtx_engine *)engine; xdir_collect_garbage(&memtx->snap_dir, vclock_sum(vclock), XDIR_GC_ASYNC); - xdir_collect_inprogress(&memtx->snap_dir); } static int @@ -1044,6 +1062,7 @@ static const struct engine_vtab memtx_engine_vtab = { /* .bootstrap = */ memtx_engine_bootstrap, /* .begin_initial_recovery = */ memtx_engine_begin_initial_recovery, /* .begin_final_recovery = */ memtx_engine_begin_final_recovery, + /* .begin_hot_standby = */ memtx_engine_begin_hot_standby, /* .end_recovery = */ memtx_engine_end_recovery, /* .begin_checkpoint = */ memtx_engine_begin_checkpoint, /* .wait_checkpoint = */ memtx_engine_wait_checkpoint, diff --git a/src/box/service_engine.c b/src/box/service_engine.c index 5a33a735a8..216d2abc74 100644 --- a/src/box/service_engine.c +++ b/src/box/service_engine.c @@ -106,6 +106,7 @@ static const struct engine_vtab service_engine_vtab = { /* .bootstrap = */ generic_engine_bootstrap, /* .begin_initial_recovery = */ generic_engine_begin_initial_recovery, /* .begin_final_recovery = */ generic_engine_begin_final_recovery, + /* .begin_hot_standby = */ generic_engine_begin_hot_standby, /* .end_recovery = */ generic_engine_end_recovery, /* .begin_checkpoint = */ generic_engine_begin_checkpoint, /* .wait_checkpoint = */ generic_engine_wait_checkpoint, diff --git a/src/box/sysview.c b/src/box/sysview.c index 00c320b6fc..05aa9ab643 100644 --- a/src/box/sysview.c +++ b/src/box/sysview.c @@ -578,6 +578,7 @@ static const struct engine_vtab sysview_engine_vtab = { /* .bootstrap = */ generic_engine_bootstrap, /* .begin_initial_recovery = */ generic_engine_begin_initial_recovery, /* .begin_final_recovery = */ generic_engine_begin_final_recovery, + /* .begin_hot_standby = */ generic_engine_begin_hot_standby, /* .end_recovery = */ generic_engine_end_recovery, /* .begin_checkpoint = */ generic_engine_begin_checkpoint, /* .wait_checkpoint = */ generic_engine_wait_checkpoint, diff --git a/src/box/vinyl.c b/src/box/vinyl.c index c885f4b29f..fabe05849d 100644 --- a/src/box/vinyl.c +++ b/src/box/vinyl.c @@ -4521,6 +4521,7 @@ static const struct engine_vtab vinyl_engine_vtab = { /* .bootstrap = */ vinyl_engine_bootstrap, /* .begin_initial_recovery = */ vinyl_engine_begin_initial_recovery, /* .begin_final_recovery = */ vinyl_engine_begin_final_recovery, + /* .begin_hot_standby = */ generic_engine_begin_hot_standby, /* .end_recovery = */ vinyl_engine_end_recovery, /* .begin_checkpoint = */ vinyl_engine_begin_checkpoint, /* .wait_checkpoint = */ vinyl_engine_wait_checkpoint, diff --git a/src/box/xlog.c b/src/box/xlog.c index 03d2757132..9c6cd59b38 100644 --- a/src/box/xlog.c +++ b/src/box/xlog.c @@ -771,6 +771,8 @@ xlog_rename(struct xlog *l) memcpy(new_filename, filename, suffix - filename); new_filename[suffix - filename] = '\0'; + ERROR_INJECT_SLEEP(ERRINJ_XLOG_RENAME_DELAY); + if (rename(filename, new_filename) != 0) { say_syserror("can't rename %s to %s", filename, new_filename); diag_set(SystemError, "failed to rename '%s' file", diff --git a/src/lib/core/errinj.h b/src/lib/core/errinj.h index 12d2814e35..f3f74c6674 100644 --- a/src/lib/core/errinj.h +++ b/src/lib/core/errinj.h @@ -166,6 +166,7 @@ struct errinj { _(ERRINJ_XLOG_GARBAGE, ERRINJ_BOOL, {.bparam = false}) \ _(ERRINJ_XLOG_META, ERRINJ_BOOL, {.bparam = false}) \ _(ERRINJ_XLOG_READ, ERRINJ_INT, {.iparam = -1}) \ + _(ERRINJ_XLOG_RENAME_DELAY, ERRINJ_BOOL, {.bparam = false}) \ ENUM0(errinj_id, ERRINJ_LIST); extern struct errinj errinjs[]; diff --git a/test/box/errinj.result b/test/box/errinj.result index 529da15781..7cf1d7a639 100644 --- a/test/box/errinj.result +++ b/test/box/errinj.result @@ -136,6 +136,7 @@ evals - ERRINJ_XLOG_GARBAGE: false - ERRINJ_XLOG_META: false - ERRINJ_XLOG_READ: -1 + - ERRINJ_XLOG_RENAME_DELAY: false ... errinj.set("some-injection", true) --- diff --git a/test/box/gh-6554-gc-removes-inprogress-xlogs.result b/test/box/gh-6554-gc-removes-inprogress-xlogs.result new file mode 100644 index 0000000000..e76505bdab --- /dev/null +++ b/test/box/gh-6554-gc-removes-inprogress-xlogs.result @@ -0,0 +1,118 @@ +-- test-run result file version 2 +test_run = require('test_run').new() + | --- + | ... +fiber = require('fiber') + | --- + | ... +fio = require('fio') + | --- + | ... + +-- +-- gh-6554: GC removes xlog.inprogress files. +-- +assert(box.cfg.wal_dir == box.cfg.memtx_dir) + | --- + | - true + | ... + +function count_inprogress() \ + return #fio.glob(box.cfg.wal_dir .. '/*.xlog.inprogress') \ +end + | --- + | ... + +-- Run GC after each checkpoint. +checkpoint_count = box.cfg.checkpoint_count + | --- + | ... +box.cfg{checkpoint_count = 1} + | --- + | ... + +s = box.schema.create_space('test') + | --- + | ... +_ = s:create_index('pk') + | --- + | ... +box.snapshot() + | --- + | - ok + | ... + +-- Suspend GC. +files = box.backup.start() + | --- + | ... + +-- Create a checkpoint. +_ = s:replace{1} + | --- + | ... +box.snapshot() + | --- + | - ok + | ... + +-- Block a writer on xlog.inprogress -> xlog rename. +box.error.injection.set('ERRINJ_XLOG_RENAME_DELAY', true) + | --- + | - ok + | ... +c = fiber.channel() + | --- + | ... +_ = fiber.create(function() local r = pcall(s.replace, s, {1}) c:put(r) end) + | --- + | ... +_ = test_run:wait_cond(function() return count_inprogress() > 0 end) + | --- + | ... +assert(count_inprogress() == 1) + | --- + | - true + | ... + +-- Resume GC and wait for it to delete old files. +box.backup.stop() + | --- + | ... +for _, f in ipairs(files) do \ + test_run:wait_cond(function() \ + return not fio.path.exists(f) \ + end) \ +end + | --- + | ... + +-- The xlog.inprogress file shouldn't be deleted by GC. +assert(count_inprogress() == 1) + | --- + | - true + | ... + +-- Resume the blocked writer and check that it succeeds. +box.error.injection.set('ERRINJ_XLOG_RENAME_DELAY', false) + | --- + | - ok + | ... +assert(c:get() == true) + | --- + | - true + | ... + +-- The xlog.inprogress file was renamed. +assert(count_inprogress() == 0) + | --- + | - true + | ... + +-- Cleanup. +s:drop() + | --- + | ... +box.cfg{checkpoint_count = checkpoint_count} + | --- + | ... diff --git a/test/box/gh-6554-gc-removes-inprogress-xlogs.test.lua b/test/box/gh-6554-gc-removes-inprogress-xlogs.test.lua new file mode 100644 index 0000000000..59651ace60 --- /dev/null +++ b/test/box/gh-6554-gc-removes-inprogress-xlogs.test.lua @@ -0,0 +1,56 @@ +test_run = require('test_run').new() +fiber = require('fiber') +fio = require('fio') + +-- +-- gh-6554: GC removes xlog.inprogress files. +-- +assert(box.cfg.wal_dir == box.cfg.memtx_dir) + +function count_inprogress() \ + return #fio.glob(box.cfg.wal_dir .. '/*.xlog.inprogress') \ +end + +-- Run GC after each checkpoint. +checkpoint_count = box.cfg.checkpoint_count +box.cfg{checkpoint_count = 1} + +s = box.schema.create_space('test') +_ = s:create_index('pk') +box.snapshot() + +-- Suspend GC. +files = box.backup.start() + +-- Create a checkpoint. +_ = s:replace{1} +box.snapshot() + +-- Block a writer on xlog.inprogress -> xlog rename. +box.error.injection.set('ERRINJ_XLOG_RENAME_DELAY', true) +c = fiber.channel() +_ = fiber.create(function() local r = pcall(s.replace, s, {1}) c:put(r) end) +_ = test_run:wait_cond(function() return count_inprogress() > 0 end) +assert(count_inprogress() == 1) + +-- Resume GC and wait for it to delete old files. +box.backup.stop() +for _, f in ipairs(files) do \ + test_run:wait_cond(function() \ + return not fio.path.exists(f) \ + end) \ +end + +-- The xlog.inprogress file shouldn't be deleted by GC. +assert(count_inprogress() == 1) + +-- Resume the blocked writer and check that it succeeds. +box.error.injection.set('ERRINJ_XLOG_RENAME_DELAY', false) +assert(c:get() == true) + +-- The xlog.inprogress file was renamed. +assert(count_inprogress() == 0) + +-- Cleanup. +s:drop() +box.cfg{checkpoint_count = checkpoint_count} diff --git a/test/box/suite.ini b/test/box/suite.ini index a53ba4d41c..c3c1b75a2f 100644 --- a/test/box/suite.ini +++ b/test/box/suite.ini @@ -5,7 +5,7 @@ script = box.lua disabled = rtree_errinj.test.lua tuple_bench.test.lua long_run = huge_field_map_long.test.lua alter-primary-index-tuple-leak-long.test.lua config = engine.cfg -release_disabled = errinj.test.lua errinj_index.test.lua rtree_errinj.test.lua upsert_errinj.test.lua iproto_stress.test.lua gh-4648-func-load-unload.test.lua net.box_discard_console_request_gh-6249.test.lua gh-5998-one-tx-for-ddl-errinj.test.lua net.box_closing_without_lost_gh-6338.test.lua net.box_iproto_id.test.lua net.box_error_extension_feature.test.lua gh-6092-invalid-listen-uri.test.lua +release_disabled = errinj.test.lua errinj_index.test.lua rtree_errinj.test.lua upsert_errinj.test.lua iproto_stress.test.lua gh-4648-func-load-unload.test.lua net.box_discard_console_request_gh-6249.test.lua gh-5998-one-tx-for-ddl-errinj.test.lua net.box_closing_without_lost_gh-6338.test.lua net.box_iproto_id.test.lua net.box_error_extension_feature.test.lua gh-6092-invalid-listen-uri.test.lua gh-6554-gc-removes-inprogress-xlogs.test.lua lua_libs = lua/fifo.lua lua/utils.lua lua/bitset.lua lua/index_random_test.lua lua/push.lua lua/identifier.lua lua/txn_proxy.lua use_unix_sockets = True use_unix_sockets_iproto = True -- GitLab