From 5512053fa281bd50de7ba764f47bfac12589424d Mon Sep 17 00:00:00 2001 From: Vladimir Davydov <vdavydov.dev@gmail.com> Date: Wed, 5 Apr 2017 11:54:14 +0300 Subject: [PATCH] box: gc: do not remove files being backed up Remember the lsn of the last checkpoint when backup starts and make sure that box.internal.gc doesn't delete any files that are needed to recover from this checkpoint or a newer one until backup is stopped. --- src/box/CMakeLists.txt | 1 + src/box/box.cc | 41 ++++---- src/box/box.h | 6 -- src/box/engine.cc | 3 + src/box/engine.h | 25 +++-- src/box/gc.c | 202 +++++++++++++++++++++++++++++++++++++++ src/box/gc.h | 96 +++++++++++++++++++ src/box/lua/init.c | 3 +- src/box/memtx_engine.cc | 20 +--- src/box/memtx_engine.h | 7 +- src/box/recovery.h | 15 --- test/box/backup.result | 5 + test/box/backup.test.lua | 4 + 13 files changed, 358 insertions(+), 70 deletions(-) create mode 100644 src/box/gc.c create mode 100644 src/box/gc.h diff --git a/src/box/CMakeLists.txt b/src/box/CMakeLists.txt index e717860ee6..74a320e7a2 100644 --- a/src/box/CMakeLists.txt +++ b/src/box/CMakeLists.txt @@ -63,6 +63,7 @@ add_library(box STATIC request.c txn.cc box.cc + gc.c user_def.c user.cc authentication.cc diff --git a/src/box/box.cc b/src/box/box.cc index 93c2999271..6ff8eb012a 100644 --- a/src/box/box.cc +++ b/src/box/box.cc @@ -67,6 +67,7 @@ #include "xrow_io.h" #include "authentication.h" #include "path_lock.h" +#include "gc.h" static char status[64] = "unknown"; @@ -79,6 +80,12 @@ static void title(const char *new_status) bool box_snapshot_is_in_progress = false; bool box_backup_is_in_progress = false; + +/* + * vclock of the checkpoint that is currently being backed up. + */ +static struct vclock box_backup_vclock; + /** * The instance is in read-write mode: the local checkpoint * and all write ahead logs are processed. For a replica, @@ -1197,7 +1204,7 @@ box_process_join(struct ev_io *io, struct xrow_header *header) * as a replica. Our best effort is to not crash in such * case: raise ER_MISSING_SNAPSHOT. */ - if (recovery_last_checkpoint(&start_vclock) < 0) + if (gc_last_checkpoint(&start_vclock) < 0) tnt_raise(ClientError, ER_MISSING_SNAPSHOT); /* Respond to JOIN request with start_vclock. */ @@ -1355,6 +1362,7 @@ box_free(void) tuple_free(); port_free(); #endif + gc_free(); engine_shutdown(); wal_thread_stop(); } @@ -1537,6 +1545,9 @@ box_cfg_xc(void) rmean_box = rmean_new(iproto_type_strs, IPROTO_TYPE_STAT_MAX); rmean_error = rmean_new(rmean_error_strings, RMEAN_ERROR_LAST); + if (gc_init(cfg_gets("memtx_dir")) < 0) + diag_raise(); + engine_init(); schema_init(); @@ -1554,7 +1565,7 @@ box_cfg_xc(void) struct vclock checkpoint_vclock; vclock_create(&checkpoint_vclock); - int64_t lsn = recovery_last_checkpoint(&checkpoint_vclock); + int64_t lsn = gc_last_checkpoint(&checkpoint_vclock); /* * Lock the write ahead log directory to avoid multiple * instances running in the same dir. @@ -1590,7 +1601,7 @@ box_cfg_xc(void) * recovery of system spaces issue DDL events in * other engines. */ - memtx->recoverSnapshot(); + memtx->recoverSnapshot(&checkpoint_vclock); struct recovery *recovery; recovery = recovery_new(cfg_gets("wal_dir"), @@ -1739,13 +1750,6 @@ box_snapshot() return rc; } -void -box_gc(int64_t lsn) -{ - wal_collect_garbage(lsn); - engine_collect_garbage(lsn); -} - int box_backup_start(box_backup_cb cb, void *cb_arg) { @@ -1753,21 +1757,26 @@ box_backup_start(box_backup_cb cb, void *cb_arg) diag_set(ClientError, ER_BACKUP_IN_PROGRESS); return -1; } - struct vclock vclock; - if (recovery_last_checkpoint(&vclock) < 0) { + if (gc_ref_last_checkpoint(&box_backup_vclock) < 0) { diag_set(ClientError, ER_MISSING_SNAPSHOT); return -1; } - int rc = engine_backup(&vclock, cb, cb_arg); - if (rc == 0) - box_backup_is_in_progress = true; + box_backup_is_in_progress = true; + int rc = engine_backup(&box_backup_vclock, cb, cb_arg); + if (rc != 0) { + gc_unref_checkpoint(&box_backup_vclock); + box_backup_is_in_progress = false; + } return rc; } void box_backup_stop(void) { - box_backup_is_in_progress = false; + if (box_backup_is_in_progress) { + gc_unref_checkpoint(&box_backup_vclock); + box_backup_is_in_progress = false; + } } const char * diff --git a/src/box/box.h b/src/box/box.h index 9305c5b895..d6cd9bba88 100644 --- a/src/box/box.h +++ b/src/box/box.h @@ -98,12 +98,6 @@ extern uint32_t snapshot_version; */ int box_snapshot(void); -/** - * Remove files that are not needed to recover - * from snapshot with @lsn or newer. - */ -void box_gc(int64_t lsn); - typedef int (*box_backup_cb)(const char *path, void *arg); /** diff --git a/src/box/engine.cc b/src/box/engine.cc index c8b9515318..91f560b746 100644 --- a/src/box/engine.cc +++ b/src/box/engine.cc @@ -39,6 +39,7 @@ #include "small/rlist.h" #include "scoped_guard.h" #include "vclock.h" +#include "gc.h" #include <stdlib.h> #include <string.h> #include <errinj.h> @@ -348,6 +349,8 @@ engine_commit_checkpoint(struct vclock *vclock) if (engine->waitCheckpoint(vclock) < 0) return -1; } + if (gc_add_checkpoint(vclock) < 0) + return -1; /* remove previous snapshot reference */ engine_foreach(engine) { engine->commitCheckpoint(vclock); diff --git a/src/box/engine.h b/src/box/engine.h index 31903f2c0a..941d837dac 100644 --- a/src/box/engine.h +++ b/src/box/engine.h @@ -44,11 +44,13 @@ enum engine_flags { extern struct rlist engines; -struct Handler; - typedef int engine_backup_cb(const char *path, void *arg); +#if defined(__cplusplus) + +struct Handler; + /** Engine instance */ class Engine { public: @@ -302,6 +304,16 @@ engine_begin_final_recovery(); void engine_end_recovery(); +/** + * Feed snapshot data as join events to the replicas. + * (called on the master). + */ +void +engine_join(struct vclock *vclock, struct xstream *stream); + +extern "C" { +#endif /* defined(__cplusplus) */ + int engine_begin_checkpoint(); @@ -320,11 +332,8 @@ engine_collect_garbage(int64_t lsn); int engine_backup(struct vclock *vclock, engine_backup_cb cb, void *cb_arg); -/** - * Feed snapshot data as join events to the replicas. - * (called on the master). - */ -void -engine_join(struct vclock *vclock, struct xstream *stream); +#if defined(__cplusplus) +} /* extern "C" */ +#endif /* defined(__cplusplus) */ #endif /* TARANTOOL_BOX_ENGINE_H_INCLUDED */ diff --git a/src/box/gc.c b/src/box/gc.c new file mode 100644 index 0000000000..67a5e80119 --- /dev/null +++ b/src/box/gc.c @@ -0,0 +1,202 @@ +/* + * Copyright 2010-2017, Tarantool AUTHORS, please see AUTHORS file. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the + * following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * AUTHORS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF + * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ +#include "gc.h" + +#include <stdint.h> +#include <stdlib.h> + +#include "diag.h" +#include "errcode.h" +#include "vclock.h" +#include "xlog.h" + +#include "engine.h" /* engine_collect_garbage() */ +#include "replication.h" /* INSTANCE_UUID */ +#include "wal.h" /* wal_collect_garbage() */ + +/** Checkpoint info. */ +struct checkpoint_info { + /** Checkpoint vclock, linked in gc_state.checkpoints. */ + struct vclock vclock; + /** + * Number of active users of this checkpoint. + * A checkpoint can't be collected unless @refs is 0. + */ + int refs; +}; + +/** Garbage collection state. */ +struct gc_state { + /** Max signature garbage collection has been called for. */ + int64_t signature; + /** Uncollected checkpoints, see checkpoint_info. */ + vclockset_t checkpoints; +}; +static struct gc_state gc; + +int +gc_init(const char *snap_dirname) +{ + gc.signature = -1; + vclockset_new(&gc.checkpoints); + + struct xdir dir; + xdir_create(&dir, snap_dirname, SNAP, &INSTANCE_UUID); + if (xdir_scan(&dir) < 0) + goto fail; + + for (struct vclock *vclock = vclockset_first(&dir.index); + vclock != NULL; vclock = vclockset_next(&dir.index, vclock)) { + if (gc_add_checkpoint(vclock) < 0) + goto fail; + } + xdir_destroy(&dir); + return 0; +fail: + xdir_destroy(&dir); + gc_free(); + return -1; +} + +void +gc_free(void) +{ + struct vclock *vclock = vclockset_first(&gc.checkpoints); + while (vclock != NULL) { + struct vclock *next = vclockset_next(&gc.checkpoints, vclock); + vclockset_remove(&gc.checkpoints, vclock); + struct checkpoint_info *cpt = container_of(vclock, + struct checkpoint_info, vclock); + free(cpt); + vclock = next; + } +} + +int +gc_add_checkpoint(const struct vclock *vclock) +{ + struct checkpoint_info *cpt; + cpt = (struct checkpoint_info *)malloc(sizeof(*cpt)); + if (cpt == NULL) { + diag_set(OutOfMemory, sizeof(*cpt), + "malloc", "struct checkpoint_info"); + return -1; + } + + struct vclock *prev = vclockset_last(&gc.checkpoints); + /* + * Do not allow to remove the last checkpoint, + * because we need it for recovery. + */ + cpt->refs = 1; + vclock_copy(&cpt->vclock, vclock); + vclockset_insert(&gc.checkpoints, &cpt->vclock); + + if (prev != NULL) { + assert(vclock_compare(vclock, prev) > 0); + gc_unref_checkpoint(prev); + } + return 0; +} + +int64_t +gc_last_checkpoint(struct vclock *vclock) +{ + struct vclock *last = vclockset_last(&gc.checkpoints); + if (last == NULL) + return -1; + vclock_copy(vclock, last); + return vclock_sum(last); +} + +int64_t +gc_ref_last_checkpoint(struct vclock *vclock) +{ + struct vclock *last = vclockset_last(&gc.checkpoints); + if (last == NULL) + return -1; + struct checkpoint_info *cpt = container_of(last, + struct checkpoint_info, vclock); + /* The last checkpoint is always pinned. */ + assert(cpt->refs > 0); + cpt->refs++; + vclock_copy(vclock, last); + return vclock_sum(last); +} + +void +gc_unref_checkpoint(struct vclock *vclock) +{ + struct vclock *cpt_vclock = vclockset_search(&gc.checkpoints, vclock); + assert(cpt_vclock != NULL); + struct checkpoint_info *cpt = container_of(cpt_vclock, + struct checkpoint_info, vclock); + assert(cpt->refs > 0); + cpt->refs--; + /* Retry gc when a checkpoint is unpinned. */ + if (cpt->refs == 0) + gc_run(gc.signature); + +} + +void +gc_run(int64_t signature) +{ + if (gc.signature < signature) + gc.signature = signature; + + int64_t gc_signature = -1; + + struct vclock *vclock = vclockset_first(&gc.checkpoints); + while (vclock != NULL) { + if (vclock_sum(vclock) >= signature) + break; /* all eligible checkpoints removed */ + + struct checkpoint_info *cpt = container_of(vclock, + struct checkpoint_info, vclock); + if (cpt->refs > 0) + break; /* checkpoint still in use */ + + struct vclock *next = vclockset_next(&gc.checkpoints, vclock); + vclockset_remove(&gc.checkpoints, vclock); + free(cpt); + vclock = next; + + /* Include this checkpoint to gc. */ + gc_signature = (vclock != NULL ? + vclock_sum(vclock) : signature); + } + + if (gc_signature >= 0) { + wal_collect_garbage(gc_signature); + engine_collect_garbage(gc_signature); + } +} diff --git a/src/box/gc.h b/src/box/gc.h new file mode 100644 index 0000000000..337b339fef --- /dev/null +++ b/src/box/gc.h @@ -0,0 +1,96 @@ +#ifndef TARANTOOL_BOX_GC_H_INCLUDED +#define TARANTOOL_BOX_GC_H_INCLUDED +/* + * Copyright 2010-2017, Tarantool AUTHORS, please see AUTHORS file. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the + * following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * AUTHORS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF + * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <stdint.h> + +#if defined(__cplusplus) +extern "C" { +#endif /* defined(__cplusplus) */ + +struct vclock; + +/** + * Initialize the garbage collection state. + * @snap_dirname is a path to the snapshot directory. + * Return 0 on success, -1 on failure. + */ +int +gc_init(const char *snap_dirname); + +/** + * Destroy the garbage collection state. + */ +void +gc_free(void); + +/** + * Add a new checkpoint to the garbage collection state. + * Returns 0 on success, -1 on OOM. + */ +int +gc_add_checkpoint(const struct vclock *vclock); + +/** + * Get the last checkpoint vclock and return its signature. + * Returns -1 if there are no checkpoints. + */ +int64_t +gc_last_checkpoint(struct vclock *vclock); + +/** + * Pin the last checkpoint so that it cannot be removed by garbage + * collection. The checkpoint vclock is returned in @vclock. + * Returns the checkpoint signature or -1 if there are no checkpoints. + */ +int64_t +gc_ref_last_checkpoint(struct vclock *vclock); + +/** + * Unpin a checkpoint that was pinned with gc_pin_last_checkpoint() + * and retry garbage collection if necessary. + */ +void +gc_unref_checkpoint(struct vclock *vclock); + +/** + * Invoke garbage collection in order to remove files left from + * checkpoints older than @signature. + */ +void +gc_run(int64_t signature); + +#if defined(__cplusplus) +} /* extern "C" */ +#endif /* defined(__cplusplus) */ + +#endif /* TARANTOOL_BOX_GC_H_INCLUDED */ diff --git a/src/box/lua/init.c b/src/box/lua/init.c index 5662cb20be..ee4a58085c 100644 --- a/src/box/lua/init.c +++ b/src/box/lua/init.c @@ -38,6 +38,7 @@ #include "box/box.h" #include "box/txn.h" +#include "box/gc.h" #include "box/lua/error.h" #include "box/lua/tuple.h" @@ -107,7 +108,7 @@ static int lbox_gc(struct lua_State *L) { int64_t lsn = luaL_checkint64(L, 1); - box_gc(lsn); + gc_run(lsn); return 0; } diff --git a/src/box/memtx_engine.cc b/src/box/memtx_engine.cc index f1d72415f2..fd97810087 100644 --- a/src/box/memtx_engine.cc +++ b/src/box/memtx_engine.cc @@ -143,22 +143,12 @@ MemtxEngine::~MemtxEngine() memtx_tuple_free(); } -int64_t -MemtxEngine::lastCheckpoint(struct vclock *vclock) -{ - return xdir_last_vclock(&m_snap_dir, vclock); -} - void -MemtxEngine::recoverSnapshot() +MemtxEngine::recoverSnapshot(const struct vclock *vclock) { - struct vclock vclock; - if (lastCheckpoint(&vclock) < 0) - return; - /* Process existing snapshot */ say_info("recovery start"); - int64_t signature = vclock.signature; + int64_t signature = vclock_sum(vclock); const char *filename = xdir_format_filename(&m_snap_dir, signature, NONE); @@ -1007,9 +997,3 @@ memtx_index_extent_reserve(int num) memtx_index_num_reserved_extents++; } } - -int -recovery_last_checkpoint(struct vclock *vclock) -{ - return ((MemtxEngine *)engine_find("memtx"))->lastCheckpoint(vclock); -} diff --git a/src/box/memtx_engine.h b/src/box/memtx_engine.h index 6fc768366b..3c4a6452a4 100644 --- a/src/box/memtx_engine.h +++ b/src/box/memtx_engine.h @@ -108,12 +108,7 @@ struct MemtxEngine: public Engine { { m_snap_io_rate_limit = new_limit * 1024 * 1024; } - /** - * Return LSN of the most recent snapshot or -1 if there is - * no snapshot. - */ - int64_t lastCheckpoint(struct vclock *vclock); - void recoverSnapshot(); + void recoverSnapshot(const struct vclock *vclock); private: void recoverSnapshotRow(struct xrow_header *row); diff --git a/src/box/recovery.h b/src/box/recovery.h index 572f5258b6..b72a77ec5d 100644 --- a/src/box/recovery.h +++ b/src/box/recovery.h @@ -81,21 +81,6 @@ recovery_finalize(struct recovery *r, struct xstream *stream); } /* extern "C" */ #endif /* defined(__cplusplus) */ -/** - * The write ahead log doesn't store the last checkpoint: - * it is represented by the last valid snapshot of memtx engine. - * This is legacy from the time the entire box was single-engine. - * - * @param[out] vclock vclock of the last checkpoint - * @retval signature of the last checkpoint, or -1 - * in case of fresh boot - * - * The function may throw XlogError exception. - * It is implemented in memtx_engine.cc - */ -int -recovery_last_checkpoint(struct vclock *vclock); - /** * Find out if there are new .xlog files since the current * vclock, and read them all up. diff --git a/test/box/backup.result b/test/box/backup.result index 0dcd1a8cd3..f13c347fc9 100644 --- a/test/box/backup.result +++ b/test/box/backup.result @@ -63,6 +63,11 @@ box.snapshot() _ = test_run:cmd("setopt delimiter ''"); --- ... +-- Make sure that garbage collection is disabled +-- while backup is in progress. +box.internal.gc(box.info.cluster.signature) +--- +... -- Prepare backup directory backup_dir = fio.pathjoin(fio.cwd(), 'backup') --- diff --git a/test/box/backup.test.lua b/test/box/backup.test.lua index 65882542aa..869fb1d78c 100644 --- a/test/box/backup.test.lua +++ b/test/box/backup.test.lua @@ -42,6 +42,10 @@ end box.snapshot() _ = test_run:cmd("setopt delimiter ''"); +-- Make sure that garbage collection is disabled +-- while backup is in progress. +box.internal.gc(box.info.cluster.signature) + -- Prepare backup directory backup_dir = fio.pathjoin(fio.cwd(), 'backup') _ = os.execute(string.format('rm -rf %s', backup_dir)) -- GitLab