diff --git a/changelogs/unreleased/gh-10292-vy-log-force-recovery.md b/changelogs/unreleased/gh-10292-vy-log-force-recovery.md new file mode 100644 index 0000000000000000000000000000000000000000..7684dd5bc42a0e4576b228e494a469a85c8947c9 --- /dev/null +++ b/changelogs/unreleased/gh-10292-vy-log-force-recovery.md @@ -0,0 +1,4 @@ +## bugfix/vinyl + +* The `force_recovery` mode can now be used to suppress "Invalid VYLOG file" + recovery errors (gh-10292). diff --git a/src/box/vy_log.c b/src/box/vy_log.c index bba4ac35caba5b7ea3817f0a9b04743581b774f3..da05b119674c27ef583fd94b853b8e2d2b4f4b03 100644 --- a/src/box/vy_log.c +++ b/src/box/vy_log.c @@ -153,6 +153,11 @@ struct vy_log { struct vclock last_checkpoint; /** Recovery context. */ struct vy_recovery *recovery; + /** + * Set if the last checkpoint is known to contain bad records. + * May be set only in the force_recovery mode. + */ + bool has_errors; /** * Latch that syncs log writers against readers. * Needed so that we don't miss any records during @@ -1105,8 +1110,10 @@ vy_log_begin_recovery(const struct vclock *vclock, bool force_recovery) * rebootstrap section, checkpoint (and hence rebootstrap) * failed, and we need to mark rebootstrap as aborted. */ - struct vy_recovery *recovery; - recovery = vy_recovery_new(-1, VY_RECOVERY_ABORT_REBOOTSTRAP); + int flags = VY_RECOVERY_ABORT_REBOOTSTRAP; + if (force_recovery) + flags |= VY_RECOVERY_IGNORE_ERRORS; + struct vy_recovery *recovery = vy_recovery_new(-1, flags); if (recovery == NULL) return NULL; @@ -1124,6 +1131,7 @@ vy_log_begin_recovery(const struct vclock *vclock, bool force_recovery) vy_log.next_id = recovery->max_id + 1; vy_log.recovery = recovery; + vy_log.has_errors = recovery->has_errors; return recovery; } @@ -1194,8 +1202,17 @@ vy_log_rotate(const struct vclock *vclock) */ latch_lock(&vy_log.latch); + /* + * If the old vylog was loaded with errors in the force_recovery mode, + * we have to ignore them so that we can load it again to make a new + * checkpoint. + */ + int flags = 0; + if (vy_log.has_errors) + flags |= VY_RECOVERY_IGNORE_ERRORS; + struct vy_recovery *recovery; - recovery = vy_recovery_new_locked(prev_signature, 0); + recovery = vy_recovery_new_locked(prev_signature, flags); if (recovery == NULL) goto fail; @@ -1218,6 +1235,9 @@ vy_log_rotate(const struct vclock *vclock) /* Add the new vclock to the xdir so that we can track it. */ xdir_add_vclock(&vy_log.dir, vclock); + /* Hopefully, the new checkpoint doesn't have errors. */ + vy_log.has_errors = false; + latch_unlock(&vy_log.latch); say_verbose("done rotating vylog"); return 0; @@ -2162,7 +2182,8 @@ vy_recovery_commit_rebootstrap(struct vy_recovery *recovery) * Fill index_id_hash with LSM trees recovered from vylog. */ static int -vy_recovery_build_index_id_hash(struct vy_recovery *recovery) +vy_recovery_build_index_id_hash(struct vy_recovery *recovery, + bool force_recovery) { struct mh_i64ptr_t *h = recovery->index_id_hash; struct vy_lsm_recovery_info *lsm; @@ -2208,7 +2229,7 @@ vy_recovery_build_index_id_hash(struct vy_recovery *recovery) tt_sprintf("LSM tree %u/%u created twice", (unsigned)space_id, (unsigned)index_id)); - return -1; + goto bad_record; } if (hashed_lsm->create_lsn < 0 || hashed_lsm->prepared != NULL) { @@ -2216,9 +2237,16 @@ vy_recovery_build_index_id_hash(struct vy_recovery *recovery) tt_sprintf("LSM tree %u/%u prepared twice", (unsigned)space_id, (unsigned)index_id)); - return -1; + goto bad_record; } hashed_lsm->prepared = lsm; + continue; +bad_record: + if (!force_recovery) + return -1; + diag_log(); + say_error("skipping invalid vylog record"); + recovery->has_errors = true; } return 0; } @@ -2241,6 +2269,7 @@ vy_recovery_new_f(va_list ap) recovery->slice_hash = NULL; recovery->max_id = -1; recovery->in_rebootstrap = false; + recovery->has_errors = false; recovery->index_id_hash = mh_i64ptr_new(); recovery->lsm_hash = mh_i64ptr_new(); @@ -2264,12 +2293,13 @@ vy_recovery_new_f(va_list ap) int rc; struct xrow_header row; size_t region_svp = region_used(&fiber()->gc); - while ((rc = xlog_cursor_next(&cursor, &row, false)) == 0) { + bool force_recovery = (flags & VY_RECOVERY_IGNORE_ERRORS) != 0; + while ((rc = xlog_cursor_next(&cursor, &row, force_recovery)) == 0) { struct vy_log_record record; region_truncate(&fiber()->gc, region_svp); rc = vy_log_record_decode(&record, &row); if (rc < 0) - break; + goto bad_record; say_verbose("load vylog record: %s", vy_log_record_str(&record)); if (record.type == VY_LOG_SNAPSHOT) { @@ -2279,7 +2309,14 @@ vy_recovery_new_f(va_list ap) } rc = vy_recovery_process_record(recovery, &record); if (rc < 0) + goto bad_record; + continue; +bad_record: + if (!force_recovery) break; + diag_log(); + say_error("skipping invalid vylog record"); + recovery->has_errors = true; } region_truncate(&fiber()->gc, region_svp); if (rc < 0) @@ -2294,7 +2331,7 @@ vy_recovery_new_f(va_list ap) vy_recovery_commit_rebootstrap(recovery); } - if (vy_recovery_build_index_id_hash(recovery) != 0) + if (vy_recovery_build_index_id_hash(recovery, force_recovery) != 0) goto fail_free; out: say_verbose("done loading vylog"); @@ -2525,6 +2562,38 @@ vy_log_create(const struct vclock *vclock, struct vy_recovery *recovery) if (vy_log_append_record(&xlog, &record) != 0) goto err_write_xlog; + ERROR_INJECT(ERRINJ_VY_LOG_WRITE_BAD_RECORDS, { + /* LSM tree deleted but not registered. */ + vy_log_record_init(&record); + record.type = VY_LOG_DROP_LSM; + record.lsm_id = 100501; + if (vy_log_append_record(&xlog, &record) != 0) + goto err_write_xlog; + + /* Missing key definition. */ + vy_log_record_init(&record); + record.type = VY_LOG_PREPARE_LSM; + record.lsm_id = 100502; + record.space_id = 12345; + if (vy_log_append_record(&xlog, &record) != 0) + goto err_write_xlog; + + /* LSM tree prepared twice. */ + vy_log_record_init(&record); + record.type = VY_LOG_PREPARE_LSM; + record.lsm_id = 100503; + record.space_id = 12345; + struct key_part_def part_def = key_part_def_default; + part_def.type = FIELD_TYPE_UNSIGNED; + record.key_parts = &part_def; + record.key_part_count = 1; + if (vy_log_append_record(&xlog, &record) != 0) + goto err_write_xlog; + record.lsm_id = 100504; + if (vy_log_append_record(&xlog, &record) != 0) + goto err_write_xlog; + }); + ERROR_INJECT(ERRINJ_VY_LOG_FILE_RENAME, { diag_set(ClientError, ER_INJECTION, "vinyl log file rename"); xlog_close(&xlog, false); diff --git a/src/box/vy_log.h b/src/box/vy_log.h index 0820ad1c7970f0e7214262ae1821d53ac12567cc..df92edd93ae03ee961d6a334c64cbd6f3d74465d 100644 --- a/src/box/vy_log.h +++ b/src/box/vy_log.h @@ -315,6 +315,11 @@ struct vy_recovery { * seen matching VY_LOG_ABORT_REBOOTSTRAP. */ bool in_rebootstrap; + /** + * Set there were errors encountered and skipped during recovery. + * May be set only if the VY_RECOVERY_IGNORE_ERRORS flag was used. + */ + bool has_errors; }; /** LSM tree info stored in a recovery context. */ @@ -583,6 +588,10 @@ enum vy_recovery_flag { * there's no VY_LOG_ABORT_REBOOTSTRAP record. */ VY_RECOVERY_ABORT_REBOOTSTRAP = 1 << 1, + /** + * Log and skip invalid rows (force recovery mode). + */ + VY_RECOVERY_IGNORE_ERRORS = 1 << 2, }; /** diff --git a/src/lib/core/errinj.h b/src/lib/core/errinj.h index 75fbb6d2d5d582d2768ec526a6f39812bcb9fedd..c10b5b3bb463d2b2f4432ddb402c8e97397a1c00 100644 --- a/src/lib/core/errinj.h +++ b/src/lib/core/errinj.h @@ -156,6 +156,7 @@ struct errinj { _(ERRINJ_VY_LOG_FILE_RENAME, ERRINJ_BOOL, {.bparam = false}) \ _(ERRINJ_VY_LOG_FLUSH, ERRINJ_BOOL, {.bparam = false}) \ _(ERRINJ_VY_LOG_FLUSH_DELAY, ERRINJ_BOOL, {.bparam = false}) \ + _(ERRINJ_VY_LOG_WRITE_BAD_RECORDS, ERRINJ_BOOL, {.bparam = false}) \ _(ERRINJ_VY_POINT_ITER_WAIT, ERRINJ_BOOL, {.bparam = false}) \ _(ERRINJ_VY_QUOTA_DELAY, ERRINJ_BOOL, {.bparam = false}) \ _(ERRINJ_VY_READ_PAGE, ERRINJ_BOOL, {.bparam = false}) \ diff --git a/test/vinyl-luatest/gh_10292_vylog_force_recovery_test.lua b/test/vinyl-luatest/gh_10292_vylog_force_recovery_test.lua new file mode 100644 index 0000000000000000000000000000000000000000..a9613ddc765dc1421707137673fdf5de098b45db --- /dev/null +++ b/test/vinyl-luatest/gh_10292_vylog_force_recovery_test.lua @@ -0,0 +1,66 @@ +local fio = require('fio') +local server = require('luatest.server') +local t = require('luatest') + +local g = t.group() + +g.before_each(function(cg) + t.tarantool.skip_if_not_debug() + cg.server = server:new() + cg.server:start() +end) + +g.after_each(function(cg) + cg.server:drop() +end) + +g.test_force_recovery = function(cg) + cg.server:exec(function() + local s = box.schema.space.create('test', {engine = 'vinyl'}) + s:create_index('pk') + + -- Create a vylog file with bad records. + box.error.injection.set('ERRINJ_VY_LOG_WRITE_BAD_RECORDS', true) + box.snapshot() + box.error.injection.set('ERRINJ_VY_LOG_WRITE_BAD_RECORDS', false) + + -- Checkpoint should fail because of the bad vylog file. + s:insert({1}) + t.assert_error_covers({ + type = 'ClientError', + code = box.error.INVALID_VYLOG_FILE, + }, box.snapshot) + end) + + -- The server should fail to restart because of the bad vylog file. + cg.server:restart(nil, {wait_until_ready = false}) + t.assert(cg.server:grep_log('INVALID_VYLOG_FILE', nil, { + filename = fio.pathjoin(cg.server.workdir, cg.server.alias .. '.log'), + })) + + -- force_recovery should help. + cg.server:restart({box_cfg = {force_recovery = true}}) + local patterns = { + 'failed to process vylog record: drop_lsm{lsm_id=100501, }', + 'INVALID_VYLOG_FILE.* LSM tree 100501 deleted but not registered', + 'failed to decode vylog record: %[15, {0: 100502, 6: 12345}%]', + 'INVALID_VYLOG_FILE.* Bad record: missing key definition', + 'INVALID_VYLOG_FILE.* LSM tree 12345/0 prepared twice', + 'skipping invalid vylog record', + } + for _, pattern in ipairs(patterns) do + t.assert(cg.server:grep_log(pattern), pattern) + end + cg.server:exec(function() + t.assert_equals(box.space.test:select(), {{1}}) + box.snapshot() + end) + + -- force_recovery isn't required after a checkpoint is created. + cg.server:restart({box_cfg = {force_recovery = false}}) + cg.server:exec(function() + t.assert_equals(box.space.test:select(), {{1}}) + box.space.test:drop() + box.snapshot() + end) +end