From a532e375fe3703a210a58e1cc4b57fdc608d34c2 Mon Sep 17 00:00:00 2001 From: Vladimir Davydov <vdavydov@tarantool.org> Date: Wed, 10 May 2023 18:29:17 +0300 Subject: [PATCH] box: exclude uncommitted alter records from snapshot With MVCC off (box.cfg.memtx_use_mvcc_engine = false), a memtx space read view may include a dirty (not committed to WAL) record. To prevent such records from being written to a snapshot, we sync WAL after creating a read view for a snapshot. The problem is that it doesn't work for long (yielding) DDL operations, such as building a new index, because such operations yield before waiting on WAL. As a result, a dirty DDL record may make it to a snapshot even though it may fail eventually. To fix that, let's keep track of all yielding DDL statements and exclude them from a read view using the memtx snapshot cleaner. Closes #8530 NO_DOC=bug fix --- .../gh-8530-alter-space-snapshot-fix.md | 4 + src/box/alter.cc | 7 ++ src/box/memtx_tx.c | 11 ++- src/box/space.c | 2 + src/box/space.h | 21 +++++ .../gh_8530_alter_space_snapshot_test.lua | 77 +++++++++++++++++++ 6 files changed, 120 insertions(+), 2 deletions(-) create mode 100644 changelogs/unreleased/gh-8530-alter-space-snapshot-fix.md create mode 100644 test/box-luatest/gh_8530_alter_space_snapshot_test.lua diff --git a/changelogs/unreleased/gh-8530-alter-space-snapshot-fix.md b/changelogs/unreleased/gh-8530-alter-space-snapshot-fix.md new file mode 100644 index 0000000000..e89a7110e9 --- /dev/null +++ b/changelogs/unreleased/gh-8530-alter-space-snapshot-fix.md @@ -0,0 +1,4 @@ +## bugfix/core + +* Fixed a bug because of which a dirty (not committed to WAL) DDL record could + be written to a snapshot and cause a recovery failure (gh-8530). diff --git a/src/box/alter.cc b/src/box/alter.cc index 678ab95853..068ac6ac46 100644 --- a/src/box/alter.cc +++ b/src/box/alter.cc @@ -903,6 +903,13 @@ alter_space_rollback(struct trigger *trigger, void * /* event */) static void alter_space_do(struct txn_stmt *stmt, struct alter_space *alter) { + struct space_alter_stmt alter_stmt; + alter_stmt.old_tuple = stmt->old_tuple; + alter_stmt.new_tuple = stmt->new_tuple; + rlist_add_entry(&stmt->space->alter_stmts, &alter_stmt, link); + auto alter_stmt_guard = make_scoped_guard([&] { + rlist_del_entry(&alter_stmt, link); + }); /** * AlterSpaceOp::prepare() may perform a potentially long * lasting operation that may yield, e.g. building of a new diff --git a/src/box/memtx_tx.c b/src/box/memtx_tx.c index 2a555faaf4..640464a1f4 100644 --- a/src/box/memtx_tx.c +++ b/src/box/memtx_tx.c @@ -3344,7 +3344,8 @@ memtx_tx_snapshot_cleaner_create(struct memtx_tx_snapshot_cleaner *cleaner, struct space *space) { cleaner->ht = NULL; - if (space == NULL || rlist_empty(&space->memtx_stories)) + if (rlist_empty(&space->memtx_stories) && + rlist_empty(&space->alter_stmts)) return; struct mh_snapshot_cleaner_t *ht = mh_snapshot_cleaner_new(); struct memtx_story *story; @@ -3361,7 +3362,13 @@ memtx_tx_snapshot_cleaner_create(struct memtx_tx_snapshot_cleaner *cleaner, entry.to = clean; mh_snapshot_cleaner_put(ht, &entry, NULL, 0); } - + struct space_alter_stmt *alter_stmt; + rlist_foreach_entry(alter_stmt, &space->alter_stmts, link) { + struct memtx_tx_snapshot_cleaner_entry entry; + entry.from = alter_stmt->new_tuple; + entry.to = alter_stmt->old_tuple; + mh_snapshot_cleaner_put(ht, &entry, NULL, 0); + } cleaner->ht = ht; } diff --git a/src/box/space.c b/src/box/space.c index 3b34f87a4b..e64186f913 100644 --- a/src/box/space.c +++ b/src/box/space.c @@ -330,6 +330,7 @@ space_create(struct space *space, struct engine *engine, } space->constraint_ids = mh_strnptr_new(); rlist_create(&space->memtx_stories); + rlist_create(&space->alter_stmts); return 0; fail_free_indexes: @@ -389,6 +390,7 @@ space_new_ephemeral(struct space_def *def, struct rlist *key_list) void space_delete(struct space *space) { + assert(rlist_empty(&space->alter_stmts)); memtx_tx_on_space_delete(space); for (uint32_t j = 0; j <= space->index_id_max; j++) { struct index *index = space->index_map[j]; diff --git a/src/box/space.h b/src/box/space.h index d1d8a5fe70..afcddebc5f 100644 --- a/src/box/space.h +++ b/src/box/space.h @@ -230,6 +230,17 @@ struct space { * List of all tx stories in the space. */ struct rlist memtx_stories; + /** + * List of currently running long (yielding) space alter operations + * triggered by statements applied to this space (see alter_space_do), + * linked by space_alter_stmt::link. + * + * We must exclude such statements from snapshot because they haven't + * reached WAL yet and may actually fail. With MVCC off, such + * statements would be visible from a read view so we have to keep + * track of them separately. + */ + struct rlist alter_stmts; /** Space upgrade state or NULL. */ struct space_upgrade *upgrade; /** @@ -239,6 +250,16 @@ struct space { struct space_wal_ext *wal_ext; }; +/** Space alter statement. */ +struct space_alter_stmt { + /** Link in space::alter_stmts. */ + struct rlist link; + /** Old tuple. */ + struct tuple *old_tuple; + /** New tuple. */ + struct tuple *new_tuple; +}; + /** * Detach constraints from space. They can be reattached or deleted then. */ diff --git a/test/box-luatest/gh_8530_alter_space_snapshot_test.lua b/test/box-luatest/gh_8530_alter_space_snapshot_test.lua new file mode 100644 index 0000000000..cddc401f03 --- /dev/null +++ b/test/box-luatest/gh_8530_alter_space_snapshot_test.lua @@ -0,0 +1,77 @@ +local server = require('luatest.server') +local t = require('luatest') + +local g = t.group('gh_8530_alter_space_snapshot', { + {memtx_use_mvcc_engine = true}, + {memtx_use_mvcc_engine = false}, +}) + +g.before_all(function(cg) + t.tarantool.skip_if_not_debug() + cg.server = server:new({ + box_cfg = { + memtx_use_mvcc_engine = cg.params.memtx_use_mvcc_engine, + }, + }) + cg.server:start() +end) + +g.after_all(function(cg) + cg.server:drop() +end) + +g.before_each(function(cg) + cg.server:exec(function() + local s = box.schema.create_space('test') + s:create_index('primary') + s:insert({1, 10}) + s:insert({2, 20}) + s:insert({3, 30}) + end) +end) + +g.after_each(function(cg) + cg.server:exec(function() + box.space.test:drop() + end) +end) + +-- Check that a new index record that hasn't been written to WAL doesn't make +-- it to a snapshot. +g.test_build_index = function(cg) + cg.server:exec(function() + local fiber = require('fiber') + local s = box.space.test + box.error.injection.set('ERRINJ_BUILD_INDEX_DELAY', true) + local f = fiber.create(s.create_index, s, 'secondary', + {parts = {2, 'unsigned'}}) + fiber.sleep(0.01) + box.snapshot() + t.assert_equals(f:status(), 'suspended') + end) + cg.server:restart() + cg.server:exec(function() + local s = box.space.test + t.assert_equals(s.index.secondary, nil) + end) +end + +-- Check that a space format record that hasn't been written to WAL doesn't +-- make it to a snapshot. +g.test_change_format = function(cg) + cg.server:exec(function() + local fiber = require('fiber') + local s = box.space.test + box.error.injection.set('ERRINJ_CHECK_FORMAT_DELAY', true) + local f = fiber.create(s.format, s, + {{'a', 'unsigned'}, {'b', 'unsigned'}}) + fiber.sleep(0.01) + box.snapshot() + t.assert_equals(f:status(), 'suspended') + end) + cg.server:restart() + cg.server:exec(function() + local s = box.space.test + t.assert_equals(s:format(), {}) + end) +end -- GitLab