From 36838c7cb8ce2ffa0dc64355dd18a5a40be16173 Mon Sep 17 00:00:00 2001 From: Vladimir Davydov <vdavydov.dev@gmail.com> Date: Sat, 17 Feb 2018 14:42:01 +0300 Subject: [PATCH] vinyl: skip needless checks for duplicates on INSERT If a unique index includes all parts of another unique index, we can skip the check for duplicates for it on INSERT. Let's mark all such indexes with a special flag on CREATE/ALTER and optimize out the check if the flag is set. If there are two indexes that index the same set of fields, check uniqueness for the one with a lower id, because it is likelier to have have a "warmer" cache. Closes #3154 --- src/box/key_def.cc | 12 ++++++++ src/box/key_def.h | 9 ++++++ src/box/vinyl.c | 34 ++++++++++++++++++---- src/box/vy_index.c | 1 + src/box/vy_index.h | 7 +++++ test/vinyl/misc.result | 63 ++++++++++++++++++++++++++++++++++++++++ test/vinyl/misc.test.lua | 25 ++++++++++++++++ 7 files changed, 146 insertions(+), 5 deletions(-) diff --git a/src/box/key_def.cc b/src/box/key_def.cc index f7f6c753a1..b4da3aed0d 100644 --- a/src/box/key_def.cc +++ b/src/box/key_def.cc @@ -481,6 +481,18 @@ key_def_find(const struct key_def *key_def, uint32_t fieldno) return NULL; } +bool +key_def_contains(const struct key_def *first, const struct key_def *second) +{ + const struct key_part *part = second->parts; + const struct key_part *end = part + second->part_count; + for (; part != end; part++) { + if (key_def_find(first, part->fieldno) == NULL) + return false; + } + return true; +} + struct key_def * key_def_merge(const struct key_def *first, const struct key_def *second) { diff --git a/src/box/key_def.h b/src/box/key_def.h index 201863f5a6..5982f5a470 100644 --- a/src/box/key_def.h +++ b/src/box/key_def.h @@ -260,6 +260,15 @@ key_def_decode_parts_160(struct key_part_def *parts, uint32_t part_count, const struct key_part * key_def_find(const struct key_def *key_def, uint32_t fieldno); +/** + * Check if key definition @a first contains all parts of + * key definition @a second. + * @retval true if @a first is a superset of @a second + * @retval false otherwise + */ +bool +key_def_contains(const struct key_def *first, const struct key_def *second); + /** * Allocate a new key_def with a set union of key parts from * first and second key defs. Parts of the new key_def consist diff --git a/src/box/vinyl.c b/src/box/vinyl.c index 1ceeb847cf..57cf7998e3 100644 --- a/src/box/vinyl.c +++ b/src/box/vinyl.c @@ -1115,6 +1115,7 @@ vinyl_space_commit_alter(struct space *old_space, struct space *new_space) /* Set possibly changed opts. */ pk->opts = new_index_def->opts; + pk->check_is_unique = true; /* Set new formats. */ tuple_format_unref(pk->disk_format); @@ -1138,6 +1139,7 @@ vinyl_space_commit_alter(struct space *old_space, struct space *new_space) index->pk = pk; new_index_def = space_index_def(new_space, i); index->opts = new_index_def->opts; + index->check_is_unique = index->opts.is_unique; tuple_format_unref(index->mem_format_with_colmask); tuple_format_unref(index->mem_format); tuple_format_unref(index->upsert_format); @@ -1150,6 +1152,27 @@ vinyl_space_commit_alter(struct space *old_space, struct space *new_space) tuple_format_ref(index->upsert_format); vy_index_validate_formats(index); } + + /* + * Check if there are unique indexes that are contained + * by other unique indexes. For them, we can skip check + * for duplicates on INSERT. Prefer indexes with higher + * ids for uniqueness check optimization as they are + * likelier to have a "colder" cache. + */ + for (int i = new_space->index_count - 1; i >= 0; i--) { + struct vy_index *index = vy_index(new_space->index[i]); + if (!index->check_is_unique) + continue; + for (int j = 0; j < (int)new_space->index_count; j++) { + struct vy_index *other = vy_index(new_space->index[j]); + if (other != index && other->check_is_unique && + key_def_contains(index->key_def, other->key_def)) { + index->check_is_unique = false; + break; + } + } + } return; fail: /* FIXME: space_vtab::commit_alter() must not fail. */ @@ -1343,8 +1366,8 @@ vy_index_get(struct vy_index *index, struct vy_tx *tx, * @retval -1 Memory error or the key is found. */ static inline int -vy_check_dup_key(struct vy_env *env, struct vy_tx *tx, struct space *space, - struct vy_index *index, struct tuple *key) +vy_check_is_unique(struct vy_env *env, struct vy_tx *tx, struct space *space, + struct vy_index *index, struct tuple *key) { struct tuple *found; /* @@ -1387,7 +1410,8 @@ vy_insert_primary(struct vy_env *env, struct vy_tx *tx, struct space *space, * A primary index is always unique and the new tuple must not * conflict with existing tuples. */ - if (vy_check_dup_key(env, tx, space, pk, stmt) != 0) + if (pk->check_is_unique && + vy_check_is_unique(env, tx, space, pk, stmt) != 0) return -1; return vy_tx_set(tx, pk, stmt); } @@ -1416,7 +1440,7 @@ vy_insert_secondary(struct vy_env *env, struct vy_tx *tx, struct space *space, * conflict with existing tuples. If the index is not * unique a conflict is impossible. */ - if (index->opts.is_unique && + if (index->check_is_unique && !key_update_can_be_skipped(index->key_def->column_mask, vy_stmt_column_mask(stmt)) && (!index->key_def->is_nullable || @@ -1425,7 +1449,7 @@ vy_insert_secondary(struct vy_env *env, struct vy_tx *tx, struct space *space, index->env->key_format); if (key == NULL) return -1; - int rc = vy_check_dup_key(env, tx, space, index, key); + int rc = vy_check_is_unique(env, tx, space, index, key); tuple_unref(key); if (rc != 0) return -1; diff --git a/src/box/vy_index.c b/src/box/vy_index.c index 0596c4edef..2ca1667267 100644 --- a/src/box/vy_index.c +++ b/src/box/vy_index.c @@ -239,6 +239,7 @@ vy_index_new(struct vy_index_env *index_env, struct vy_cache_env *cache_env, index->space_id = index_def->space_id; index->id = index_def->iid; index->opts = index_def->opts; + index->check_is_unique = index->opts.is_unique; vy_index_read_set_new(&index->read_set); index_env->index_count++; diff --git a/src/box/vy_index.h b/src/box/vy_index.h index 47fa335f2e..4368f6a592 100644 --- a/src/box/vy_index.h +++ b/src/box/vy_index.h @@ -158,6 +158,13 @@ struct vy_index { struct key_def *cmp_def; /** Key definition passed by the user. */ struct key_def *key_def; + /** + * If the following flag is set the index is unique and + * it must be checked for duplicates on INSERT. Otherwise, + * the check can be skipped, either because this index + * is not unique or it is a part of another unique index. + */ + bool check_is_unique; /** * Tuple format for tuples of this index created when * reading pages from disk. diff --git a/test/vinyl/misc.result b/test/vinyl/misc.result index a8db51e5ab..8c4bbbb43a 100644 --- a/test/vinyl/misc.result +++ b/test/vinyl/misc.result @@ -112,3 +112,66 @@ tx2.rollback - tx1.rollback -- 1 s:drop() --- ... +-- +-- gh-3158: check of duplicates is skipped if the index +-- is contained by another unique index which is checked. +-- +s = box.schema.create_space('test', {engine = 'vinyl'}) +--- +... +i1 = s:create_index('i1', {unique = true, parts = {1, 'unsigned', 2, 'unsigned'}}) +--- +... +i2 = s:create_index('i2', {unique = true, parts = {2, 'unsigned', 1, 'unsigned'}}) +--- +... +i3 = s:create_index('i3', {unique = true, parts = {3, 'unsigned', 4, 'unsigned', 5, 'unsigned'}}) +--- +... +i4 = s:create_index('i4', {unique = true, parts = {5, 'unsigned', 4, 'unsigned'}}) +--- +... +i5 = s:create_index('i5', {unique = true, parts = {4, 'unsigned', 5, 'unsigned', 1, 'unsigned'}}) +--- +... +i6 = s:create_index('i6', {unique = true, parts = {4, 'unsigned', 6, 'unsigned', 5, 'unsigned'}}) +--- +... +i7 = s:create_index('i7', {unique = true, parts = {6, 'unsigned'}}) +--- +... +s:insert{1, 1, 1, 1, 1, 1} +--- +- [1, 1, 1, 1, 1, 1] +... +i1:info().lookup -- 1 +--- +- 1 +... +i2:info().lookup -- 0 +--- +- 0 +... +i3:info().lookup -- 0 +--- +- 0 +... +i4:info().lookup -- 1 +--- +- 1 +... +i5:info().lookup -- 0 +--- +- 0 +... +i6:info().lookup -- 0 +--- +- 0 +... +i7:info().lookup -- 1 +--- +- 1 +... +s:drop() +--- +... diff --git a/test/vinyl/misc.test.lua b/test/vinyl/misc.test.lua index 8b76a66b74..93dfb4bfe6 100644 --- a/test/vinyl/misc.test.lua +++ b/test/vinyl/misc.test.lua @@ -48,3 +48,28 @@ tx2 = box.info.vinyl().tx tx2.commit - tx1.commit -- 0 tx2.rollback - tx1.rollback -- 1 s:drop() + +-- +-- gh-3158: check of duplicates is skipped if the index +-- is contained by another unique index which is checked. +-- +s = box.schema.create_space('test', {engine = 'vinyl'}) +i1 = s:create_index('i1', {unique = true, parts = {1, 'unsigned', 2, 'unsigned'}}) +i2 = s:create_index('i2', {unique = true, parts = {2, 'unsigned', 1, 'unsigned'}}) +i3 = s:create_index('i3', {unique = true, parts = {3, 'unsigned', 4, 'unsigned', 5, 'unsigned'}}) +i4 = s:create_index('i4', {unique = true, parts = {5, 'unsigned', 4, 'unsigned'}}) +i5 = s:create_index('i5', {unique = true, parts = {4, 'unsigned', 5, 'unsigned', 1, 'unsigned'}}) +i6 = s:create_index('i6', {unique = true, parts = {4, 'unsigned', 6, 'unsigned', 5, 'unsigned'}}) +i7 = s:create_index('i7', {unique = true, parts = {6, 'unsigned'}}) + +s:insert{1, 1, 1, 1, 1, 1} + +i1:info().lookup -- 1 +i2:info().lookup -- 0 +i3:info().lookup -- 0 +i4:info().lookup -- 1 +i5:info().lookup -- 0 +i6:info().lookup -- 0 +i7:info().lookup -- 1 + +s:drop() -- GitLab