From 36838c7cb8ce2ffa0dc64355dd18a5a40be16173 Mon Sep 17 00:00:00 2001
From: Vladimir Davydov <vdavydov.dev@gmail.com>
Date: Sat, 17 Feb 2018 14:42:01 +0300
Subject: [PATCH] vinyl: skip needless checks for duplicates on INSERT

If a unique index includes all parts of another unique index, we can
skip the check for duplicates for it on INSERT. Let's mark all such
indexes with a special flag on CREATE/ALTER and optimize out the check
if the flag is set. If there are two indexes that index the same set
of fields, check uniqueness for the one with a lower id, because it
is likelier to have have a "warmer" cache.

Closes #3154
---
 src/box/key_def.cc       | 12 ++++++++
 src/box/key_def.h        |  9 ++++++
 src/box/vinyl.c          | 34 ++++++++++++++++++----
 src/box/vy_index.c       |  1 +
 src/box/vy_index.h       |  7 +++++
 test/vinyl/misc.result   | 63 ++++++++++++++++++++++++++++++++++++++++
 test/vinyl/misc.test.lua | 25 ++++++++++++++++
 7 files changed, 146 insertions(+), 5 deletions(-)

diff --git a/src/box/key_def.cc b/src/box/key_def.cc
index f7f6c753a1..b4da3aed0d 100644
--- a/src/box/key_def.cc
+++ b/src/box/key_def.cc
@@ -481,6 +481,18 @@ key_def_find(const struct key_def *key_def, uint32_t fieldno)
 	return NULL;
 }
 
+bool
+key_def_contains(const struct key_def *first, const struct key_def *second)
+{
+	const struct key_part *part = second->parts;
+	const struct key_part *end = part + second->part_count;
+	for (; part != end; part++) {
+		if (key_def_find(first, part->fieldno) == NULL)
+			return false;
+	}
+	return true;
+}
+
 struct key_def *
 key_def_merge(const struct key_def *first, const struct key_def *second)
 {
diff --git a/src/box/key_def.h b/src/box/key_def.h
index 201863f5a6..5982f5a470 100644
--- a/src/box/key_def.h
+++ b/src/box/key_def.h
@@ -260,6 +260,15 @@ key_def_decode_parts_160(struct key_part_def *parts, uint32_t part_count,
 const struct key_part *
 key_def_find(const struct key_def *key_def, uint32_t fieldno);
 
+/**
+ * Check if key definition @a first contains all parts of
+ * key definition @a second.
+ * @retval true if @a first is a superset of @a second
+ * @retval false otherwise
+ */
+bool
+key_def_contains(const struct key_def *first, const struct key_def *second);
+
 /**
  * Allocate a new key_def with a set union of key parts from
  * first and second key defs. Parts of the new key_def consist
diff --git a/src/box/vinyl.c b/src/box/vinyl.c
index 1ceeb847cf..57cf7998e3 100644
--- a/src/box/vinyl.c
+++ b/src/box/vinyl.c
@@ -1115,6 +1115,7 @@ vinyl_space_commit_alter(struct space *old_space, struct space *new_space)
 
 	/* Set possibly changed opts. */
 	pk->opts = new_index_def->opts;
+	pk->check_is_unique = true;
 
 	/* Set new formats. */
 	tuple_format_unref(pk->disk_format);
@@ -1138,6 +1139,7 @@ vinyl_space_commit_alter(struct space *old_space, struct space *new_space)
 		index->pk = pk;
 		new_index_def = space_index_def(new_space, i);
 		index->opts = new_index_def->opts;
+		index->check_is_unique = index->opts.is_unique;
 		tuple_format_unref(index->mem_format_with_colmask);
 		tuple_format_unref(index->mem_format);
 		tuple_format_unref(index->upsert_format);
@@ -1150,6 +1152,27 @@ vinyl_space_commit_alter(struct space *old_space, struct space *new_space)
 		tuple_format_ref(index->upsert_format);
 		vy_index_validate_formats(index);
 	}
+
+	/*
+	 * Check if there are unique indexes that are contained
+	 * by other unique indexes. For them, we can skip check
+	 * for duplicates on INSERT. Prefer indexes with higher
+	 * ids for uniqueness check optimization as they are
+	 * likelier to have a "colder" cache.
+	 */
+	for (int i = new_space->index_count - 1; i >= 0; i--) {
+		struct vy_index *index = vy_index(new_space->index[i]);
+		if (!index->check_is_unique)
+			continue;
+		for (int j = 0; j < (int)new_space->index_count; j++) {
+			struct vy_index *other = vy_index(new_space->index[j]);
+			if (other != index && other->check_is_unique &&
+			    key_def_contains(index->key_def, other->key_def)) {
+				index->check_is_unique = false;
+				break;
+			}
+		}
+	}
 	return;
 fail:
 	/* FIXME: space_vtab::commit_alter() must not fail. */
@@ -1343,8 +1366,8 @@ vy_index_get(struct vy_index *index, struct vy_tx *tx,
  * @retval -1 Memory error or the key is found.
  */
 static inline int
-vy_check_dup_key(struct vy_env *env, struct vy_tx *tx, struct space *space,
-		 struct vy_index *index, struct tuple *key)
+vy_check_is_unique(struct vy_env *env, struct vy_tx *tx, struct space *space,
+		   struct vy_index *index, struct tuple *key)
 {
 	struct tuple *found;
 	/*
@@ -1387,7 +1410,8 @@ vy_insert_primary(struct vy_env *env, struct vy_tx *tx, struct space *space,
 	 * A primary index is always unique and the new tuple must not
 	 * conflict with existing tuples.
 	 */
-	if (vy_check_dup_key(env, tx, space, pk, stmt) != 0)
+	if (pk->check_is_unique &&
+	    vy_check_is_unique(env, tx, space, pk, stmt) != 0)
 		return -1;
 	return vy_tx_set(tx, pk, stmt);
 }
@@ -1416,7 +1440,7 @@ vy_insert_secondary(struct vy_env *env, struct vy_tx *tx, struct space *space,
 	 * conflict with existing tuples. If the index is not
 	 * unique a conflict is impossible.
 	 */
-	if (index->opts.is_unique &&
+	if (index->check_is_unique &&
 	    !key_update_can_be_skipped(index->key_def->column_mask,
 				       vy_stmt_column_mask(stmt)) &&
 	    (!index->key_def->is_nullable ||
@@ -1425,7 +1449,7 @@ vy_insert_secondary(struct vy_env *env, struct vy_tx *tx, struct space *space,
 							index->env->key_format);
 		if (key == NULL)
 			return -1;
-		int rc = vy_check_dup_key(env, tx, space, index, key);
+		int rc = vy_check_is_unique(env, tx, space, index, key);
 		tuple_unref(key);
 		if (rc != 0)
 			return -1;
diff --git a/src/box/vy_index.c b/src/box/vy_index.c
index 0596c4edef..2ca1667267 100644
--- a/src/box/vy_index.c
+++ b/src/box/vy_index.c
@@ -239,6 +239,7 @@ vy_index_new(struct vy_index_env *index_env, struct vy_cache_env *cache_env,
 	index->space_id = index_def->space_id;
 	index->id = index_def->iid;
 	index->opts = index_def->opts;
+	index->check_is_unique = index->opts.is_unique;
 	vy_index_read_set_new(&index->read_set);
 
 	index_env->index_count++;
diff --git a/src/box/vy_index.h b/src/box/vy_index.h
index 47fa335f2e..4368f6a592 100644
--- a/src/box/vy_index.h
+++ b/src/box/vy_index.h
@@ -158,6 +158,13 @@ struct vy_index {
 	struct key_def *cmp_def;
 	/** Key definition passed by the user. */
 	struct key_def *key_def;
+	/**
+	 * If the following flag is set the index is unique and
+	 * it must be checked for duplicates on INSERT. Otherwise,
+	 * the check can be skipped, either because this index
+	 * is not unique or it is a part of another unique index.
+	 */
+	bool check_is_unique;
 	/**
 	 * Tuple format for tuples of this index created when
 	 * reading pages from disk.
diff --git a/test/vinyl/misc.result b/test/vinyl/misc.result
index a8db51e5ab..8c4bbbb43a 100644
--- a/test/vinyl/misc.result
+++ b/test/vinyl/misc.result
@@ -112,3 +112,66 @@ tx2.rollback - tx1.rollback -- 1
 s:drop()
 ---
 ...
+--
+-- gh-3158: check of duplicates is skipped if the index
+-- is contained by another unique index which is checked.
+--
+s = box.schema.create_space('test', {engine = 'vinyl'})
+---
+...
+i1 = s:create_index('i1', {unique = true, parts = {1, 'unsigned', 2, 'unsigned'}})
+---
+...
+i2 = s:create_index('i2', {unique = true, parts = {2, 'unsigned', 1, 'unsigned'}})
+---
+...
+i3 = s:create_index('i3', {unique = true, parts = {3, 'unsigned', 4, 'unsigned', 5, 'unsigned'}})
+---
+...
+i4 = s:create_index('i4', {unique = true, parts = {5, 'unsigned', 4, 'unsigned'}})
+---
+...
+i5 = s:create_index('i5', {unique = true, parts = {4, 'unsigned', 5, 'unsigned', 1, 'unsigned'}})
+---
+...
+i6 = s:create_index('i6', {unique = true, parts = {4, 'unsigned', 6, 'unsigned', 5, 'unsigned'}})
+---
+...
+i7 = s:create_index('i7', {unique = true, parts = {6, 'unsigned'}})
+---
+...
+s:insert{1, 1, 1, 1, 1, 1}
+---
+- [1, 1, 1, 1, 1, 1]
+...
+i1:info().lookup -- 1
+---
+- 1
+...
+i2:info().lookup -- 0
+---
+- 0
+...
+i3:info().lookup -- 0
+---
+- 0
+...
+i4:info().lookup -- 1
+---
+- 1
+...
+i5:info().lookup -- 0
+---
+- 0
+...
+i6:info().lookup -- 0
+---
+- 0
+...
+i7:info().lookup -- 1
+---
+- 1
+...
+s:drop()
+---
+...
diff --git a/test/vinyl/misc.test.lua b/test/vinyl/misc.test.lua
index 8b76a66b74..93dfb4bfe6 100644
--- a/test/vinyl/misc.test.lua
+++ b/test/vinyl/misc.test.lua
@@ -48,3 +48,28 @@ tx2 = box.info.vinyl().tx
 tx2.commit - tx1.commit -- 0
 tx2.rollback - tx1.rollback -- 1
 s:drop()
+
+--
+-- gh-3158: check of duplicates is skipped if the index
+-- is contained by another unique index which is checked.
+--
+s = box.schema.create_space('test', {engine = 'vinyl'})
+i1 = s:create_index('i1', {unique = true, parts = {1, 'unsigned', 2, 'unsigned'}})
+i2 = s:create_index('i2', {unique = true, parts = {2, 'unsigned', 1, 'unsigned'}})
+i3 = s:create_index('i3', {unique = true, parts = {3, 'unsigned', 4, 'unsigned', 5, 'unsigned'}})
+i4 = s:create_index('i4', {unique = true, parts = {5, 'unsigned', 4, 'unsigned'}})
+i5 = s:create_index('i5', {unique = true, parts = {4, 'unsigned', 5, 'unsigned', 1, 'unsigned'}})
+i6 = s:create_index('i6', {unique = true, parts = {4, 'unsigned', 6, 'unsigned', 5, 'unsigned'}})
+i7 = s:create_index('i7', {unique = true, parts = {6, 'unsigned'}})
+
+s:insert{1, 1, 1, 1, 1, 1}
+
+i1:info().lookup -- 1
+i2:info().lookup -- 0
+i3:info().lookup -- 0
+i4:info().lookup -- 1
+i5:info().lookup -- 0
+i6:info().lookup -- 0
+i7:info().lookup -- 1
+
+s:drop()
-- 
GitLab