From 2be2e75caed2085256ff9117b9181c21c1ebb7f2 Mon Sep 17 00:00:00 2001
From: Vladislav Shpilevoy <v.shpilevoy@tarantool.org>
Date: Fri, 13 Jan 2023 00:29:43 +0100
Subject: [PATCH] schema: allow _cluster update after join

The function replica_check_id() is called on any change in
_cluster: insert, delete, update. It was supposed to check if the
replica ID is valid - not nil, not out of range (VCLOCK_MAX).

But it was also raising an error when the ID matched this
instance's ID unless the instance was joining. That happened even
if a _cluster tuple was updated without changing the ID at all.
For example, if one would just do
_cluster:replace(_cluster:get(box.info.id)).

Better do the check in the only place where the mutation can
happen - on deletion. Since replica ID is a primary key in
_cluster, it can't be updated there. Only inserted or deleted.

This commit is backported to 2.11, since we want to allow using
persistent names as early as we can in order to simplify the upgrade
process. We also bump the schema version in the following commit in
order to distinguish this version from overs 2.11.X, where persistent
names doesn't work.

Closes #10549

NO_DOC=bugfix and refactoring
NO_CHANGELOG=cannot happen without touching system spaces
NO_TEST=too insignificant for an own test

(cherry picked from commit cb8f4715c58aeaa5008888b66a328f200198a93c)
---
 src/box/alter.cc                    | 16 ++++++++++++++++
 src/box/replication.cc              | 16 ----------------
 test/replication-py/cluster.result  |  8 ++++++--
 test/replication-py/cluster.test.py |  3 +++
 4 files changed, 25 insertions(+), 18 deletions(-)

diff --git a/src/box/alter.cc b/src/box/alter.cc
index 59527c5ec7..191e17e255 100644
--- a/src/box/alter.cc
+++ b/src/box/alter.cc
@@ -4248,6 +4248,22 @@ on_replace_dd_cluster(struct trigger *trigger, void *event)
 			return -1;
 		if (replica_check_id(replica_id) != 0)
 			return -1;
+		/*
+		 * It's okay to update the instance id (drop, then insert) while
+		 * it is joining to a cluster as long as the id is set by the
+		 * time bootstrap is complete, which is checked in box_cfg()
+		 * anyway.
+		 *
+		 * For example, the replica could be deleted from the _cluster
+		 * space on the master manually before rebootstrap, in which
+		 * case it will replay this operation during the final join
+		 * stage.
+		 */
+		if (!replicaset.is_joining && replica_id == instance_id) {
+			diag_set(ClientError, ER_LOCAL_INSTANCE_ID_IS_READ_ONLY,
+				 (unsigned)replica_id);
+			return -1;
+		}
 		tt_uuid replica_uuid;
 		if (tuple_field_uuid(old_tuple, BOX_CLUSTER_FIELD_UUID,
 				    &replica_uuid) != 0)
diff --git a/src/box/replication.cc b/src/box/replication.cc
index 389b8b214a..b28bf8519d 100644
--- a/src/box/replication.cc
+++ b/src/box/replication.cc
@@ -232,21 +232,6 @@ replica_check_id(uint32_t replica_id)
 			  (unsigned) replica_id);
 		return -1;
 	}
-	/*
-	 * It's okay to update the instance id while it is joining to
-	 * a cluster as long as the id is set by the time bootstrap is
-	 * complete, which is checked in box_cfg() anyway.
-	 *
-	 * For example, the replica could be deleted from the _cluster
-	 * space on the master manually before rebootstrap, in which
-	 * case it will replay this operation during the final join
-	 * stage.
-	 */
-	if (!replicaset.is_joining && replica_id == instance_id) {
-		diag_set(ClientError, ER_LOCAL_INSTANCE_ID_IS_READ_ONLY,
-			  (unsigned) replica_id);
-		return -1;
-	}
 	return 0;
 }
 
@@ -380,7 +365,6 @@ replica_clear_id(struct replica *replica)
 	--replicaset.registered_count;
 	gc_delay_unref();
 	if (replica->id == instance_id) {
-		/* See replica_check_id(). */
 		assert(replicaset.is_joining);
 		instance_id = REPLICA_ID_NIL;
 		box_broadcast_id();
diff --git a/test/replication-py/cluster.result b/test/replication-py/cluster.result
index f68a6af7c6..b96940e698 100644
--- a/test/replication-py/cluster.result
+++ b/test/replication-py/cluster.result
@@ -27,14 +27,18 @@ gh-434: Assertion if replace _cluster tuple for local server
 -------------------------------------------------------------
 box.space._cluster:replace{1, require('uuid').NULL:str()}
 ---
-- error: The local instance id 1 is read-only
+- error: 'Invalid UUID: 00000000-0000-0000-0000-000000000000'
 ...
 box.space._cluster:replace{1, require('uuid').str()}
 ---
-- error: The local instance id 1 is read-only
+- error: Space _cluster does not support updates of instance uuid
 ...
 box.space._cluster:update(1, {{'=', 3, 'test'}})
 ---
+- [1, '<master uuid>', 'test']
+...
+box.space._cluster:delete(1)
+---
 - error: The local instance id 1 is read-only
 ...
 -------------------------------------------------------------
diff --git a/test/replication-py/cluster.test.py b/test/replication-py/cluster.test.py
index ee915b591e..1f36511ede 100644
--- a/test/replication-py/cluster.test.py
+++ b/test/replication-py/cluster.test.py
@@ -89,6 +89,9 @@ server.admin("box.space._cluster:replace{1, require('uuid').str()}")
 # Update of tail is OK
 server.admin("box.space._cluster:update(1, {{'=', 3, 'test'}})")
 
+# Self-delete is not ok.
+server.admin("box.space._cluster:delete(1)")
+
 print("-------------------------------------------------------------")
 print("gh-1140: Assertion if replace _cluster tuple for remote server")
 print("-------------------------------------------------------------")
-- 
GitLab