From 44b28d1c715a2cfc0d9f47ad88d6cec1e655cf6e Mon Sep 17 00:00:00 2001
From: Georgy Moshkin <gmoshkin@picodata.io>
Date: Fri, 2 Jun 2023 16:06:11 +0300
Subject: [PATCH] fix: unfinished ddl over snapshot

---
 src/storage.rs       | 10 +++------
 test/int/test_ddl.py | 53 ++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 56 insertions(+), 7 deletions(-)

diff --git a/src/storage.rs b/src/storage.rs
index eeeb466a88..2db24dfcb9 100644
--- a/src/storage.rs
+++ b/src/storage.rs
@@ -596,13 +596,9 @@ impl Clusterwide {
 
         for space_def in self.spaces.iter()? {
             if !space_def.operable {
-                // This means a ddl operation wasn't committed yet. We probably
-                // don't want unfinished ddl artifacts comming over the snapshot
-                // so this will likely never happen.
-                // TODO: or do we actually want this working?
-                crate::warn_or_panic!(
-                    "unfinished ddl operation arrived via snapshot: {space_def:?}"
-                );
+                // If it so happens, that we receive an unfinished schema change via snapshot,
+                // which will somehow get finished without the governor sending us
+                // a proc_apply_schema_change rpc, it will be a very sad day.
                 continue;
             }
 
diff --git a/test/int/test_ddl.py b/test/int/test_ddl.py
index 77eeeeefc3..6b9153ec02 100644
--- a/test/int/test_ddl.py
+++ b/test/int/test_ddl.py
@@ -369,6 +369,59 @@ def test_ddl_create_sharded_space(cluster: Cluster):
     assert i2.call("box.space._index:get", [space_id, 1]) == tt_bucket_id_def
 
 
+def test_ddl_create_space_unfinished_from_snapshot(cluster: Cluster):
+    i1, i2, i3 = cluster.deploy(instance_count=3)
+
+    # Put i3 to sleep, so that schema change get's blocked.
+    i3.terminate()
+
+    # Start schema change.
+    space_id = 732
+    index = i1.propose_create_space(
+        dict(
+            id=space_id,
+            name="some space name",
+            format=[dict(name="id", type="unsigned", is_nullable=False)],
+            primary_key=[dict(field="id")],
+            distribution=dict(
+                kind="sharded_implicitly", sharding_key=["id"], sharding_fn="murmur3"
+            ),
+        ),
+        wait_index=False,
+    )
+
+    # Schema change is blocked.
+    with pytest.raises(ReturnError, match="timeout"):
+        i1.raft_wait_index(index, timeout=3)
+
+    # Space is created but is not operable.
+    assert i1.call("box.space._space:get", space_id) is not None
+    assert not i1.eval("return box.space._pico_space:get(...).operable", space_id)
+    assert i2.call("box.space._space:get", space_id) is not None
+    assert not i2.eval("return box.space._pico_space:get(...).operable", space_id)
+
+    # Compact raft log to trigger snapshot with an unfinished schema change.
+    i1.raft_compact_log()
+    i2.raft_compact_log()
+
+    # Add a new replicaset who will boot from snapshot.
+    i4 = cluster.add_instance(wait_online=True)
+
+    # TODO: test readonly replica doing the same
+
+    # It has received an unfinished schema change.
+    assert not i4.eval("return box.space._pico_space:get(...).operable", space_id)
+
+    # Wake the instance, who was blocking the schema change.
+    i3.start()
+    i3.wait_online()
+
+    # The schema change finalized.
+    for i in cluster.instances:
+        assert i.call("box.space._space:get", space_id) is not None
+        assert i.eval("return box.space._pico_space:get(...).operable", space_id)
+
+
 def test_ddl_create_space_partial_failure(cluster: Cluster):
     # i2 & i3 are for quorum
     i1, i2, i3, i4, i5 = cluster.deploy(instance_count=5)
-- 
GitLab