From 44b28d1c715a2cfc0d9f47ad88d6cec1e655cf6e Mon Sep 17 00:00:00 2001 From: Georgy Moshkin <gmoshkin@picodata.io> Date: Fri, 2 Jun 2023 16:06:11 +0300 Subject: [PATCH] fix: unfinished ddl over snapshot --- src/storage.rs | 10 +++------ test/int/test_ddl.py | 53 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 56 insertions(+), 7 deletions(-) diff --git a/src/storage.rs b/src/storage.rs index eeeb466a88..2db24dfcb9 100644 --- a/src/storage.rs +++ b/src/storage.rs @@ -596,13 +596,9 @@ impl Clusterwide { for space_def in self.spaces.iter()? { if !space_def.operable { - // This means a ddl operation wasn't committed yet. We probably - // don't want unfinished ddl artifacts comming over the snapshot - // so this will likely never happen. - // TODO: or do we actually want this working? - crate::warn_or_panic!( - "unfinished ddl operation arrived via snapshot: {space_def:?}" - ); + // If it so happens, that we receive an unfinished schema change via snapshot, + // which will somehow get finished without the governor sending us + // a proc_apply_schema_change rpc, it will be a very sad day. continue; } diff --git a/test/int/test_ddl.py b/test/int/test_ddl.py index 77eeeeefc3..6b9153ec02 100644 --- a/test/int/test_ddl.py +++ b/test/int/test_ddl.py @@ -369,6 +369,59 @@ def test_ddl_create_sharded_space(cluster: Cluster): assert i2.call("box.space._index:get", [space_id, 1]) == tt_bucket_id_def +def test_ddl_create_space_unfinished_from_snapshot(cluster: Cluster): + i1, i2, i3 = cluster.deploy(instance_count=3) + + # Put i3 to sleep, so that schema change get's blocked. + i3.terminate() + + # Start schema change. + space_id = 732 + index = i1.propose_create_space( + dict( + id=space_id, + name="some space name", + format=[dict(name="id", type="unsigned", is_nullable=False)], + primary_key=[dict(field="id")], + distribution=dict( + kind="sharded_implicitly", sharding_key=["id"], sharding_fn="murmur3" + ), + ), + wait_index=False, + ) + + # Schema change is blocked. + with pytest.raises(ReturnError, match="timeout"): + i1.raft_wait_index(index, timeout=3) + + # Space is created but is not operable. + assert i1.call("box.space._space:get", space_id) is not None + assert not i1.eval("return box.space._pico_space:get(...).operable", space_id) + assert i2.call("box.space._space:get", space_id) is not None + assert not i2.eval("return box.space._pico_space:get(...).operable", space_id) + + # Compact raft log to trigger snapshot with an unfinished schema change. + i1.raft_compact_log() + i2.raft_compact_log() + + # Add a new replicaset who will boot from snapshot. + i4 = cluster.add_instance(wait_online=True) + + # TODO: test readonly replica doing the same + + # It has received an unfinished schema change. + assert not i4.eval("return box.space._pico_space:get(...).operable", space_id) + + # Wake the instance, who was blocking the schema change. + i3.start() + i3.wait_online() + + # The schema change finalized. + for i in cluster.instances: + assert i.call("box.space._space:get", space_id) is not None + assert i.eval("return box.space._pico_space:get(...).operable", space_id) + + def test_ddl_create_space_partial_failure(cluster: Cluster): # i2 & i3 are for quorum i1, i2, i3, i4, i5 = cluster.deploy(instance_count=5) -- GitLab