From 3a220dadf9ff820c65398f767b2441808fd6b563 Mon Sep 17 00:00:00 2001
From: Sergey Ostanevich <sergos@tarantool.org>
Date: Mon, 24 Apr 2023 18:10:36 +0300
Subject: [PATCH] flaky: fix the qsync_advanced test

Add necessary wait for replication to appear on the replica, enforce
correct txn isolation to avoid memtx/vinyl discrepancy.
Remove the test from the fragile list.

Closes tarantool/tarantool-qa#292

NO_DOC=test fix
NO_CHANGELOG=test fix
---
 test/replication/qsync_advanced.result   | 62 +++++++++++++++++++-----
 test/replication/qsync_advanced.test.lua | 35 +++++++++----
 test/replication/suite.ini               |  3 --
 3 files changed, 74 insertions(+), 26 deletions(-)

diff --git a/test/replication/qsync_advanced.result b/test/replication/qsync_advanced.result
index 72ac0c3269..067abac0cf 100644
--- a/test/replication/qsync_advanced.result
+++ b/test/replication/qsync_advanced.result
@@ -37,6 +37,13 @@ disable_sync_mode = function()
 end;
  | ---
  | ...
+enable_sync_mode = function()
+    local s = box.space._space:get(box.space.sync.id)
+    local new_s = s:update({{'=', 6, {is_sync=true}}})
+    box.space._space:replace(new_s)
+end;
+ | ---
+ | ...
 test_run:cmd("setopt delimiter ''");
  | ---
  | - true
@@ -80,11 +87,14 @@ box.space.sync:insert{1} -- success
  | ---
  | - [1]
  | ...
-test_run:cmd('switch replica')
+test_run:switch('replica')
  | ---
  | - true
  | ...
-box.space.sync:select{} -- 1
+box.begin{txn_isolation='read-committed'} t = box.space.sync:select{} box.commit()
+ | ---
+ | ...
+t
  | ---
  | - - [1]
  | ...
@@ -117,6 +127,9 @@ box.space.sync:insert{1}
  | ---
  | - error: Quorum collection for a synchronous transaction is timed out
  | ...
+test_run:wait_lsn('replica', 'default')
+ | ---
+ | ...
 test_run:switch('replica')
  | ---
  | - true
@@ -173,7 +186,10 @@ test_run:switch('replica')
  | ---
  | - true
  | ...
-box.space.sync:select{} -- 1, 2, 3
+box.begin{txn_isolation='read-committed'} t = box.space.sync:select{} box.commit()
+ | ---
+ | ...
+t
  | ---
  | - - [1]
  |   - [2]
@@ -400,6 +416,9 @@ box.space.sync:select{} -- 1, 2, 3, 4, 5
  |   - [4]
  |   - [5]
  | ...
+test_run:wait_lsn('replica', 'default')
+ | ---
+ | ...
 test_run:cmd('switch replica')
  | ---
  | - true
@@ -445,6 +464,9 @@ box.cfg{
 }
  | ---
  | ...
+test_run:wait_fullmesh({'default', 'replica'})
+ | ---
+ | ...
 _ = box.schema.space.create('sync', {is_sync=true, engine=engine})
  | ---
  | ...
@@ -501,7 +523,10 @@ test_run:switch('default')
  | ---
  | - true
  | ...
-box.space.sync:select{} -- 1, 2
+box.begin{txn_isolation='read-committed'} t = box.space.sync:select{} box.commit()
+ | ---
+ | ...
+t
  | ---
  | - - [1]
  |   - [2]
@@ -580,7 +605,10 @@ test_run:switch('replica')
  | ---
  | - true
  | ...
-box.space.sync:select{} -- 1
+box.begin{txn_isolation='read-committed'} t = box.space.sync:select{} box.commit()
+ | ---
+ | ...
+t
  | ---
  | - - [1]
  | ...
@@ -623,6 +651,10 @@ test_run:switch('replica')
  | ---
  | - true
  | ...
+box.space.sync:select{} -- 1
+ | ---
+ | - - [1]
+ | ...
 box.error.injection.set('ERRINJ_WAL_IO', true)
  | ---
  | - ok
@@ -709,6 +741,9 @@ box.space.sync:insert{1} -- success
  | ---
  | - [1]
  | ...
+test_run:wait_lsn('replica', 'default')
+ | ---
+ | ...
 test_run:cmd('switch replica')
  | ---
  | - true
@@ -722,7 +757,7 @@ test_run:switch('default')
  | - true
  | ...
 -- Enable synchronous mode.
-disable_sync_mode()
+enable_sync_mode()
  | ---
  | ...
 -- Space is in sync mode now.
@@ -733,28 +768,29 @@ box.space.sync:insert{2} -- success
  | ---
  | - [2]
  | ...
-box.cfg{replication_synchro_quorum=BROKEN_QUORUM, replication_synchro_timeout=1000}
+box.cfg{replication_synchro_quorum=BROKEN_QUORUM, replication_synchro_timeout=0.01}
  | ---
  | ...
-box.space.sync:insert{3} -- success
+box.space.sync:insert{3} -- fail
  | ---
- | - [3]
+ | - error: Quorum collection for a synchronous transaction is timed out
  | ...
-box.space.sync:select{} -- 1, 2, 3
+box.space.sync:select{} -- 1, 2
  | ---
  | - - [1]
  |   - [2]
- |   - [3]
+ | ...
+test_run:wait_lsn('replica', 'default') -- needed to propagate ROLLBACK
+ | ---
  | ...
 test_run:cmd('switch replica')
  | ---
  | - true
  | ...
-box.space.sync:select{} -- 1, 2, 3
+box.space.sync:select{} -- 1, 2
  | ---
  | - - [1]
  |   - [2]
- |   - [3]
  | ...
 -- Testcase cleanup.
 test_run:switch('default')
diff --git a/test/replication/qsync_advanced.test.lua b/test/replication/qsync_advanced.test.lua
index 37c285b8d3..179cf3dd0e 100644
--- a/test/replication/qsync_advanced.test.lua
+++ b/test/replication/qsync_advanced.test.lua
@@ -15,6 +15,11 @@ disable_sync_mode = function()
     local new_s = s:update({{'=', 6, {is_sync=false}}})
     box.space._space:replace(new_s)
 end;
+enable_sync_mode = function()
+    local s = box.space._space:get(box.space.sync.id)
+    local new_s = s:update({{'=', 6, {is_sync=true}}})
+    box.space._space:replace(new_s)
+end;
 test_run:cmd("setopt delimiter ''");
 
 box.schema.user.grant('guest', 'replication')
@@ -33,8 +38,9 @@ _ = box.space.sync:create_index('pk')
 box.ctl.promote()
 -- Testcase body.
 box.space.sync:insert{1} -- success
-test_run:cmd('switch replica')
-box.space.sync:select{} -- 1
+test_run:switch('replica')
+box.begin{txn_isolation='read-committed'} t = box.space.sync:select{} box.commit()
+t
 -- Testcase cleanup.
 test_run:switch('default')
 box.space.sync:drop()
@@ -47,6 +53,7 @@ _ = box.schema.space.create('sync', {is_sync=true, engine=engine})
 _ = box.space.sync:create_index('pk')
 -- Testcase body.
 box.space.sync:insert{1}
+test_run:wait_lsn('replica', 'default')
 test_run:switch('replica')
 box.space.sync:select{} -- none
 -- Testcase cleanup.
@@ -66,7 +73,8 @@ box.space.sync:insert{2}
 box.space.sync:insert{3}
 box.space.sync:select{} -- 1, 2, 3
 test_run:switch('replica')
-box.space.sync:select{} -- 1, 2, 3
+box.begin{txn_isolation='read-committed'} t = box.space.sync:select{} box.commit()
+t
 -- Testcase cleanup.
 test_run:switch('default')
 box.space.sync:drop()
@@ -143,6 +151,7 @@ box.space.sync:insert{4} -- success
 box.cfg{replication_synchro_quorum=NUM_INSTANCES}
 box.space.sync:insert{5} -- success
 box.space.sync:select{} -- 1, 2, 3, 4, 5
+test_run:wait_lsn('replica', 'default')
 test_run:cmd('switch replica')
 box.space.sync:select{} -- 1, 2, 3, 4, 5
 -- Testcase cleanup.
@@ -163,6 +172,7 @@ box.cfg{
     replication_synchro_timeout = 1000,                                         \
     replication = replica_url,                                                  \
 }
+test_run:wait_fullmesh({'default', 'replica'})
 _ = box.schema.space.create('sync', {is_sync=true, engine=engine})
 _ = box.space.sync:create_index('pk')
 -- Testcase body.
@@ -179,7 +189,8 @@ box.cfg{replication_synchro_quorum = 2, replication_synchro_timeout = 1000}
 box.space.sync:insert{2}
 box.space.sync:select{} -- 1, 2
 test_run:switch('default')
-box.space.sync:select{} -- 1, 2
+box.begin{txn_isolation='read-committed'} t = box.space.sync:select{} box.commit()
+t
 -- Revert cluster configuration.
 test_run:switch('default')
 box.cfg{read_only=false}
@@ -205,7 +216,8 @@ box.space.sync:insert{2}
 box.error.injection.set('ERRINJ_WAL_IO', false)
 box.space.sync:select{} -- 1
 test_run:switch('replica')
-box.space.sync:select{} -- 1
+box.begin{txn_isolation='read-committed'} t = box.space.sync:select{} box.commit()
+t
 -- Testcase cleanup.
 test_run:switch('default')
 box.space.sync:drop()
@@ -222,6 +234,7 @@ _ = box.space.sync:create_index('pk')
 box.space.sync:insert{1}
 box.space.sync:select{} -- 1
 test_run:switch('replica')
+box.space.sync:select{} -- 1
 box.error.injection.set('ERRINJ_WAL_IO', true)
 test_run:switch('default')
 box.space.sync:insert{2}
@@ -255,19 +268,21 @@ test_run:cmd('start server replica with wait=True, wait_load=True')
 _ = box.schema.space.create('sync', {engine=engine})
 _ = box.space.sync:create_index('pk')
 box.space.sync:insert{1} -- success
+test_run:wait_lsn('replica', 'default')
 test_run:cmd('switch replica')
 box.space.sync:select{} -- 1
 test_run:switch('default')
 -- Enable synchronous mode.
-disable_sync_mode()
+enable_sync_mode()
 -- Space is in sync mode now.
 box.cfg{replication_synchro_quorum=NUM_INSTANCES, replication_synchro_timeout=1000}
 box.space.sync:insert{2} -- success
-box.cfg{replication_synchro_quorum=BROKEN_QUORUM, replication_synchro_timeout=1000}
-box.space.sync:insert{3} -- success
-box.space.sync:select{} -- 1, 2, 3
+box.cfg{replication_synchro_quorum=BROKEN_QUORUM, replication_synchro_timeout=0.01}
+box.space.sync:insert{3} -- fail
+box.space.sync:select{} -- 1, 2
+test_run:wait_lsn('replica', 'default') -- needed to propagate ROLLBACK
 test_run:cmd('switch replica')
-box.space.sync:select{} -- 1, 2, 3
+box.space.sync:select{} -- 1, 2
 -- Testcase cleanup.
 test_run:switch('default')
 box.space.sync:drop()
diff --git a/test/replication/suite.ini b/test/replication/suite.ini
index 9436179d6d..71b800b9d1 100644
--- a/test/replication/suite.ini
+++ b/test/replication/suite.ini
@@ -45,9 +45,6 @@ fragile = {
         "on_schema_init.test.lua": {
             "issues": [ "gh-5291" ]
         },
-        "qsync_advanced.test.lua": {
-            "issues": [ "gh-5340" ]
-        },
         "replicaset_ro_mostly.test.lua": {
             "issues": [ "gh-5342" ]
         },
-- 
GitLab