From 17acae1f991b716dc38f892d8cbc2d230192600b Mon Sep 17 00:00:00 2001
From: Serge Petrenko <sergepetrenko@tarantool.org>
Date: Mon, 25 Mar 2019 20:40:02 +0300
Subject: [PATCH] test: fix long_row_timeout.test.lua failure in parallel mode

The test used to write big rows (20 mb in size), so when run in parallel
mode, it put high load on the disk and processor, which made appliers
time out multiple times during read, and caused the test to fail
occasionally.
So, instead of writing huge rows in test, introduce a new error
injection restricting sio from reading more than a couple of bytes per
request. This ensures that the test is still relevant and makes it a lot
more lightweight.

Closes #4062
---
 src/lib/core/errinj.h                      |  1 +
 src/lib/core/sio.c                         |  5 +++++
 test/box/errinj.result                     |  4 +++-
 test/replication/long_row_timeout.result   | 18 +++++-------------
 test/replication/long_row_timeout.test.lua | 13 +++++--------
 test/replication/suite.ini                 |  2 +-
 6 files changed, 20 insertions(+), 23 deletions(-)

diff --git a/src/lib/core/errinj.h b/src/lib/core/errinj.h
index 41783cc74d..99891c5b9e 100644
--- a/src/lib/core/errinj.h
+++ b/src/lib/core/errinj.h
@@ -125,6 +125,7 @@ struct errinj {
 	_(ERRINJ_VY_COMPACTION_DELAY, ERRINJ_BOOL, {.bparam = false}) \
 	_(ERRINJ_TUPLE_FORMAT_COUNT, ERRINJ_INT, {.iparam = -1}) \
 	_(ERRINJ_MEMTX_DELAY_GC, ERRINJ_BOOL, {.bparam = false}) \
+	_(ERRINJ_SIO_READ_MAX, ERRINJ_INT, {.iparam = -1}) \
 
 ENUM0(errinj_id, ERRINJ_LIST);
 extern struct errinj errinjs[];
diff --git a/src/lib/core/sio.c b/src/lib/core/sio.c
index 8f25b8159a..ecd6d4128c 100644
--- a/src/lib/core/sio.c
+++ b/src/lib/core/sio.c
@@ -41,6 +41,7 @@
 #include "trivia/util.h"
 #include "exception.h"
 #include "uri/uri.h"
+#include "errinj.h"
 
 const char *
 sio_socketname(int fd)
@@ -222,6 +223,10 @@ sio_accept(int fd, struct sockaddr *addr, socklen_t *addrlen)
 ssize_t
 sio_read(int fd, void *buf, size_t count)
 {
+	struct errinj *inj = errinj(ERRINJ_SIO_READ_MAX, ERRINJ_INT);
+	if (inj != NULL && inj->iparam > 0)
+		count = MIN(count, (size_t)inj->iparam);
+
 	ssize_t n = read(fd, buf, count);
 	if (n < 0 && !sio_wouldblock(errno)) {
 		/*
diff --git a/test/box/errinj.result b/test/box/errinj.result
index 8e76b21b36..2bc41ac5b6 100644
--- a/test/box/errinj.result
+++ b/test/box/errinj.result
@@ -42,6 +42,8 @@ errinj.info()
     state: false
   ERRINJ_PORT_DUMP:
     state: false
+  ERRINJ_RELAY_BREAK_LSN:
+    state: -1
   ERRINJ_WAL_IO:
     state: false
   ERRINJ_WAL_FALLOCATE:
@@ -66,7 +68,7 @@ errinj.info()
     state: 0
   ERRINJ_XLOG_META:
     state: false
-  ERRINJ_RELAY_BREAK_LSN:
+  ERRINJ_SIO_READ_MAX:
     state: -1
   ERRINJ_VY_INDEX_FILE_RENAME:
     state: false
diff --git a/test/replication/long_row_timeout.result b/test/replication/long_row_timeout.result
index 5b5a46d510..9284d9c2ae 100644
--- a/test/replication/long_row_timeout.result
+++ b/test/replication/long_row_timeout.result
@@ -25,32 +25,27 @@ box.info.replication[2].downstream.status
 ---
 - follow
 ...
-default_memtx_max_tuple_size = box.cfg.memtx_max_tuple_size
----
-...
+-- make applier incapable of reading rows in one go, so that it
+-- yields a couple of times.
 test_run:cmd('switch replica')
 ---
 - true
 ...
-box.cfg{memtx_max_tuple_size = 21 * 1024 * 1024}
+box.error.injection.set("ERRINJ_SIO_READ_MAX", 1)
 ---
+- ok
 ...
 test_run:cmd('switch default')
 ---
 - true
 ...
-box.cfg{memtx_max_tuple_size = 21 * 1024 * 1024}
----
-...
--- insert some big rows which cannot be read in one go, so applier yields
--- on read a couple of times.
 s = box.schema.space.create('test')
 ---
 ...
 _ = s:create_index('pk')
 ---
 ...
-for i = 1,5 do box.space.test:replace{1, digest.urandom(20 * 1024 * 1024)} collectgarbage('collect') end
+for i = 1,5 do box.space.test:replace{1, digest.urandom(1024)} collectgarbage('collect') end
 ---
 ...
 -- replication_disconnect_timeout is 4 * replication_timeout, check that
@@ -100,9 +95,6 @@ test_run:cmd('delete server replica')
 test_run:cleanup_cluster()
 ---
 ...
-box.cfg{memtx_max_tuple_size = default_memtx_max_tuple_size}
----
-...
 box.schema.user.revoke('guest', 'replication')
 ---
 ...
diff --git a/test/replication/long_row_timeout.test.lua b/test/replication/long_row_timeout.test.lua
index 6e1d38b112..7fc52e3e22 100644
--- a/test/replication/long_row_timeout.test.lua
+++ b/test/replication/long_row_timeout.test.lua
@@ -10,17 +10,15 @@ test_run:cmd('create server replica with rpl_master=default, script="replication
 test_run:cmd('start server replica')
 box.info.replication[2].downstream.status
 
-default_memtx_max_tuple_size = box.cfg.memtx_max_tuple_size
+
+-- make applier incapable of reading rows in one go, so that it
+-- yields a couple of times.
 test_run:cmd('switch replica')
-box.cfg{memtx_max_tuple_size = 21 * 1024 * 1024}
+box.error.injection.set("ERRINJ_SIO_READ_MAX", 1)
 test_run:cmd('switch default')
-box.cfg{memtx_max_tuple_size = 21 * 1024 * 1024}
-
--- insert some big rows which cannot be read in one go, so applier yields
--- on read a couple of times.
 s = box.schema.space.create('test')
 _ = s:create_index('pk')
-for i = 1,5 do box.space.test:replace{1, digest.urandom(20 * 1024 * 1024)} collectgarbage('collect') end
+for i = 1,5 do box.space.test:replace{1, digest.urandom(1024)} collectgarbage('collect') end
 -- replication_disconnect_timeout is 4 * replication_timeout, check that
 -- replica doesn't time out too early.
 test_run:cmd('setopt delimiter ";"')
@@ -42,7 +40,6 @@ test_run:cmd('stop server replica')
 test_run:cmd('cleanup server replica')
 test_run:cmd('delete server replica')
 test_run:cleanup_cluster()
-box.cfg{memtx_max_tuple_size = default_memtx_max_tuple_size}
 box.schema.user.revoke('guest', 'replication')
 
 -- Rotate xlogs so as not to replicate the huge rows in
diff --git a/test/replication/suite.ini b/test/replication/suite.ini
index 6e9e3edd0e..dd5b01405b 100644
--- a/test/replication/suite.ini
+++ b/test/replication/suite.ini
@@ -3,7 +3,7 @@ core = tarantool
 script =  master.lua
 description = tarantool/box, replication
 disabled = consistent.test.lua
-release_disabled = catch.test.lua errinj.test.lua gc.test.lua gc_no_space.test.lua before_replace.test.lua quorum.test.lua recover_missing_xlog.test.lua sync.test.lua
+release_disabled = catch.test.lua errinj.test.lua gc.test.lua gc_no_space.test.lua before_replace.test.lua quorum.test.lua recover_missing_xlog.test.lua sync.test.lua long_row_timeout.test.lua
 config = suite.cfg
 lua_libs = lua/fast_replica.lua lua/rlimit.lua
 use_unix_sockets = True
-- 
GitLab