From 62ba7ba762f13a2273f6314121b0352b19c893b6 Mon Sep 17 00:00:00 2001
From: Vladislav Shpilevoy <v.shpilevoy@tarantool.org>
Date: Fri, 6 Apr 2018 16:01:31 +0300
Subject: [PATCH] netbox: don't cancel pending requests on schema change

When a schema version change is detected, there is no reason to
cancel and retry already sent requests. They can be already
executed on a server, and their retrying leads to multiple
execution.

A request must be retried only if a server responded with
WRONG_SCHEMA_VERSION error exactly to this request.

Closes #3325
---
 src/box/lua/net_box.lua  | 26 +++++-------------
 test/box/errinj.result   | 57 ++++++++++++++++++++++++++++++++++++++++
 test/box/errinj.test.lua | 24 +++++++++++++++++
 3 files changed, 88 insertions(+), 19 deletions(-)

diff --git a/src/box/lua/net_box.lua b/src/box/lua/net_box.lua
index 8df70e578e..4ed2b375d0 100644
--- a/src/box/lua/net_box.lua
+++ b/src/box/lua/net_box.lua
@@ -179,29 +179,18 @@ local function create_transport(host, port, user, password, callback,
     local recv_buf         = buffer.ibuf(buffer.READAHEAD)
 
     -- STATE SWITCHING --
-    local function set_state(new_state, new_errno, new_error, schema_version)
+    local function set_state(new_state, new_errno, new_error)
         state = new_state
         last_errno = new_errno
         last_error = new_error
         callback('state_changed', new_state, new_errno, new_error)
         state_cond:broadcast()
-        if state ~= 'active' then
-            -- cancel all requests but the ones bearing the particular
-            -- schema id; if schema id was omitted or we aren't fetching
-            -- schema, cancel everything
-            if not schema_version or state ~= 'fetch_schema' then
-                schema_version = -1
-            end
-            local next_id, next_request = next(requests)
-            while next_id do
-                local id, request = next_id, next_request
-                next_id, next_request = next(requests, id)
-                if request.schema_version ~= schema_version then
-                    requests[id] = nil -- this marks the request as completed
-                    request.errno  = new_errno
-                    request.response = new_error
-                end
+        if state == 'error' or state == 'error_reconnect' then
+            for _, request in pairs(requests) do
+                request.errno = new_errno
+                request.response = new_error
             end
+            requests = {}
         end
     end
 
@@ -543,8 +532,7 @@ local function create_transport(host, port, user, password, callback,
             local body
             body, body_end = decode(body_rpos)
             set_state('fetch_schema',
-                      E_WRONG_SCHEMA_VERSION, body[IPROTO_ERROR_KEY],
-                      response_schema_version)
+                      E_WRONG_SCHEMA_VERSION, body[IPROTO_ERROR_KEY])
             return iproto_schema_sm(schema_version)
         end
         return iproto_sm(schema_version)
diff --git a/test/box/errinj.result b/test/box/errinj.result
index 8e4d57429b..2ece180bbf 100644
--- a/test/box/errinj.result
+++ b/test/box/errinj.result
@@ -1090,6 +1090,63 @@ errinj.set("ERRINJ_IPROTO_TX_DELAY", false)
 s:drop()
 ---
 ...
+--
+-- gh-3325: do not cancel already sent requests, when a schema
+-- change is detected.
+--
+s = box.schema.create_space('test')
+---
+...
+pk = s:create_index('pk')
+---
+...
+s:replace{1, 1}
+---
+- [1, 1]
+...
+cn = net_box.connect(box.cfg.listen)
+---
+...
+errinj.set("ERRINJ_WAL_DELAY", true)
+---
+- ok
+...
+ok = nil
+---
+...
+err = nil
+---
+...
+test_run:cmd('setopt delimiter ";"')
+---
+- true
+...
+f = fiber.create(function()
+  local str = 'box.space.test:create_index("sk", {parts = {{2, "integer"}}})'
+  ok, err = pcall(cn.eval, cn, str)
+end)
+test_run:cmd('setopt delimiter ""');
+---
+...
+cn.space.test:get{1}
+---
+- [1, 1]
+...
+errinj.set("ERRINJ_WAL_DELAY", false)
+---
+- ok
+...
+ok, err
+---
+- true
+- null
+...
+cn:close()
+---
+...
+s:drop()
+---
+...
 box.schema.user.revoke('guest', 'read,write,execute','universe')
 ---
 ...
diff --git a/test/box/errinj.test.lua b/test/box/errinj.test.lua
index d97cd81f8d..2ef574fb1d 100644
--- a/test/box/errinj.test.lua
+++ b/test/box/errinj.test.lua
@@ -365,4 +365,28 @@ for i = 1, 200 do ch:get() end
 errinj.set("ERRINJ_IPROTO_TX_DELAY", false)
 
 s:drop()
+
+--
+-- gh-3325: do not cancel already sent requests, when a schema
+-- change is detected.
+--
+s = box.schema.create_space('test')
+pk = s:create_index('pk')
+s:replace{1, 1}
+cn = net_box.connect(box.cfg.listen)
+errinj.set("ERRINJ_WAL_DELAY", true)
+ok = nil
+err = nil
+test_run:cmd('setopt delimiter ";"')
+f = fiber.create(function()
+  local str = 'box.space.test:create_index("sk", {parts = {{2, "integer"}}})'
+  ok, err = pcall(cn.eval, cn, str)
+end)
+test_run:cmd('setopt delimiter ""');
+cn.space.test:get{1}
+errinj.set("ERRINJ_WAL_DELAY", false)
+ok, err
+cn:close()
+s:drop()
+
 box.schema.user.revoke('guest', 'read,write,execute','universe')
-- 
GitLab