From 87a2989b1a52eeb15d9111ec76589cde122a7315 Mon Sep 17 00:00:00 2001
From: Alexandr Lyapunov <aleks@tarantool.org>
Date: Tue, 11 Jul 2017 18:51:45 +0300
Subject: [PATCH] vinyl: fix rollback of prepared TX

Now if a prepared transaction is aborted, it expects to be the
latest prepared TX in TX manager. It's not true in two cases:

- The transaction failed during preparation. The TX is in partially
  prepared state and must rollback all the changes it made in mems
  but the preparation was not finished and thus the TX could not be
  considered as the latest in TX manager.

- It's a cascading rollback with more than on TX. It would be
  graceful for the latest aborted TX to set the previous TX as
  the latest after the abortion. But the TX does not know the
  previous prepared TX and simply set to NULL appropriate pointer;
  when the time comes for the previous TX to be aborted it
  does not see itself as the latest.

The TX must not expect itself to be the latest but must handle
the last_prepared_tx pointer only if it is the latest.

Fix it and add tests.

Fix #2588 (case 1)
Fix #2591 (case 2)
---
 src/box/vy_tx.c            | 23 ++++++++++++++++++++---
 test/vinyl/errinj.result   | 30 ++++++++++++++++++++++++++++++
 test/vinyl/errinj.test.lua | 12 ++++++++++++
 test/vinyl/gh.result       | 14 ++++++++++++++
 test/vinyl/gh.test.lua     |  6 ++++++
 5 files changed, 82 insertions(+), 3 deletions(-)

diff --git a/src/box/vy_tx.c b/src/box/vy_tx.c
index a0430fdcf3..8967fd2f20 100644
--- a/src/box/vy_tx.c
+++ b/src/box/vy_tx.c
@@ -589,9 +589,26 @@ vy_tx_rollback_after_prepare(struct vy_tx *tx)
 
 	struct tx_manager *xm = tx->xm;
 
-	/* Expect cascading rollback in the reverse order. */
-	assert(xm->last_prepared_tx == tx);
-	xm->last_prepared_tx = NULL;
+	/*
+	 * There are two reasons of rollback_after_prepare:
+	 * 1) Fail in the middle of vy_tx_prepare call.
+	 * 2) Cascading rollback after WAL fail.
+	 *
+	 * If a TX is the latest prepared TX and the it is rollbacked,
+	 * it's certainly the case (2) and we should set xm->last_prepared_tx
+	 * to the previous prepared TX, if any.
+	 * But doesn't know the previous TX.
+	 * On the other hand we may expect that cascading rollback will
+	 * concern all the prepared TXs, all of them will be rollbacked
+	 * and xm->last_prepared_tx must be set to NULL in the end.
+	 * Thus we can set xm->last_prepared_tx to NULL now and it will be
+	 * correct in the end of the cascading rollback.
+	 *
+	 * We must not change xm->last_prepared_tx in all other cases,
+	 * it will be changed by the corresponding TX.
+	 */
+	if (xm->last_prepared_tx == tx)
+		xm->last_prepared_tx = NULL;
 
 	struct txv *v;
 	stailq_foreach_entry(v, &tx->log, next_in_log) {
diff --git a/test/vinyl/errinj.result b/test/vinyl/errinj.result
index 40d7975272..1752bc220b 100644
--- a/test/vinyl/errinj.result
+++ b/test/vinyl/errinj.result
@@ -878,3 +878,33 @@ box.snapshot()
 s:drop()
 ---
 ...
+s = box.schema.space.create('test', {engine = 'vinyl'})
+---
+...
+_ = s:create_index('i1', {parts = {1, 'unsigned'}})
+---
+...
+c = 10
+---
+...
+errinj.set("ERRINJ_WAL_WRITE_DISK", true)
+---
+- ok
+...
+for i = 1,10 do fiber.create(function() pcall(s.replace, s, {i}) c = c - 1 end) end
+---
+...
+while c ~= 0 do fiber.sleep(0.001) end
+---
+...
+s:select{}
+---
+- []
+...
+errinj.set("ERRINJ_WAL_WRITE_DISK", false)
+---
+- ok
+...
+s:drop()
+---
+...
diff --git a/test/vinyl/errinj.test.lua b/test/vinyl/errinj.test.lua
index bddc2936b6..3a599218f5 100644
--- a/test/vinyl/errinj.test.lua
+++ b/test/vinyl/errinj.test.lua
@@ -353,3 +353,15 @@ errinj.set('ERRINJ_WAL_IO', false)
 _ = s:create_index('pk')
 box.snapshot()
 s:drop()
+
+s = box.schema.space.create('test', {engine = 'vinyl'})
+_ = s:create_index('i1', {parts = {1, 'unsigned'}})
+
+c = 10
+errinj.set("ERRINJ_WAL_WRITE_DISK", true)
+for i = 1,10 do fiber.create(function() pcall(s.replace, s, {i}) c = c - 1 end) end
+while c ~= 0 do fiber.sleep(0.001) end
+s:select{}
+errinj.set("ERRINJ_WAL_WRITE_DISK", false)
+
+s:drop()
diff --git a/test/vinyl/gh.result b/test/vinyl/gh.result
index 15570d6118..2ca16889e7 100644
--- a/test/vinyl/gh.result
+++ b/test/vinyl/gh.result
@@ -640,3 +640,17 @@ s.index.i1:count() == s.index.i2:count()
 s:drop()
 ---
 ...
+-- https://github.com/tarantool/tarantool/issues/2588
+s = box.schema.space.create('vinyl', { engine = 'vinyl' })
+---
+...
+i = box.space.vinyl:create_index('primary')
+---
+...
+s:replace({1, string.rep('x', 10 * 1024 * 1024)})
+---
+- error: Failed to allocate 10485799 bytes in lsregion_alloc for mem_stmt
+...
+s:drop()
+---
+...
diff --git a/test/vinyl/gh.test.lua b/test/vinyl/gh.test.lua
index c14bab4323..9756d153b8 100644
--- a/test/vinyl/gh.test.lua
+++ b/test/vinyl/gh.test.lua
@@ -266,3 +266,9 @@ test_run:cmd("setopt delimiter ''");
 
 s.index.i1:count() == s.index.i2:count()
 s:drop()
+
+-- https://github.com/tarantool/tarantool/issues/2588
+s = box.schema.space.create('vinyl', { engine = 'vinyl' })
+i = box.space.vinyl:create_index('primary')
+s:replace({1, string.rep('x', 10 * 1024 * 1024)})
+s:drop()
-- 
GitLab