From 4de5ef85864dbe64b99b0f2f5171efdd3c482267 Mon Sep 17 00:00:00 2001
From: Vladimir Davydov <vdavydov@tarantool.org>
Date: Wed, 11 Jan 2023 14:33:28 +0300
Subject: [PATCH] lua: gracefully fail space on/before replace trigger if txn
 was aborted

lbox_push_event_f and lbox_push_event_f callback functions used for
passing the statement between txn and space on/before replace Lua
triggers don't assume that the transaction may be aborted by yield
after the current statement began (this may happen if a trigger callback
yields). In this case, all statements in txn would be rolled back and
txn_current_stmt would return NULL, leading to a crash.

Let's fix this by checking if the transaction is still active and
raising an error immediately if it isn't, thus skipping Lua triggers.

Notes:
 - We merged lbox_pop_txn_stmt_and_check_format into lbox_pop_txn_stmt,
   because the latter is only called by the former.
 - Since lbox_push_event_f callback may now fail, we have to update
   lbox_trigger_run to handle it.

Closes #8027

NO_DOC=bug fix

(cherry picked from commit 1a678a5e88b0c0ba7565d0027bf819e5c47f8999)
---
 .../gh-8027-txn-handle-abort-in-trigger.md    |  5 +
 src/box/lua/space.cc                          | 64 +++++++------
 src/lua/trigger.c                             | 20 ++--
 src/lua/trigger.h                             |  6 +-
 ..._8027_txn_handle_abort_in_trigger_test.lua | 92 +++++++++++++++++++
 5 files changed, 150 insertions(+), 37 deletions(-)
 create mode 100644 changelogs/unreleased/gh-8027-txn-handle-abort-in-trigger.md
 create mode 100644 test/box-luatest/gh_8027_txn_handle_abort_in_trigger_test.lua

diff --git a/changelogs/unreleased/gh-8027-txn-handle-abort-in-trigger.md b/changelogs/unreleased/gh-8027-txn-handle-abort-in-trigger.md
new file mode 100644
index 0000000000..ace2f42742
--- /dev/null
+++ b/changelogs/unreleased/gh-8027-txn-handle-abort-in-trigger.md
@@ -0,0 +1,5 @@
+## bugfix/core
+
+* Fixed a crash that happened if the transaction was aborted (for example,
+  by fiber yield with MVCC off) while space `on_replace` or `before_replace`
+  was running (gh-8027).
diff --git a/src/box/lua/space.cc b/src/box/lua/space.cc
index 1cdbda409e..5ab5245ad4 100644
--- a/src/box/lua/space.cc
+++ b/src/box/lua/space.cc
@@ -58,13 +58,22 @@ extern "C" {
 #include "vclock/vclock.h"
 
 /**
- * Trigger function for all spaces
+ * Function invoked before execution of a before_replace or on_replace trigger.
+ * It pushes the current transaction statement to Lua stack to be passed to
+ * the trigger callback.
  */
 static int
 lbox_push_txn_stmt(struct lua_State *L, void *event)
 {
-	struct txn_stmt *stmt = txn_current_stmt((struct txn *) event);
-
+	struct txn *txn = (struct txn *)event;
+	/*
+	 * The transaction could be aborted while the previous trigger was
+	 * running (e.g. if the trigger callback yielded).
+	 */
+	if (txn_check_can_continue(txn) != 0)
+		return -1;
+	struct txn_stmt *stmt = txn_current_stmt(txn);
+	assert(stmt != NULL);
 	if (stmt->old_tuple) {
 		luaT_pushtuple(L, stmt->old_tuple);
 	} else {
@@ -82,11 +91,34 @@ lbox_push_txn_stmt(struct lua_State *L, void *event)
 	return 4;
 }
 
+/**
+ * Function invoked upon successful execution of a before_replace trigger.
+ * It resets the current transaction statement to the tuple returned by
+ * the trigger callback.
+ */
 static int
 lbox_pop_txn_stmt(struct lua_State *L, int nret, void *event)
 {
-	struct txn_stmt *stmt = txn_current_stmt((struct txn *) event);
-
+	struct txn *txn = (struct txn *)event;
+	/*
+	 * The transaction could be aborted while the trigger was running
+	 * (e.g. if the trigger callback yielded).
+	 */
+	if (txn_check_can_continue(txn) != 0)
+		return -1;
+	struct txn_stmt *stmt = txn_current_stmt(txn);
+	assert(stmt != NULL);
+	/*
+	 * Tuple returned by a before_replace trigger callback must
+	 * match the space format.
+	 *
+	 * Since upgrade from pre-1.7.5 versions passes tuple with not
+	 * suitable format to before_replace triggers during recovery,
+	 * we need to disable format validation until box is configured.
+	 */
+	if (box_is_configured() && stmt->new_tuple != NULL &&
+	    tuple_validate(stmt->space->format, stmt->new_tuple) != 0)
+		return -1;
 	if (nret < 1) {
 		/* No return value - nothing to do. */
 		return 0;
@@ -109,25 +141,6 @@ lbox_pop_txn_stmt(struct lua_State *L, int nret, void *event)
 	return 0;
 }
 
-/**
- * Wrapper over lbox_pop_txn_stmt that checks tuple's format.
- */
-static int
-lbox_pop_txn_stmt_and_check_format(struct lua_State *L, int nret, void *event)
-{
-	struct txn_stmt *stmt = txn_current_stmt((struct txn *) event);
-	struct tuple *tuple = stmt->new_tuple;
-	/*
-	 * Since upgrade from pre-1.7.5 versions passes tuple with not suitable
-	 * format to before_replace triggers during recovery, we need to disable
-	 * format validation until box is configured.
-	 */
-	if (box_is_configured() && tuple != NULL &&
-	    tuple_validate(stmt->space->format, tuple) != 0)
-		return -1;
-	return lbox_pop_txn_stmt(L, nret, event);
-}
-
 /**
  * Set/Reset/Get space.on_replace trigger
  */
@@ -167,8 +180,7 @@ lbox_space_before_replace(struct lua_State *L)
 	lua_pop(L, 1);
 
 	return lbox_trigger_reset(L, 3, &space->before_replace,
-				  lbox_push_txn_stmt,
-				  lbox_pop_txn_stmt_and_check_format);
+				  lbox_push_txn_stmt, lbox_pop_txn_stmt);
 }
 
 /**
diff --git a/src/lua/trigger.c b/src/lua/trigger.c
index 55c0bae37b..2ecae0af82 100644
--- a/src/lua/trigger.c
+++ b/src/lua/trigger.c
@@ -68,6 +68,7 @@ static int
 lbox_trigger_run(struct trigger *ptr, void *event)
 {
 	struct lbox_trigger *trigger = (struct lbox_trigger *) ptr;
+	int rc = -1;
 	/*
 	 * Create a new coro and reference it. Remove it
 	 * from tarantool_L stack, which is a) scarce
@@ -77,11 +78,11 @@ lbox_trigger_run(struct trigger *ptr, void *event)
 	 * it is on.
 	 */
 	lua_State *L;
-	int coro_ref;
+	int coro_ref = LUA_NOREF;
 	if (fiber()->storage.lua.stack == NULL) {
 		L = luaT_newthread(tarantool_L);
 		if (L == NULL)
-			return -1;
+			goto out;
 		coro_ref = luaL_ref(tarantool_L, LUA_REGISTRYINDEX);
 	} else {
 		L = fiber()->storage.lua.stack;
@@ -92,6 +93,8 @@ lbox_trigger_run(struct trigger *ptr, void *event)
 	int nargs = 0;
 	if (trigger->push_event != NULL) {
 		nargs = trigger->push_event(L, event);
+		if (nargs < 0)
+			goto out;
 	}
 	/*
 	 * There are two cases why we can't access `trigger` after
@@ -105,24 +108,23 @@ lbox_trigger_run(struct trigger *ptr, void *event)
 	 */
 	lbox_pop_event_f pop_event = trigger->pop_event;
 	trigger = NULL;
-	if (luaT_call(L, nargs, LUA_MULTRET)) {
-		luaL_unref(tarantool_L, LUA_REGISTRYINDEX, coro_ref);
-		return -1;
-	}
+	if (luaT_call(L, nargs, LUA_MULTRET))
+		goto out;
 	int nret = lua_gettop(L) - top;
 	if (pop_event != NULL &&
 	    pop_event(L, nret, event) != 0) {
 		lua_settop(L, top);
-		luaL_unref(tarantool_L, LUA_REGISTRYINDEX, coro_ref);
-		return -1;
+		goto out;
 	}
 	/*
 	 * Clear the stack after pop_event saves all
 	 * the needed return values.
 	 */
 	lua_settop(L, top);
+	rc = 0;
+out:
 	luaL_unref(tarantool_L, LUA_REGISTRYINDEX, coro_ref);
-	return 0;
+	return rc;
 }
 
 static struct lbox_trigger *
diff --git a/src/lua/trigger.h b/src/lua/trigger.h
index 1d1dcce352..2e8dcfc3ab 100644
--- a/src/lua/trigger.h
+++ b/src/lua/trigger.h
@@ -38,8 +38,10 @@ extern "C" {
 struct lua_State;
 
 /**
- * The job of lbox_push_event_f is to push trigger arguments
- * to Lua stack.
+ * If not NULL, lbox_push_event_f will be called before execution
+ * of the trigger callback. It's supposed to push trigger arguments
+ * to Lua stack and return the number of pushed values on success.
+ * On error, it should set diag and return a negative number.
  */
 typedef int
 (*lbox_push_event_f)(struct lua_State *L, void *event);
diff --git a/test/box-luatest/gh_8027_txn_handle_abort_in_trigger_test.lua b/test/box-luatest/gh_8027_txn_handle_abort_in_trigger_test.lua
new file mode 100644
index 0000000000..ac2eecb483
--- /dev/null
+++ b/test/box-luatest/gh_8027_txn_handle_abort_in_trigger_test.lua
@@ -0,0 +1,92 @@
+local server = require('luatest.server')
+local t = require('luatest')
+
+local g = t.group()
+
+g.before_all(function(cg)
+    cg.server = server:new({
+        alias = 'default',
+        box_cfg = {memtx_use_mvcc_engine = false},
+    })
+    cg.server:start()
+end)
+
+g.after_all(function(cg)
+    cg.server:stop()
+end)
+
+g.after_each(function(cg)
+    cg.server:exec(function()
+        if box.space.test ~= nil then
+            box.space.test:drop()
+        end
+    end)
+end)
+
+g.test_yield_before_replace = function(cg)
+    cg.server:exec(function()
+        local fiber = require('fiber')
+        local t = require('luatest')
+        local s = box.schema.create_space('test')
+        s:create_index('primary')
+        s:before_replace(function(_, new)
+            if new[1] == 10 then
+                fiber.yield()
+            end
+            return new
+        end)
+        s:before_replace(function(_, new)
+            if new[2] == 10 then
+                fiber.yield()
+            end
+            return new
+        end)
+        local errmsg = 'Transaction has been aborted by a fiber yield'
+        t.assert_error_msg_equals(errmsg, s.insert, s, {10, 1})
+        t.assert_error_msg_equals(errmsg, s.insert, s, {1, 10})
+        box.begin()
+        s:insert({1, 1})
+        t.assert_error_msg_equals(errmsg, s.insert, s, {10, 1})
+        t.assert_error_msg_equals(errmsg, box.commit)
+        t.assert_equals(s:select(), {})
+        box.begin()
+        s:insert({1, 1})
+        t.assert_error_msg_equals(errmsg, s.insert, s, {2, 10})
+        t.assert_error_msg_equals(errmsg, box.commit)
+        t.assert_equals(s:select(), {})
+    end)
+end
+
+g.test_yield_on_replace = function(cg)
+    cg.server:exec(function()
+        local fiber = require('fiber')
+        local t = require('luatest')
+        local s = box.schema.create_space('test')
+        s:create_index('primary')
+        s:on_replace(function(_, new)
+            if new[1] == 10 then
+                fiber.yield()
+            end
+            return new
+        end)
+        s:on_replace(function(_, new)
+            if new[2] == 10 then
+                fiber.yield()
+            end
+            return new
+        end)
+        local errmsg = 'Transaction has been aborted by a fiber yield'
+        t.assert_error_msg_equals(errmsg, s.insert, s, {10, 1})
+        t.assert_error_msg_equals(errmsg, s.insert, s, {1, 10})
+        box.begin()
+        s:insert({1, 1})
+        s:insert({10, 1})
+        t.assert_error_msg_equals(errmsg, box.commit)
+        t.assert_equals(s:select(), {})
+        box.begin()
+        s:insert({1, 1})
+        t.assert_error_msg_equals(errmsg, s.insert, s, {2, 10})
+        t.assert_error_msg_equals(errmsg, box.commit)
+        t.assert_equals(s:select(), {})
+    end)
+end
-- 
GitLab