From 786eb2acdd0ac6bf3cc0079afe5606e55dce2980 Mon Sep 17 00:00:00 2001
From: Nikolay Shirokovskiy <nshirokovskiy@tarantool.org>
Date: Mon, 27 Nov 2023 15:17:19 +0300
Subject: [PATCH] main: don't break graceful shutdown on init script exit

Graceful shutdown is done in a special fiber which is started for
example on SIGTERM. So it can run concurrently with fiber executing
Tarantool init script. On init fiber exit we break event loop to pass
control back to the Tarantool initialization code. But we fail to run
event loop a bit more to finish graceful shutdown.

The test is a bit contrived. A more real world case is when Tarantool is
termintated during lingering box.cfg().

Close #9411

NO_DOC=bugfix
---
 ...-graceful-shutdown-break-on-script-exit.md |  3 ++
 src/main.cc                                   | 14 ++++---
 ...ful_shutdown_break_on_script_exit_test.lua | 42 +++++++++++++++++++
 3 files changed, 53 insertions(+), 6 deletions(-)
 create mode 100644 changelogs/unreleased/gh-9411-fix-graceful-shutdown-break-on-script-exit.md
 create mode 100644 test/app-luatest/gh_9411_fix_graceful_shutdown_break_on_script_exit_test.lua

diff --git a/changelogs/unreleased/gh-9411-fix-graceful-shutdown-break-on-script-exit.md b/changelogs/unreleased/gh-9411-fix-graceful-shutdown-break-on-script-exit.md
new file mode 100644
index 0000000000..4995050b2d
--- /dev/null
+++ b/changelogs/unreleased/gh-9411-fix-graceful-shutdown-break-on-script-exit.md
@@ -0,0 +1,3 @@
+## bugfix/core
+
+* Fixed graceful shutdown break on init script exit (gh-9411).
diff --git a/src/main.cc b/src/main.cc
index 49980efa03..018c2ce1e2 100644
--- a/src/main.cc
+++ b/src/main.cc
@@ -107,7 +107,8 @@ static double start_time;
 /** A preallocated fiber to run on_shutdown triggers. */
 static struct fiber *on_shutdown_fiber = NULL;
 /** A flag restricting repeated execution of tarantool_exit(). */
-static bool is_shutting_down = false;
+static bool shutdown_started = false;
+static bool shutdown_finished = false;
 static int exit_code = 0;
 
 char tarantool_path[PATH_MAX];
@@ -162,7 +163,7 @@ on_shutdown_f(va_list ap)
 		fiber_sleep(0.0);
 
 	/* Handle spurious wakeups. */
-	while (!is_shutting_down)
+	while (!shutdown_started)
 		fiber_yield();
 
 	if (on_shutdown_run_triggers() != 0) {
@@ -170,6 +171,7 @@ on_shutdown_f(va_list ap)
 		diag_log();
 		diag_clear(diag_get());
 	}
+	shutdown_finished = true;
 	ev_break(loop(), EVBREAK_ALL);
 	return 0;
 }
@@ -178,7 +180,7 @@ void
 tarantool_exit(int code)
 {
 	start_loop = false;
-	if (is_shutting_down) {
+	if (shutdown_started) {
 		/*
 		 * We are already running on_shutdown triggers,
 		 * and will exit as soon as they'll finish.
@@ -186,7 +188,7 @@ tarantool_exit(int code)
 		 */
 		return;
 	}
-	is_shutting_down = true;
+	shutdown_started = true;
 	exit_code = code;
 	box_broadcast_fmt("box.shutdown", "%b", true);
 	fiber_wakeup(on_shutdown_fiber);
@@ -1074,10 +1076,10 @@ main(int argc, char **argv)
 	 * init script, and there was neither os.exit nor SIGTERM, then call
 	 * tarantool_exit and start an event loop to run on_shutdown triggers.
 	 */
-	if (!is_shutting_down) {
+	if (!shutdown_started)
 		tarantool_exit(exit_code);
+	if (!shutdown_finished)
 		ev_run(loop(), 0);
-	}
 	/* freeing resources */
 	free((void *)instance.name);
 	free((void *)instance.config);
diff --git a/test/app-luatest/gh_9411_fix_graceful_shutdown_break_on_script_exit_test.lua b/test/app-luatest/gh_9411_fix_graceful_shutdown_break_on_script_exit_test.lua
new file mode 100644
index 0000000000..fa86badaf4
--- /dev/null
+++ b/test/app-luatest/gh_9411_fix_graceful_shutdown_break_on_script_exit_test.lua
@@ -0,0 +1,42 @@
+local popen = require('popen')
+local t = require('luatest')
+
+local g = t.group()
+
+g.after_each(function()
+    if g.handle ~= nil then
+        g.handle:close()
+    end
+    g.handle = nil
+end)
+
+g.test = function()
+    local script = [[
+        local fiber = require('fiber')
+
+        box.ctl.set_on_shutdown_timeout(1)
+        box.ctl.on_shutdown(function()
+            fiber.sleep(0.2)
+            print('shutdown callback finished')
+        end, nil)
+
+        fiber.create(function()
+            os.exit(0)
+        end)
+
+        fiber.sleep(0.1)
+    ]]
+    local tarantool_bin = arg[-1]
+    local handle, err = popen.new({tarantool_bin, '-e', script},
+                                  {stdout = popen.opts.PIPE,
+                                   stdin = popen.opts.DEVNULL,
+                                   stderr = popen.opts.DEVNULL})
+    assert(handle, err)
+    g.handle = handle
+    local output, err = handle:read({timeout = 3})
+    assert(output, err)
+    t.assert_equals(output, 'shutdown callback finished\n')
+    local status = handle:wait()
+    t.assert_equals(status.state, 'exited')
+    t.assert_equals(status.exit_code, 0)
+end
-- 
GitLab