From cb8e903b275cd949b366dc553cc2bd62d71a5ef0 Mon Sep 17 00:00:00 2001
From: Vladimir Davydov <vdavydov@tarantool.org>
Date: Thu, 19 Oct 2023 17:09:10 +0300
Subject: [PATCH] fiber: use alternative signal stack

We install a signal handler that prints the stack trace on SIGSEGV,
SIGBUS, SIGILL, SIGFPE. The signal handler uses the current stack.
This works fine for most issues, but not for stack overflow, because
the latter makes the current stack unusable, leading to a crash in
the signal handler. Let's install an alternative signal stack in each
thread so that we can print the stack trace on stack overflow.

Note that we skip this for ASAN because it installs its own signal
stack. (Installing a custom stack would result in a crash.)

Closes #9222

NO_DOC=bug fix
---
 changelogs/unreleased/gh-9222-sigaltstack.md |  4 ++
 src/lib/core/crash.c                         |  5 ++-
 src/lib/core/fiber.c                         | 45 ++++++++++++++++++++
 test/unit/guard.cc                           | 16 -------
 4 files changed, 53 insertions(+), 17 deletions(-)
 create mode 100644 changelogs/unreleased/gh-9222-sigaltstack.md

diff --git a/changelogs/unreleased/gh-9222-sigaltstack.md b/changelogs/unreleased/gh-9222-sigaltstack.md
new file mode 100644
index 0000000000..f17d280bd9
--- /dev/null
+++ b/changelogs/unreleased/gh-9222-sigaltstack.md
@@ -0,0 +1,4 @@
+## bugfix/core
+
+* Fixed a crash that happened while printing the stack trace on a stack
+  overflow bug (gh-9222).
diff --git a/src/lib/core/crash.c b/src/lib/core/crash.c
index 6e324997c9..55825d3db5 100644
--- a/src/lib/core/crash.c
+++ b/src/lib/core/crash.c
@@ -225,9 +225,12 @@ crash_signal_init(void)
 	 *
 	 * SA_NODEFER allows receiving the same signal
 	 * during handler.
+	 *
+	 * SA_ONSTACK is required to handle stack overflow, which
+	 * makes the current stack unusable.
 	 */
 	struct sigaction sa = {
-		.sa_flags = SA_RESETHAND | SA_NODEFER | SA_SIGINFO,
+		.sa_flags = SA_RESETHAND | SA_NODEFER | SA_SIGINFO | SA_ONSTACK,
 		.sa_sigaction = crash_signal_cb,
 	};
 	sigemptyset(&sa.sa_mask);
diff --git a/src/lib/core/fiber.c b/src/lib/core/fiber.c
index f6ff1aaa7b..1437036062 100644
--- a/src/lib/core/fiber.c
+++ b/src/lib/core/fiber.c
@@ -223,6 +223,49 @@ fiber_mprotect(void *addr, size_t len, int prot)
 	return -1;
 }
 
+/**
+ * Sets an alternative signal stack.
+ *
+ * We use an alternative signal stack so that we can print the stack trace on
+ * stack overflow, which makes the current stack unusable. This function should
+ * be called in each thread because the SIGSEGV signal is thread-directed.
+ * At thread exit, one should call signal_stack_free() to free the stack set by
+ * this function.
+ *
+ * We skip this for ASAN because it installs its own signal stack.
+ */
+static void
+signal_stack_init(void)
+{
+#ifndef ENABLE_ASAN
+	stack_t stack;
+	stack.ss_flags = 0;
+	/* SIGSTKSZ doesn't seem to be enough for libunwind. */
+	stack.ss_size = 4 * SIGSTKSZ;
+	stack.ss_sp = xmalloc(stack.ss_size);
+	if (sigaltstack(&stack, NULL) != 0) {
+		say_syserror("sigaltstack");
+		free(stack.ss_sp);
+	}
+#endif
+}
+
+/**
+ * Frees the signal stack set with signal_stack_init().
+ */
+static void
+signal_stack_free(void)
+{
+#ifndef ENABLE_ASAN
+	stack_t stack;
+	if (sigaltstack(NULL, &stack) != 0) {
+		say_syserror("sigaltstack");
+	} else {
+		free(stack.ss_sp);
+	}
+#endif
+}
+
 static __thread bool fiber_top_enabled = false;
 
 #ifdef ENABLE_BACKTRACE
@@ -1730,6 +1773,7 @@ cord_create(struct cord *cord, const char *name)
 #ifdef HAVE_MADV_DONTNEED
 	cord->sched.stack_watermark = NULL;
 #endif
+	signal_stack_init();
 }
 
 void
@@ -1759,6 +1803,7 @@ cord_exit(struct cord *cord)
 	assert(cord == cord());
 	(void)cord;
 	trigger_free_in_thread();
+	signal_stack_free();
 }
 
 void
diff --git a/test/unit/guard.cc b/test/unit/guard.cc
index 236d3180b8..14a4afc6a1 100644
--- a/test/unit/guard.cc
+++ b/test/unit/guard.cc
@@ -42,21 +42,6 @@ stack_break_f(char *frame_zero)
 static int
 main_f(va_list ap)
 {
-	stack_t stack;
-	stack.ss_flags = 0;
-	/*
-	 * It is said that SIGSTKSZ is not enough for one of llvm sanitizers
-	 * (probably asan, because this test fails with segmentation fault if
-	 * we use SIGSTKSZ as alternative signal stack size when we use it).
-	 * https://github.com/llvm/llvm-project/blame/699231ab3c7dd8f028d868b103481fa901f3c721/compiler-rt/lib/sanitizer_common/sanitizer_posix_libcdep.cpp#L169
-	 */
-	stack.ss_size = 4 * SIGSTKSZ;
-	stack.ss_sp = xmalloc(stack.ss_size);
-	if (sigaltstack(&stack, NULL) < 0) {
-		free(stack.ss_sp);
-		perror("sigaltstack");
-		exit(EXIT_FAILURE);
-	}
 	struct sigaction sa;
 	sa.sa_handler = sigsegf_handler;
 	sigemptyset(&sa.sa_mask);
@@ -67,7 +52,6 @@ main_f(va_list ap)
 	int res = stack_break_f((char *)__builtin_frame_address(0));
 
 	ev_break(loop(), EVBREAK_ALL);
-	free(stack.ss_sp);
 	return res;
 }
 
-- 
GitLab