From 21726f6935c6e7131c0a8cff40e7eb198ba547ab Mon Sep 17 00:00:00 2001
From: Vladimir Davydov <vdavydov.dev@gmail.com>
Date: Fri, 28 Dec 2018 23:29:06 +0300
Subject: [PATCH] relay: close xlog cursor in relay thread

An xlog_cursor created and used by a relay via recovery context is
destroyed by the main thread once the relay thread has exited. This is
incorrect, because xlog_cursor uses cord's slab allocator and therefore
must be destroyed in the same thread it was created by, otherwise we
risk getting a use-after-free bug. So this patch moves recovery_delete()
invocation to the end of the relay thread routine.

No test is added, because our existing tests already cover this case -
crashes don't usually happen, because we are lucky. The next patch will
add some assertions to make the bug 100% reproducible.

Closes #3910
---
 src/box/relay.cc | 27 +++++++++++++++++++++++----
 1 file changed, 23 insertions(+), 4 deletions(-)

diff --git a/src/box/relay.cc b/src/box/relay.cc
index 48b1648f92..90fced244c 100644
--- a/src/box/relay.cc
+++ b/src/box/relay.cc
@@ -210,6 +210,26 @@ relay_cancel(struct relay *relay)
 	}
 }
 
+/**
+ * Called by a relay thread right before termination.
+ */
+static void
+relay_exit(struct relay *relay)
+{
+	struct errinj *inj = errinj(ERRINJ_RELAY_EXIT_DELAY, ERRINJ_DOUBLE);
+	if (inj != NULL && inj->dparam > 0)
+		fiber_sleep(inj->dparam);
+
+	/*
+	 * Destroy the recovery context. We MUST do it in
+	 * the relay thread, because it contains an xlog
+	 * cursor, which must be closed in the same thread
+	 * that opened it (it uses cord's slab allocator).
+	 */
+	recovery_delete(relay->r);
+	relay->r = NULL;
+}
+
 static void
 relay_stop(struct relay *relay)
 {
@@ -277,6 +297,8 @@ int
 relay_final_join_f(va_list ap)
 {
 	struct relay *relay = va_arg(ap, struct relay *);
+	auto guard = make_scoped_guard([=] { relay_exit(relay); });
+
 	coio_enable();
 	relay_set_cord_name(relay->io.fd);
 
@@ -600,10 +622,7 @@ relay_subscribe_f(va_list ap)
 		    NULL, NULL, cbus_process);
 	cbus_endpoint_destroy(&relay->endpoint, cbus_process);
 
-	struct errinj *inj = errinj(ERRINJ_RELAY_EXIT_DELAY, ERRINJ_DOUBLE);
-	if (inj != NULL && inj->dparam > 0)
-		fiber_sleep(inj->dparam);
-
+	relay_exit(relay);
 	return -1;
 }
 
-- 
GitLab