From 1f416452190a0a0ce507f060ad526b3c903beac3 Mon Sep 17 00:00:00 2001 From: Nikolay Shirokovskiy <nshirokovskiy@tarantool.org> Date: Thu, 11 Jul 2024 13:20:45 +0300 Subject: [PATCH] swim: finish worker fiber on Tarantool shutdown Let's make sure swim worker fiber is finished on Tarantool shutdown as we are going to free fibers stacks. If fiber is not finished it's stack may have references to objects on heap. Thus as fiber stack will be freed we will have FP memory leaks under ASAN. Let's make swim gc do not yield using asynchronuos deletion. This way we will not use worker fiber for swim deletion. We are going to stop this worker fiber before all swim object are collected. Part of #9722 NO_CHANGELOG=internal NO_DOC=internal --- src/lib/swim/swim.c | 28 ++++++++++++++++++++-------- src/lib/swim/swim.h | 7 +++++++ src/lua/swim.c | 15 +++++++++++++++ src/lua/swim.lua | 16 +--------------- test/app-luatest/shutdown_test.lua | 23 +++++++++++++++++++++++ 5 files changed, 66 insertions(+), 23 deletions(-) diff --git a/src/lib/swim/swim.c b/src/lib/swim/swim.c index 6053abd529..707b52a207 100644 --- a/src/lib/swim/swim.c +++ b/src/lib/swim/swim.c @@ -544,6 +544,8 @@ struct swim { * lines. */ struct swim_task round_step_task; + /* Whether event handler should free swim on exit. */ + bool free_in_handler; }; /** Put the member into a list of ACK waiters. */ @@ -628,7 +630,8 @@ swim_on_member_update(struct swim *swim, struct swim_member *member, swim_member_ref(member); stailq_add_tail_entry(&swim->event_queue, member, in_event_queue); - fiber_wakeup(swim->event_handler); + if (swim->event_handler != NULL) + fiber_wakeup(swim->event_handler); } member->events |= events; } @@ -1888,6 +1891,9 @@ swim_event_handler_f(va_list va) } swim_member_unref(m); } + s->event_handler = NULL; + if (s->free_in_handler) + swim_delete(s); return 0; } @@ -1928,7 +1934,9 @@ swim_new(uint64_t generation) return NULL; } fiber_set_joinable(swim->event_handler, true); + fiber_set_managed_shutdown(swim->event_handler); fiber_start(swim->event_handler, swim); + swim->free_in_handler = false; return swim; } @@ -2203,11 +2211,8 @@ static inline void swim_kill_event_handler(struct swim *swim) { struct fiber *f = swim->event_handler; - /* - * Nullify so as not to keep pointer at a fiber when it is - * reused. - */ - swim->event_handler = NULL; + if (f == NULL) + return; fiber_cancel(f); fiber_join(f); } @@ -2215,8 +2220,7 @@ swim_kill_event_handler(struct swim *swim) void swim_delete(struct swim *swim) { - if (swim->event_handler != NULL) - swim_kill_event_handler(swim); + swim_kill_event_handler(swim); struct ev_loop *l = swim_loop(); swim_scheduler_destroy(&swim->scheduler); swim_ev_timer_stop(l, &swim->round_tick); @@ -2245,6 +2249,14 @@ swim_delete(struct swim *swim) free(swim); } +void +swim_gc(struct swim *swim) +{ + swim->free_in_handler = true; + fiber_set_joinable(swim->event_handler, false); + fiber_cancel(swim->event_handler); +} + /** * Quit message is broadcasted in the same way as round messages, * step by step, with the only difference that quit round steps diff --git a/src/lib/swim/swim.h b/src/lib/swim/swim.h index 4565eb9765..24fc3ea8fe 100644 --- a/src/lib/swim/swim.h +++ b/src/lib/swim/swim.h @@ -124,6 +124,13 @@ int swim_set_codec(struct swim *swim, enum crypto_algo algo, enum crypto_mode mode, const char *key, int key_size); +/** + * Delete asynchronously. Does not yield. Object is invalid after return + * from this function and should not be used. + */ +void +swim_gc(struct swim *swim); + /** * Stop listening and broadcasting messages, cleanup all internal * structures, free memory. The function yields. Actual deletion diff --git a/src/lua/swim.c b/src/lua/swim.c index 7583815f28..f2c182e22e 100644 --- a/src/lua/swim.c +++ b/src/lua/swim.c @@ -153,6 +153,20 @@ lua_swim_quit(struct lua_State *L) return 0; } +/** + * Delete asynchronously. Does not yield. Object is invalid after return + * from this function and should not be used. + */ +static int +lua_swim_gc(struct lua_State *L) +{ + uint32_t ctypeid; + struct swim *s = *(struct swim **)luaL_checkcdata(L, 1, &ctypeid); + assert(ctypeid == ctid_swim_ptr); + swim_gc(s); + return 0; +} + void tarantool_lua_swim_init(struct lua_State *L) { @@ -163,6 +177,7 @@ tarantool_lua_swim_init(struct lua_State *L) static const struct luaL_Reg lua_swim_internal_methods [] = { {"swim_new", lua_swim_new}, {"swim_delete", lua_swim_delete}, + {"swim_gc", lua_swim_gc}, {"swim_quit", lua_swim_quit}, {"swim_on_member_event", lua_swim_on_member_event}, {"swim_on_member_event_normalize_arguments", diff --git a/src/lua/swim.lua b/src/lua/swim.lua index f72734d7b4..4462f75fde 100644 --- a/src/lua/swim.lua +++ b/src/lua/swim.lua @@ -5,7 +5,6 @@ local msgpack = require('msgpack') local crypto = require('crypto') local fiber = require('fiber') local internal = require('swim.lib') -local schedule_task = fiber._internal.schedule_task local cord_ibuf_take = buffer.internal.cord_ibuf_take local cord_ibuf_put = buffer.internal.cord_ibuf_put @@ -979,19 +978,6 @@ swim_cfg_not_configured_mt.__call = swim_cfg_first_call -- removed members erasure - GC drops them automatically. local cache_table_mt = { __mode = 'v' } --- --- SWIM garbage collection function. It can't delete the SWIM --- instance immediately, because it is invoked by Lua GC. Firstly, --- it is not safe to yield in FFI - Jit can't survive a yield. --- Secondly, it is not safe to yield in any GC function, because --- it stops garbage collection. Instead, here GC is delayed, works --- at the end of the event loop, and deletes the instance --- asynchronously. --- -local function swim_gc(ptr) - schedule_task(internal.swim_delete, ptr) -end - -- -- Create a new SWIM instance, and configure if @a cfg is -- provided. @@ -1020,7 +1006,7 @@ local function swim_new(cfg) if ptr == nil then return nil, box.error.last() end - ffi.gc(ptr, swim_gc) + ffi.gc(ptr, internal.swim_gc) local s = setmetatable({ ptr = ptr, cfg = setmetatable({index = {}}, swim_cfg_not_configured_mt), diff --git a/test/app-luatest/shutdown_test.lua b/test/app-luatest/shutdown_test.lua index d582b28264..5e3a4feb33 100644 --- a/test/app-luatest/shutdown_test.lua +++ b/test/app-luatest/shutdown_test.lua @@ -228,3 +228,26 @@ g_netbox.test_netbox_shutdown = function(cg) end, {cg.peer.net_box_uri}) test_no_hang_on_shutdown(cg.server) end + +local g_swim = t.group('swim') + +g_swim.before_each(function(cg) + cg.server = server:new() + cg.server:start() +end) + +g_swim.after_each(function(cg) + if cg.server ~= nil then + cg.server:drop() + end +end) + +-- Test shutdown with swim instance. +g_swim.test_swim_shutdown = function(cg) + cg.server:exec(function() + local swim = require('swim') + local s = swim.new({generation = 0}) + rawset(_G, 'test_swim', s) + end) + test_no_hang_on_shutdown(cg.server) +end -- GitLab