Skip to content
Snippets Groups Projects
Commit 8f4538cb authored by Ilya Verbin's avatar Ilya Verbin Committed by Vladimir Davydov
Browse files

fiber: do not crash on concurrent fiber:join()

If two or more fibers are yielding in fiber_join_timeout(), one of them
will eventually join and recycle the fiber, while the rest will crash
on accessing the recycled fiber's struct. Fix this by doing fiber_find()
again after each waiting attempt in lbox_fiber_join().

Closes #7489
Closes #7531

NO_DOC=bugfix
parent 73e1059d
No related branches found
No related tags found
No related merge requests found
## bugfix/core
* Fixed a possible crash on concurrent `fiber_object:join()` (gh-7489).
...@@ -735,6 +735,7 @@ lbox_fiber_join(struct lua_State *L) ...@@ -735,6 +735,7 @@ lbox_fiber_join(struct lua_State *L)
struct fiber *fiber = lbox_checkfiber(L, 1); struct fiber *fiber = lbox_checkfiber(L, 1);
struct lua_State *child_L = fiber->storage.lua.stack; struct lua_State *child_L = fiber->storage.lua.stack;
struct error *e = NULL; struct error *e = NULL;
uint64_t fid = fiber->fid;
int num_ret = 0; int num_ret = 0;
int coro_ref = 0; int coro_ref = 0;
...@@ -747,7 +748,30 @@ lbox_fiber_join(struct lua_State *L) ...@@ -747,7 +748,30 @@ lbox_fiber_join(struct lua_State *L)
luaL_error(L, "fiber:join(timeout): bad arguments"); luaL_error(L, "fiber:join(timeout): bad arguments");
} }
} }
int rc = fiber_join_timeout(fiber, timeout); double deadline = fiber_clock() + timeout;
while (!fiber_is_dead(fiber)) {
bool exceeded = fiber_wait_on_deadline(fiber, deadline);
/*
* At this point the fiber's struct may be already reused by
* another fiber.
*/
if (exceeded) {
diag_set(TimedOut);
e = diag_last_error(&fiber()->diag);
lua_pushboolean(L, false);
luaT_pusherror(L, e);
return 2;
}
fiber = fiber_find(fid);
if (fiber == NULL) {
luaL_error(L, "the fiber is already joined "
"by concurrent fiber:join()");
}
}
/* The fiber is already dead. */
int rc = fiber_join(fiber);
if (child_L != NULL) { if (child_L != NULL) {
coro_ref = lua_tointeger(child_L, -1); coro_ref = lua_tointeger(child_L, -1);
......
local fiber = require('fiber')
local t = require('luatest')
local g1 = t.group('gh-7489-concurrent-fiber-join-1',
{{ do_cancel = false },
{ do_cancel = true }})
local g2 = t.group('gh-7489-concurrent-fiber-join-2')
local error_message = 'the fiber is already joined by concurrent fiber:join()'
g1.test_concurrent_fiber_join = function(cg)
local block1 = true
-- 1. Create f1, but do not start it
local f1 = fiber.new(function()
-- 4. Wait for unblocking and exit
while block1 do
fiber.sleep(0.001)
end
end)
f1:set_joinable(true)
-- 2. Create f2, but do not start it
local f2 = fiber.new(function()
block1 = false
-- 5. Try to join f1. Yield here until it is unblocked and finished
f1:join()
end)
f2:set_joinable(true)
-- 3. Start f1, then f2
fiber.yield()
-- 6. Optionally cancel f1
if cg.params.do_cancel then
f1:cancel()
end
-- 7. Try to join f1. Yield here until it is unblocked and finished.
-- This join should raise an error, because f1 is already joined by f2
t.assert_error_msg_content_equals(error_message, function() f1:join() end)
end
g2.test_concurrent_fiber_join = function()
local f1 = nil
local f2 = nil
local function test_f1()
-- Do nothing
end
local function test_f2()
while true do
fiber.sleep(0.001)
end
end
local function joiner_f1()
-- 3. Try to join f1. Yield until it is started and immediately finished
local st = f1:join()
-- 5. Create a new fiber, which sleeps until it is cancelled
f2 = fiber.new(test_f2)
f2:set_joinable(true)
return st
end
local function joiner_f2()
-- 4. Try to join f1. Yield until it is started and immediately finished
local st, err = pcall(fiber.join, f1)
-- 6. This join should raise an error, because f1 should be already
-- joined by the first joiner.
-- In case of a bug, it may hang trying to join f2 instead of f1
return not st and err == error_message
end
-- 1. Create two joiners and one joinee fibers, but do not start them
local j1 = fiber.new(joiner_f1)
local j2 = fiber.new(joiner_f2)
f1 = fiber.new(test_f1)
j1:set_joinable(true)
j2:set_joinable(true)
f1:set_joinable(true)
-- 2. Start fibers in the following order: j1, j2, f1
fiber.yield()
-- 7. Verify that the first join succeeded and the second one failed
local _, joiner1_passed = j1:join()
local _, joiner2_passed = j2:join()
t.assert(joiner1_passed)
t.assert(joiner2_passed)
f2:cancel()
f2:join()
end
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment