Skip to content
Snippets Groups Projects
Commit b8717738 authored by Vladimir Davydov's avatar Vladimir Davydov Committed by Roman Tsisyk
Browse files

Fix race in garbage collection

Engine callbacks that perform garbage collection may sleep, because they
use coio for removing files to avoid blocking the TX thread. If garbage
collection is called concurrently from different fibers (e.g. from relay
fibers), we may attempt to delete the same file multiple times. What is
worse xdir_collect_garbage(), used by engine callbacks to remove files,
isn't safe against concurrent execution - it first unlinks a file via
coio, which involves a yield, and only then removes the corresponding
vclock from the directory index. This opens a race window for another
fiber to read the same clock and yield, in the interim the vclock can be
freed by the first fiber:

  #0  __GI_raise (sig=sig@entry=6) at ../sysdeps/unix/sysv/linux/raise.c:51
  #1  0x00007f105ceda3fa in __GI_abort () at abort.c:89
  #2  0x000055e4c03f4a3d in sig_fatal_cb (signo=11) at main.cc:184
  #3  <signal handler called>
  #4  0x000055e4c066907a in vclockset_remove (rbtree=0x55e4c1010e58, node=0x55e4c1023d20) at box/vclock.c:215
  #5  0x000055e4c06256af in xdir_collect_garbage (dir=0x55e4c1010e28, signature=342, use_coio=true) at box/xlog.c:620
  #6  0x000055e4c0417dcc in memtx_engine_collect_garbage (engine=0x55e4c1010df0, lsn=342) at box/memtx_engine.c:784
  #7  0x000055e4c0414dbf in engine_collect_garbage (lsn=342) at box/engine.c:155
  #8  0x000055e4c04a36c7 in gc_run () at box/gc.c:192
  #9  0x000055e4c04a38f2 in gc_consumer_advance (consumer=0x55e4c1021360, signature=342) at box/gc.c:262
  #10 0x000055e4c04b4da8 in tx_gc_advance (msg=0x7f1028000aa0) at box/relay.cc:250
  #11 0x000055e4c04eb854 in cmsg_deliver (msg=0x7f1028000aa0) at cbus.c:353
  #12 0x000055e4c04ec871 in fiber_pool_f (ap=0x7f1056800ec0) at fiber_pool.c:64
  #13 0x000055e4c03f4784 in fiber_cxx_invoke(fiber_func, typedef __va_list_tag __va_list_tag *) (f=0x55e4c04ec6d4 <fiber_pool_f>, ap=0x7f1056800ec0) at fiber.h:665
  #14 0x000055e4c04e6816 in fiber_loop (data=0x0) at fiber.c:631
  #15 0x000055e4c0687dab in coro_init () at /home/vlad/src/tarantool/third_party/coro/coro.c:110

Fix this by serializing concurrent execution of garbage collection
callbacks with a latch.
parent 582a85d4
No related branches found
No related tags found
No related merge requests found
......@@ -42,6 +42,7 @@
#include "diag.h"
#include "say.h"
#include "latch.h"
#include "vclock.h"
#include "checkpoint.h"
#include "engine.h" /* engine_collect_garbage() */
......@@ -72,6 +73,11 @@ struct gc_state {
int64_t signature;
/** Registered consumers, linked by gc_consumer::node. */
gc_tree_t consumers;
/**
* Latch serializing concurrent invocations of engine
* garbage collection callbacks.
*/
struct latch latch;
};
static struct gc_state gc;
......@@ -131,6 +137,7 @@ gc_init(void)
{
gc.signature = -1;
gc_tree_new(&gc.consumers);
latch_create(&gc.latch);
}
void
......@@ -145,6 +152,7 @@ gc_free(void)
gc_consumer_delete(consumer);
consumer = next;
}
latch_destroy(&gc.latch);
}
void
......@@ -182,19 +190,27 @@ gc_run(void)
gc.signature = gc_signature;
/*
* Engine callbacks may sleep, because they use coio for
* removing files. Make sure we won't try to remove the
* same file multiple times by serializing concurrent gc
* executions.
*/
latch_lock(&gc.latch);
/*
* Run garbage collection.
*
* The order is important here: we must invoke garbage
* collection for memtx snapshots first and abort if it
* fails - see the comment to memtx_engine::collectGarbage.
* fails - see comment to memtx_engine_collect_garbage().
*/
if (engine_collect_garbage(gc_signature) != 0) {
say_error("box garbage collection failed: %s",
diag_last_error(diag_get())->errmsg);
return;
} else {
wal_collect_garbage(gc_signature);
}
wal_collect_garbage(gc_signature);
latch_unlock(&gc.latch);
}
void
......
......@@ -194,6 +194,37 @@ test_run:cleanup_cluster()
---
- true
...
--
-- Test that concurrent invocation of the garbage collector works fine.
--
s:truncate()
---
...
for i = 1, 10 do s:replace{i} end
---
...
box.snapshot()
---
- ok
...
replica_set.join(test_run, 3)
---
...
replica_set.stop_all(test_run)
---
...
for i = 11, 50 do s:replace{i} if i % 10 == 0 then box.snapshot() end end
---
...
replica_set.start_all(test_run)
---
...
replica_set.wait_all(test_run)
---
...
replica_set.drop_all(test_run)
---
...
-- Cleanup.
s:drop()
---
......
......@@ -101,6 +101,22 @@ box.snapshot()
test_run:cleanup_cluster()
#box.internal.gc.info().checkpoints == 1 or box.internal.gc.info()
--
-- Test that concurrent invocation of the garbage collector works fine.
--
s:truncate()
for i = 1, 10 do s:replace{i} end
box.snapshot()
replica_set.join(test_run, 3)
replica_set.stop_all(test_run)
for i = 11, 50 do s:replace{i} if i % 10 == 0 then box.snapshot() end end
replica_set.start_all(test_run)
replica_set.wait_all(test_run)
replica_set.drop_all(test_run)
-- Cleanup.
s:drop()
box.error.injection.set("ERRINJ_RELAY_REPORT_INTERVAL", 0)
......
......@@ -27,6 +27,18 @@ function unregister(inspector, id)
box.space._cluster:delete{id}
end
function start(inspector, id)
inspector:cmd('start server replica'..tostring(id - 1))
end
function stop(inspector, id)
inspector:cmd('stop server replica'..tostring(id - 1))
end
function wait(inspector, id)
inspector:wait_lsn('replica'..tostring(id - 1), 'default')
end
function delete(inspector, id)
inspector:cmd('stop server replica'..tostring(id - 1))
inspector:cmd('delete server replica'..tostring(id - 1))
......@@ -37,6 +49,18 @@ function drop(inspector, id)
delete(inspector, id)
end
function start_all(inspector)
call_all(function (id) start(inspector, id) end)
end
function stop_all(inspector)
call_all(function (id) stop(inspector, id) end)
end
function wait_all(inspector)
call_all(function (id) wait(inspector, id) end)
end
function drop_all(inspector)
call_all(function (id) drop(inspector, id) end)
end
......@@ -56,6 +80,9 @@ end
return {
join = join;
start_all = start_all;
stop_all = stop_all;
wait_all = wait_all;
drop_all = drop_all;
vclock_diff = vclock_diff;
unregister = unregister;
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment