From b871773886e8060f74783dadde0145a9a5d08d39 Mon Sep 17 00:00:00 2001 From: Vladimir Davydov <vdavydov.dev@gmail.com> Date: Mon, 13 Nov 2017 14:42:05 +0300 Subject: [PATCH] Fix race in garbage collection Engine callbacks that perform garbage collection may sleep, because they use coio for removing files to avoid blocking the TX thread. If garbage collection is called concurrently from different fibers (e.g. from relay fibers), we may attempt to delete the same file multiple times. What is worse xdir_collect_garbage(), used by engine callbacks to remove files, isn't safe against concurrent execution - it first unlinks a file via coio, which involves a yield, and only then removes the corresponding vclock from the directory index. This opens a race window for another fiber to read the same clock and yield, in the interim the vclock can be freed by the first fiber: #0 __GI_raise (sig=sig@entry=6) at ../sysdeps/unix/sysv/linux/raise.c:51 #1 0x00007f105ceda3fa in __GI_abort () at abort.c:89 #2 0x000055e4c03f4a3d in sig_fatal_cb (signo=11) at main.cc:184 #3 <signal handler called> #4 0x000055e4c066907a in vclockset_remove (rbtree=0x55e4c1010e58, node=0x55e4c1023d20) at box/vclock.c:215 #5 0x000055e4c06256af in xdir_collect_garbage (dir=0x55e4c1010e28, signature=342, use_coio=true) at box/xlog.c:620 #6 0x000055e4c0417dcc in memtx_engine_collect_garbage (engine=0x55e4c1010df0, lsn=342) at box/memtx_engine.c:784 #7 0x000055e4c0414dbf in engine_collect_garbage (lsn=342) at box/engine.c:155 #8 0x000055e4c04a36c7 in gc_run () at box/gc.c:192 #9 0x000055e4c04a38f2 in gc_consumer_advance (consumer=0x55e4c1021360, signature=342) at box/gc.c:262 #10 0x000055e4c04b4da8 in tx_gc_advance (msg=0x7f1028000aa0) at box/relay.cc:250 #11 0x000055e4c04eb854 in cmsg_deliver (msg=0x7f1028000aa0) at cbus.c:353 #12 0x000055e4c04ec871 in fiber_pool_f (ap=0x7f1056800ec0) at fiber_pool.c:64 #13 0x000055e4c03f4784 in fiber_cxx_invoke(fiber_func, typedef __va_list_tag __va_list_tag *) (f=0x55e4c04ec6d4 <fiber_pool_f>, ap=0x7f1056800ec0) at fiber.h:665 #14 0x000055e4c04e6816 in fiber_loop (data=0x0) at fiber.c:631 #15 0x000055e4c0687dab in coro_init () at /home/vlad/src/tarantool/third_party/coro/coro.c:110 Fix this by serializing concurrent execution of garbage collection callbacks with a latch. --- src/box/gc.c | 22 ++++++++++++++++--- test/replication/gc.result | 31 +++++++++++++++++++++++++++ test/replication/gc.test.lua | 16 ++++++++++++++ test/replication/lua/fast_replica.lua | 27 +++++++++++++++++++++++ 4 files changed, 93 insertions(+), 3 deletions(-) diff --git a/src/box/gc.c b/src/box/gc.c index 3b66d3d63a..4bfe2318f0 100644 --- a/src/box/gc.c +++ b/src/box/gc.c @@ -42,6 +42,7 @@ #include "diag.h" #include "say.h" +#include "latch.h" #include "vclock.h" #include "checkpoint.h" #include "engine.h" /* engine_collect_garbage() */ @@ -72,6 +73,11 @@ struct gc_state { int64_t signature; /** Registered consumers, linked by gc_consumer::node. */ gc_tree_t consumers; + /** + * Latch serializing concurrent invocations of engine + * garbage collection callbacks. + */ + struct latch latch; }; static struct gc_state gc; @@ -131,6 +137,7 @@ gc_init(void) { gc.signature = -1; gc_tree_new(&gc.consumers); + latch_create(&gc.latch); } void @@ -145,6 +152,7 @@ gc_free(void) gc_consumer_delete(consumer); consumer = next; } + latch_destroy(&gc.latch); } void @@ -182,19 +190,27 @@ gc_run(void) gc.signature = gc_signature; + /* + * Engine callbacks may sleep, because they use coio for + * removing files. Make sure we won't try to remove the + * same file multiple times by serializing concurrent gc + * executions. + */ + latch_lock(&gc.latch); /* * Run garbage collection. * * The order is important here: we must invoke garbage * collection for memtx snapshots first and abort if it - * fails - see the comment to memtx_engine::collectGarbage. + * fails - see comment to memtx_engine_collect_garbage(). */ if (engine_collect_garbage(gc_signature) != 0) { say_error("box garbage collection failed: %s", diag_last_error(diag_get())->errmsg); - return; + } else { + wal_collect_garbage(gc_signature); } - wal_collect_garbage(gc_signature); + latch_unlock(&gc.latch); } void diff --git a/test/replication/gc.result b/test/replication/gc.result index d589a65946..2baa011387 100644 --- a/test/replication/gc.result +++ b/test/replication/gc.result @@ -194,6 +194,37 @@ test_run:cleanup_cluster() --- - true ... +-- +-- Test that concurrent invocation of the garbage collector works fine. +-- +s:truncate() +--- +... +for i = 1, 10 do s:replace{i} end +--- +... +box.snapshot() +--- +- ok +... +replica_set.join(test_run, 3) +--- +... +replica_set.stop_all(test_run) +--- +... +for i = 11, 50 do s:replace{i} if i % 10 == 0 then box.snapshot() end end +--- +... +replica_set.start_all(test_run) +--- +... +replica_set.wait_all(test_run) +--- +... +replica_set.drop_all(test_run) +--- +... -- Cleanup. s:drop() --- diff --git a/test/replication/gc.test.lua b/test/replication/gc.test.lua index 2a1fbb7f55..55439b7a78 100644 --- a/test/replication/gc.test.lua +++ b/test/replication/gc.test.lua @@ -101,6 +101,22 @@ box.snapshot() test_run:cleanup_cluster() #box.internal.gc.info().checkpoints == 1 or box.internal.gc.info() +-- +-- Test that concurrent invocation of the garbage collector works fine. +-- +s:truncate() +for i = 1, 10 do s:replace{i} end +box.snapshot() + +replica_set.join(test_run, 3) +replica_set.stop_all(test_run) + +for i = 11, 50 do s:replace{i} if i % 10 == 0 then box.snapshot() end end + +replica_set.start_all(test_run) +replica_set.wait_all(test_run) +replica_set.drop_all(test_run) + -- Cleanup. s:drop() box.error.injection.set("ERRINJ_RELAY_REPORT_INTERVAL", 0) diff --git a/test/replication/lua/fast_replica.lua b/test/replication/lua/fast_replica.lua index 3f78fd9457..8c772c41ff 100644 --- a/test/replication/lua/fast_replica.lua +++ b/test/replication/lua/fast_replica.lua @@ -27,6 +27,18 @@ function unregister(inspector, id) box.space._cluster:delete{id} end +function start(inspector, id) + inspector:cmd('start server replica'..tostring(id - 1)) +end + +function stop(inspector, id) + inspector:cmd('stop server replica'..tostring(id - 1)) +end + +function wait(inspector, id) + inspector:wait_lsn('replica'..tostring(id - 1), 'default') +end + function delete(inspector, id) inspector:cmd('stop server replica'..tostring(id - 1)) inspector:cmd('delete server replica'..tostring(id - 1)) @@ -37,6 +49,18 @@ function drop(inspector, id) delete(inspector, id) end +function start_all(inspector) + call_all(function (id) start(inspector, id) end) +end + +function stop_all(inspector) + call_all(function (id) stop(inspector, id) end) +end + +function wait_all(inspector) + call_all(function (id) wait(inspector, id) end) +end + function drop_all(inspector) call_all(function (id) drop(inspector, id) end) end @@ -56,6 +80,9 @@ end return { join = join; + start_all = start_all; + stop_all = stop_all; + wait_all = wait_all; drop_all = drop_all; vclock_diff = vclock_diff; unregister = unregister; -- GitLab