diff --git a/src/box/gc.c b/src/box/gc.c index 3b66d3d63a8b2d25c17e4f43930b8bd0ad89f954..4bfe2318f0cdc7b69caee16f54dcb324b7dcc8cd 100644 --- a/src/box/gc.c +++ b/src/box/gc.c @@ -42,6 +42,7 @@ #include "diag.h" #include "say.h" +#include "latch.h" #include "vclock.h" #include "checkpoint.h" #include "engine.h" /* engine_collect_garbage() */ @@ -72,6 +73,11 @@ struct gc_state { int64_t signature; /** Registered consumers, linked by gc_consumer::node. */ gc_tree_t consumers; + /** + * Latch serializing concurrent invocations of engine + * garbage collection callbacks. + */ + struct latch latch; }; static struct gc_state gc; @@ -131,6 +137,7 @@ gc_init(void) { gc.signature = -1; gc_tree_new(&gc.consumers); + latch_create(&gc.latch); } void @@ -145,6 +152,7 @@ gc_free(void) gc_consumer_delete(consumer); consumer = next; } + latch_destroy(&gc.latch); } void @@ -182,19 +190,27 @@ gc_run(void) gc.signature = gc_signature; + /* + * Engine callbacks may sleep, because they use coio for + * removing files. Make sure we won't try to remove the + * same file multiple times by serializing concurrent gc + * executions. + */ + latch_lock(&gc.latch); /* * Run garbage collection. * * The order is important here: we must invoke garbage * collection for memtx snapshots first and abort if it - * fails - see the comment to memtx_engine::collectGarbage. + * fails - see comment to memtx_engine_collect_garbage(). */ if (engine_collect_garbage(gc_signature) != 0) { say_error("box garbage collection failed: %s", diag_last_error(diag_get())->errmsg); - return; + } else { + wal_collect_garbage(gc_signature); } - wal_collect_garbage(gc_signature); + latch_unlock(&gc.latch); } void diff --git a/test/replication/gc.result b/test/replication/gc.result index d589a65946852dfbfc32ff5c08915c623e965a36..2baa0113875d4b704a1562f49cef9bee86694676 100644 --- a/test/replication/gc.result +++ b/test/replication/gc.result @@ -194,6 +194,37 @@ test_run:cleanup_cluster() --- - true ... +-- +-- Test that concurrent invocation of the garbage collector works fine. +-- +s:truncate() +--- +... +for i = 1, 10 do s:replace{i} end +--- +... +box.snapshot() +--- +- ok +... +replica_set.join(test_run, 3) +--- +... +replica_set.stop_all(test_run) +--- +... +for i = 11, 50 do s:replace{i} if i % 10 == 0 then box.snapshot() end end +--- +... +replica_set.start_all(test_run) +--- +... +replica_set.wait_all(test_run) +--- +... +replica_set.drop_all(test_run) +--- +... -- Cleanup. s:drop() --- diff --git a/test/replication/gc.test.lua b/test/replication/gc.test.lua index 2a1fbb7f5558fbf564c889a4c668df1b6fef3315..55439b7a78cad0fb5f917cbe4cf5f4f9655fcd41 100644 --- a/test/replication/gc.test.lua +++ b/test/replication/gc.test.lua @@ -101,6 +101,22 @@ box.snapshot() test_run:cleanup_cluster() #box.internal.gc.info().checkpoints == 1 or box.internal.gc.info() +-- +-- Test that concurrent invocation of the garbage collector works fine. +-- +s:truncate() +for i = 1, 10 do s:replace{i} end +box.snapshot() + +replica_set.join(test_run, 3) +replica_set.stop_all(test_run) + +for i = 11, 50 do s:replace{i} if i % 10 == 0 then box.snapshot() end end + +replica_set.start_all(test_run) +replica_set.wait_all(test_run) +replica_set.drop_all(test_run) + -- Cleanup. s:drop() box.error.injection.set("ERRINJ_RELAY_REPORT_INTERVAL", 0) diff --git a/test/replication/lua/fast_replica.lua b/test/replication/lua/fast_replica.lua index 3f78fd9457b8adc4611658b7ea0a0d01a8bbfb8f..8c772c41ffc685684caa19f48c18d1beaaae4ef9 100644 --- a/test/replication/lua/fast_replica.lua +++ b/test/replication/lua/fast_replica.lua @@ -27,6 +27,18 @@ function unregister(inspector, id) box.space._cluster:delete{id} end +function start(inspector, id) + inspector:cmd('start server replica'..tostring(id - 1)) +end + +function stop(inspector, id) + inspector:cmd('stop server replica'..tostring(id - 1)) +end + +function wait(inspector, id) + inspector:wait_lsn('replica'..tostring(id - 1), 'default') +end + function delete(inspector, id) inspector:cmd('stop server replica'..tostring(id - 1)) inspector:cmd('delete server replica'..tostring(id - 1)) @@ -37,6 +49,18 @@ function drop(inspector, id) delete(inspector, id) end +function start_all(inspector) + call_all(function (id) start(inspector, id) end) +end + +function stop_all(inspector) + call_all(function (id) stop(inspector, id) end) +end + +function wait_all(inspector) + call_all(function (id) wait(inspector, id) end) +end + function drop_all(inspector) call_all(function (id) drop(inspector, id) end) end @@ -56,6 +80,9 @@ end return { join = join; + start_all = start_all; + stop_all = stop_all; + wait_all = wait_all; drop_all = drop_all; vclock_diff = vclock_diff; unregister = unregister;