diff --git a/src/box/box.cc b/src/box/box.cc index 9767349ac8ea8124ba070e436fddf9a7e244fe03..936d58f545e43d9e29fa402a3e611c690d6b4bc3 100644 --- a/src/box/box.cc +++ b/src/box/box.cc @@ -4941,6 +4941,8 @@ bootstrap_from_master(struct replica *master) try { applier_resume_to_state(applier, APPLIER_READY, TIMEOUT_INFINITY); + } catch (FiberIsCancelled *e) { + throw e; } catch (...) { return false; } @@ -4958,6 +4960,8 @@ bootstrap_from_master(struct replica *master) try { applier_resume_to_state(applier, APPLIER_FETCH_SNAPSHOT, TIMEOUT_INFINITY); + } catch (FiberIsCancelled *e) { + throw e; } catch (...) { return false; } @@ -5926,6 +5930,14 @@ box_storage_shutdown() if (!is_storage_initialized) return; iproto_shutdown(); + /* + * Finish client fibers after iproto_shutdown otherwise new fibers + * can be started through new iproto requests. Also we should + * finish client fibers before other subsystems shutdown so that + * we won't need to handle requests from client fibers after/during + * subsystem shutdown. + */ + fiber_shutdown(); replication_shutdown(); } diff --git a/src/box/lua/console.lua b/src/box/lua/console.lua index 90111ccade2b251bbefbe48f96281be0d2e066cb..06e5a85033867ff3b1bd371b77dd2ceb35f1d14c 100644 --- a/src/box/lua/console.lua +++ b/src/box/lua/console.lua @@ -985,7 +985,15 @@ local function client_handler(client, _peer) state:print(string.format("%-63s\n%-63s\n", "Tarantool ".. version.." (Lua console)", "type 'help' for interactive help")) + local on_shutdown = function() + -- Fiber is going to be cancelled on shutdown. Do not report + -- cancel induced error to the peer. + client:close(); + end + state.fiber = fiber.self() + box.ctl.on_shutdown(on_shutdown) repl(state) + box.ctl.on_shutdown(nil, on_shutdown) session_internal.run_on_disconnect() end diff --git a/src/box/memtx_engine.cc b/src/box/memtx_engine.cc index a0530aae6b438970070ff521f3ef2ee98820c2d9..0ace4a5283674fd982be5c6b0655d7554003259c 100644 --- a/src/box/memtx_engine.cc +++ b/src/box/memtx_engine.cc @@ -1028,12 +1028,19 @@ checkpoint_f(va_list ap) return -1; } - struct mh_i32_t *temp_space_ids = mh_i32_new(); + struct mh_i32_t *temp_space_ids; say_info("saving snapshot `%s'", snap->filename); - ERROR_INJECT_SLEEP(ERRINJ_SNAP_WRITE_DELAY); + ERROR_INJECT_WHILE(ERRINJ_SNAP_WRITE_DELAY, { + fiber_sleep(0.001); + if (fiber_is_cancelled()) { + diag_set(FiberIsCancelled); + goto fail; + } + }); ERROR_INJECT(ERRINJ_SNAP_SKIP_ALL_ROWS, goto done); struct space_read_view *space_rv; + temp_space_ids = mh_i32_new(); read_view_foreach_space(space_rv, &ckpt->rv) { FiberGCChecker gc_check; bool skip = false; diff --git a/src/box/replication.cc b/src/box/replication.cc index 261b5e5cd3035d7613cb266ccfe36215c3e30581..83bc4b6cd4db8472216232e92dc9e8594e7cf346 100644 --- a/src/box/replication.cc +++ b/src/box/replication.cc @@ -62,18 +62,6 @@ double replication_sync_timeout = 300.0; /* seconds */ bool replication_skip_conflict = false; int replication_threads = 1; -/** - * Fiber executing replicaset_connect. NULL if the function - * is not being executed. - */ -static struct fiber *replication_connect_fiber; - -/** Condition that replicaset_connect finished execution. */ -static struct fiber_cond replication_connect_cond; - -/** If set then replication shutdown is started. */ -static bool replication_is_shutting_down; - bool cfg_replication_anon = true; struct tt_uuid cfg_bootstrap_leader_uuid; struct uri cfg_bootstrap_leader_uri; @@ -231,7 +219,6 @@ replication_init(int num_threads) diag_create(&replicaset.applier.diag); replication_threads = num_threads; - fiber_cond_create(&replication_connect_cond); /* The local instance is always part of the quorum. */ replicaset.healthy_count = 1; @@ -242,12 +229,6 @@ replication_init(int num_threads) void replication_shutdown(void) { - replication_is_shutting_down = true; - if (replication_connect_fiber != NULL) - fiber_cancel(replication_connect_fiber); - while (replication_connect_fiber != NULL) - fiber_cond_wait(&replication_connect_cond); - struct replica *replica; rlist_foreach_entry(replica, &replicaset.anon, in_anon) applier_stop(replica->applier); @@ -263,7 +244,6 @@ replication_free(void) diag_destroy(&replicaset.applier.diag); trigger_destroy(&replicaset.on_ack); trigger_destroy(&replicaset.on_relay_thread_start); - fiber_cond_destroy(&replication_connect_cond); fiber_cond_destroy(&replicaset.applier.cond); latch_destroy(&replicaset.applier.order_latch); applier_free(); @@ -1072,9 +1052,6 @@ void replicaset_connect(const struct uri_set *uris, bool connect_quorum, bool keep_connect) { - if (replication_is_shutting_down) - tnt_raise(ClientError, ER_SHUTDOWN); - if (uris->uri_count == 0) { /* Cleanup the replica set. */ replicaset_update(NULL, 0, false); @@ -1087,12 +1064,6 @@ replicaset_connect(const struct uri_set *uris, tnt_raise(ClientError, ER_CFG, "replication", "too many replicas"); } - assert(replication_connect_fiber == NULL); - replication_connect_fiber = fiber(); - auto connect_fiber_guard = make_scoped_guard([&]{ - replication_connect_fiber = NULL; - fiber_cond_signal(&replication_connect_cond); - }); int count = 0; struct applier *appliers[VCLOCK_MAX] = {}; auto appliers_guard = make_scoped_guard([&]{ @@ -1342,6 +1313,11 @@ replicaset_sync(void) say_info("replica set sync complete"); box_set_orphan(false); } + /* + * If fiber is cancelled raise error here so that orphan status is + * correct. + */ + fiber_testcancel(); } void diff --git a/src/box/vy_quota.c b/src/box/vy_quota.c index 8f86be915c4a9beda18087065931be20ce542d68..ebbbde09603ef75e2e53e1985c92e50484ca7b4f 100644 --- a/src/box/vy_quota.c +++ b/src/box/vy_quota.c @@ -346,6 +346,10 @@ vy_quota_use(struct vy_quota *q, enum vy_quota_consumer_type type, diag_set(ClientError, ER_VY_QUOTA_TIMEOUT); return -1; } + if (fiber_is_cancelled()) { + diag_set(FiberIsCancelled); + return -1; + } double wait_time = ev_monotonic_now(loop()) - wait_start; if (wait_time > q->too_long_threshold) { diff --git a/src/box/vy_scheduler.c b/src/box/vy_scheduler.c index daddff071cef4b75664b69af8cb092159c7c6a6e..677bf5749135465d27328d12f30d214cc98586bc 100644 --- a/src/box/vy_scheduler.c +++ b/src/box/vy_scheduler.c @@ -730,13 +730,20 @@ vy_scheduler_wait_checkpoint(struct vy_scheduler *scheduler) /* A dump error occurred, abort checkpoint. */ struct error *e = diag_last_error(&scheduler->diag); diag_set_error(diag_get(), e); - say_error("vinyl checkpoint failed: %s", e->errmsg); - return -1; + goto error; } fiber_cond_wait(&scheduler->dump_cond); + if (fiber_is_cancelled()) { + diag_set(FiberIsCancelled); + goto error; + } } say_info("vinyl checkpoint completed"); return 0; +error: + say_error("vinyl checkpoint failed: %s", + diag_last_error(diag_get())->errmsg); + return -1; } void @@ -886,6 +893,7 @@ vy_deferred_delete_batch_process_f(struct cmsg *cmsg) struct vy_deferred_delete_batch *batch = container_of(cmsg, struct vy_deferred_delete_batch, cmsg); struct vy_task *task = batch->task; + fiber_set_system(fiber(), true); /* * Wait for memory quota if necessary before starting to * process the batch (we can't yield between statements). diff --git a/src/lib/core/errinj.h b/src/lib/core/errinj.h index 89d81a606dbcb1ae399995ed836543556c5d0c50..12e5412fdde70beb3b5cd32bbeeaf7eaeba6cf8b 100644 --- a/src/lib/core/errinj.h +++ b/src/lib/core/errinj.h @@ -104,7 +104,6 @@ struct errinj { _(ERRINJ_IPROTO_TX_DELAY, ERRINJ_BOOL, {.bparam = false}) \ _(ERRINJ_IPROTO_WRITE_ERROR_DELAY, ERRINJ_BOOL, {.bparam = false})\ _(ERRINJ_LOG_ROTATE, ERRINJ_BOOL, {.bparam = false}) \ - _(ERRINJ_MAIN_MAKE_FILE_ON_RETURN, ERRINJ_BOOL, {.bparam = false}) \ _(ERRINJ_MEMTX_DELAY_GC, ERRINJ_BOOL, {.bparam = false}) \ _(ERRINJ_NETBOX_DISABLE_ID, ERRINJ_BOOL, {.bparam = false}) \ _(ERRINJ_NETBOX_FLIP_FEATURE, ERRINJ_INT, {.iparam = -1}) \ diff --git a/src/lib/core/fiber.c b/src/lib/core/fiber.c index 9ec5d2fa69ceef1dcb24c96c69c165cdb34bb0d5..26bc4459e0a7e5865ec21944d8dae78255066bd8 100644 --- a/src/lib/core/fiber.c +++ b/src/lib/core/fiber.c @@ -1187,6 +1187,13 @@ fiber_loop(MAYBE_UNUSED void *data) assert(f != fiber); fiber_wakeup(f); } + if (!(fiber->flags & FIBER_IS_SYSTEM)) { + assert(cord()->client_fiber_count > 0); + cord()->client_fiber_count--; + if (cord()->shutdown_fiber != NULL && + cord()->client_fiber_count == 0) + fiber_wakeup(cord()->shutdown_fiber); + } fiber_on_stop(fiber); /* reset pending wakeups */ rlist_del(&fiber->state); @@ -1590,6 +1597,8 @@ fiber_new_ex(const char *name, const struct fiber_attr *fiber_attr, fiber_gc_checker_init(fiber); cord->next_fid++; assert(cord->next_fid > FIBER_ID_MAX_RESERVED); + if (!(fiber->flags & FIBER_IS_SYSTEM)) + cord()->client_fiber_count++; return fiber; @@ -1849,7 +1858,7 @@ cord_create(struct cord *cord, const char *name) cord->sched.name = NULL; fiber_set_name(&cord->sched, "sched"); cord->fiber = &cord->sched; - cord->sched.flags = FIBER_IS_RUNNING; + cord->sched.flags = FIBER_IS_RUNNING | FIBER_IS_SYSTEM; cord->sched.max_slice = zero_slice; cord->max_slice = default_slice; @@ -1884,6 +1893,8 @@ cord_create(struct cord *cord, const char *name) cord->sched.stack_watermark = NULL; #endif signal_stack_init(); + cord->shutdown_fiber = NULL; + cord->client_fiber_count = 0; } void @@ -2339,3 +2350,38 @@ fiber_lua_state(struct fiber *f) { return f->storage.lua.stack; } + +void +fiber_set_system(struct fiber *f, bool yesno) +{ + if (yesno) { + if (!(f->flags & FIBER_IS_SYSTEM)) { + f->flags |= FIBER_IS_SYSTEM; + assert(cord()->client_fiber_count > 0); + cord()->client_fiber_count--; + if (cord()->shutdown_fiber != NULL && + cord()->client_fiber_count == 0) + fiber_wakeup(cord()->shutdown_fiber); + } + } else { + if (f->flags & FIBER_IS_SYSTEM) { + f->flags &= ~FIBER_IS_SYSTEM; + cord()->client_fiber_count++; + } + } +} + +void +fiber_shutdown(void) +{ + assert(cord()->shutdown_fiber == NULL); + struct fiber *fiber; + rlist_foreach_entry(fiber, &cord()->alive, link) { + if (!(fiber->flags & FIBER_IS_SYSTEM)) + fiber_cancel(fiber); + } + cord()->shutdown_fiber = fiber(); + while (cord()->client_fiber_count != 0) + fiber_yield(); + cord()->shutdown_fiber = NULL; +} diff --git a/src/lib/core/fiber.h b/src/lib/core/fiber.h index 2835220501f250ee29b76ff0a3e8dd96fce74423..e58553bdd2f9fba9b03c28ba418142180b431b8a 100644 --- a/src/lib/core/fiber.h +++ b/src/lib/core/fiber.h @@ -852,6 +852,10 @@ struct cord { struct fiber *main_fiber; /** An event triggered to cancel cord main fiber. */ ev_async cancel_event; + /** Number of alive client (non system) fibers. */ + int client_fiber_count; + /** Fiber calling fiber_shutdown. NULL if there is no such. */ + struct fiber *shutdown_fiber; }; extern __thread struct cord *cord_ptr; @@ -1243,6 +1247,14 @@ fiber_check_gc(void); struct lua_State * fiber_lua_state(struct fiber *f); +/** Change whether fiber is system or not. */ +void +fiber_set_system(struct fiber *f, bool yesno); + +/** Cancel all client (non system) fibers and wait until they finished. */ +void +fiber_shutdown(void); + #if defined(__cplusplus) } /* extern "C" */ diff --git a/src/lib/core/fiber_pool.c b/src/lib/core/fiber_pool.c index cdf067a7f70ca77f16798de089f41a9d6ba81335..bd5f47284a520c5aae891fcb52c78f8c034448e5 100644 --- a/src/lib/core/fiber_pool.c +++ b/src/lib/core/fiber_pool.c @@ -61,7 +61,9 @@ fiber_pool_f(va_list ap) f->caller->flags |= FIBER_IS_READY; assert(f->caller->caller == &cord->sched); } + fiber_set_system(fiber(), false); cmsg_deliver(msg); + fiber_set_system(fiber(), true); fiber_check_gc(); /* * Normally fibers die after their function @@ -131,7 +133,17 @@ fiber_pool_cb(ev_loop *loop, struct ev_watcher *watcher, int events) f = rlist_shift_entry(&pool->idle, struct fiber, state); fiber_call(f); } else if (pool->size < pool->max_size) { - f = fiber_new(cord_name(cord()), fiber_pool_f); + /* + * We don't want fibers to be cancellable by client + * while they are in the pool. However system flag is + * reset during processing message from pool endpoint + * so that fiber is made cancellable back. + * + * If some message processing should not be cancellable + * by client then it can just set system flag during + * it's execution. + */ + f = fiber_new_system(cord_name(cord()), fiber_pool_f); if (f == NULL) { diag_log(); break; diff --git a/src/lua/fiber.c b/src/lua/fiber.c index 8a8f413084897d795155ceeb85a2ec4075966c72..bed60bd1ff1a9a4cdb20f813dd84d34892c669c9 100644 --- a/src/lua/fiber.c +++ b/src/lua/fiber.c @@ -883,6 +883,17 @@ lbox_fiber_stall(struct lua_State *L) return 0; } +/** Make fiber system. Takes the fiber as a single argument. */ +static int +lbox_fiber_set_system(struct lua_State *L) +{ + if (lua_gettop(L) != 1) + luaL_error(L, "fiber.set_system(id): bad arguments"); + struct fiber *fiber = lbox_checkfiber(L, 1); + fiber_set_system(fiber, true); + return 0; +} + /** Helper for fiber slice parsing. */ static struct fiber_slice lbox_fiber_slice_parse(struct lua_State *L, int idx) @@ -1018,6 +1029,7 @@ static const struct luaL_Reg fiberlib[] = { {"extend_slice", lbox_fiber_extend_slice}, /* Internal functions, to hide in fiber.lua. */ {"stall", lbox_fiber_stall}, + {"set_system", lbox_fiber_set_system}, {NULL, NULL} }; diff --git a/src/lua/fiber.lua b/src/lua/fiber.lua index faaae31cfd550865eadd9667dfbd6124d0d2ea08..48eda368add5746260531311c840834a8faf50cd 100644 --- a/src/lua/fiber.lua +++ b/src/lua/fiber.lua @@ -73,7 +73,9 @@ fiber.clock = fiber_clock fiber.clock64 = fiber_clock64 local stall = fiber.stall +local fiber_set_system = fiber.set_system fiber.stall = nil +fiber.set_system = nil local worker_next_task = nil local worker_last_task @@ -101,15 +103,21 @@ local function worker_f() end end +local worker_name = 'tasks_worker_fiber' + local function worker_safe_f() pcall(worker_f) -- Worker_f never returns. If the execution is here, this -- fiber is probably canceled and now is not able to sleep. -- Create a new one. worker_fiber = fiber.new(worker_safe_f) + fiber_set_system(worker_fiber) + worker_fiber:name(worker_name) end worker_fiber = fiber.new(worker_safe_f) +fiber_set_system(worker_fiber) +worker_fiber:name(worker_name) local function worker_schedule_task(f, arg) local task = {f = f, arg = arg} @@ -125,6 +133,7 @@ end -- Start from '_' to hide it from auto completion. fiber._internal = fiber._internal or {} fiber._internal.schedule_task = worker_schedule_task +fiber._internal.set_system = fiber_set_system setmetatable(fiber, {__serialize = function(self) local res = table.copy(self) diff --git a/src/lua/init.lua b/src/lua/init.lua index 352e7a8c3778809ee0f175dde7f45587549cc29e..e61772a6958aa61716eda44975cc68571ea25181 100644 --- a/src/lua/init.lua +++ b/src/lua/init.lua @@ -157,6 +157,7 @@ local function exit(code) -- os.exit() never yields. After on_shutdown -- fiber completes, we will never wake up again. local TIMEOUT_INFINITY = 500 * 365 * 86400 + fiber._internal.set_system(fiber.self()) while true do fiber.sleep(TIMEOUT_INFINITY) end end rawset(os, "exit", exit) diff --git a/src/main.cc b/src/main.cc index 42f3a68a2f7787b224e6902347089ef4a204c0c3..2bdf6759327da678345b1aea9ff6c2138d79563f 100644 --- a/src/main.cc +++ b/src/main.cc @@ -1109,14 +1109,5 @@ main(int argc, char **argv) free((void *)instance.name); free((void *)instance.config); tarantool_free(); - ERROR_INJECT(ERRINJ_MAIN_MAKE_FILE_ON_RETURN, do { - int fd = open("tt_exit_file.txt.inprogress", - O_WRONLY | O_CREAT | O_TRUNC, -1); - if (fd < 0) - break; - dprintf(fd, "ExitCode: %d\n", exit_code); - close(fd); - rename("tt_exit_file.txt.inprogress", "tt_exit_file.txt"); - } while (false)); return exit_code; } diff --git a/test/box-luatest/gh_7743_term_initial_cfg_snap_test.lua b/test/box-luatest/gh_7743_term_initial_cfg_snap_test.lua index daebfcd69991cb549f2238c910666a2cee3de1cb..c3e87490b01dacb9bdb2a68ab84e6ecafa400272 100644 --- a/test/box-luatest/gh_7743_term_initial_cfg_snap_test.lua +++ b/test/box-luatest/gh_7743_term_initial_cfg_snap_test.lua @@ -20,7 +20,6 @@ g.test_sigterm_during_initial_snapshot = function() -- uses usleep() which is a pthread cancellation point. TARANTOOL_RUN_BEFORE_BOX_CFG = [[ box.ctl.set_on_shutdown_timeout(1000) - box.error.injection.set('ERRINJ_MAIN_MAKE_FILE_ON_RETURN', true) box.error.injection.set('ERRINJ_SNAP_WRITE_DELAY', true) ]] } @@ -30,18 +29,7 @@ g.test_sigterm_during_initial_snapshot = function() t.helpers.retrying({}, function() assert(g.server:grep_log('saving snapshot', nil, {filename = logname})) end) - g.server.process:kill('TERM') - local path = fio.pathjoin(g.server.workdir, 'tt_exit_file.txt') - local exit_text - t.helpers.retrying({}, function() - local f = fio.open(path, 'O_RDONLY') - if f == nil then - error('could not open') - end - exit_text = f:read() - f:close() - end) - g.server.process = nil g.server:stop() - t.assert_str_contains(exit_text, 'ExitCode: 0\n') + local panic_msg = "failed to create a checkpoint" + t.assert(g.server:grep_log(panic_msg, nil, {filename = logname})) end diff --git a/test/box-luatest/gh_8530_alter_space_snapshot_test.lua b/test/box-luatest/gh_8530_alter_space_snapshot_test.lua index cddc401f03ce78942de8cdefd84844df86790e23..1e9eb0004d1ad4074587831724374f96d3836b79 100644 --- a/test/box-luatest/gh_8530_alter_space_snapshot_test.lua +++ b/test/box-luatest/gh_8530_alter_space_snapshot_test.lua @@ -49,6 +49,9 @@ g.test_build_index = function(cg) box.snapshot() t.assert_equals(f:status(), 'suspended') end) + -- Use KILL because server will hang on shutdown due to injection. + -- We don't need graceful shutdown for the test anyway. + cg.server.process:kill('KILL') cg.server:restart() cg.server:exec(function() local s = box.space.test @@ -69,6 +72,9 @@ g.test_change_format = function(cg) box.snapshot() t.assert_equals(f:status(), 'suspended') end) + -- Use KILL because server will hang on shutdown due to injection. + -- We don't need graceful shutdown for the test anyway. + cg.server.process:kill('KILL') cg.server:restart() cg.server:exec(function() local s = box.space.test diff --git a/test/box/errinj.result b/test/box/errinj.result index 8c4c8150f1a504cd28a35d5e29eb6b87b8ca3e82..979031600d132e96581efd97bf0ac20ab454b965 100644 --- a/test/box/errinj.result +++ b/test/box/errinj.result @@ -77,7 +77,6 @@ evals - ERRINJ_IPROTO_TX_DELAY: false - ERRINJ_IPROTO_WRITE_ERROR_DELAY: false - ERRINJ_LOG_ROTATE: false - - ERRINJ_MAIN_MAKE_FILE_ON_RETURN: false - ERRINJ_MEMTX_DELAY_GC: false - ERRINJ_NETBOX_DISABLE_ID: false - ERRINJ_NETBOX_FLIP_FEATURE: -1 diff --git a/test/replication-luatest/shutdown_test.lua b/test/replication-luatest/shutdown_test.lua index 0b1a0f4b78d0fc291f95dd194d33ca64de1d3363..b4901803ff97cd17a54faa91b972364e681d79cf 100644 --- a/test/replication-luatest/shutdown_test.lua +++ b/test/replication-luatest/shutdown_test.lua @@ -17,20 +17,6 @@ g.after_each(function(cg) end end) -local test_no_crash_on_shutdown = function(server) - server.process:kill() - local path = fio.pathjoin(server.workdir, 'tt_exit_file.txt') - t.helpers.retrying({}, function() - t.assert(fio.path.exists(path)) - end) - local fh, err = fio.open(path, 'O_RDONLY') - assert(fh, err) - local str, err = fh:read() - assert(str, err) - fh:close() - t.assert_str_contains(str, 'ExitCode: 0\n') -end - g.test_shutdown_on_rebootstrap = function(cg) t.tarantool.skip_if_not_debug() -- It is critical for test that we can connect to uri but cannot auth. @@ -40,13 +26,7 @@ g.test_shutdown_on_rebootstrap = function(cg) replication = 'no:way@' .. cg.master.net_box_uri, replication_timeout = 100, } - local env = { - -- There will be no connection to replica in test. - TARANTOOL_RUN_BEFORE_BOX_CFG = [[ - box.error.injection.set('ERRINJ_MAIN_MAKE_FILE_ON_RETURN', true) - ]], - } - cg.replica = server:new({box_cfg = cfg, env = env}) + cg.replica = server:new({box_cfg = cfg}) -- Can't not wait because replica will not be bootstrapped. cg.replica:start({wait_until_ready = false}) local retry_msg = string.format('will retry every %.2f second', @@ -56,5 +36,7 @@ g.test_shutdown_on_rebootstrap = function(cg) t.helpers.retrying({}, function() t.assert(cg.replica:grep_log(retry_msg, nil, {filename = log})) end) - test_no_crash_on_shutdown(cg.replica) + cg.replica:stop() + local panic_msg = "can't initialize storage: fiber is cancelled" + t.assert(cg.replica:grep_log(panic_msg, nil, {filename = log})) end diff --git a/test/replication-py/cluster.test.py b/test/replication-py/cluster.test.py index 514c874a39b913fcc617adc1cb07da88caa7663f..8249a79876ababab3cae4068edfe95302cfbbb34 100644 --- a/test/replication-py/cluster.test.py +++ b/test/replication-py/cluster.test.py @@ -230,6 +230,7 @@ failed.rpl_master = master failed.name = "failed" failed.deploy(True, wait=False) +failed.crash_expected = True line = "ER_READONLY" if failed.logfile_pos.seek_wait(line): print("'{}' exists in server log".format(line)) diff --git a/test/replication/anon.result b/test/replication/anon.result index 68e629f61b101fbc3a3de2c31c7ad70f68c422ad..997e5f0280b1e274fa903e8fd27d1ab9488add40 100644 --- a/test/replication/anon.result +++ b/test/replication/anon.result @@ -407,7 +407,7 @@ test_run:cmd([[create server replica with rpl_master=replica_anon1,\ | --- | - true | ... -test_run:cmd('start server replica with wait_load=False, wait=False') +test_run:cmd('start server replica with wait_load=False, wait=False, crash_expected=True') | --- | - true | ... diff --git a/test/replication/anon.test.lua b/test/replication/anon.test.lua index 97b2e7d67f9bb381def0777667132f19cee403f0..a2fc8b47df1a244e51811145f533469c5d715681 100644 --- a/test/replication/anon.test.lua +++ b/test/replication/anon.test.lua @@ -146,7 +146,7 @@ test_run:cmd('delete server replica_anon2') -- Check that joining to an anonymous replica is prohibited. test_run:cmd([[create server replica with rpl_master=replica_anon1,\ script="replication/replica.lua"]]) -test_run:cmd('start server replica with wait_load=False, wait=False') +test_run:cmd('start server replica with wait_load=False, wait=False, crash_expected=True') test_run:wait_log('replica', 'ER_UNSUPPORTED: Anonymous replica does not support registration of non%-anonymous nodes.', nil, 10) test_run:cmd('stop server replica') test_run:cmd('delete server replica') diff --git a/test/replication/force_recovery.result b/test/replication/force_recovery.result index e142e829ab5c454d26e1b2ebfcbbb6ea771809a2..c278a218a37a02a010580aebc8af156ba8731929 100644 --- a/test/replication/force_recovery.result +++ b/test/replication/force_recovery.result @@ -63,7 +63,7 @@ fio.unlink(xlog) box.cfg{force_recovery = true} --- ... -test_run:cmd("start server test with wait=False") +test_run:cmd("start server test with wait=False, crash_expected=True") --- - true ... diff --git a/test/replication/force_recovery.test.lua b/test/replication/force_recovery.test.lua index bd3b439d2a960ad960586e5e154de3ab1e7b7874..e6f7ae716088e9c4ba611b3ba4c8d183f829d961 100644 --- a/test/replication/force_recovery.test.lua +++ b/test/replication/force_recovery.test.lua @@ -27,7 +27,7 @@ fio.unlink(xlog) -- Check that even though box.cfg.force_recovery is set, -- replication will still fail due to LSN gap. box.cfg{force_recovery = true} -test_run:cmd("start server test with wait=False") +test_run:cmd("start server test with wait=False, crash_expected=True") test_run:cmd("switch test") test_run:wait_upstream(1, {message_re = 'Missing %.xlog file', status = 'loading'}) box.space.test:select() diff --git a/test/replication/gh-3637-misc-error-on-replica-auth-fail.result b/test/replication/gh-3637-misc-error-on-replica-auth-fail.result index 98880d8e4088486e95f6bebfe79a0c0556e4ad57..9008f88c11825d3c2673c2facde0ba5ce39c23e1 100644 --- a/test/replication/gh-3637-misc-error-on-replica-auth-fail.result +++ b/test/replication/gh-3637-misc-error-on-replica-auth-fail.result @@ -49,6 +49,19 @@ vclock[0] = nil _ = test_run:wait_vclock('replica_auth', vclock) --- ... +-- Wait server init script finish or server will panic on stop. +test_run:switch('replica_auth') +--- +- true +... +test_run:wait_cond(function() return _G.startup_finished == true end) +--- +- true +... +test_run:switch('default') +--- +- true +... test_run:cmd("stop server replica_auth") --- - true diff --git a/test/replication/gh-3637-misc-error-on-replica-auth-fail.test.lua b/test/replication/gh-3637-misc-error-on-replica-auth-fail.test.lua index c51a2f628977a30b1dc0d8ae1f13ce6a474c728b..6028796d74d9a303aaa6dce89cd7596d4e4c1a37 100644 --- a/test/replication/gh-3637-misc-error-on-replica-auth-fail.test.lua +++ b/test/replication/gh-3637-misc-error-on-replica-auth-fail.test.lua @@ -24,6 +24,11 @@ vclock = test_run:get_vclock('default') vclock[0] = nil _ = test_run:wait_vclock('replica_auth', vclock) +-- Wait server init script finish or server will panic on stop. +test_run:switch('replica_auth') +test_run:wait_cond(function() return _G.startup_finished == true end) + +test_run:switch('default') test_run:cmd("stop server replica_auth") test_run:cmd("cleanup server replica_auth") test_run:cmd("delete server replica_auth") diff --git a/test/replication/gh-4739-vclock-assert.result b/test/replication/gh-4739-vclock-assert.result index 83896c4e16e96c29cfbee3751bdc1c0686022352..21247e42b8a23ed876e06a9398337cc8bdafeadf 100644 --- a/test/replication/gh-4739-vclock-assert.result +++ b/test/replication/gh-4739-vclock-assert.result @@ -56,7 +56,7 @@ end, 10) -- Restart the remote instance. This will make the first instance -- resubscribe without entering orphan mode. -test_run:cmd('restart server rebootstrap2 with wait=False') +test_run:cmd('restart server rebootstrap2 with wait=False, crash_expected=True') | --- | - true | ... diff --git a/test/replication/gh-4739-vclock-assert.test.lua b/test/replication/gh-4739-vclock-assert.test.lua index 5755ad75285a7498a886566977cfd9c4537847ad..781b7bc041158b8309ffc3c4c8a0bb20fa0331c0 100644 --- a/test/replication/gh-4739-vclock-assert.test.lua +++ b/test/replication/gh-4739-vclock-assert.test.lua @@ -24,7 +24,7 @@ end, 10) -- Restart the remote instance. This will make the first instance -- resubscribe without entering orphan mode. -test_run:cmd('restart server rebootstrap2 with wait=False') +test_run:cmd('restart server rebootstrap2 with wait=False, crash_expected=True') test_run:cmd('switch rebootstrap1') -- Wait until resubscribe is sent test_run:wait_cond(function()\ diff --git a/test/replication/gh-5613-bootstrap-prefer-booted.result b/test/replication/gh-5613-bootstrap-prefer-booted.result index d31b66c191ff3e3a84a35f7f8b787b8f7225da63..077b2992c3897badfe470fac3fc7116c606bce32 100644 --- a/test/replication/gh-5613-bootstrap-prefer-booted.result +++ b/test/replication/gh-5613-bootstrap-prefer-booted.result @@ -43,7 +43,7 @@ test_run:cmd('create server replica2 with script="replication/gh-5613-replica2.l | --- | - true | ... -test_run:cmd('start server replica2 with wait=False') +test_run:cmd('start server replica2 with wait=False, crash_expected=True') | --- | - true | ... diff --git a/test/replication/gh-5613-bootstrap-prefer-booted.test.lua b/test/replication/gh-5613-bootstrap-prefer-booted.test.lua index 6d4fcd14261cbd22a9ac790a814773e2f727f18e..9300d4e3d47ab0d1779385918f72fbc677151312 100644 --- a/test/replication/gh-5613-bootstrap-prefer-booted.test.lua +++ b/test/replication/gh-5613-bootstrap-prefer-booted.test.lua @@ -17,7 +17,7 @@ box.cfg{read_only = true} test_run:switch('default') test_run:cmd('create server replica2 with script="replication/gh-5613-replica2.lua"') -test_run:cmd('start server replica2 with wait=False') +test_run:cmd('start server replica2 with wait=False, crash_expected=True') opts = {filename = 'gh-5613-replica2.log'} assert(test_run:wait_log(nil, 'ER_READONLY', nil, nil, opts) ~= nil) diff --git a/test/replication/gh-5806-xlog-cleanup.result b/test/replication/gh-5806-xlog-cleanup.result index aa709f8c8b64f00ae9ae6b63952fe98ac5dd3df2..21d6d18b6f9b5d3aa0f9045edee9692103c5d692 100644 --- a/test/replication/gh-5806-xlog-cleanup.result +++ b/test/replication/gh-5806-xlog-cleanup.result @@ -153,7 +153,7 @@ assert(not box.info.gc().is_paused) -- -- Start replica and wait for error. -test_run:cmd('start server replica with wait=False, wait_load=False') +test_run:cmd('start server replica with wait=False, wait_load=False, crash_expected=True') | --- | - true | ... diff --git a/test/replication/gh-5806-xlog-cleanup.test.lua b/test/replication/gh-5806-xlog-cleanup.test.lua index 3c4abe5ee4eadea6b0ae409475ab8b8e028910fa..310ab6b641ab8d860492b2f548f25ebf3ce410ac 100644 --- a/test/replication/gh-5806-xlog-cleanup.test.lua +++ b/test/replication/gh-5806-xlog-cleanup.test.lua @@ -78,7 +78,7 @@ assert(not box.info.gc().is_paused) -- -- Start replica and wait for error. -test_run:cmd('start server replica with wait=False, wait_load=False') +test_run:cmd('start server replica with wait=False, wait_load=False, crash_expected=True') -- -- Wait error to appear, 60 seconds should be more than enough, diff --git a/test/replication/prune.result b/test/replication/prune.result index e25e9684e2982aac9a82110804c3dee888afb4fa..b2040cc198d0fc76c690011fac4b511d977c5012 100644 --- a/test/replication/prune.result +++ b/test/replication/prune.result @@ -137,7 +137,7 @@ test_run:cmd('stop server replica1') --- - true ... -test_run:cmd('start server replica1 with args="true", wait=False') +test_run:cmd('start server replica1 with args="true", wait=False, crash_expected=True') --- - true ... diff --git a/test/replication/prune.test.lua b/test/replication/prune.test.lua index 68300b270c345ce5484b900b7553cfe8c64d53b4..fd24b707739df8d97bb5297057ba4d9f1a245200 100644 --- a/test/replication/prune.test.lua +++ b/test/replication/prune.test.lua @@ -66,7 +66,7 @@ test_run:cmd('eval replica1 "box.info.replication[1].upstream.message"') -- restart replica and check that replica isn't able to join to cluster test_run:cmd('stop server replica1') -test_run:cmd('start server replica1 with args="true", wait=False') +test_run:cmd('start server replica1 with args="true", wait=False, crash_expected=True') test_run:cmd('switch replica1') test_run:wait_upstream(1, {message_re = "Can't subscribe non%-anonymous replica"}) test_run:cmd('switch default') diff --git a/test/replication/replica_auth.lua b/test/replication/replica_auth.lua index 61d046fc47f0dea090ca4aa7c3c20727a201ad1e..72898c6186517e2449632496432884e9204cea00 100644 --- a/test/replication/replica_auth.lua +++ b/test/replication/replica_auth.lua @@ -4,9 +4,12 @@ local USER_PASS = arg[1] local TIMEOUT = arg[2] and tonumber(arg[2]) or 0.1 require('console').listen(os.getenv('ADMIN')) +_G.startup_finished = false box.cfg({ listen = os.getenv("LISTEN"), replication = USER_PASS .. "@" .. os.getenv("MASTER"), replication_timeout = TIMEOUT, }) + +_G.startup_finished = true diff --git a/test/replication/replica_rejoin.result b/test/replication/replica_rejoin.result index e489c150a6edefc3a6f216d8cff1137a7a7ba46a..0cccc7f0b459af5b7c53324f004b21a0817043fd 100644 --- a/test/replication/replica_rejoin.result +++ b/test/replication/replica_rejoin.result @@ -238,7 +238,7 @@ test_run:wait_cond(function() return #fio.glob(fio.pathjoin(box.cfg.wal_dir, '*. box.cfg{checkpoint_count = checkpoint_count} --- ... -test_run:cmd("start server replica with wait=False") +test_run:cmd("start server replica with wait=False, crash_expected=True") --- - true ... diff --git a/test/replication/replica_rejoin.test.lua b/test/replication/replica_rejoin.test.lua index 2563177cf55dbae1df5325e066112e61c14f3990..f9d1b45f6316cd7ebae2308f6a71ba9ca2bf5b86 100644 --- a/test/replication/replica_rejoin.test.lua +++ b/test/replication/replica_rejoin.test.lua @@ -90,7 +90,7 @@ for i = 1, 3 do box.space.test:insert{i * 100} end fio = require('fio') test_run:wait_cond(function() return #fio.glob(fio.pathjoin(box.cfg.wal_dir, '*.xlog')) == 1 end) or fio.pathjoin(box.cfg.wal_dir, '*.xlog') box.cfg{checkpoint_count = checkpoint_count} -test_run:cmd("start server replica with wait=False") +test_run:cmd("start server replica with wait=False, crash_expected=True") test_run:cmd("switch replica") test_run:wait_upstream(1, {message_re = 'Missing %.xlog file', status = 'loading'}) box.space.test:select() diff --git a/test/unit/fiber.cc b/test/unit/fiber.cc index 2ccd5a8ca75fa682c3b0a417a764db08dc4a56cb..7caab3a8e2737afae8edc862ce210c7bed8e9261 100644 --- a/test/unit/fiber.cc +++ b/test/unit/fiber.cc @@ -581,6 +581,145 @@ fiber_test_leak_modes() say_logger_free(); } +static void +fiber_test_client_fiber_count(void) +{ + header(); + + int count = cord()->client_fiber_count; + + struct fiber *fiber1 = fiber_new("fiber1", wait_cancel_f); + fail_unless(fiber1 != NULL); + fail_unless(++count == cord()->client_fiber_count); + + struct fiber *fiber2 = fiber_new("fiber2", wait_cancel_f); + fail_unless(fiber2 != NULL); + fail_unless(++count == cord()->client_fiber_count); + + struct fiber *fiber3 = fiber_new_system("fiber3", wait_cancel_f); + fail_unless(fiber3 != NULL); + fail_unless(count == cord()->client_fiber_count); + + struct fiber *fiber4 = fiber_new_system("fiber4", wait_cancel_f); + fail_unless(fiber4 != NULL); + fail_unless(count == cord()->client_fiber_count); + + fiber_set_joinable(fiber1, true); + fiber_cancel(fiber1); + fiber_join(fiber1); + fail_unless(--count == cord()->client_fiber_count); + + fiber_set_joinable(fiber4, true); + fiber_cancel(fiber4); + fiber_join(fiber4); + fail_unless(count == cord()->client_fiber_count); + + fiber_set_joinable(fiber2, true); + fiber_cancel(fiber2); + fiber_join(fiber2); + fail_unless(--count == cord()->client_fiber_count); + + fiber_set_joinable(fiber3, true); + fiber_cancel(fiber3); + fiber_join(fiber3); + fail_unless(count == cord()->client_fiber_count); + + footer(); +} + +static void +fiber_test_set_system(void) +{ + header(); + + struct fiber *fiber1 = fiber_new("fiber1", wait_cancel_f); + fail_unless(fiber1 != NULL); + int count = cord()->client_fiber_count; + + fiber_set_system(fiber1, true); + fail_unless(--count == cord()->client_fiber_count); + fail_unless((fiber1->flags & FIBER_IS_SYSTEM) != 0); + + fiber_set_system(fiber1, true); + fail_unless(count == cord()->client_fiber_count); + fail_unless((fiber1->flags & FIBER_IS_SYSTEM) != 0); + + fiber_set_system(fiber1, false); + fail_unless(++count == cord()->client_fiber_count); + fail_unless((fiber1->flags & FIBER_IS_SYSTEM) == 0); + + fiber_set_system(fiber1, false); + fail_unless(count == cord()->client_fiber_count); + fail_unless((fiber1->flags & FIBER_IS_SYSTEM) == 0); + + struct fiber *fiber2 = fiber_new_system("fiber2", wait_cancel_f); + fail_unless(fiber2 != NULL); + count = cord()->client_fiber_count; + + fiber_set_system(fiber2, false); + fail_unless(++count == cord()->client_fiber_count); + fail_unless((fiber2->flags & FIBER_IS_SYSTEM) == 0); + + fiber_set_system(fiber2, false); + fail_unless(count == cord()->client_fiber_count); + fail_unless((fiber2->flags & FIBER_IS_SYSTEM) == 0); + + fiber_set_system(fiber2, true); + fail_unless(--count == cord()->client_fiber_count); + fail_unless((fiber2->flags & FIBER_IS_SYSTEM) != 0); + + fiber_set_system(fiber2, true); + fail_unless(count == cord()->client_fiber_count); + fail_unless((fiber2->flags & FIBER_IS_SYSTEM) != 0); + + fiber_set_joinable(fiber1, true); + fiber_cancel(fiber1); + fiber_join(fiber1); + fiber_set_joinable(fiber2, true); + fiber_cancel(fiber2); + fiber_join(fiber2); + + footer(); +} + +static int +hang_on_cancel_f(va_list ap) +{ + while (!fiber_is_cancelled()) + fiber_yield(); + fiber_set_system(fiber(), true); + while (true) + fiber_yield(); + return 0; +} + +static void +fiber_test_shutdown(void) +{ + footer(); + + struct fiber *fiber1 = fiber_new("fiber1", wait_cancel_f); + fail_unless(fiber1 != NULL); + fiber_set_joinable(fiber1, true); + struct fiber *fiber2 = fiber_new_system("fiber2", wait_cancel_f); + fail_unless(fiber2 != NULL); + struct fiber *fiber3 = fiber_new("fiber3", hang_on_cancel_f); + fail_unless(fiber3 != NULL); + + fiber_shutdown(); + fail_unless((fiber1->flags & FIBER_IS_DEAD) != 0); + fail_unless((fiber2->flags & FIBER_IS_DEAD) == 0); + fail_unless((fiber3->flags & FIBER_IS_DEAD) == 0); + + fiber_join(fiber1); + + fiber_set_joinable(fiber2, true); + fiber_cancel(fiber2); + fiber_join(fiber2); + + header(); +} + static int main_f(va_list ap) { @@ -597,6 +736,9 @@ main_f(va_list ap) cord_cancel_and_join_test(); fiber_test_defaults(); fiber_test_leak_modes(); + fiber_test_client_fiber_count(); + fiber_test_set_system(); + fiber_test_shutdown(); ev_break(loop(), EVBREAK_ALL); return 0; } @@ -611,7 +753,7 @@ int main() memory_init(); fiber_init(fiber_cxx_invoke); fiber_attr_create(&default_attr); - struct fiber *main = fiber_new_xc("main", main_f); + struct fiber *main = fiber_new_system_xc("main", main_f); fiber_wakeup(main); ev_run(loop(), 0); fiber_free(); diff --git a/test/unit/fiber.result b/test/unit/fiber.result index 0066138356d2c518a6e5e49e7ca0e5ad394e5853..2b5469cfba87e66554f0431f348d2c6a3edbd870 100644 --- a/test/unit/fiber.result +++ b/test/unit/fiber.result @@ -40,3 +40,9 @@ OutOfMemory: Failed to allocate 42 bytes in allocator for exception *** fiber_test_leak: done *** *** fiber_test_leak *** *** fiber_test_leak: done *** + *** fiber_test_client_fiber_count *** + *** fiber_test_client_fiber_count: done *** + *** fiber_test_set_system *** + *** fiber_test_set_system: done *** + *** fiber_test_shutdown: done *** + *** fiber_test_shutdown *** diff --git a/test/vinyl/errinj.result b/test/vinyl/errinj.result index 18d10b077bd0d3b513ea39fb42750345c978fd0f..e30c34ab11c1dc774183890e3b0cc9f2cecbf9d5 100644 --- a/test/vinyl/errinj.result +++ b/test/vinyl/errinj.result @@ -1145,8 +1145,8 @@ s:drop() --- ... -- --- Check that tarantool stops immediately even if a vinyl worker --- thread is blocked (see gh-3225). +-- Check that tarantool stops immediately if large snapshot write +-- is in progress. -- s = box.schema.space.create('test', {engine = 'vinyl'}) --- @@ -1154,33 +1154,17 @@ s = box.schema.space.create('test', {engine = 'vinyl'}) _ = s:create_index('pk') --- ... -s:replace{1, 1} ---- -- [1, 1] -... -box.snapshot() ---- -- ok -... -errinj.set('ERRINJ_VY_READ_PAGE_TIMEOUT', 9000) ---- -- ok -... -_ = fiber.create(function() s:get(1) end) +for i = 1, 10000 do s:replace({i}) end --- ... -s:replace{1, 2} ---- -- [1, 2] -... -errinj.set('ERRINJ_VY_RUN_WRITE_STMT_TIMEOUT', 9000) +errinj.set('ERRINJ_VY_RUN_WRITE_STMT_TIMEOUT', 0.01) --- - ok ... _ = fiber.create(function() box.snapshot() end) --- ... -test_run:cmd("restart server default") +test_run:cmd("restart server default") -- don't stuck box.space.test:drop() --- ... diff --git a/test/vinyl/errinj.test.lua b/test/vinyl/errinj.test.lua index d698b4408481dc58f57958fe37845a711099321a..0a7beac682738df50e71ea6d3f057ca132e3d9f5 100644 --- a/test/vinyl/errinj.test.lua +++ b/test/vinyl/errinj.test.lua @@ -414,23 +414,16 @@ box.schema.user.revoke('guest', 'replication') s:drop() -- --- Check that tarantool stops immediately even if a vinyl worker --- thread is blocked (see gh-3225). +-- Check that tarantool stops immediately if large snapshot write +-- is in progress. -- s = box.schema.space.create('test', {engine = 'vinyl'}) _ = s:create_index('pk') -s:replace{1, 1} -box.snapshot() - -errinj.set('ERRINJ_VY_READ_PAGE_TIMEOUT', 9000) -_ = fiber.create(function() s:get(1) end) - -s:replace{1, 2} - -errinj.set('ERRINJ_VY_RUN_WRITE_STMT_TIMEOUT', 9000) +for i = 1, 10000 do s:replace({i}) end +errinj.set('ERRINJ_VY_RUN_WRITE_STMT_TIMEOUT', 0.01) _ = fiber.create(function() box.snapshot() end) -test_run:cmd("restart server default") +test_run:cmd("restart server default") -- don't stuck box.space.test:drop() -- diff --git a/test/vinyl/errinj_vylog.result b/test/vinyl/errinj_vylog.result index b9ae9332e9b615ad1a58ae2d94c6acdd2dccc208..6ac76b2c816ce3836f95b1a377beba34fea1afa0 100644 --- a/test/vinyl/errinj_vylog.result +++ b/test/vinyl/errinj_vylog.result @@ -399,7 +399,9 @@ fiber.sleep(0.01) --- ... -- Should ignore the incomplete index on recovery. -test_run:cmd('restart server default') +-- Use KILL because server will hang on shutdown due to injection. +-- We don't need graceful shutdown for the test anyway. +test_run:cmd('restart server default with signal=KILL') s = box.space.test --- ... diff --git a/test/vinyl/errinj_vylog.test.lua b/test/vinyl/errinj_vylog.test.lua index 4401f301502c82bb996a862d6e2881d719763deb..54a69c65997bd3ddd5636bf3da2f4f613eec820e 100644 --- a/test/vinyl/errinj_vylog.test.lua +++ b/test/vinyl/errinj_vylog.test.lua @@ -198,7 +198,9 @@ _ = fiber.create(function() s:create_index('sk', {parts = {2, 'unsigned'}}) end) fiber.sleep(0.01) -- Should ignore the incomplete index on recovery. -test_run:cmd('restart server default') +-- Use KILL because server will hang on shutdown due to injection. +-- We don't need graceful shutdown for the test anyway. +test_run:cmd('restart server default with signal=KILL') s = box.space.test s.index[1] == nil diff --git a/test/xlog/panic_on_wal_error.result b/test/xlog/panic_on_wal_error.result index c4494ac87a846fccf58ea0d3c4e16b98b2ac4272..0806a96ed20af446b615549de1d3cbe6167c635d 100644 --- a/test/xlog/panic_on_wal_error.result +++ b/test/xlog/panic_on_wal_error.result @@ -121,7 +121,7 @@ box.cfg.force_recovery -- try to start the replica, ha-ha -- (replication should fail, some rows are missing) -- -test_run:cmd("start server replica with wait=False") +test_run:cmd("start server replica with wait=False, crash_expected=True") --- - true ... diff --git a/test/xlog/panic_on_wal_error.test.lua b/test/xlog/panic_on_wal_error.test.lua index eea6aad300ea3b69106ebee15e0e4ef77e528fcc..77bcde7877dec217b9f63af8aeafe596c98cc103 100644 --- a/test/xlog/panic_on_wal_error.test.lua +++ b/test/xlog/panic_on_wal_error.test.lua @@ -57,7 +57,7 @@ box.cfg.force_recovery -- try to start the replica, ha-ha -- (replication should fail, some rows are missing) -- -test_run:cmd("start server replica with wait=False") +test_run:cmd("start server replica with wait=False, crash_expected=True") test_run:cmd("switch replica") -- Need to wait for box.info.replication[1] defined, otherwise test-run fails to -- wait for the upstream status sometimes.