diff --git a/core/tarantool.c b/core/tarantool.c index 497cb698812592efa4970e15afc6c12132c40c39..c679568286e7eb94839f034115b402b904bdb173 100644 --- a/core/tarantool.c +++ b/core/tarantool.c @@ -172,7 +172,7 @@ tarantool_uptime(void) #ifdef STORAGE int -snapshot(void *ev __unused__, int events __unused__) +snapshot(void *ev, int events __unused__) { pid_t p = fork(); if (p < 0) { @@ -180,10 +180,23 @@ snapshot(void *ev __unused__, int events __unused__) return -1; } if (p > 0) { + /* + * If called from a signal handler, we can't + * access any fiber state, and no one is expecting + * to get an execution status. Just return 0 to + * indicate a successful fork. + */ + if (ev != NULL) + return 0; + /* + * This is 'save snapshot' call received from the + * administrative console. Check for the child + * exit status and report it back. This is done to + * make 'save snapshot' synchronous, and propagate + * any possible error up to the user. + */ wait_for_child(p); - assert(p == fiber->cw.rpid); - return WEXITSTATUS(fiber->cw.rstatus); } @@ -223,6 +236,10 @@ sig_int(int signal) _exit(EXIT_SUCCESS); } +/** + * Adjust the process signal mask and add handlers for signals. + */ + static void signal_init(void) { @@ -519,6 +536,8 @@ main(int argc, char **argv) signal_init(); mod_init(); #elif defined(STORAGE) + ev_default_loop(EVFLAG_AUTO); + ev_signal *ev_sig; ev_sig = palloc(eter_pool, sizeof(ev_signal)); ev_signal_init(ev_sig, (void *)snapshot, SIGUSR1); @@ -526,7 +545,6 @@ main(int argc, char **argv) initialize(cfg.slab_alloc_arena, cfg.slab_alloc_minimal, cfg.slab_alloc_factor); signal_init(); - ev_default_loop(0); mod_init(); admin_init(); diff --git a/test/box/snapshot.result b/test/box/snapshot.result index edb9e178186c970e3ea58d6d3574da8afeeafea0..1942f49d144df86208d35e6e943f7e46d3cf0f20 100644 --- a/test/box/snapshot.result +++ b/test/box/snapshot.result @@ -1533,3 +1533,15 @@ delete from t0 where k0 = 1 Delete OK, 1 row affected delete from t0 where k0 = 2 Delete OK, 1 row affected +# +# A test case for http://bugs.launchpad.net/bugs/727174 +# "tarantool_silverbox crashes when saving snapshot on SIGUSR1" +# + +# Increment the lsn number, to make sure there is no such snapshot yet +# +insert into t0 values (1, 'Test tuple') +Insert OK, 1 row affected +Snapshot exists. +delete from t0 where k0=1 +Delete OK, 1 row affected diff --git a/test/box/snapshot.test b/test/box/snapshot.test index 764699f2a3b2d3bd3e981359992a3359cbda05e0..b4ecf0c4d064f01b4627bb28fe36ee77fc7d8abc 100644 --- a/test/box/snapshot.test +++ b/test/box/snapshot.test @@ -1,5 +1,8 @@ # encoding: tarantool # +import yaml +from signal import SIGUSR1 + print """ # Verify that the server starts from a pre-recorded snapshot. # This way we check that the server can read old snapshots (v11) @@ -45,4 +48,39 @@ exec admin "save snapshot" os.chmod(vardir, 0755) exec sql "delete from t0 where k0 = 1" exec sql "delete from t0 where k0 = 2" + +print """# +# A test case for http://bugs.launchpad.net/bugs/727174 +# "tarantool_silverbox crashes when saving snapshot on SIGUSR1" +#""" + +print """ +# Increment the lsn number, to make sure there is no such snapshot yet +#""" + +exec sql "insert into t0 values (1, 'Test tuple')" + +info = yaml.load(admin.execute("show info\n"))["info"] + +pid = info["pid"] +snapshot = str(info["lsn"]).zfill(20) + ".snap" +snapshot = os.path.join(vardir, snapshot) + +iteration = 0 + +MAX_ITERATIONS = 100 + +while not os.access(snapshot, os.F_OK) and iteration < MAX_ITERATIONS: + if iteration % 10 == 0: + os.kill(pid, SIGUSR1) + time.sleep(0.1) + iteration = iteration + 1 + +if iteration == 0 or iteration >= MAX_ITERATIONS: + print "Snapshot is missing." +else: + print "Snapshot exists." + +exec sql "delete from t0 where k0=1" + # vim: syntax=python spell