From 3c2e3a59d85e25b495d0b95366e67a6f19ee00da Mon Sep 17 00:00:00 2001
From: Konstantin Osipov <kostja.osipov@gmail.com>
Date: Mon, 21 Feb 2011 17:07:44 +0300
Subject: [PATCH] Fixes and tests for Bug##712447, Bug##695689, Bug#686411

Bug#712447 "Valgrind reports use of not initialized memory
after 'reload configuration'

Bug#695689 "'save snapshot' does not report error when it occurs"

Bug#686411 "Possible data loss when renaming a snapshot"
---
 core/CMakeLists.txt                    |   2 +-
 core/admin.c                           |  80 +++++---
 core/admin.rl                          |  15 +-
 core/diagnostics.c                     |  90 +++++++++
 core/diagnostics.h                     |  58 ++++++
 core/fiber.c                           |  43 ++--
 core/log_io.c                          | 263 ++++++++++++++++++-------
 core/tarantool.c                       |  36 ++--
 include/fiber.h                        |   5 +-
 include/log_io.h                       |   4 +
 include/say.h                          |  17 +-
 include/tarantool.h                    |   2 +-
 test/box/bad_record.xlog               | Bin 0 -> 22 bytes
 test/box/empty.xlog                    | Bin
 test/box/just_header.xlog              | Bin 0 -> 11 bytes
 test/box/reconfigure.result            |  47 +++++
 test/box/reconfigure.test              |  26 +++
 test/box/show.result                   |  10 +-
 test/box/show.test                     |   3 +
 test/box/snapshot.result               |  28 +++
 test/box/snapshot.test                 |  32 ++-
 test/box/tarantool.cfg                 |   2 +
 test/box/tarantool_bad1.cfg            |   2 +
 test/box/tarantool_bad2.cfg            |   2 +
 test/box/tarantool_bad3.cfg            |   2 +
 test/box/tarantool_bad4.cfg            |   2 +
 test/box/tarantool_bad5.cfg            |   2 +
 test/box/tarantool_good.cfg            |   2 +
 test/box/unfinished.xlog               | Bin 0 -> 86 bytes
 test/box/xlog.result                   |  34 ++++
 test/box/xlog.test                     | 113 +++++++++++
 test/lib/tarantool_silverbox_server.py |   8 +-
 32 files changed, 779 insertions(+), 151 deletions(-)
 create mode 100644 core/diagnostics.c
 create mode 100644 core/diagnostics.h
 create mode 100644 test/box/bad_record.xlog
 create mode 100644 test/box/empty.xlog
 create mode 100644 test/box/just_header.xlog
 create mode 100644 test/box/unfinished.xlog
 create mode 100644 test/box/xlog.result
 create mode 100644 test/box/xlog.test

diff --git a/core/CMakeLists.txt b/core/CMakeLists.txt
index 55d2c0ee84..7d0ef6126c 100644
--- a/core/CMakeLists.txt
+++ b/core/CMakeLists.txt
@@ -50,7 +50,7 @@ set (recompiled_core_sources
      ${CMAKE_SOURCE_DIR}/core/admin.c
      ${CMAKE_SOURCE_DIR}/core/fiber.c PARENT_SCOPE)
 
-set (common_sources tbuf.c palloc.c util.c
+set (common_sources tbuf.c palloc.c util.c diagnostics.c
     salloc.c pickle.c coro.c stat.c log_io.c
     log_io_remote.c iproto.c)
 
diff --git a/core/admin.c b/core/admin.c
index 000cd85764..ab1f5afc6d 100644
--- a/core/admin.c
+++ b/core/admin.c
@@ -186,15 +186,15 @@ case 6:
 	}
 	goto st0;
 tr12:
-#line 180 "core/admin.rl"
+#line 193 "core/admin.rl"
 	{slab_validate(); ok(out);}
 	goto st108;
 tr19:
-#line 170 "core/admin.rl"
+#line 183 "core/admin.rl"
 	{return 0;}
 	goto st108;
 tr28:
-#line 166 "core/admin.rl"
+#line 179 "core/admin.rl"
 	{strend = p;}
 #line 137 "core/admin.rl"
 	{
@@ -221,12 +221,23 @@ case 6:
 		}
 	goto st108;
 tr66:
-#line 177 "core/admin.rl"
+#line 190 "core/admin.rl"
 	{coredump(60); ok(out);}
 	goto st108;
 tr75:
-#line 178 "core/admin.rl"
-	{snapshot(NULL, 0); ok(out);}
+#line 150 "core/admin.rl"
+	{
+			int ret = snapshot(NULL, 0);
+
+			if (ret == 0)
+				ok(out);
+			else {
+				tbuf_printf(err, " can't save snapshot, errno %d (%s)",
+					    ret, strerror(ret));
+
+				fail(out, err);
+			}
+		}
 	goto st108;
 tr92:
 #line 113 "core/admin.rl"
@@ -249,41 +260,41 @@ case 6:
 		}
 	goto st108;
 tr106:
-#line 172 "core/admin.rl"
+#line 185 "core/admin.rl"
 	{start(out); fiber_info(out); end(out);}
 	goto st108;
 tr112:
-#line 171 "core/admin.rl"
+#line 184 "core/admin.rl"
 	{start(out); mod_info(out); end(out);}
 	goto st108;
 tr117:
-#line 175 "core/admin.rl"
+#line 188 "core/admin.rl"
 	{start(out); palloc_stat(out); end(out);}
 	goto st108;
 tr125:
-#line 174 "core/admin.rl"
+#line 187 "core/admin.rl"
 	{start(out); slab_stat(out); end(out);}
 	goto st108;
 tr129:
-#line 176 "core/admin.rl"
+#line 189 "core/admin.rl"
 	{start(out); stat_print(out);end(out);}
 	goto st108;
 st108:
 	if ( ++p == pe )
 		goto _test_eof108;
 case 108:
-#line 276 "core/admin.c"
+#line 287 "core/admin.c"
 	goto st0;
 tr13:
-#line 180 "core/admin.rl"
+#line 193 "core/admin.rl"
 	{slab_validate(); ok(out);}
 	goto st7;
 tr20:
-#line 170 "core/admin.rl"
+#line 183 "core/admin.rl"
 	{return 0;}
 	goto st7;
 tr29:
-#line 166 "core/admin.rl"
+#line 179 "core/admin.rl"
 	{strend = p;}
 #line 137 "core/admin.rl"
 	{
@@ -310,12 +321,23 @@ case 108:
 		}
 	goto st7;
 tr67:
-#line 177 "core/admin.rl"
+#line 190 "core/admin.rl"
 	{coredump(60); ok(out);}
 	goto st7;
 tr76:
-#line 178 "core/admin.rl"
-	{snapshot(NULL, 0); ok(out);}
+#line 150 "core/admin.rl"
+	{
+			int ret = snapshot(NULL, 0);
+
+			if (ret == 0)
+				ok(out);
+			else {
+				tbuf_printf(err, " can't save snapshot, errno %d (%s)",
+					    ret, strerror(ret));
+
+				fail(out, err);
+			}
+		}
 	goto st7;
 tr93:
 #line 113 "core/admin.rl"
@@ -338,30 +360,30 @@ case 108:
 		}
 	goto st7;
 tr107:
-#line 172 "core/admin.rl"
+#line 185 "core/admin.rl"
 	{start(out); fiber_info(out); end(out);}
 	goto st7;
 tr113:
-#line 171 "core/admin.rl"
+#line 184 "core/admin.rl"
 	{start(out); mod_info(out); end(out);}
 	goto st7;
 tr118:
-#line 175 "core/admin.rl"
+#line 188 "core/admin.rl"
 	{start(out); palloc_stat(out); end(out);}
 	goto st7;
 tr126:
-#line 174 "core/admin.rl"
+#line 187 "core/admin.rl"
 	{start(out); slab_stat(out); end(out);}
 	goto st7;
 tr130:
-#line 176 "core/admin.rl"
+#line 189 "core/admin.rl"
 	{start(out); stat_print(out);end(out);}
 	goto st7;
 st7:
 	if ( ++p == pe )
 		goto _test_eof7;
 case 7:
-#line 365 "core/admin.c"
+#line 387 "core/admin.c"
 	if ( (*p) == 10 )
 		goto st108;
 	goto st0;
@@ -442,28 +464,28 @@ case 15:
 	}
 	goto tr25;
 tr25:
-#line 166 "core/admin.rl"
+#line 179 "core/admin.rl"
 	{strstart = p;}
 	goto st16;
 st16:
 	if ( ++p == pe )
 		goto _test_eof16;
 case 16:
-#line 453 "core/admin.c"
+#line 475 "core/admin.c"
 	switch( (*p) ) {
 		case 10: goto tr28;
 		case 13: goto tr29;
 	}
 	goto st16;
 tr26:
-#line 166 "core/admin.rl"
+#line 179 "core/admin.rl"
 	{strstart = p;}
 	goto st17;
 st17:
 	if ( ++p == pe )
 		goto _test_eof17;
 case 17:
-#line 467 "core/admin.c"
+#line 489 "core/admin.c"
 	switch( (*p) ) {
 		case 10: goto tr28;
 		case 13: goto tr29;
@@ -1427,7 +1449,7 @@ case 107:
 	_out: {}
 	}
 
-#line 186 "core/admin.rl"
+#line 199 "core/admin.rl"
 
 
 	fiber->rbuf->len -= (void *)pe - (void *)fiber->rbuf->data;
diff --git a/core/admin.rl b/core/admin.rl
index 896f203272..1be63602a4 100644
--- a/core/admin.rl
+++ b/core/admin.rl
@@ -147,6 +147,19 @@ admin_dispatch(void)
 				ok(out);
 		}
 
+		action save_snapshot {
+			int ret = snapshot(NULL, 0);
+
+			if (ret == 0)
+				ok(out);
+			else {
+				tbuf_printf(err, " can't save snapshot, errno %d (%s)",
+					    ret, strerror(ret));
+
+				fail(out, err);
+			}
+		}
+
 		eol = "\n" | "\r\n";
 		show = "sh"("o"("w")?)?;
 		info = "in"("f"("o")?)?;
@@ -175,7 +188,7 @@ admin_dispatch(void)
 			    show " "+ palloc		%{start(out); palloc_stat(out); end(out);}	|
 			    show " "+ stat		%{start(out); stat_print(out);end(out);}	|
 			    save " "+ coredump		%{coredump(60); ok(out);}			|
-			    save " "+ snapshot		%{snapshot(NULL, 0); ok(out);}			|
+			    save " "+ snapshot		%save_snapshot					|
 			    exec " "+ string		%mod_exec					|
 			    check " "+ slab		%{slab_validate(); ok(out);}			|
 			    reload " "+ configuration	%reload_configuration);
diff --git a/core/diagnostics.c b/core/diagnostics.c
new file mode 100644
index 0000000000..237c02d113
--- /dev/null
+++ b/core/diagnostics.c
@@ -0,0 +1,90 @@
+/*
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met: 1. Redistributions of source code must
+ * retain the above copyright notice, this list of conditions and
+ * the following disclaimer.  2. Redistributions in binary form
+ * must reproduce the above copyright notice, this list of
+ * conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+#include "diagnostics.h"
+#include "fiber.h"
+#include <errno.h>
+#include <string.h>
+
+static struct Error oom_error = { ENOMEM, "Out of memory" };
+
+/**
+ * Allocate error on heap. Errors are expected to be rare and
+ * small, thus we don't care much about allocation speed, and
+ * memory fragmentation should be negligible.
+ */
+
+static struct Error *error_create(int code, const char *msg_arg)
+{
+	/* Just something large enough. */
+	const int MAX_MSGLEN = 200;
+
+	if (msg_arg == NULL)
+		msg_arg = "";
+
+	size_t msglen = strlen(msg_arg);
+	char *msg;
+
+	if (msglen > MAX_MSGLEN)
+		msglen = MAX_MSGLEN;
+
+	struct Error *error = malloc(sizeof(struct Error) + msglen + 1);
+
+	if (error == NULL)
+		return &oom_error;
+
+	msg = (char *)(error + 1);
+	strncpy(msg, msg_arg, msglen);
+	msg[msglen] = '\0';
+
+	error->code = code;
+	error->msg = msg;
+
+	return error;
+}
+
+
+static void error_destroy(struct Error *error)
+{
+	if (error != &oom_error)
+		free(error);
+}
+
+
+void diag_set_error(int code, const char *message)
+{
+	if (fiber->diagnostics)
+		diag_clear();
+	fiber->diagnostics = error_create(code, message);
+}
+
+
+struct Error *diag_get_last_error()
+{
+	return fiber->diagnostics;
+}
+
+
+void diag_clear()
+{
+	error_destroy(fiber->diagnostics);
+}
diff --git a/core/diagnostics.h b/core/diagnostics.h
new file mode 100644
index 0000000000..c136c8ecdf
--- /dev/null
+++ b/core/diagnostics.h
@@ -0,0 +1,58 @@
+#ifndef TARANTOOL_CORE_DIAGNOSTICS_H_INCLUDED
+#define TARANTOOL_CORE_DIAGNOSTICS_H_INCLUDED
+/*
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met: 1. Redistributions of source code must
+ * retain the above copyright notice, this list of conditions and
+ * the following disclaimer.  2. Redistributions in binary form
+ * must reproduce the above copyright notice, this list of
+ * conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * This is used globally in the program to pass around information
+ * about execution errors. Each fiber has its own error context,
+ * setting an error in one doesn't affect another.
+ */
+
+struct Error
+{
+	/** Most often contains system errno. */
+	int code;
+	/** Text description of the error. Can be NULL. */
+	const char *msg;
+};
+
+/**
+ * Set the last error in the current execution context (fiber).
+ * If another error was already set, it's overwritten.
+ *
+ * @param code  Error code.
+ * @todo: think how to distinguish errno and tarantool codes here.
+ * @param message  Optional text message. Can be NULL.
+ */
+void diag_set_error(int code, const char *msg);
+
+/** Return the last error. Return NULL if no error.
+ */
+struct Error *diag_get_last_error();
+
+/** Clear the last error, if any.
+ */
+void diag_clear();
+
+#endif /* TARANTOOL_CORE_DIAGNOSTICS_H_INCLUDED */
diff --git a/core/fiber.c b/core/fiber.c
index 173bc9f8cd..3d184addfa 100644
--- a/core/fiber.c
+++ b/core/fiber.c
@@ -53,6 +53,7 @@
 #include <util.h>
 #include <stat.h>
 #include <pickle.h>
+#include "diagnostics.h"
 
 static struct fiber sched;
 struct fiber *fiber = &sched;
@@ -154,6 +155,18 @@ fiber_sleep(ev_tstamp delay)
 	yield();
 }
 
+
+/** Wait for a forked child to complete. */
+
+void
+wait_for_child(pid_t pid)
+{
+	ev_child_set(&fiber->cw, pid, 0);
+	ev_child_start(&fiber->cw);
+	yield();
+	ev_child_stop(&fiber->cw);
+}
+
 void
 wait_for(int events)
 {
@@ -305,17 +318,22 @@ fiber_gc(void)
 	prelease(ex_pool);
 }
 
-void
-fiber_zombificate(struct fiber *f)
+
+/** Destroy the currently active fiber and prepare it for reuse.
+ */
+
+static void
+fiber_zombificate()
 {
-	f->name = NULL;
-	f->f = NULL;
-	f->data = NULL;
-	unregister_fid(f);
-	f->fid = 0;
-	fiber_alloc(f);
-
-	SLIST_INSERT_HEAD(&zombie_fibers, f, zombie_link);
+	diag_clear();
+	fiber->name = NULL;
+	fiber->f = NULL;
+	fiber->data = NULL;
+	unregister_fid(fiber);
+	fiber->fid = 0;
+	fiber_alloc(fiber);
+
+	SLIST_INSERT_HEAD(&zombie_fibers, fiber, zombie_link);
 }
 
 static void
@@ -330,7 +348,7 @@ fiber_loop(void *data __unused__)
 		}
 
 		fiber_close();
-		fiber_zombificate(fiber);
+		fiber_zombificate();
 		yield();	/* give control back to scheduler */
 	}
 }
@@ -367,7 +385,8 @@ fiber_create(const char *restrict name, int fd, int inbox_size, void (*f) (void
 		fiber_alloc(fiber);
 		ev_init(&fiber->io, (void *)ev_schedule);
 		ev_init(&fiber->timer, (void *)ev_schedule);
-		fiber->io.data = fiber->timer.data = fiber;
+		ev_init(&fiber->cw, (void *)ev_schedule);
+		fiber->io.data = fiber->timer.data = fiber->cw.data = fiber;
 
 		SLIST_INSERT_HEAD(&fibers, fiber, link);
 	}
diff --git a/core/log_io.c b/core/log_io.c
index c84c45e0d3..ea572971a8 100644
--- a/core/log_io.c
+++ b/core/log_io.c
@@ -46,6 +46,7 @@
 #include <util.h>
 #include <pickle.h>
 #include <tbuf.h>
+#include "diagnostics.h"
 
 const u16 snap_tag = -1;
 const u16 wal_tag = -2;
@@ -58,6 +59,7 @@ const u32 marker_v11 = 0xba0babed;
 const u32 eof_marker_v11 = 0x10adab1e;
 const char *snap_suffix = ".snap";
 const char *xlog_suffix = ".xlog";
+const char *inprogress_suffix = ".inprogress";
 const char *v04 = "0.04\n";
 const char *v03 = "0.03\n";
 const char *v11 = "0.11\n";
@@ -179,7 +181,7 @@ snap_classes(row_reader snap_row_reader, const char *dirname)
 	c[1]->filetype = c[0]->filetype;
 	c[1]->suffix = c[0]->suffix;
 
-	c[0]->dirname = c[1]->dirname = dirname;
+	c[0]->dirname = c[1]->dirname = dirname ? strdup(dirname) : NULL;
 	return c;
 }
 
@@ -198,7 +200,7 @@ xlog_classes(const char *dirname)
 	xlog04_class(c[0]);
 	v11_class(c[1]);
 
-	c[0]->dirname = c[1]->dirname = dirname;
+	c[0]->dirname = c[1]->dirname = dirname ? strdup(dirname) : NULL;
 	return c;
 }
 
@@ -365,14 +367,30 @@ scan_dir(struct log_io_class *class, i64 **ret_lsn)
 
 	errno = 0;
 	while ((dent = readdir(dh)) != NULL) {
-		size_t len = strlen(dent->d_name) + 1;
-		if (len < suffix_len + 1)
+		char *suffix = strrchr(dent->d_name, '.');
+
+		if (suffix == NULL)
 			continue;
-		if (strcmp(dent->d_name + len - 1 - suffix_len, class->suffix))
+
+		char *sub_suffix = memrchr(dent->d_name, '.', suffix - dent->d_name);
+
+		/*
+		 * A valid suffix is either .xlog or
+		 * .xlog.inprogress, given class->suffix ==
+		 * 'xlog'.
+		 */
+		bool valid_suffix;
+		valid_suffix = (strcmp(suffix, class->suffix) == 0 ||
+				(sub_suffix != NULL &&
+				 strcmp(suffix, inprogress_suffix) == 0 &&
+				 strncmp(sub_suffix, class->suffix, suffix_len) == 0));
+
+		if (!valid_suffix)
 			continue;
 
 		lsn[i] = strtoll(dent->d_name, &parse_suffix, 10);
-		if (strcmp(parse_suffix, class->suffix) != 0) {	/* d_name doesn't parse entirely, ignore it */
+		if (strncmp(parse_suffix, class->suffix, suffix_len) != 0) {
+			/* d_name doesn't parse entirely, ignore it */
 			say_warn("can't parse `%s', skipping", dent->d_name);
 			continue;
 		}
@@ -548,12 +566,59 @@ row_reader_v11(FILE *f, struct palloc_pool *pool)
 	return m;
 }
 
+static int
+inprogress_log_rename(char *filename)
+{
+	char *new_filename;
+	char *suffix = strrchr(filename, '.');
+
+	assert(suffix);
+	assert(strcmp(suffix, inprogress_suffix) == 0);
+
+	/* Create a new filename without '.inprogress' suffix. */
+	new_filename = strndupa(filename, suffix - filename);
+
+	if (rename(filename, new_filename) != 0) {
+		say_syserror("can't rename %s to %s", filename, new_filename);
+
+		return -1;
+	}
+
+	return 0;
+}
+
+static int
+inprogress_log_unlink(char *filename)
+{
+#ifndef NDEBUG
+	char *suffix = strrchr(filename, '.');
+	assert(suffix);
+	assert(strcmp(suffix, inprogress_suffix) == 0);
+#endif
+	if (unlink(filename) != 0) {
+		if (errno == ENONET)
+			return 0;
+
+		say_syserror("can't unlink %s", filename);
+
+		return -1;
+	}
+
+	return 0;
+}
+
 int
 close_log(struct log_io **lptr)
 {
 	struct log_io *l = *lptr;
 	int r;
 
+	if (l->rows == 1 && l->mode == LOG_WRITE) {
+		/* Rename WAL before finalize. */
+		if (inprogress_log_rename(l->filename) != 0)
+			panic("can't rename 'inprogress' WAL");
+	}
+
 	if (l->class->eof_marker_size > 0 && l->mode == LOG_WRITE) {
 		if (fwrite(&l->class->eof_marker, l->class->eof_marker_size, 1, l->f) != 1)
 			say_error("can't write eof_marker");
@@ -628,13 +693,12 @@ format_filename(char *filename, struct log_io_class *class, i64 lsn, int suffix)
 			 class->dirname, lsn, class->suffix);
 		break;
 	case -1:
-		snprintf(filename, PATH_MAX, "%s/%020" PRIi64 "%s.inprogress",
-			 class->dirname, lsn, class->suffix);
+		snprintf(filename, PATH_MAX, "%s/%020" PRIi64 "%s%s",
+			 class->dirname, lsn, class->suffix, inprogress_suffix);
 		break;
 	default:
-		snprintf(filename, PATH_MAX, "%s/%020" PRIi64 "%s.%i",
-			 class->dirname, lsn, class->suffix, suffix);
-		break;
+		/* not reached */
+		assert(0);
 	}
 	return filename;
 }
@@ -646,14 +710,15 @@ open_for_read(struct recovery_state *recover, struct log_io_class **class, i64 l
 	char filetype[32], version[32], buf[256];
 	struct log_io *l = NULL;
 	char *r;
-	char *error = "unknown error";
 
-	l = malloc(sizeof(*l));
-	if (l == NULL)
+	l = calloc(1, sizeof(*l));
+	if (l == NULL) {
+		diag_set_error(errno, strerror(errno));
 		goto error;
-	memset(l, 0, sizeof(*l));
+	}
 	l->mode = LOG_READ;
 	l->stat.data = recover;
+	l->is_inprogress = suffix == -1 ? true : false;
 
 	/* when filename is not null it is forced open for debug reading */
 	if (filename == NULL) {
@@ -668,24 +733,24 @@ open_for_read(struct recovery_state *recover, struct log_io_class **class, i64 l
 
 	l->f = fopen(l->filename, "r");
 	if (l->f == NULL) {
-		error = strerror(errno);
+		diag_set_error(errno, strerror(errno));
 		goto error;
 	}
 
 	r = fgets(filetype, sizeof(filetype), l->f);
 	if (r == NULL) {
-		error = "header reading failed";
+		diag_set_error(1, "header reading failed");
 		goto error;
 	}
 
 	r = fgets(version, sizeof(version), l->f);
 	if (r == NULL) {
-		error = "header reading failed";
+		diag_set_error(1, "header reading failed");
 		goto error;
 	}
 
 	if (strcmp((*class)->filetype, filetype) != 0) {
-		error = "unknown filetype";
+		diag_set_error(1, "unknown filetype");
 		goto error;
 	}
 
@@ -696,7 +761,7 @@ open_for_read(struct recovery_state *recover, struct log_io_class **class, i64 l
 	}
 
 	if (*class == NULL) {
-		error = "unknown version";
+		diag_set_error(1, "unknown version");
 		goto error;
 	}
 	l->class = *class;
@@ -705,7 +770,7 @@ open_for_read(struct recovery_state *recover, struct log_io_class **class, i64 l
 		for (;;) {
 			r = fgets(buf, sizeof(buf), l->f);
 			if (r == NULL) {
-				error = "header reading failed";
+				diag_set_error(1, "header reading failed");
 				goto error;
 			}
 			if (strcmp(r, "\n") == 0 || strcmp(r, "\r\n") == 0)
@@ -714,14 +779,15 @@ open_for_read(struct recovery_state *recover, struct log_io_class **class, i64 l
 	} else {
 		r = fgets(buf, sizeof(buf), l->f);	/* skip line with time */
 		if (r == NULL) {
-			error = "header reading failed";
+			diag_set_error(1, "header reading failed");
 			goto error;
 		}
 	}
 
 	return l;
       error:
-	say_error("open_for_read: failed to open `%s': %s", l->filename, error);
+	say_error("open_for_read: failed to open `%s': %s", l->filename,
+		  diag_get_last_error()->msg);
 	if (l != NULL) {
 		if (l->f != NULL)
 			fclose(l->f);
@@ -735,12 +801,14 @@ open_for_write(struct recovery_state *recover, struct log_io_class *class, i64 l
 {
 	struct log_io *l = NULL;
 	int fd;
-	char *error = "unknown error";
+	char *dot;
+	bool exists;
 
-	l = malloc(sizeof(*l));
-	if (l == NULL)
+	l = calloc(1, sizeof(*l));
+	if (l == NULL) {
+		diag_set_error(errno, strerror(errno));
 		goto error;
-	memset(l, 0, sizeof(*l));
+	}
 	l->mode = LOG_WRITE;
 	l->class = class;
 	l->stat.data = recover;
@@ -750,15 +818,34 @@ open_for_write(struct recovery_state *recover, struct log_io_class *class, i64 l
 	format_filename(l->filename, class, lsn, suffix);
 	say_debug("find_log for writing `%s'", l->filename);
 
+	if (suffix == -1) {
+		/*
+		 * Check whether a file with this name already exists.
+		 * We don't overwrite existing files.
+		 */
+		dot = strrchr(l->filename, '.');
+		*dot = '\0';
+		exists = access(l->filename, F_OK) == 0;
+		*dot = '.';
+		if (exists) {
+			diag_set_error(EEXIST, "exists");
+			goto error;
+		}
+	}
+
+	/*
+	 * Open the <lsn>.<suffix>.inprogress file. If it
+	 * exists, open will fail.
+	 */
 	fd = open(l->filename, O_WRONLY | O_CREAT | O_EXCL | O_APPEND, 0664);
 	if (fd < 0) {
-		error = strerror(errno);
+		diag_set_error(errno, strerror(errno));
 		goto error;
 	}
 
 	l->f = fdopen(fd, "a");
 	if (l->f == NULL) {
-		error = strerror(errno);
+		diag_set_error(errno, strerror(errno));
 		goto error;
 	}
 
@@ -766,7 +853,8 @@ open_for_write(struct recovery_state *recover, struct log_io_class *class, i64 l
 	write_header(l);
 	return l;
       error:
-	say_error("find_log: failed to open `%s': %s", l->filename, error);
+	say_error("find_log: failed to open `%s': %s", l->filename,
+		  diag_get_last_error()->msg);
 	if (l != NULL) {
 		if (l->f != NULL)
 			fclose(l->f);
@@ -938,8 +1026,6 @@ recover_remaining_wals(struct recovery_state *r)
 {
 	int result = 0;
 	struct log_io *next_wal;
-	char *name;
-	int suffix = 0;
 	i64 current_lsn, wal_greatest_lsn;
 	size_t rows_before;
 
@@ -965,13 +1051,30 @@ recover_remaining_wals(struct recovery_state *r)
 		}
 
 		current_lsn = r->confirmed_lsn + 1;	/* TODO: find better way looking for next xlog */
-		next_wal = open_for_read(r, r->wal_class, current_lsn, suffix, NULL);
+		next_wal = open_for_read(r, r->wal_class, current_lsn, 0, NULL);
+
+		/*
+		 * When doing final recovery, and dealing with the
+		 * last file, try opening .<suffix>.inprogress.
+		 */
+		if (next_wal == NULL && r->finalize && current_lsn == wal_greatest_lsn) {
+			next_wal = open_for_read(r, r->wal_class, current_lsn, -1, NULL);
+			if (next_wal == NULL) {
+				char *filename =
+					format_filename(NULL, *r->wal_class, current_lsn, -1);
+
+				say_warn("unlink broken %s wal", filename);
+				if (inprogress_log_unlink(filename) != 0)
+					panic("can't unlink 'inprogres' wal");
+			}
+		}
+
 		if (next_wal == NULL) {
-			if (suffix++ < 10)
-				continue;
 			result = 0;
 			break;
 		}
+
+
 		assert(r->current_wal == NULL);
 		r->current_wal = next_wal;
 		say_info("recover from `%s'", r->current_wal->filename);
@@ -987,26 +1090,10 @@ recover_remaining_wals(struct recovery_state *r)
 		if (r->current_wal->rows > 0 && r->current_wal->rows != rows_before)
 			r->current_wal->retry = 0;
 
-		/*
-		 * rows == 0 could possible indicate an filename confilct
-		 * retry filename with same lsn but with bigger suffix
-		 */
+		/* rows == 0 could possible indicate to an empty WAL */
 		if (r->current_wal->rows == 0) {
-			say_error("read zero records from %s, RETRY", r->current_wal->filename);
-			if (suffix++ < 10)
-				continue;
-
-			say_error("too many filename conflicters");
-			result = -1;
+			say_error("read zero records from %s", r->current_wal->filename);
 			break;
-		} else {
-			name = format_filename(NULL, r->wal_prefered_class,
-					       current_lsn, suffix + 1);
-			if (access(name, F_OK) == 0) {
-				say_error("found conflicter `%s' after successful reading", name);
-				result = -1;
-				break;
-			}
 		}
 
 		if (result == LOG_EOF) {
@@ -1014,15 +1101,14 @@ recover_remaining_wals(struct recovery_state *r)
 				 r->confirmed_lsn);
 			close_log(&r->current_wal);
 		}
-		suffix = 0;
 	}
 
 	/*
-	 * it's not a fatal error then last wal is empty
-	 * but if we lost some logs it is fatal error
+	 * It's not a fatal error when last WAL is empty, but if
+	 * we lost some logs it is a fatal error.
 	 */
 	if (wal_greatest_lsn > r->confirmed_lsn + 1) {
-		say_error("not all wals have been successfuly read");
+		say_error("not all WALs have been successfully read");
 		result = -1;
 	}
 
@@ -1140,6 +1226,8 @@ recover_finalize(struct recovery_state *r)
 {
 	int result;
 
+	r->finalize = true;
+
 	if (ev_is_active(&r->wal_timer))
 		ev_timer_stop(&r->wal_timer);
 
@@ -1150,10 +1238,29 @@ recover_finalize(struct recovery_state *r)
 
 	result = recover_remaining_wals(r);
 	if (result < 0)
-		panic("unable to scucessfully finalize recovery");
+		panic("unable to successfully finalize recovery");
 
 	if (r->current_wal != NULL && result != LOG_EOF) {
 		say_warn("wal `%s' wasn't correctly closed", r->current_wal->filename);
+
+		if (!r->current_wal->is_inprogress) {
+			if (r->current_wal->rows == 0)
+			        /* Regular WAL (not inprogress) must contain at least one row */
+				panic("zero rows was successfully read from last WAL `%s'",
+				      r->current_wal->filename);
+		} else if (r->current_wal->rows == 0) {
+			/* Unlink empty inprogress WAL */
+			say_warn("unlink broken %s wal", r->current_wal->filename);
+			if (inprogress_log_unlink(r->current_wal->filename) != 0)
+				panic("can't unlink 'inprogress' wal");
+		} else if (r->current_wal->rows == 1) {
+			/* Rename inprogress wal with one row */
+			say_warn("rename unfinished %s wal", r->current_wal->filename);
+			if (inprogress_log_rename(r->current_wal->filename) != 0)
+				panic("can't rename 'inprogress' wal");
+		} else
+			panic("too many rows in inprogress WAL `%s'", r->current_wal->filename);
+
 		close_log(&r->current_wal);
 	}
 }
@@ -1169,11 +1276,9 @@ write_to_disk(void *_state, struct tbuf *t)
 {
 	static struct log_io *wal = NULL, *wal_to_close = NULL;
 	static ev_tstamp last_flush = 0;
-	static size_t rows = 0;
 	struct tbuf *reply, *header;
 	struct recovery_state *r = _state;
 	u32 result = 0;
-	int suffix = 0;
 
 	/* we're not running inside ev_loop, so update ev_now manually */
 	ev_now_update();
@@ -1187,11 +1292,17 @@ write_to_disk(void *_state, struct tbuf *t)
 
 	reply = tbuf_alloc(t->pool);
 
-	/* if there is filename conflict, try filename with lager suffix */
-	while (wal == NULL && suffix < 10) {
-		wal = open_for_write(r, r->wal_prefered_class, wal_write_request(t)->lsn, suffix);
-		suffix++;
+	if (wal == NULL)
+		/* Open WAL with '.inprogress' suffix. */
+		wal = open_for_write(r, r->wal_prefered_class, wal_write_request(t)->lsn, -1);
+	else if (wal->rows == 1) {
+		/* rename wal after first successfull write to name without inprogress suffix*/
+		if (inprogress_log_rename(wal->filename) != 0) {
+			say_error("can't rename inprogress wal");
+			goto fail;
+		}
 	}
+
 	if (wal_to_close != NULL) {
 		if (close_log(&wal_to_close) != 0)
 			goto fail;
@@ -1242,13 +1353,11 @@ write_to_disk(void *_state, struct tbuf *t)
 		last_flush = ev_now();
 	}
 
-	rows++;
-	if (wal->class->rows_per_file <= rows ||
+	wal->rows++;
+	if (wal->class->rows_per_file <= wal->rows ||
 	    (wal_write_request(t)->lsn + 1) % wal->class->rows_per_file == 0) {
 		wal_to_close = wal;
 		wal = NULL;
-		rows = 0;
-		suffix = 0;
 	}
 
 	tbuf_append(reply, &result, sizeof(result));
@@ -1296,6 +1405,9 @@ recover_init(const char *snap_dirname, const char *wal_dirname,
 {
 	struct recovery_state *r = p0alloc(eter_pool, sizeof(*r));
 
+	if (rows_per_file <= 1)
+		panic("inacceptable value of 'rows_per_file'");
+
 	r->wal_timer.data = r;
 	r->row_handler = row_handler;
 	r->data = data;
@@ -1416,13 +1528,19 @@ snapshot_save(struct recovery_state *r, void (*f) (struct log_io_iter *))
 
 	snap = open_for_write(r, r->snap_prefered_class, r->confirmed_lsn, -1);
 	if (snap == NULL)
-		panic("can't open snap for writing");
+		panic_status(diag_get_last_error()->code,
+			     "can't open snap for writing");
 
 	iter_open(snap, &i, write_rows);
 
 	if (r->snap_io_rate_limit > 0)
 		i.io_rate_limit = r->snap_io_rate_limit;
 
+	/*
+	 * While saving a snapshot, snapshot name is set to
+	 * <lsn>.snap.inprogress. When done, the snapshot is
+	 * renamed to <lsn>.snap.
+	 */
 	strncpy(final_filename, snap->filename, PATH_MAX);
 	dot = strrchr(final_filename, '.');
 	*dot = 0;
@@ -1433,8 +1551,11 @@ snapshot_save(struct recovery_state *r, void (*f) (struct log_io_iter *))
 	if (fsync(fileno(snap->f)) < 0)
 		panic("fsync");
 
-	if (rename(snap->filename, final_filename) != 0)
-		panic("rename");
+	if (link(snap->filename, final_filename) == -1)
+		panic_status(errno, "can't create hard link to snapshot");
+
+	if (unlink(snap->filename) == -1)
+		say_syserror("can't unlink 'inprogress' snapshot");
 
 	close_log(&snap);
 
diff --git a/core/tarantool.c b/core/tarantool.c
index da268a4aa6..6964498f28 100644
--- a/core/tarantool.c
+++ b/core/tarantool.c
@@ -112,8 +112,9 @@ reload_cfg(struct tbuf *out)
 		return -1;
 	}
 	ret = load_cfg(&new_cfg1, 1);
-	tbuf_append(out, cfg_out->data, cfg_out->len);
 	if (ret == -1) {
+		tbuf_append(out, cfg_out->data, cfg_out->len);
+
 		destroy_tarantool_cfg(&new_cfg1);
 
 		return -1;
@@ -125,8 +126,9 @@ reload_cfg(struct tbuf *out)
 		return -1;
 	}
 	ret = load_cfg(&new_cfg2, 0);
-	tbuf_append(out, cfg_out->data, cfg_out->len);
 	if (ret == -1) {
+		tbuf_append(out, cfg_out->data, cfg_out->len);
+
 		destroy_tarantool_cfg(&new_cfg1);
 
 		return -1;
@@ -138,6 +140,7 @@ reload_cfg(struct tbuf *out)
 		destroy_tarantool_cfg(&new_cfg2);
 
 		out_warning(0, "Could not accept read only '%s' option", diff);
+		tbuf_append(out, cfg_out->data, cfg_out->len);
 
 		return -1;
 	}
@@ -167,16 +170,21 @@ tarantool_uptime(void)
 }
 
 #ifdef STORAGE
-void
+int
 snapshot(void *ev __unused__, int events __unused__)
 {
 	pid_t p = fork();
 	if (p < 0) {
 		say_syserror("fork");
-		return;
+		return -1;
+	}
+	if (p > 0) {
+		wait_for_child(p);
+
+		assert(p == fiber->cw.rpid);
+
+		return WEXITSTATUS(fiber->cw.rstatus);
 	}
-	if (p > 0)
-		return;
 
 	fiber->name = "dumper";
 	set_proc_title("dumper (%" PRIu32 ")", getppid());
@@ -186,16 +194,10 @@ snapshot(void *ev __unused__, int events __unused__)
 	__gcov_flush();
 #endif
 	_exit(EXIT_SUCCESS);
-}
-#endif
 
-static void
-sig_child(int signal __unused__)
-{
-	int child_status;
-	/* TODO: watch child status & destroy corresponding fibers */
-	while (waitpid(-1, &child_status, WNOHANG) > 0) ;
+	return 0;
 }
+#endif
 
 static void
 sig_int(int signal)
@@ -231,12 +233,6 @@ signal_init(void)
 	if (sigaction(SIGPIPE, &sa, 0) == -1)
 		goto error;
 
-	sa.sa_handler = sig_child;
-	sa.sa_flags = SA_RESTART;
-	sigemptyset(&sa.sa_mask);
-	if (sigaction(SIGCHLD, &sa, NULL) == -1)
-		goto error;
-
 	sa.sa_handler = sig_int;
 	sa.sa_flags = 0;
 	sigemptyset(&sa.sa_mask);
diff --git a/include/fiber.h b/include/fiber.h
index 35601ba5fa..9113d06a83 100644
--- a/include/fiber.h
+++ b/include/fiber.h
@@ -65,6 +65,7 @@ struct fiber {
 	int fd;
 
 	ev_timer timer;
+	ev_child cw;
 
 	struct tbuf *iov;
 	size_t iov_cnt;
@@ -83,6 +84,8 @@ struct fiber {
 	void *f_data;
 
 	void *data;
+	/** Information about the last error. */
+	void *diagnostics;
 
 	u64 cookie;
 	bool has_peer;
@@ -109,8 +112,8 @@ extern struct fiber *fiber;
 
 void fiber_init(void);
 struct fiber *fiber_create(const char *name, int fd, int inbox_size, void (*f) (void *), void *);
-void fiber_zombificate(struct fiber *f);
 void wait_for(int events);
+void wait_for_child(pid_t pid);
 void unwait(int events);
 void yield(void);
 void raise_(int);
diff --git a/include/log_io.h b/include/log_io.h
index 25e7246f16..64fe367f42 100644
--- a/include/log_io.h
+++ b/include/log_io.h
@@ -73,6 +73,8 @@ struct log_io {
 	size_t rows;
 	size_t retry;
 	char filename[PATH_MAX + 1];
+
+	bool is_inprogress;
 };
 
 struct recovery_state {
@@ -92,6 +94,8 @@ struct recovery_state {
 	int snap_io_rate_limit;
 	u64 cookie;
 
+	bool finalize;
+
 	/* pointer to user supplied custom data */
 	void *data;
 };
diff --git a/include/say.h b/include/say.h
index a0cce23530..b89183f2f8 100644
--- a/include/say.h
+++ b/include/say.h
@@ -54,13 +54,14 @@ void _say(int level, const char *filename, int line, const char *error,
 
 #define say(level, ...) ({ _say(level, __FILE__, __LINE__, __VA_ARGS__); })
 
-#define panic(...)		({ say(S_FATAL, NULL, __VA_ARGS__); exit(EXIT_FAILURE); })
-#define panic_syserror(...)	({ say(S_FATAL, strerror(errno), __VA_ARGS__); exit(EXIT_FAILURE); })
-#define say_syserror(...)	say(S_ERROR, strerror(errno), __VA_ARGS__)
-#define say_error(...)		say(S_ERROR, NULL, __VA_ARGS__)
-#define say_crit(...)		say(S_CRIT, NULL, __VA_ARGS__)
-#define say_warn(...)		say(S_WARN, NULL, __VA_ARGS__)
-#define say_info(...)		say(S_INFO, NULL, __VA_ARGS__)
-#define say_debug(...)		say(S_DEBUG, NULL, __VA_ARGS__)
+#define panic_status(status, ...)	({ say(S_FATAL, NULL, __VA_ARGS__); exit(status); })
+#define panic(...)			panic_status(EXIT_FAILURE, __VA_ARGS__)
+#define panic_syserror(...)		({ say(S_FATAL, strerror(errno), __VA_ARGS__); exit(EXIT_FAILURE); })
+#define say_syserror(...)		say(S_ERROR, strerror(errno), __VA_ARGS__)
+#define say_error(...)			say(S_ERROR, NULL, __VA_ARGS__)
+#define say_crit(...)			say(S_CRIT, NULL, __VA_ARGS__)
+#define say_warn(...)			say(S_WARN, NULL, __VA_ARGS__)
+#define say_info(...)			say(S_INFO, NULL, __VA_ARGS__)
+#define say_debug(...)			say(S_DEBUG, NULL, __VA_ARGS__)
 
 #endif
diff --git a/include/tarantool.h b/include/tarantool.h
index 47f1664182..78b80b8c1f 100644
--- a/include/tarantool.h
+++ b/include/tarantool.h
@@ -48,7 +48,7 @@ extern const char *cfg_filename;
 extern bool init_storage, booting;
 extern char *binary_filename;
 i32 reload_cfg(struct tbuf *out);
-void snapshot(void *ev __unused__, int events __unused__);
+int snapshot(void *ev __unused__, int events __unused__);
 const char *tarantool_version(void);
 void tarantool_info(struct tbuf *out);
 double tarantool_uptime(void);
diff --git a/test/box/bad_record.xlog b/test/box/bad_record.xlog
new file mode 100644
index 0000000000000000000000000000000000000000..28e8cd35f26399e406ef600e314af78bdafa04d9
GIT binary patch
literal 22
dcma#>@ptDk&@(jT;z~+PQ7B4H&M!*g0sua71?d0)

literal 0
HcmV?d00001

diff --git a/test/box/empty.xlog b/test/box/empty.xlog
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/test/box/just_header.xlog b/test/box/just_header.xlog
new file mode 100644
index 0000000000000000000000000000000000000000..3472cc4eb0dbaff89e0ff37b3637d2ff6580af1a
GIT binary patch
literal 11
Scma#>@ptDk&@(jT;sO8<ivk${

literal 0
HcmV?d00001

diff --git a/test/box/reconfigure.result b/test/box/reconfigure.result
index 7faaa71b78..3ddcf40019 100644
--- a/test/box/reconfigure.result
+++ b/test/box/reconfigure.result
@@ -3,6 +3,7 @@ reload configuration
 fail:
  - Could not accept read only 'slab_alloc_arena' option
  - Could not accept read only 'pid_file' option
+ - Could not accept read only 'logger' option
  - Could not accept read only 'primary_port' option
  - Could not accept read only 'secondary_port' option
  - Could not accept read only 'admin_port' option
@@ -50,3 +51,49 @@ reload configuration
 fail:
  - can't open config `tarantool.cfg'
 ...
+reload configuration
+---
+ok
+...
+#
+# A test case for http://bugs.launchpad.net/bugs/712447:
+# Valgrind reports use of not initialized memory after 'reload
+# configuration'
+#
+insert into t0 values (1, 'tuple')
+Insert OK, 1 row affected
+save snapshot
+---
+ok
+...
+reload configuration
+---
+fail:
+ - can't open config `tarantool.cfg'
+...
+insert into t0 values (2, 'tuple 2')
+Insert OK, 1 row affected
+save snapshot
+---
+ok
+...
+reload configuration
+---
+ok
+...
+insert into t0 values (3, 'tuple 3')
+Insert OK, 1 row affected
+save snapshot
+---
+ok
+...
+reload configuration
+---
+ok
+...
+delete from t0 where k0 = 1
+Delete OK, 1 row affected
+delete from t0 where k0 = 2
+Delete OK, 1 row affected
+delete from t0 where k0 = 3
+Delete OK, 1 row affected
diff --git a/test/box/reconfigure.test b/test/box/reconfigure.test
index 6a292b1428..afa62fec6f 100644
--- a/test/box/reconfigure.test
+++ b/test/box/reconfigure.test
@@ -44,6 +44,32 @@ exec admin "reload configuration"
 os.unlink(cfg)
 exec admin "reload configuration"
 
+# cleanup
 # restore default
 os.rename(oldcfg, cfg)
+exec admin "reload configuration"
+
+print """#
+# A test case for http://bugs.launchpad.net/bugs/712447:
+# Valgrind reports use of not initialized memory after 'reload
+# configuration'
+#"""
+exec sql "insert into t0 values (1, 'tuple')"
+exec admin "save snapshot"
+os.rename(cfg, oldcfg)
+exec admin "reload configuration"
+exec sql "insert into t0 values (2, 'tuple 2')"
+exec admin "save snapshot"
+os.symlink(abspath("box/tarantool_good.cfg"), cfg)
+exec admin "reload configuration"
+exec sql "insert into t0 values (3, 'tuple 3')"
+exec admin "save snapshot"
+# Cleanup
+os.unlink(cfg)
+os.rename(oldcfg, cfg)
+exec admin "reload configuration"
+exec sql "delete from t0 where k0 = 1"
+exec sql "delete from t0 where k0 = 2"
+exec sql "delete from t0 where k0 = 3"
+
 # vim: syntax=python
diff --git a/test/box/show.result b/test/box/show.result
index bc4af4173e..d00dfc6d0c 100644
--- a/test/box/show.result
+++ b/test/box/show.result
@@ -35,7 +35,7 @@ configuration:
   slab_alloc_factor: "2"
   work_dir: (null)
   pid_file: "box.pid"
-  logger: (null)
+  logger: "tee --append tarantool.log"
   logger_nonblock: "1"
   io_collect_interval: "0"
   backlog: "1024"
@@ -82,10 +82,14 @@ save coredump
 ---
 ok
 ...
+insert into t0 values (1, 'tuple')
+Insert OK, 1 row affected
 save snapshot
 ---
 ok
 ...
+delete from t0 where k0 = 1
+Delete OK, 1 row affected
 exec module command
 ---
 unimplemented
@@ -94,10 +98,10 @@ show info
 ---
 info:
   version: "1.3.minor-<rev>-<commit>
-  uptime: 0
+  uptime: <uptime>
   pid: <pid>
   wal_writer_pid: <pid>
-  lsn: 1
+  lsn: 3
   recovery_lag: 0.000
   recovery_last_update: 0.000
   status: primary
diff --git a/test/box/show.test b/test/box/show.test
index 2c2dbd10e5..2e4de22121 100644
--- a/test/box/show.test
+++ b/test/box/show.test
@@ -9,10 +9,13 @@ exec admin "help"
 exec admin "show configuration"
 exec admin "show stat"
 exec admin "save coredump"
+exec sql "insert into t0 values (1, 'tuple')"
 exec admin "save snapshot"
+exec sql "delete from t0 where k0 = 1"
 exec admin "exec module command"
 sys.stdout.push_filter("(\d\.\d)\.\d-\d+-\S+", "\\1.minor-<rev>-<commit>")
 sys.stdout.push_filter("pid: \d+", "pid: <pid>")
+sys.stdout.push_filter("uptime: \d+", "uptime: <uptime>")
 exec admin "show info"
 sys.stdout.clear_all_filters()
 sys.stdout.push_filter(".*", "")
diff --git a/test/box/snapshot.result b/test/box/snapshot.result
index 2167ee5024..edb9e17818 100644
--- a/test/box/snapshot.result
+++ b/test/box/snapshot.result
@@ -1505,3 +1505,31 @@ Found 1 tuple:
 select * from t0 where k0=500
 No match
 # Restore the default server...
+#
+# A test case for: http://bugs.launchpad.net/bugs/686411
+# Check that 'save snapshot' does not overwrite a snapshot
+# file that already exists. Verify also that any other
+# error that happens when saving snapshot is propagated
+# to the caller.
+
+insert into t0 values (1, 'first tuple')
+Insert OK, 1 row affected
+save snapshot
+---
+ok
+...
+save snapshot
+---
+fail: can't save snapshot, errno 17 (File exists)
+...
+insert into t0 values (2, 'second tuple')
+Insert OK, 1 row affected
+# Make 'var' directory read-only.
+save snapshot
+---
+fail: can't save snapshot, errno 13 (Permission denied)
+...
+delete from t0 where k0 = 1
+Delete OK, 1 row affected
+delete from t0 where k0 = 2
+Delete OK, 1 row affected
diff --git a/test/box/snapshot.test b/test/box/snapshot.test
index 3eef198ee2..764699f2a3 100644
--- a/test/box/snapshot.test
+++ b/test/box/snapshot.test
@@ -1,4 +1,5 @@
 # encoding: tarantool
+#
 print """
 # Verify that the server starts from a pre-recorded snapshot.
 # This way we check that the server can read old snapshots (v11)
@@ -15,4 +16,33 @@ server.stop(True)
 os.unlink(snapshot)
 server.start(True)
 
-# vim: syntax=python
+print """#
+# A test case for: http://bugs.launchpad.net/bugs/686411
+# Check that 'save snapshot' does not overwrite a snapshot
+# file that already exists. Verify also that any other
+# error that happens when saving snapshot is propagated
+# to the caller.
+"""
+exec sql "insert into t0 values (1, 'first tuple')"
+exec admin "save snapshot"
+
+# In absence of data modifications, two consecutive
+# 'save snapshot' statements will try to write
+# into the same file, since file name is based
+# on LSN.
+#  Don't allow to overwrite snapshots.
+exec admin "save snapshot"
+#
+# Increment LSN
+exec sql "insert into t0 values (2, 'second tuple')"
+#
+# Check for other errors, e.g. "Permission denied".
+print "# Make 'var' directory read-only."
+os.chmod(vardir, 0555)
+exec admin "save snapshot"
+
+# cleanup
+os.chmod(vardir, 0755)
+exec sql "delete from t0 where k0 = 1"
+exec sql "delete from t0 where k0 = 2"
+# vim: syntax=python spell
diff --git a/test/box/tarantool.cfg b/test/box/tarantool.cfg
index c09c23a187..d7cd0fe80c 100644
--- a/test/box/tarantool.cfg
+++ b/test/box/tarantool.cfg
@@ -2,6 +2,8 @@ slab_alloc_arena = 0.1
 
 pid_file = "box.pid"
 
+logger="tee --append tarantool.log"
+
 primary_port = 33013
 secondary_port = 33014
 admin_port = 33015
diff --git a/test/box/tarantool_bad1.cfg b/test/box/tarantool_bad1.cfg
index af4eb414a0..fb0536b31c 100644
--- a/test/box/tarantool_bad1.cfg
+++ b/test/box/tarantool_bad1.cfg
@@ -2,6 +2,8 @@ slab_alloc_arena = 0.2
 
 pid_file = "/var/run/box.pid"
 
+logger="cat tarantool.log"
+
 primary_port = 33023
 secondary_port = 33024
 admin_port = 33025
diff --git a/test/box/tarantool_bad2.cfg b/test/box/tarantool_bad2.cfg
index fa5f170d89..0d03bfde3b 100644
--- a/test/box/tarantool_bad2.cfg
+++ b/test/box/tarantool_bad2.cfg
@@ -2,6 +2,8 @@ slab_alloc_arena = 0.1
 
 pid_file = "box.pid"
 
+logger="tee --append tarantool.log"
+
 #primary_port = 33013
 secondary_port = 33014
 admin_port = 33015
diff --git a/test/box/tarantool_bad3.cfg b/test/box/tarantool_bad3.cfg
index edac4b2011..8c1d14c3c6 100644
--- a/test/box/tarantool_bad3.cfg
+++ b/test/box/tarantool_bad3.cfg
@@ -2,6 +2,8 @@ slab_alloc_arena = 0.1
 
 pid_file = "box.pid"
 
+logger="tee --append tarantool.log"
+
 primary_port = 33013
 secondary_port = 33014
 admin_port = 33015
diff --git a/test/box/tarantool_bad4.cfg b/test/box/tarantool_bad4.cfg
index 99bdb38f15..67b9c893b0 100644
--- a/test/box/tarantool_bad4.cfg
+++ b/test/box/tarantool_bad4.cfg
@@ -2,6 +2,8 @@ slab_alloc_arena = 0.1
 
 pid_file = "box.pid"
 
+logger="tee --append tarantool.log"
+
 primary_port = 33013
 secondary_port = 33014
 admin_port = 33015
diff --git a/test/box/tarantool_bad5.cfg b/test/box/tarantool_bad5.cfg
index 1ba6dfa8bd..1390839277 100644
--- a/test/box/tarantool_bad5.cfg
+++ b/test/box/tarantool_bad5.cfg
@@ -2,6 +2,8 @@ slab_alloc_arena = 0.1
 
 pid_file = "box.pid"
 
+logger="tee --append tarantool.log"
+
 primary_port = 33013
 secondary_port = 33014
 admin_port = 33015
diff --git a/test/box/tarantool_good.cfg b/test/box/tarantool_good.cfg
index c09c23a187..d7cd0fe80c 100644
--- a/test/box/tarantool_good.cfg
+++ b/test/box/tarantool_good.cfg
@@ -2,6 +2,8 @@ slab_alloc_arena = 0.1
 
 pid_file = "box.pid"
 
+logger="tee --append tarantool.log"
+
 primary_port = 33013
 secondary_port = 33014
 admin_port = 33015
diff --git a/test/box/unfinished.xlog b/test/box/unfinished.xlog
new file mode 100644
index 0000000000000000000000000000000000000000..c905fec9229b37c384366bc267c471184e5d7238
GIT binary patch
literal 86
zcma#>@ptDk&@(jT;(EK9dzTz<Rvs$@1SqyY-p><$*-;C~bu99f{`a4WVY)*-0|O&3
cR0RVA6OhHi0wj3S@=J?KG89Tm3vyDq09?Eh<p2Nx

literal 0
HcmV?d00001

diff --git a/test/box/xlog.result b/test/box/xlog.result
new file mode 100644
index 0000000000..ba345f63be
--- /dev/null
+++ b/test/box/xlog.result
@@ -0,0 +1,34 @@
+
+# Inprogress xlog must be renamed before second insert.
+
+insert into t0 values (1, 'first tuple')
+Insert OK, 1 row affected
+00000000000000000002.xlog.inprogress exists
+insert into t0 values (2, 'second tuple')
+Insert OK, 1 row affected
+00000000000000000002.xlog.inprogress has been successfully renamed
+
+# Inprogress xlog must be renamed during regular termination.
+
+insert into t0 values (3, 'third tuple')
+Insert OK, 1 row affected
+00000000000000000004.xlog.inprogress exists
+Stopping the server...
+00000000000000000004.xlog.inprogress has been successfully renamed
+
+# An inprogress xlog fle with one record must be renamed during recovery.
+
+00000000000000000005.xlog.inprogress hash been successfully renamed
+
+# Empty (zero size) inprogress xlog must be deleted during recovery.
+
+00000000000000000006.xlog.inprogress has been successfully deleted
+
+# Empty (header only, no records) inprogress xlog must be deleted
+# during recovery.
+
+00000000000000000006.xlog.inprogress has been successfully deleted
+
+# Inprogress xlog with bad record must be deleted during recovery.
+
+00000000000000000006.xlog.inprogress has been successfully deleted
diff --git a/test/box/xlog.test b/test/box/xlog.test
new file mode 100644
index 0000000000..5054043588
--- /dev/null
+++ b/test/box/xlog.test
@@ -0,0 +1,113 @@
+# encoding: tarantool
+#
+from os.path import abspath
+
+# cleanup vardir
+server.stop(True)
+server.install(True)
+
+print """
+# Inprogress xlog must be renamed before second insert.
+"""
+wal_inprogress = os.path.join(vardir, "00000000000000000002.xlog.inprogress")
+wal = os.path.join(vardir, "00000000000000000002.xlog")
+
+server.start(True)
+exec sql "insert into t0 values (1, 'first tuple')"
+if os.access(wal_inprogress, os.F_OK):
+  print "00000000000000000002.xlog.inprogress exists"
+
+exec sql "insert into t0 values (2, 'second tuple')"
+
+if os.access(wal, os.F_OK) and not os.access(wal_inprogress, os.F_OK):
+  print "00000000000000000002.xlog.inprogress has been successfully renamed"
+server.stop(True)
+
+print """
+# Inprogress xlog must be renamed during regular termination.
+"""
+server.start(True)
+
+wal_inprogress = os.path.join(vardir, "00000000000000000004.xlog.inprogress")
+wal = os.path.join(vardir, "00000000000000000004.xlog")
+
+exec sql "insert into t0 values (3, 'third tuple')"
+
+if os.access(wal_inprogress, os.F_OK):
+  print "00000000000000000004.xlog.inprogress exists"
+
+server.stop(False)
+
+if os.access(wal, os.F_OK) and not os.access(wal_inprogress, os.F_OK):
+  print "00000000000000000004.xlog.inprogress has been successfully renamed"
+
+print """
+# An inprogress xlog fle with one record must be renamed during recovery.
+"""
+
+wal_inprogress = os.path.join(vardir, "00000000000000000005.xlog.inprogress")
+wal = os.path.join(vardir, "00000000000000000005.xlog")
+
+os.symlink(abspath("box/unfinished.xlog"), wal_inprogress)
+
+server.start(True)
+
+if os.access(wal, os.F_OK) and not os.access(wal_inprogress, os.F_OK):
+  print "00000000000000000005.xlog.inprogress hash been successfully renamed"
+server.stop(True)
+
+print """
+# Empty (zero size) inprogress xlog must be deleted during recovery.
+"""
+
+wal_inprogress = os.path.join(vardir, "00000000000000000006.xlog.inprogress")
+wal = os.path.join(vardir, "00000000000000000006.xlog")
+
+os.symlink(abspath("box/empty.xlog"), wal_inprogress)
+server.start(True)
+
+if not os.access(wal_inprogress, os.F_OK) and not os.access(wal, os.F_OK):
+   print "00000000000000000006.xlog.inprogress has been successfully deleted"
+server.stop(True)
+
+print """
+# Empty (header only, no records) inprogress xlog must be deleted
+# during recovery.
+"""
+
+# If the previous test has failed, there is a dangling link
+# and symlink fails.
+try:
+  os.symlink(abspath("box/just_header.xlog"), wal_inprogress)
+except OSError as e:
+  print e
+
+server.start(True)
+
+if not os.access(wal_inprogress, os.F_OK) and not os.access(wal, os.F_OK):
+   print "00000000000000000006.xlog.inprogress has been successfully deleted"
+server.stop(True)
+
+print """
+# Inprogress xlog with bad record must be deleted during recovery.
+"""
+
+# If the previous test has failed, there is a dangling link
+# and symlink fails.
+try:
+  os.symlink(abspath("box/bad_record.xlog"), wal_inprogress)
+except OSError as e:
+  print e
+
+server.start(True)
+
+if not os.access(wal_inprogress, os.F_OK) and not os.access(wal, os.F_OK):
+   print "00000000000000000006.xlog.inprogress has been successfully deleted"
+server.stop(True)
+
+# cleanup
+server.stop(True)
+server.install(True)
+server.start(True)
+
+# vim: syntax=python
diff --git a/test/lib/tarantool_silverbox_server.py b/test/lib/tarantool_silverbox_server.py
index 9008c05acc..996ced8f3f 100644
--- a/test/lib/tarantool_silverbox_server.py
+++ b/test/lib/tarantool_silverbox_server.py
@@ -116,9 +116,11 @@ class TarantoolSilverboxServer:
                           stdout = subprocess.PIPE,
                           stderr = subprocess.PIPE)
 
-    version = subprocess.Popen([self.abspath_to_exe, "--version"],
-                               cwd = self.args.vardir,
-                               stdout = subprocess.PIPE).stdout.read().rstrip()
+    p = subprocess.Popen([self.abspath_to_exe, "--version"],
+                         cwd = self.args.vardir,
+                         stdout = subprocess.PIPE)
+    version = p.stdout.read().rstrip()
+    p.wait()
 
     if not silent:
       print "Starting {0} {1}.".format(os.path.basename(self.abspath_to_exe),
-- 
GitLab