diff --git a/CMakeLists.txt b/CMakeLists.txt
index 20812e926c2f7d3dd1bab8b3fb833aa38acd4edc..59cf71196f5ba7b302e1230f60c41daf39c7b1df 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -77,7 +77,7 @@ if (HAVE_SENDFILE)
     endif()
 endif()
 check_function_exists(open_memstream HAVE_OPEN_MEMSTREAM)
-
+check_function_exists(fmemopen HAVE_FMEMOPEN)
 check_function_exists(funopen HAVE_FUNOPEN)
 
 #
@@ -305,6 +305,13 @@ include(BuildLibYAML)
 libyaml_build()
 add_dependencies(build_bundled_libs yaml)
 
+#
+# LibUUID
+#
+
+set(LIBUUID_FIND_REQUIRED ON)
+find_package(LibUUID)
+
 #
 # Third-Party misc
 #
diff --git a/cmake/FindLibUUID.cmake b/cmake/FindLibUUID.cmake
new file mode 100644
index 0000000000000000000000000000000000000000..0db8ae7e5e155eb0b371a02909566c449b2ff255
--- /dev/null
+++ b/cmake/FindLibUUID.cmake
@@ -0,0 +1,31 @@
+if(NOT LIBUUID_FOUND)
+    find_path(LIBUUID_INCLUDE_DIR
+        NAMES uuid.h
+        PATH_SUFFIXES uuid
+    )
+    if (LIBUUID_INCLUDE_DIR)
+        if (NOT LIBUUID_FIND_QUIETLY)
+            message(STATUS "Found libuuid includes: ${LIBUUID_INCLUDE_DIR}")
+        endif ()
+        set(LIBUUID_INCLUDE_DIRS ${LIBUUID_INCLUDE_DIR})
+        check_library_exists(uuid uuid_is_null "" HAVE_LIBUUID_LINUX)
+        if (HAVE_LIBUUID_LINUX)
+            if (NOT LIBUUID_FIND_QUIETLY)
+                message(STATUS "Found libuuid library: ${LIBUUID_LIBRARIES}")
+            endif ()
+            set(LIBUUID_FOUND ON)
+            set(LIBUUID_LIBRARIES uuid)
+        else()
+            check_library_exists(c uuid_is_nil "" HAVE_LIBUUID_BSD)
+            if (HAVE_LIBUUID_BSD)
+                set(LIBUUID_FOUND ON)
+            elseif (LIBUUID_FIND_REQUIRED)
+                message(FATAL_ERROR "Could not find uuid libraries")
+            endif()
+    endif()
+    elseif(LIBUUID_FIND_REQUIRED)
+         message(FATAL_ERROR "Could not find uuid development files")
+    endif()
+endif (NOT LIBUUID_FOUND)
+
+mark_as_advanced(LIBUUID_LIBRARIES LIBUUID_INCLUDE_DIRS)
diff --git a/debian/control b/debian/control
index c54bb43cb8fb2355f1167b3300ef5544458bd119..a75e10a13487acba64280c921aa5710728623e52 100644
--- a/debian/control
+++ b/debian/control
@@ -7,7 +7,8 @@ Build-Depends: cdbs, debhelper (>= 8),
  libncurses5-dev,
  libiberty-dev | binutils-dev,
  libmysqlclient-dev,
- libpq-dev
+ libpq-dev,
+ uuid-dev
 Section: database
 Standards-Version: 3.9.5
 Homepage: http://tarantool.org/
diff --git a/extra/schema_erase.lua b/extra/schema_erase.lua
index 6ee9acfee3d7e9cb7e70b39e1a94d3e6277ff820..b37853908d319efd742b3fe7072bc1fcda2080cf 100644
--- a/extra/schema_erase.lua
+++ b/extra/schema_erase.lua
@@ -4,6 +4,7 @@ _index = box.space[box.schema.INDEX_ID]
 _user = box.space[box.schema.USER_ID]
 _func = box.space[box.schema.FUNC_ID]
 _priv = box.space[box.schema.PRIV_ID]
+_cluster = box.space[box.schema.CLUSTER_ID]
 -- destroy everything - save snapshot produces an empty snapshot now
 _schema:run_triggers(false)
 _schema:truncate()
@@ -17,3 +18,5 @@ _func:run_triggers(false)
 _func:truncate()
 _priv:run_triggers(false)
 _priv:truncate()
+_cluster:run_triggers(false)
+_cluster:truncate()
diff --git a/extra/schema_fill.lua b/extra/schema_fill.lua
index 975caba8ce9f031ef7482e112631f84c067d44ba..b32f5f1ccfa298f9424047f933987b6720681cec 100644
--- a/extra/schema_fill.lua
+++ b/extra/schema_fill.lua
@@ -7,6 +7,7 @@ _index = box.space[box.schema.INDEX_ID]
 _func = box.space[box.schema.FUNC_ID]
 _user = box.space[box.schema.USER_ID]
 _priv = box.space[box.schema.PRIV_ID]
+_cluster = box.space[box.schema.CLUSTER_ID]
 -- define schema version
 _schema:insert{'version', 1, 6}
 -- define system spaces
@@ -16,6 +17,7 @@ _space:insert{_index.n, ADMIN, '_index', 'memtx', 0}
 _space:insert{_func.n, ADMIN, '_func', 'memtx', 0}
 _space:insert{_user.n, ADMIN, '_user', 'memtx', 0}
 _space:insert{_priv.n, ADMIN, '_priv', 'memtx', 0}
+_space:insert{_cluster.n, ADMIN, '_cluster', 'memtx', 0}
 -- define indexes
 _index:insert{_schema.n, 0, 'primary', 'tree', 1, 1, 0, 'str'}
 
@@ -46,6 +48,11 @@ _index:insert{_func.n, 2, 'name', 'tree', 1, 1, 2, 'str'}
 _index:insert{_priv.n, 0, 'primary', 'tree', 1, 3, 1, 'num', 2, 'str', 3, 'num'}
 _index:insert{_priv.n, 1, 'owner', 'tree', 0, 1, 1, 'num'}
 
+-- primary key: node id
+_index:insert{_cluster.n, 0, 'primary', 'tree', 1, 1, 0, 'num'}
+-- node uuid key: node uuid
+_index:insert{_cluster.n, 1, 'uuid', 'tree', 1, 1, 1, 'str'}
+
 -- 
 -- Pre-create user and grants
 _user:insert{GUEST, '', 'guest'}
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 410580ca1c0c9cfc164d38cda1e0c8688616b0ae..fe3e7f5d53324ff828c9b9607ac3e8c13536b2a3 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -13,11 +13,6 @@ include_directories(${LIBCORO_INCLUDE_DIR})
 include_directories(${LIBGOPT_INCLUDE_DIR})
 include_directories(${READLINE_INCLUDE_DIR})
 
-# Require pthread globally if compiling with GCC
-if (CMAKE_COMPILER_IS_GNUCC)
-    add_compile_flags("C;CXX" "-pthread")
-endif()
-
 # Compile src/lua/*.lua files into src/lua/*.lua.c sources
 file(MAKE_DIRECTORY ${CMAKE_BINARY_DIR}/src/lua)
 set(lua_sources)
@@ -75,6 +70,7 @@ set (common_sources
      cfg.cc
      cpu_feature.c
      fiob.c
+     tt_uuid.c
      ffisyms.cc
      lua/init.cc
      lua/fiber.cc
@@ -100,6 +96,7 @@ endif()
 
 set_source_files_compile_flags(${common_sources})
 add_library(core STATIC ${common_sources})
+target_link_libraries(core pthread)
 
 set (common_libraries core small salad misc bitset msgpuck)
 
@@ -142,6 +139,7 @@ if (ENABLE_BACKTRACE AND HAVE_BFD)
     endif()
 endif()
 
+set (common_libraries ${common_libraries} ${LIBUUID_LIBRARIES})
 set (common_libraries ${common_libraries} PARENT_SCOPE)
 
 add_subdirectory(lib)
diff --git a/src/admin.cc b/src/admin.cc
index 130e0538218070e3c159c094cfee499ab799cb7b..7def35ec83272c31cf9eab4c313795ef3f7df595 100644
--- a/src/admin.cc
+++ b/src/admin.cc
@@ -97,7 +97,7 @@ admin_handler(va_list ap)
 	for (;;) {
 		if (admin_dispatch(&coio, iobuf, L) < 0)
 			return;
-		iobuf_gc(iobuf);
+		iobuf_reset(iobuf);
 		fiber_gc();
 	}
 }
diff --git a/src/bootstrap.snap b/src/bootstrap.snap
index 6ee1bc4150098ff3370e4627f37e8d35b969682b..3b4a138c38523678c62295af347db6f62900a7f7 100644
Binary files a/src/bootstrap.snap and b/src/bootstrap.snap differ
diff --git a/src/box/CMakeLists.txt b/src/box/CMakeLists.txt
index 0bf3dcc0f313fd23db9034f214f380edff0b20bb..996e4fb7f13d38bf60097cd8302c55a218c25bb3 100644
--- a/src/box/CMakeLists.txt
+++ b/src/box/CMakeLists.txt
@@ -36,6 +36,7 @@ add_library(box
     box.cc
     access.cc
     authentication.cc
+    cluster.cc
     ${lua_sources}
     lua/call.cc
     lua/tuple.cc
diff --git a/src/box/alter.cc b/src/box/alter.cc
index aec77035a1e5d7d81ecda2b3d535f7646ddc24fb..8af36e69279eb7a7a36582dd099ceb887621af0d 100644
--- a/src/box/alter.cc
+++ b/src/box/alter.cc
@@ -38,6 +38,7 @@
 #include <new> /* for placement new */
 #include <stdio.h> /* snprintf() */
 #include <ctype.h>
+#include "cluster.h" /* for cluster_set_uuid() */
 
 /** _space columns */
 #define ID               0
@@ -1282,7 +1283,7 @@ func_cache_remove_func(struct trigger * /* trigger */, void *event)
 static struct trigger drop_func_trigger =
 	{ rlist_nil, func_cache_remove_func, NULL, NULL };
 
-/** Remove a function from function cache */
+/** Replace a function in the function cache */
 static void
 func_cache_replace_func(struct trigger * /* trigger */, void *event)
 {
@@ -1495,6 +1496,105 @@ on_replace_dd_priv(struct trigger * /* trigger */, void *event)
 
 /* }}} access control */
 
+/* {{{ cluster configuration */
+
+/**
+ * Parse a tuple field which is expected to contain a string
+ * representation of UUID, and return a 16-byte representation.
+ */
+tt_uuid
+tuple_field_uuid(struct tuple *tuple, int fieldno)
+{
+	const char *value = tuple_field_cstr(tuple, fieldno);
+	tt_uuid uuid;
+	if (tt_uuid_from_string(value, &uuid) != 0)
+		tnt_raise(ClientError, ER_INVALID_UUID, value);
+	return uuid;
+}
+
+/**
+ * This trigger is normally invoked only upon initial recovery.
+ *
+ * Before a cluster is assigned a cluster id it's read only.
+ */
+static void
+on_replace_dd_schema(struct trigger * /* trigger */, void *event)
+{
+	struct txn *txn = (struct txn *) event;
+	struct tuple *old_tuple = txn->old_tuple;
+	struct tuple *new_tuple = txn->new_tuple;
+	const char *key = tuple_field_cstr(new_tuple ?
+					   new_tuple : old_tuple, 0);
+	if (strcmp(key, "cluster") == 0) {
+		if (old_tuple != NULL || new_tuple == NULL)
+			tnt_raise(ClientError, ER_CLUSTER_ID_IS_RO);
+		tt_uuid uu = tuple_field_uuid(new_tuple, 1);
+		cluster_set_id(&uu);
+	}
+}
+
+/**
+ * A record with id of the new node has been synced to the
+ * write ahead log. Update the cluster configuration with
+ * a new node.
+ */
+static void
+on_commit_dd_cluster(struct trigger *trigger, void *event)
+{
+	(void) trigger;
+	struct txn *txn = (struct txn *) event;
+	uint32_t node_id = tuple_field_u32(txn->new_tuple, 0);
+	tt_uuid node_uuid = tuple_field_uuid(txn->new_tuple, 1);
+
+	cluster_add_node(&node_uuid, node_id);
+}
+
+static struct trigger commit_cluster_trigger =
+	{ rlist_nil, on_commit_dd_cluster, NULL, NULL };
+
+/**
+ * A trigger invoked on replace in the space _cluster,
+ * which contains cluster configuration.
+ *
+ * This space is modified by JOIN command in IPROTO
+ * protocol.
+ *
+ * The trigger updates the cluster configuration cache
+ * with uuid of the newly joined node.
+ *
+ * During recovery, it acts the same way, loading identifiers
+ * of all nodes into the node cache. Node globally unique
+ * identifiers are used to keep track of cluster configuration,
+ * so that a node that previously joined the cluster can
+ * follow updates, and a node that belongs to a different
+ * cluster can not by mistake join/follow another cluster
+ * without first being reset (emptied).
+ */
+static void
+on_replace_dd_cluster(struct trigger *trigger, void *event)
+{
+	(void) trigger;
+	struct txn *txn = (struct txn *) event;
+	struct tuple *old_tuple = txn->old_tuple;
+	struct tuple *new_tuple = txn->new_tuple;
+	if (old_tuple != NULL || new_tuple == NULL)
+		tnt_raise(ClientError, ER_NODE_ID_IS_RO);
+
+	/* Check fields */
+	uint32_t node_id = tuple_field_u32(new_tuple, 0);
+	if (cnode_id_is_reserved(node_id))
+		tnt_raise(ClientError, ER_NODE_ID_IS_RESERVED,
+			  (unsigned) node_id);
+	tt_uuid node_uuid = tuple_field_uuid(new_tuple, 1);
+	if (tt_uuid_is_nil(&node_uuid))
+		tnt_raise(ClientError, ER_INVALID_UUID,
+			  tt_uuid_str(&node_uuid));
+
+	trigger_set(&txn->on_commit, &commit_cluster_trigger);
+}
+
+/* }}} cluster configuration */
+
 struct trigger alter_space_on_replace_space = {
 	rlist_nil, on_replace_dd_space, NULL, NULL
 };
@@ -1503,6 +1603,10 @@ struct trigger alter_space_on_replace_index = {
 	rlist_nil, on_replace_dd_index, NULL, NULL
 };
 
+struct trigger on_replace_schema = {
+	rlist_nil, on_replace_dd_schema, NULL, NULL
+};
+
 struct trigger on_replace_user = {
 	rlist_nil, on_replace_dd_user, NULL, NULL
 };
@@ -1515,4 +1619,8 @@ struct trigger on_replace_priv = {
 	rlist_nil, on_replace_dd_priv, NULL, NULL
 };
 
+struct trigger on_replace_cluster = {
+	rlist_nil, on_replace_dd_cluster, NULL, NULL
+};
+
 /* vim: set foldmethod=marker */
diff --git a/src/box/alter.h b/src/box/alter.h
index a563c3771e1fe06e7ba2f19c05d6093b298d7860..d66e810df2c32ffa81127c8c1a07158c5fa25897 100644
--- a/src/box/alter.h
+++ b/src/box/alter.h
@@ -32,8 +32,10 @@
 
 extern struct trigger alter_space_on_replace_space;
 extern struct trigger alter_space_on_replace_index;
+extern struct trigger on_replace_schema;
 extern struct trigger on_replace_user;
 extern struct trigger on_replace_func;
 extern struct trigger on_replace_priv;
+extern struct trigger on_replace_cluster;
 
 #endif /* INCLUDES_TARANTOOL_BOX_ALTER_H */
diff --git a/src/box/box.cc b/src/box/box.cc
index ae93421c29fe0eb0e76484e2101c0c6904fea6e8..5b9c895d36d03969d545efa0eed80646a3fb03ad 100644
--- a/src/box/box.cc
+++ b/src/box/box.cc
@@ -56,7 +56,6 @@
 #include "cfg.h"
 #include "iobuf.h"
 
-static void process_replica(struct port *port, struct request *request);
 static void process_ro(struct port *port, struct request *request);
 static void process_rw(struct port *port, struct request *request);
 box_process_func box_process = process_ro;
@@ -88,7 +87,6 @@ static void
 process_rw(struct port *port, struct request *request)
 {
 	struct txn *txn = txn_begin();
-
 	try {
 		stat_collect(stat_base, request->code, 1);
 		request->execute(request, txn, port);
@@ -102,16 +100,6 @@ process_rw(struct port *port, struct request *request)
 	}
 }
 
-static void
-process_replica(struct port *port, struct request *request)
-{
-	if (!iproto_request_is_select(request->code)) {
-		tnt_raise(ClientError, ER_NONMASTER,
-			  cfg_gets("replication_source"));
-	}
-	return process_rw(port, request);
-}
-
 static void
 process_ro(struct port *port, struct request *request)
 {
@@ -120,37 +108,25 @@ process_ro(struct port *port, struct request *request)
 	return process_rw(port, request);
 }
 
-static int
-recover_row(void *param __attribute__((unused)),
-	    struct iproto_packet *packet)
+static void
+recover_row(void *param __attribute__((unused)), struct iproto_packet *packet)
 {
-	try {
-		assert(packet->bodycnt == 1); /* always 1 for read */
-		struct request request;
-		request_create(&request, packet->code);
-		request_decode(&request, (const char *) packet->body[0].iov_base,
-				packet->body[0].iov_len);
-		request.packet = packet;
-		process_rw(&null_port, &request);
-	} catch (Exception *e) {
-		e->log();
-		return -1;
-	}
-
-	return 0;
+	assert(packet->bodycnt == 1); /* always 1 for read */
+	struct request request;
+	request_create(&request, packet->code);
+	request_decode(&request, (const char *) packet->body[0].iov_base,
+		packet->body[0].iov_len);
+	request.packet = packet;
+	process_rw(&null_port, &request);
 }
 
 static void
 box_enter_master_or_replica_mode(const char *replication_source)
 {
+	box_process = process_rw;
 	if (replication_source != NULL) {
-		box_process = process_replica;
-
-		recovery_wait_lsn(recovery_state, recovery_state->lsn);
 		recovery_follow_remote(recovery_state, replication_source);
-
 	} else {
-		box_process = process_rw;
 		title("primary", NULL);
 		say_info("I am primary");
 	}
@@ -300,6 +276,63 @@ box_leave_local_standby_mode(void *data __attribute__((unused)))
 	box_enter_master_or_replica_mode(cfg_gets("replication_source"));
 }
 
+/**
+ * @brief Called when recovery/replication wants to add a new node
+ * to cluster.
+ * cluster_add_node() is called as a commit trigger on _cluster
+ * space and actually adds the node to the cluster.
+ * @param node_uuid
+ */
+static void
+box_on_cluster_join(const tt_uuid *node_uuid)
+{
+	struct space *space = space_cache_find(SC_CLUSTER_ID);
+	class Index *index = index_find(space, 0);
+	struct iterator *it = index->position();
+	index->initIterator(it, ITER_LE, NULL, 0);
+	struct tuple *tuple = it->next(it);
+	uint32_t node_id = tuple ? tuple_field_u32(tuple, 0) + 1 : 1;
+
+	struct request req;
+	request_create(&req, IPROTO_INSERT);
+	req.space_id = SC_CLUSTER_ID;
+	char buf[128];
+	char *data = buf;
+	data = mp_encode_array(data, 2);
+	data = mp_encode_uint(data, node_id);
+	data = mp_encode_str(data, tt_uuid_str(node_uuid), UUID_STR_LEN);
+	assert(data <= buf + sizeof(buf));
+	req.tuple = buf;
+	req.tuple_end = data;
+	process_rw(&null_port, &req);
+}
+
+static void
+box_set_cluster_uuid(struct recovery_state *r)
+{
+	/* Save Cluster-UUID to _schema space */
+	tt_uuid cluster_uuid;
+	tt_uuid_create(&cluster_uuid);
+
+	const char *key = "cluster";
+	struct request req;
+	request_create(&req, IPROTO_INSERT);
+	req.space_id = SC_SCHEMA_ID;
+	char buf[128];
+	char *data = buf;
+	data = mp_encode_array(data, 2);
+	data = mp_encode_str(data, key, strlen(key));
+	data = mp_encode_str(data, tt_uuid_str(&cluster_uuid), UUID_STR_LEN);
+	assert(data <= buf + sizeof(buf));
+	req.tuple = buf;
+	req.tuple_end = data;
+
+	process_rw(&null_port, &req);
+
+	/* Cluster-UUID was be updated by a _schema trigger */
+	assert(tt_uuid_cmp(&r->cluster_uuid, &cluster_uuid) == 0);
+}
+
 void
 box_free(void)
 {
@@ -342,7 +375,7 @@ box_init()
 
 	/* recovery initialization */
 	recovery_init(cfg_gets("snap_dir"), cfg_gets("wal_dir"),
-		      recover_row, NULL, box_snapshot_cb,
+		      recover_row, NULL, box_snapshot_cb, box_on_cluster_join,
 		      cfg_geti("rows_per_wal"));
 	recovery_update_io_rate_limit(recovery_state,
 				      cfg_getd("snap_io_rate_limit"));
@@ -353,9 +386,28 @@ box_init()
 	stat_base = stat_register(iproto_request_type_strs,
 				  IPROTO_DML_REQUEST_MAX);
 
-	recover_snap(recovery_state, cfg_gets("replication_source"));
+	const char *replication_source = cfg_gets("replication_source");
+	if (recovery_has_data(recovery_state)) {
+		/* Process existing snapshot */
+		recover_snap(recovery_state);
+		recovery_fix_lsn(recovery_state, false);
+	} else if (replication_source != NULL) {
+		/* Initialize replica */
+		replica_bootstrap(recovery_state, replication_source);
+		recovery_fix_lsn(recovery_state, false);
+		snapshot_save(recovery_state);
+	} else {
+		/* Initialize cluster */
+		cluster_bootstrap(recovery_state);
+		box_set_cluster_uuid(recovery_state);
+		recovery_fix_lsn(recovery_state, true);
+		snapshot_save(recovery_state);
+	}
+
+	if (tt_uuid_is_nil(&recovery_state->cluster_uuid))
+		tnt_raise(ClientError, ER_INVALID_CLUSTER);
+
 	space_end_recover_snapshot();
-	recover_existing_wals(recovery_state);
 	space_end_recover();
 
 	stat_cleanup(stat_base, IPROTO_DML_REQUEST_MAX);
@@ -480,14 +532,6 @@ box_snapshot(void)
 	return 0;
 }
 
-void
-box_init_storage(const char *dirname)
-{
-	struct log_dir dir = snap_dir;
-	dir.dirname = (char *) dirname;
-	init_storage_on_master(&dir);
-}
-
 void
 box_info(struct tbuf *out)
 {
diff --git a/src/box/cluster.cc b/src/box/cluster.cc
new file mode 100644
index 0000000000000000000000000000000000000000..b4be84de8eb289c203c6e80e1090971b21df037b
--- /dev/null
+++ b/src/box/cluster.cc
@@ -0,0 +1,75 @@
+/*
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above
+ *    copyright notice, this list of conditions and the
+ *    following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above
+ *    copyright notice, this list of conditions and the following
+ *    disclaimer in the documentation and/or other materials
+ *    provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY <COPYRIGHT HOLDER> ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+ * <COPYRIGHT HOLDER> OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
+ * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+#include "cluster.h"
+#include "recovery.h"
+#include "exception.h"
+
+void
+cluster_set_id(const tt_uuid *uu)
+{
+	/* Set cluster UUID. */
+	assert(tt_uuid_is_nil(&recovery_state->cluster_uuid));
+	recovery_state->cluster_uuid = *uu;
+}
+
+void
+cluster_add_node(const tt_uuid *node_uuid, cnode_id_t node_id)
+{
+	struct recovery_state *r = recovery_state;
+
+	assert(!tt_uuid_is_nil(node_uuid));
+	assert(!cnode_id_is_reserved(node_id));
+
+	/* Add node */
+	struct node *node = (struct node *) calloc(1, sizeof(*node));
+	if (node == NULL) {
+		tnt_raise(ClientError, ER_MEMORY_ISSUE, sizeof(*node),
+			  "recovery", "r->cluster");
+	}
+	node->id = node_id;
+	node->uuid = *node_uuid;
+	uint32_t k = mh_cluster_put(recovery_state->cluster,
+		(const struct node **) &node, NULL, NULL);
+	if (k == mh_end(recovery_state->cluster)) {
+		free(node);
+		tnt_raise(ClientError, ER_MEMORY_ISSUE, sizeof(*node),
+			  "recovery", "r->cluster");
+	}
+
+	say_debug("confirm node: {uuid = %s, id = %u}",
+		  tt_uuid_str(node_uuid), node_id);
+
+	/* Confirm Local node */
+	if (tt_uuid_cmp(&r->node_uuid, node_uuid) == 0) {
+		/* Confirm Local Node */
+		say_info("synchronized with cluster");
+		assert(r->local_node == NULL || r->local_node->id == 0);
+		r->local_node = node;
+	}
+}
diff --git a/src/box/cluster.h b/src/box/cluster.h
new file mode 100644
index 0000000000000000000000000000000000000000..40d631410455e1dc066cc9886c839ceaef90fc27
--- /dev/null
+++ b/src/box/cluster.h
@@ -0,0 +1,107 @@
+#ifndef INCLUDES_BOX_CLUSTER_H
+#define INCLUDES_BOX_CLUSTER_H
+/*
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above
+ *    copyright notice, this list of conditions and the
+ *    following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above
+ *    copyright notice, this list of conditions and the following
+ *    disclaimer in the documentation and/or other materials
+ *    provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY <COPYRIGHT HOLDER> ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+ * <COPYRIGHT HOLDER> OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
+ * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+#include "tt_uuid.h"
+#include <stdint.h>
+/**
+ * @module cluster - global state of multi-master
+ * replicated database.
+ *
+ * Right now the cluster can only consist of instances
+ * connected with asynchronous master-master replication.
+ *
+ * Each cluster has a globally unique identifier. Each
+ * node in the cluster is identified as well. A node
+ * which is part of one cluster can not join another
+ * cluster.
+ *
+ * Cluster and node identifiers are stored in a system
+ * space _cluster on all nodes. The node identifier
+ * is also stored in each snapshot header, this is how
+ * the node knows which node id in the cluster belongs
+ * to it.
+ *
+ * Cluster and node identifiers are globally unique
+ * (UUID, universally unique identifiers). In addition
+ * to a long UUID, which is stored in _cluster system
+ * space for each node, a short integer id is used for
+ * pervasive node identification in a replication stream,
+ * a snapshot, or internal data structures.
+ * The mapping between 16-byte node globally unique id and
+ * 4 byte cluster local id is stored in _cluster table. When
+ * a node joins the cluster, it sends its globally unique
+ * identifier to one of the masters, and gets its cluster
+ * local identifier as part of the reply to the JOIN request
+ * (in fact, it gets it as a REPLACE request in _cluster
+ * system space along with the rest of the replication
+ * stream).
+ *
+ * Cluster state on each node is represented by a table
+ * like below:
+ *
+ *   ----------------------------------
+ *  | node id          | confirmed lsn |
+ *   ----------------------------------
+ *  | 1                |  1258         | <-- changes of the local node
+ *   ----------------------------------
+ *  | 2                |  1292         | <-- changes received from
+ *   ----------------------------------       a remote node
+ */
+
+/** Cluster-local node identifier. */
+typedef uint32_t cnode_id_t;
+
+static inline bool
+cnode_id_is_reserved(cnode_id_t id)
+{
+	return id == 0;
+}
+
+/**
+ * Bootstrap a new cluster consisting of this node by
+ * assigning it a new globally unique cluster id. Used
+ * during bootstrapping in an empty data directory when no
+ * existing cluster for joining has been provided in the
+ * database configuration.
+ */
+void
+cluster_set_id(const tt_uuid *uu);
+
+/**
+ * Register the universally unique identifier of a remote node and
+ * a matching cluster-local identifier in the  cluster registry.
+ * Called when a remote master joins the cluster.
+ *
+ * The node is added to the cluster lsn table with LSN 0.
+ */
+void
+cluster_add_node(const tt_uuid *node_uu, cnode_id_t id);
+
+#endif
diff --git a/src/box/schema.cc b/src/box/schema.cc
index e93d744b4a28d764fbbe9916eccc54db378b7a63..ac32fa5c7897559106ec56b9d3735691e6084d8d 100644
--- a/src/box/schema.cc
+++ b/src/box/schema.cc
@@ -255,7 +255,7 @@ schema_init()
 					      true /* unique */,
 					      1 /* part count */);
 	key_def_set_part(key_def, 0 /* part no */, 0 /* field no */, STRING);
-	(void) sc_space_new(&def, key_def, NULL);
+	(void) sc_space_new(&def, key_def, &on_replace_schema);
 
 	/* _space - home for all spaces. */
 	key_def->space_id = def.id = SC_SPACE_ID;
@@ -280,6 +280,13 @@ schema_init()
 	key_def->space_id = def.id = SC_PRIV_ID;
 	snprintf(def.name, sizeof(def.name), "_priv");
 	(void) sc_space_new(&def, key_def, &on_replace_priv);
+	/*
+	 * _cluster - association server uuid <-> server id
+	 * The real index is defined in the snapshot.
+	 */
+	key_def->space_id = def.id = SC_CLUSTER_ID;
+	snprintf(def.name, sizeof(def.name), "_cluster");
+	(void) sc_space_new(&def, key_def, &on_replace_cluster);
 	key_def_delete(key_def);
 
 	/* _index - definition of indexes in all spaces */
diff --git a/src/box/schema.h b/src/box/schema.h
index 097cbc7417d6a073017a3679efcbd2e93d3f2db0..eadc0f9c0016ebe756231e93d6c6f0aaa5c8f5d6 100644
--- a/src/box/schema.h
+++ b/src/box/schema.h
@@ -45,6 +45,8 @@ enum schema_id {
 	SC_USER_ID = 304,
 	/** Space id of _priv. */
 	SC_PRIV_ID = 312,
+	/** Space id of _cluster. */
+	SC_CLUSTER_ID = 320,
 	/** End of the reserved range of system spaces. */
 	SC_SYSTEM_ID_MAX = 511
 };
diff --git a/src/box/txn.cc b/src/box/txn.cc
index 62831885d7e9593511f99738afe0375ff8da6887..91f43ab93be822a4ef31b9462d74132ce69fa58a 100644
--- a/src/box/txn.cc
+++ b/src/box/txn.cc
@@ -30,7 +30,8 @@
 #include "tuple.h"
 #include "space.h"
 #include <tarantool.h>
-#include <recovery.h>
+#include "cluster.h"
+#include "recovery.h"
 #include <fiber.h>
 #include "request.h" /* for request_name */
 
@@ -39,18 +40,17 @@ double too_long_threshold;
 void
 txn_add_redo(struct txn *txn, struct request *request)
 {
-	if (recovery_state->wal_mode == WAL_NONE)
+	txn->packet = request->packet;
+	if (recovery_state->wal_mode == WAL_NONE || request->packet != NULL)
 		return;
-	if (request->packet == NULL) {
-		/* Generate binary body for Lua requests */
-		struct iproto_packet *packet = (struct iproto_packet *)
-			region_alloc0(&fiber()->gc, sizeof(*packet));
-		packet->code = request->code;
-		packet->bodycnt = request_encode(request, packet->body);
-		txn->packet = packet;
-	} else {
-		txn->packet = request->packet;
-	}
+
+	/* Generate binary body for Lua requests */
+	struct iproto_packet *packet = (struct iproto_packet *)
+		region_alloc0(&fiber()->gc, sizeof(*packet));
+	assert(packet->node_id == 0); /* local request */
+	packet->code = request->code;
+	packet->bodycnt = request_encode(request, packet->body);
+	txn->packet = packet;
 }
 
 void
@@ -93,27 +93,19 @@ txn_commit(struct txn *txn)
 {
 	if ((txn->old_tuple || txn->new_tuple) &&
 	    !space_is_temporary(txn->space)) {
-		struct iproto_packet *packet = txn->packet;
-		int64_t lsn = next_lsn(recovery_state);
-
 		int res = 0;
-		if (recovery_state->wal_mode != WAL_NONE) {
-			/* txn_commit() must be done after txn_add_redo() */
-			assert(txn->packet != NULL);
-			packet->lsn = lsn;
-			ev_tstamp start = ev_now(loop()), stop;
-			res = wal_write(recovery_state, packet);
-			stop = ev_now(loop());
+		/* txn_commit() must be done after txn_add_redo() */
+		assert(recovery_state->wal_mode == WAL_NONE || txn->packet != NULL);
+		ev_tstamp start = ev_now(loop()), stop;
+		res = wal_write(recovery_state, txn->packet);
+		stop = ev_now(loop());
 
-			if (stop - start > too_long_threshold) {
-				say_warn("too long %s: %.3f sec",
-					 iproto_request_name(packet->code),
-					 stop - start);
-			}
+		if (stop - start > too_long_threshold && txn->packet != NULL) {
+			say_warn("too long %s: %.3f sec",
+				iproto_request_name(txn->packet->code),
+					stop - start);
 		}
 
-		confirm_lsn(recovery_state, lsn, res == 0);
-
 		if (res)
 			tnt_raise(LoggedError, ER_WAL_IO);
 	}
diff --git a/src/errcode.h b/src/errcode.h
index 586bb944a02de0872bd07424a31ab22e37fd0809..51dcb096d5a3c7d1e5338ac61384ed8e57b6c94e 100644
--- a/src/errcode.h
+++ b/src/errcode.h
@@ -110,7 +110,13 @@ enum { TNT_ERRMSG_MAX = 512 };
 	/* 58 */_(ER_RELOAD_CFG,		2, "Can't set option '%s' dynamically") \
 	/* 59 */_(ER_CFG,			2, "Incorrect option value: %s") \
 	/* 60 */_(ER_SOPHIA,			2, "%s") \
-
+	/* 61 */_(ER_LOCAL_NODE_IS_NOT_ACTIVE,	2, "Local node is not active") \
+	/* 62 */_(ER_UNKNOWN_NODE,		2, "Unknown node %u") \
+	/* 63 */_(ER_INVALID_CLUSTER,		2, "Invalid cluster id") \
+	/* 64 */_(ER_INVALID_UUID,		2, "Invalid UUID: %s") \
+	/* 65 */_(ER_CLUSTER_ID_IS_RO,		2, "Can't reset cluster id: it is already assigned") \
+	/* 66 */_(ER_NODE_ID_IS_RO,		2, "Can't reset node id") \
+	/* 67 */_(ER_NODE_ID_IS_RESERVED,	2, "Can't initialize node id with a reserved value %u") \
 
 /*
  * !IMPORTANT! Please follow instructions at start of the file
diff --git a/src/iobuf.cc b/src/iobuf.cc
index f47e33dc5ea854a24b409c77ad655f02b3d36a6f..d4837d673a03e3124e69cc5a6df657ff2d354a94 100644
--- a/src/iobuf.cc
+++ b/src/iobuf.cc
@@ -320,7 +320,7 @@ iobuf_flush(struct iobuf *iobuf, struct ev_io *coio)
 	ssize_t total = coio_writev(coio, iobuf->out.iov,
 				    obuf_iovcnt(&iobuf->out),
 				    obuf_size(&iobuf->out));
-	iobuf_gc(iobuf);
+	iobuf_reset(iobuf);
 	/*
 	 * If there is some residue in the input buffer, move it
 	 * but only in case if we don't have iobuf_readahead
@@ -335,7 +335,7 @@ iobuf_flush(struct iobuf *iobuf, struct ev_io *coio)
 }
 
 void
-iobuf_gc(struct iobuf *iobuf)
+iobuf_reset(struct iobuf *iobuf)
 {
 	/*
 	 * If we happen to have fully processed the input,
diff --git a/src/iobuf.h b/src/iobuf.h
index 0bd43b3cd2218378f59eec463b9decb2194b2473..25d97babc89a8c2e689955175c4da0692d79d03e 100644
--- a/src/iobuf.h
+++ b/src/iobuf.h
@@ -226,7 +226,7 @@ iobuf_flush(struct iobuf *iobuf, struct ev_io *coio);
  * Is automatically called by iobuf_flush().
  */
 void
-iobuf_gc(struct iobuf *iobuf);
+iobuf_reset(struct iobuf *iobuf);
 
 /** Return true if there is no input and no output. */
 static inline bool
diff --git a/src/iproto.cc b/src/iproto.cc
index 83fa53b011ae36a015722706102106f80b72e5e2..7f1be78104102f8b34f5c5ff1c96b45cab8296da 100644
--- a/src/iproto.cc
+++ b/src/iproto.cc
@@ -98,6 +98,9 @@ iproto_process_disconnect(struct iproto_request *request);
 static void
 iproto_process_dml(struct iproto_request *request);
 
+static void
+iproto_process_admin(struct iproto_request *request);
+
 struct IprotoRequestGuard {
 	struct iproto_request *ireq;
 	IprotoRequestGuard(struct iproto_request *ireq_arg):ireq(ireq_arg) {}
@@ -457,30 +460,6 @@ iproto_connection_input_iobuf(struct iproto_connection *con)
 	return newbuf;
 }
 
-static void
-iproto_process_admin(struct iproto_request *ireq,
-		     struct iproto_connection *con)
-{
-	switch (ireq->packet.code) {
-	case IPROTO_PING:
-		iproto_reply_ping(&ireq->iobuf->out, ireq->packet.sync);
-		break;
-	case IPROTO_SUBSCRIBE:
-		if (ireq->packet.bodycnt != 0) {
-			tnt_raise(ClientError, ER_INVALID_MSGPACK,
-				  "subscribe request body");
-		}
-		subscribe(con->input.fd, ireq->packet.lsn, ireq->packet.sync);
-		tnt_raise(IprotoConnectionShutdown);
-	default:
-		tnt_raise(ClientError, ER_UNKNOWN_REQUEST_TYPE,
-			   (uint32_t) ireq->packet.code);
-	}
-	if (! ev_is_active(&con->output))
-		ev_feed_event(con->loop, &con->output, EV_WRITE);
-}
-
-
 /** Enqueue all requests which were read up. */
 static inline void
 iproto_enqueue_batch(struct iproto_connection *con, struct ibuf *in)
@@ -519,6 +498,9 @@ iproto_enqueue_batch(struct iproto_connection *con, struct ibuf *in)
 		iproto_packet_decode(&ireq->packet, &pos, reqend);
 		ireq->total_len = pos - reqstart; /* total request length */
 
+		/* Mark this request as local (see fill_lsn()) */
+		ireq->packet.node_id = 0;
+
 		/*
 		 * sic: in case of exception con->parse_size
 		 * as well as in->pos must not be advanced, to
@@ -533,14 +515,12 @@ iproto_enqueue_batch(struct iproto_connection *con, struct ibuf *in)
 			pos = (const char *) ireq->packet.body[0].iov_base;
 			request_decode(&ireq->request, pos,
 				       ireq->packet.body[0].iov_len);
-			ireq->request.packet = &ireq->packet;
-			iproto_queue_push(&request_queue, guard.release());
-			/* Request will be discarded in iproto_process_dml */
 		} else {
-			iproto_process_admin(ireq, con);
-			/* Entire request can be discarded. */
-			in->pos += ireq->packet.body[0].iov_len;
+			ireq->process = iproto_process_admin;
 		}
+		ireq->request.packet = &ireq->packet;
+		iproto_queue_push(&request_queue, guard.release());
+		/* Request will be discarded in iproto_process_XXX */
 
 		/* Request is parsed */
 		con->parse_size -= reqend - reqstart;
@@ -635,7 +615,7 @@ iproto_flush(struct iobuf *iobuf, int fd, struct obuf_svp *svp)
 
 	if (nwr > 0) {
 		if (svp->size + nwr == obuf_size(&iobuf->out)) {
-			iobuf_gc(iobuf);
+			iobuf_reset(iobuf);
 			*svp = obuf_create_svp(&iobuf->out);
 			return 0;
 		}
@@ -711,6 +691,57 @@ iproto_process_dml(struct iproto_request *ireq)
 	}
 }
 
+static void
+iproto_process_admin(struct iproto_request *ireq)
+{
+	struct iobuf *iobuf = ireq->iobuf;
+	struct iproto_connection *con = ireq->connection;
+
+	auto scope_guard = make_scoped_guard([=]{
+		/* Discard request (see iproto_enqueue_batch()) */
+		iobuf->in.pos += ireq->total_len;
+
+		if (evio_is_active(&con->output)) {
+			if (! ev_is_active(&con->output))
+				ev_feed_event(con->loop,
+					      &con->output,
+					      EV_WRITE);
+		} else if (iproto_connection_is_idle(con)) {
+			iproto_connection_delete(con);
+		}
+	});
+
+	if (unlikely(! evio_is_active(&con->output)))
+		return;
+
+	try {
+		switch (ireq->packet.code) {
+		case IPROTO_PING:
+			iproto_reply_ping(&ireq->iobuf->out, ireq->packet.sync);
+			break;
+		case IPROTO_JOIN:
+			/* TODO: replication authorization */
+			session_set_user(con->session, ADMIN, ADMIN);
+			replication_join(con->input.fd, &ireq->packet);
+			/* TODO: check requests in `con; queue */
+			iproto_connection_shutdown(con);
+			return;
+		case IPROTO_SUBSCRIBE:
+			/* TODO: replication authorization */
+			replication_subscribe(con->input.fd, &ireq->packet);
+			/* TODO: check requests in `con; queue */
+			iproto_connection_shutdown(con);
+			return;
+		default:
+			tnt_raise(ClientError, ER_UNKNOWN_REQUEST_TYPE,
+				   (uint32_t) ireq->packet.code);
+		}
+	} catch (ClientError *e) {
+		say_error("admin command error: %s", e->errmsg());
+		iproto_reply_error(&iobuf->out, e, ireq->packet.sync);
+	}
+}
+
 static struct iproto_request *
 iproto_request_new(struct iproto_connection *con,
 		   iproto_request_f process)
diff --git a/src/iproto_constants.cc b/src/iproto_constants.cc
index 7aa1d975e417fbdc3f2477ac370808fa57206ff8..d7b886bb5e2a40b22caf7cec8c40cb3b3648b1c0 100644
--- a/src/iproto_constants.cc
+++ b/src/iproto_constants.cc
@@ -37,7 +37,7 @@ const unsigned char iproto_key_type[IPROTO_KEY_MAX] =
 	/* {{{ header */
 		/* 0x00 */	MP_UINT,   /* IPROTO_CODE */
 		/* 0x01 */	MP_UINT,   /* IPROTO_SYNC */
-		/* 0x02 */	MP_UINT,   /* IPROTO_SERVER_ID */
+		/* 0x02 */	MP_UINT,   /* IPROTO_NODE_ID */
 		/* 0x03 */	MP_UINT,   /* IPROTO_LSN */
 		/* 0x04 */	MP_DOUBLE, /* IPROTO_TIMESTAMP */
 	/* }}} */
@@ -83,6 +83,9 @@ const unsigned char iproto_key_type[IPROTO_KEY_MAX] =
 	/* 0x21 */	MP_ARRAY, /* IPROTO_TUPLE */
 	/* 0x22 */	MP_STR, /* IPROTO_FUNCTION_NAME */
 	/* 0x23 */	MP_STR, /* IPROTO_USER_NAME */
+	/* 0x24 */	MP_STR, /* IPROTO_NODE_UUID */
+	/* 0x25 */	MP_STR, /* IPROTO_CLUSTER_UUID */
+	/* 0x26 */	MP_MAP, /* IPROTO_LSNMAP */
 	/* }}} */
 };
 
@@ -94,7 +97,8 @@ const char *iproto_request_type_strs[] =
 	"REPLACE",
 	"UPDATE",
 	"DELETE",
-	"CALL"
+	"CALL",
+	"AUTH"
 };
 
 void
@@ -125,6 +129,9 @@ iproto_packet_decode(struct iproto_packet *packet, const char **pos,
 		case IPROTO_SYNC:
 			packet->sync = mp_decode_uint(pos);
 			break;
+		case IPROTO_NODE_ID:
+			packet->node_id = mp_decode_uint(pos);
+			break;
 		case IPROTO_LSN:
 			packet->lsn = mp_decode_uint(pos);
 			break;
@@ -169,6 +176,12 @@ iproto_packet_encode(const struct iproto_packet *packet, struct iovec *iov)
 		map_size++;
 	}
 
+	if (packet->node_id) {
+		d = mp_encode_uint(d, IPROTO_NODE_ID);
+		d = mp_encode_uint(d, packet->node_id);
+		map_size++;
+	}
+
 	if (packet->lsn) {
 		d = mp_encode_uint(d, IPROTO_LSN);
 		d = mp_encode_uint(d, packet->lsn);
@@ -191,3 +204,34 @@ iproto_packet_encode(const struct iproto_packet *packet, struct iovec *iov)
 	assert(1 + packet->bodycnt <= IPROTO_PACKET_IOVMAX);
 	return 1 + packet->bodycnt; /* new iovcnt */
 }
+
+int
+iproto_encode_row(const struct iproto_packet *packet, struct iovec *iov,
+		  char fixheader[IPROTO_FIXHEADER_SIZE])
+{
+	int iovcnt = iproto_packet_encode(packet, iov + 1) + 1;
+	uint32_t len = 0;
+	for (int i = 1; i < iovcnt; i++)
+		len += iov[i].iov_len;
+
+	/* Encode length */
+	char *data = fixheader;
+	data = mp_encode_uint(data, len);
+	/* Encode padding */
+	ssize_t padding = IPROTO_FIXHEADER_SIZE - (data - fixheader);
+	if (padding > 0) {
+		data = mp_encode_strl(data, padding - 1);
+#if defined(NDEBUG)
+		data += padding - 1;
+#else
+		while (--padding > 0)
+			*(data++) = 0; /* valgrind */
+#endif
+	}
+	assert(data == fixheader + IPROTO_FIXHEADER_SIZE);
+	iov[0].iov_base = fixheader;
+	iov[0].iov_len = IPROTO_FIXHEADER_SIZE;
+
+	assert(iovcnt <= IPROTO_ROW_IOVMAX);
+	return iovcnt;
+}
diff --git a/src/iproto_constants.h b/src/iproto_constants.h
index 3309d043b46ca7bf95390aa19d185bf2ce956df8..07b0180ac76b9d6e9c3bc6dec4cefb5289efce6d 100644
--- a/src/iproto_constants.h
+++ b/src/iproto_constants.h
@@ -49,8 +49,8 @@ enum {
 enum iproto_key {
 	IPROTO_CODE = 0x00,
 	IPROTO_SYNC = 0x01,
-	/* replication keys */
-	IPROTO_SERVER_ID = 0x02,
+	/* Replication keys (header) */
+	IPROTO_NODE_ID = 0x02,
 	IPROTO_LSN = 0x03,
 	IPROTO_TIMESTAMP = 0x04,
 	/* Leave a gap for other keys in the header. */
@@ -64,6 +64,10 @@ enum iproto_key {
 	IPROTO_TUPLE = 0x21,
 	IPROTO_FUNCTION_NAME = 0x22,
 	IPROTO_USER_NAME = 0x23,
+	/* Replication keys (body) */
+	IPROTO_NODE_UUID = 0x24,
+	IPROTO_CLUSTER_UUID = 0x25,
+	IPROTO_LSNMAP = 0x26,
 	/* Leave a gap between request keys and response keys */
 	IPROTO_DATA = 0x30,
 	IPROTO_ERROR = 0x31,
@@ -72,7 +76,7 @@ enum iproto_key {
 
 #define bit(c) (1ULL<<IPROTO_##c)
 
-#define IPROTO_HEAD_BMAP (bit(CODE) | bit(SYNC) | bit(SERVER_ID) | bit(LSN))
+#define IPROTO_HEAD_BMAP (bit(CODE) | bit(SYNC) | bit(NODE_ID) | bit(LSN))
 #define IPROTO_BODY_BMAP (bit(SPACE_ID) | bit(INDEX_ID) | bit(LIMIT) |\
 			  bit(OFFSET) | bit(KEY) | bit(TUPLE) | \
 			  bit(FUNCTION_NAME) | bit(USER_NAME))
@@ -104,7 +108,9 @@ enum iproto_request_type {
 	IPROTO_AUTH = 7,
 	IPROTO_DML_REQUEST_MAX = 8,
 	IPROTO_PING = 64,
-	IPROTO_SUBSCRIBE = 66
+	IPROTO_JOIN = 65,
+	IPROTO_SUBSCRIBE = 66,
+	IPROTO_SETLSN = 67
 };
 
 extern const char *iproto_request_type_strs[];
@@ -138,6 +144,7 @@ enum {
 
 struct iproto_packet {
 	uint32_t code;
+	uint32_t node_id;
 	uint64_t sync;
 	uint64_t lsn;
 	double tm;
@@ -151,26 +158,11 @@ iproto_packet_decode(struct iproto_packet *packet, const char **pos, const char
 int
 iproto_packet_encode(const struct iproto_packet *packet, struct iovec *out);
 
-struct iproto_subscribe {
-	uint8_t m_len;                          /* MP_STR */
-	uint32_t v_len;                         /* length */
-	uint8_t m_header;                       /* MP_MAP */
-	uint8_t k_code;                         /* IPROTO_CODE */
-	uint8_t v_code;                         /* response status */
-	uint8_t k_sync;                         /* IPROTO_SYNC */
-	uint8_t m_sync;                         /* MP_UINT64 */
-	uint64_t sync;                          /* sync */
-	uint8_t k_lsn;                          /* IPROTO_LSN */
-	uint8_t m_lsn;                          /* MP_UINT64 */
-	uint64_t lsn;                           /* lsn */
-} __attribute__((packed));
-
-static const struct iproto_subscribe iproto_subscribe_stub = {
-	0xce, mp_bswap_u32(sizeof(struct iproto_subscribe) - 5), 0x83,
-	IPROTO_CODE, IPROTO_SUBSCRIBE,
-	IPROTO_SYNC, 0xcf, 0,
-	IPROTO_LSN, 0xcf, 0
-};
+enum { IPROTO_ROW_IOVMAX = IPROTO_PACKET_IOVMAX + 1 };
+
+int
+iproto_encode_row(const struct iproto_packet *packet, struct iovec *iov,
+		  char fixheader[IPROTO_FIXHEADER_SIZE]);
 
 #if defined(__cplusplus)
 } /* extern "C" */
diff --git a/src/log_io.cc b/src/log_io.cc
index 475ec3c83b85f03d222eef237b3afd191efd7246..eb291c7b4918e7a73a751ba9fc761c82ae87715f 100644
--- a/src/log_io.cc
+++ b/src/log_io.cc
@@ -33,12 +33,13 @@
 #include "fiber.h"
 #include "crc32.h"
 #include "fio.h"
-#include "tarantool_eio.h"
+#include "third_party/tarantool_eio.h"
 #include "fiob.h"
 #include "msgpuck/msgpuck.h"
 #include "iproto_constants.h"
-
-const uint32_t xlog_format = 12;
+#include "scoped_guard.h"
+#define MH_UNDEF 1 /* conflicts with mh_nodeids_t */
+#include "recovery.h" /* for mh_cluster */
 
 /*
  * marker is MsgPack fixext2
@@ -51,47 +52,223 @@ const log_magic_t eof_marker = mp_bswap_u32(0xd510aded); /* host byte order */
 const char inprogress_suffix[] = ".inprogress";
 const char v12[] = "0.12\n";
 
-struct log_dir snap_dir = {
-	/* .panic_if_error = */ false,
-	/* .sync_is_async = */ false,
-	/* .open_wflags = */ "wxd",
-	/* .filetype = */ "SNAP\n",
-	/* .filename_ext = */ ".snap",
-	/* .dirname = */ NULL,
-	/* .mode = */ 0660
-};
-
-struct log_dir wal_dir = {
-	/* .panic_if_error = */ false,
-	/* .sync_is_async = */ true,
-	/* .open_wflags = */ "wx",
-	/* .filetype = */ "XLOG\n",
-	/* .filename_ext = */ ".xlog",
-	/* .dirname = */ NULL,
-	/* .mode = */ 0660
-};
+/* {{{ struct log_dir */
 
-static int
-cmp_i64(const void *_a, const void *_b)
+static inline int
+log_dir_map_cmp(const struct log_meta *a, const struct log_meta *b)
 {
-	const int64_t *a = (const int64_t *) _a, *b = (const int64_t *) _b;
-	if (*a == *b)
+	if (a->lsnsum != b->lsnsum)
+		return a->lsnsum - b->lsnsum;
+	return 0;
+}
+
+rb_gen(, log_dir_map_, log_dir_map_t, struct log_meta, link, log_dir_map_cmp)
+
+static inline int
+log_dir_lsnmap_cmp(const struct log_meta_lsn *a, const struct log_meta_lsn *b)
+{
+	if (a->node_id != b->node_id)
+		return a->node_id - b->node_id;
+	if (a->lsn != b->lsn)
+		return a->lsn - b->lsn;
+
+	if (a->meta == NULL) /* a is a key */
 		return 0;
-	return (*a > *b) ? 1 : -1;
+
+	/* logs with smaller lsnsum are first */
+	if (a->meta->lsnsum != b->meta->lsnsum)
+		return a->meta->lsnsum - b->meta->lsnsum;
+
+	return 0;
+}
+
+rb_gen(, log_dir_lsnmap_, log_dir_lsnmap_t, struct log_meta_lsn, link,
+       log_dir_lsnmap_cmp)
+
+#define mh_name _nodeids
+#define mh_key_t uint32_t
+#define mh_node_t uint32_t
+#define mh_arg_t void *
+#define mh_hash(a, arg) ((*a))
+#define mh_hash_key(a, arg) (a)
+#define mh_eq(a, b, arg) ((*a) == (*b))
+#define mh_eq_key(key, node, arg) (key == (*node))
+#define MH_SOURCE 1
+#include "salad/mhash.h"
+
+int
+log_dir_create(struct log_dir *dir)
+{
+	memset(dir, 0, sizeof(*dir));
+	dir->nodeids = mh_nodeids_new();
+	if (dir->nodeids == NULL)
+		return -1;
+	log_dir_map_new(&dir->map);
+	log_dir_lsnmap_new(&dir->lsnmap);
+	return 0;
+}
+
+static struct log_meta *
+log_meta_clean(log_dir_map_t *t, struct log_meta *meta, void *arg);
+
+void
+log_dir_destroy(struct log_dir *dir)
+{
+	mh_nodeids_delete(dir->nodeids);
+	free(dir->dirname);
+	log_dir_map_iter(&dir->map, NULL, log_meta_clean, dir);
+}
+
+void
+log_dir_remove_from_index(struct log_dir *dir, struct log_meta *meta)
+{
+	for (uint32_t i = 0; i < meta->lsn_count; i++) {
+		log_dir_lsnmap_remove(&dir->lsnmap, &meta->lsns[i]);
+	}
+	log_dir_map_remove(&dir->map, meta);
+	free(meta);
 }
 
-static ssize_t
-scan_dir(struct log_dir *dir, int64_t **ret_lsn)
+int
+log_dir_add_to_index(struct log_dir *dir, int64_t lsnsum)
+{
+	struct log_meta key;
+	key.lsnsum = lsnsum;
+	struct log_meta *meta = log_dir_map_search(&dir->map, &key);
+	if (meta != NULL) {
+		meta->remove_flag = false;
+		return 0;
+	}
+
+	/*
+	 * Open xlog to find SETLSN
+	 */
+	tt_uuid uuid;
+	struct log_io *wal = log_io_open_for_read(dir, lsnsum, &uuid,
+						  INPROGRESS);
+	if (wal == NULL)
+		return -1;
+	auto log_guard = make_scoped_guard([&]{
+		log_io_close(&wal);
+	});
+
+	/*
+	 * Find SETLSN command for xlogs (must be the first)
+	 */
+	struct log_io_cursor cur;
+	log_io_cursor_open(&cur, wal);
+	struct iproto_packet packet;
+	if (log_io_cursor_next(&cur, &packet) != 0 ||
+	    packet.code != IPROTO_SETLSN)
+		return -2;
+
+	/*
+	 * Parse SETLSN
+	 */
+	uint32_t row_count = 0;
+	struct log_setlsn_row *rows = log_decode_setlsn(&packet, &row_count);
+	auto rows_guard = make_scoped_guard([=]{
+		free(rows);
+	});
+
+	/*
+	 * Update indexes
+	 */
+	meta = (struct log_meta *) calloc(1, sizeof(*meta) +
+		sizeof(*meta->lsns) * row_count);
+	if (meta == NULL) {
+		tnt_raise(ClientError, ER_MEMORY_ISSUE, sizeof(*meta),
+			"log_dir", "meta");
+	}
+	auto meta_guard = make_scoped_guard([=]{
+		log_dir_remove_from_index(dir, meta);
+		free(meta);
+	});
+
+	meta->lsnsum = lsnsum;
+	log_dir_map_insert(&dir->map, meta);
+
+	meta->lsn_count = row_count;
+	int64_t lsnsum_check = 0;
+	for (uint32_t i = 0; i < row_count; i++) {
+		struct log_meta_lsn *meta_lsn = &meta->lsns[i];
+		meta_lsn->meta = meta;
+		meta_lsn->node_id = rows[i].node_id;
+		meta_lsn->lsn = rows[i].lsn;
+		lsnsum_check += rows[i].lsn;
+		log_dir_lsnmap_insert(&dir->lsnmap, meta_lsn);
+
+		uint32_t k;
+		k = mh_nodeids_find(dir->nodeids, rows[i].node_id, NULL);
+		if (k != mh_end(dir->nodeids))
+			continue;
+
+		/* Update the set of node_ids */
+		k = mh_nodeids_put(dir->nodeids, &rows[i].node_id, NULL, NULL);
+		if (k == mh_end(dir->nodeids)) {
+			tnt_raise(ClientError, ER_MEMORY_ISSUE, sizeof(*meta),
+				"log_dir", "meta->nodeids");
+		}
+	}
+
+	/*
+	 * Snapshots have empty starting SETLSN table. Don't check lsnsum and
+	 * use the information derived from xlog name.
+	 */
+	if (lsnsum_check != lsnsum && !dir->ignore_initial_setlsn)
+		tnt_raise(IllegalParams, "Invalid xlog name");
+
+	meta_guard.is_active = false;
+	return 0;
+}
+
+static struct log_meta *
+log_meta_mark(log_dir_map_t *t, struct log_meta *meta, void *arg)
+{
+	(void) t;
+	(void) arg;
+	meta->remove_flag = true;
+	return meta;
+}
+
+static struct log_meta *
+log_meta_delete(log_dir_map_t *t, struct log_meta *meta, void *arg)
+{
+	(void) t;
+	struct log_dir *dir = (struct log_dir *) arg;
+	if (meta->remove_flag) {
+		log_dir_remove_from_index(dir, meta);
+		return NULL;
+	}
+
+	return meta;
+}
+
+static struct log_meta *
+log_meta_clean(log_dir_map_t *t, struct log_meta *meta, void *arg)
+{
+	(void) t;
+	struct log_dir *dir = (struct log_dir *) arg;
+	log_dir_remove_from_index(dir, meta);
+	return NULL;
+}
+
+int
+log_dir_scan(struct log_dir *dir)
 {
-	ssize_t result = -1;
-	size_t i = 0, size = 1000;
 	ssize_t ext_len = strlen(dir->filename_ext);
-	int64_t *lsn = (int64_t *) region_alloc(&fiber()->gc,
-						sizeof(int64_t) * size);
 	DIR *dh = opendir(dir->dirname);
 
-	if (lsn == NULL || dh == NULL)
-		goto out;
+	if (dh == NULL) {
+		say_syserror("error reading directory `%s'", dir->dirname);
+		return -1;
+	}
+	auto log_guard = make_scoped_guard([&]{
+		closedir(dh);
+	});
+
+	/* Mark all items to delete */
+	log_dir_map_iter(&dir->map, NULL, log_meta_mark, dir);
 
 	errno = 0;
 	struct dirent *dent;
@@ -117,80 +294,128 @@ scan_dir(struct log_dir *dir, int64_t **ret_lsn)
 		if (!ext_is_ok)
 			continue;
 
-		lsn[i] = strtoll(dent->d_name, &ext, 10);
+		long long lsnsum = strtoll(dent->d_name, &ext, 10);
 		if (strncmp(ext, dir->filename_ext, ext_len) != 0) {
 			/* d_name doesn't parse entirely, ignore it */
 			say_warn("can't parse `%s', skipping", dent->d_name);
 			continue;
 		}
 
-		if (lsn[i] == LLONG_MAX || lsn[i] == LLONG_MIN) {
+		if (lsnsum == LLONG_MAX || lsnsum == LLONG_MIN) {
 			say_warn("can't parse `%s', skipping", dent->d_name);
 			continue;
 		}
 
-		i++;
-		if (i == size) {
-			int64_t *n = (int64_t *) region_alloc(&fiber()->gc, sizeof(int64_t) * size * 2);
-			if (n == NULL)
-				goto out;
-			memcpy(n, lsn, sizeof(int64_t) * size);
-			lsn = n;
-			size = size * 2;
-		}
+		int rc = log_dir_add_to_index(dir, lsnsum);
+		if (rc != 0)
+			return rc;
 	}
 
-	qsort(lsn, i, sizeof(int64_t), cmp_i64);
-
-	*ret_lsn = lsn;
-	result = i;
-out:
-	if (errno != 0)
-		say_syserror("error reading directory `%s'", dir->dirname);
+	/* Delete marked items */
+	log_dir_map_iter(&dir->map, NULL, log_meta_delete, dir);
 
-	if (dh != NULL)
-		closedir(dh);
-	return result;
+	return 0;
 }
 
 int64_t
-greatest_lsn(struct log_dir *dir)
+log_dir_greatest(struct log_dir *dir)
 {
-	int64_t *lsn;
-	ssize_t count = scan_dir(dir, &lsn);
+	struct log_meta *meta = log_dir_map_last(&dir->map);
+	if (meta == NULL)
+		return -1;
+	return meta->lsnsum;
+}
 
-	if (count <= 0)
-		return count;
+static inline struct log_meta_lsn *
+log_dir_lsnmap_lesearch(log_dir_lsnmap_t *tree, struct log_meta_lsn *key)
+{
+	struct log_meta_lsn *node = log_dir_lsnmap_psearch(tree, key);
+	if (node == NULL || node->node_id != key->node_id)
+		return NULL;
+
+	int64_t lsn = node->lsn;
+	while (1) {
+		struct log_meta_lsn *next = log_dir_lsnmap_next(tree, node);
+		if (next == NULL || next->node_id != key->node_id ||
+				next->lsn != lsn)
+			break;
+		node = next;
+	};
+	return node;
+}
 
-	return lsn[count - 1];
+static inline struct log_meta_lsn *
+log_dir_lsnmap_gtsearch(log_dir_lsnmap_t *tree, struct log_meta_lsn *key)
+{
+	struct log_meta_lsn *node = log_dir_lsnmap_nsearch(tree, key);
+	if (node == NULL || node->node_id != key->node_id)
+		return NULL;
+
+	int64_t lsn = node->lsn;
+	while (1) {
+		struct log_meta_lsn *prev = log_dir_lsnmap_prev(tree, node);
+		if (prev == NULL || prev->node_id != key->node_id ||
+				prev->lsn != lsn)
+			break;
+		node = prev;
+	};
+	return node;
 }
 
 int64_t
-find_including_file(struct log_dir *dir, int64_t target_lsn)
+log_dir_next(struct log_dir *dir, struct mh_cluster_t *cluster)
 {
-	int64_t *lsn;
-	ssize_t count = scan_dir(dir, &lsn);
+	int64_t result = INT64_MAX;
+	uint32_t k;
+	mh_foreach(dir->nodeids, k) {
+		/*
+		 * Find file where lsn <= key.lsn for given node_id
+		 */
+		struct log_meta_lsn key;
+		key.node_id = *mh_nodeids_node(dir->nodeids, k);
+		key.lsn = 0;
+		key.meta = NULL; /* this node is a key */
+		uint32_t m = mh_cluster_find(cluster, key.node_id, NULL);
+		if (m != mh_end(cluster)) {
+			struct node *node = *mh_cluster_node(cluster, m);
+			key.lsn = node->current_lsn;
+		}
 
-	if (count <= 0)
-		return count;
+		struct log_meta *meta = NULL;
 
-	while (count > 1) {
-		if (*lsn <= target_lsn && target_lsn < *(lsn + 1)) {
-			goto out;
-			return *lsn;
+		/*
+		 * Find tree node with greatest node.meta.lsnsum where
+		 * node.node_id == key.node_id, node.lsn <= key.lsn
+		 */
+		struct log_meta_lsn *meta_lsn =
+				log_dir_lsnmap_lesearch(&dir->lsnmap, &key);
+		if (meta_lsn == NULL) {
+			/*
+			 * Find tree node with smallest node.meta.lsnsum where
+			 * node.node_id == key.node_id, node.lsn > key.lsn
+			 */
+			meta_lsn = log_dir_lsnmap_gtsearch(&dir->lsnmap, &key);
+			if (meta_lsn == NULL)
+				return INT64_MAX; /* Not found */
+
+			/*
+			 * Take a previous file
+			 */
+			meta = log_dir_map_prev(&dir->map, meta_lsn->meta);
+			if (meta == NULL)
+				return INT64_MAX; /* Not found */
+		} else {
+			meta = meta_lsn->meta;
 		}
-		lsn++;
-		count--;
-	}
 
-	/*
-	 * we can't check here for sure will or will not last file
-	 * contain record with desired lsn since number of rows in file
-	 * is not known beforehand. so, we simply return the last one.
-	 */
+		/*
+		 * Find min([file.lsnsum])
+		 */
+		if (meta->lsnsum < result)
+			result = meta->lsnsum;
+	}
 
-      out:
-	return *lsn;
+	return result;
 }
 
 char *
@@ -203,6 +428,95 @@ format_filename(struct log_dir *dir, int64_t lsn, enum log_suffix suffix)
 	return filename;
 }
 
+void
+log_encode_setlsn(struct iproto_packet *packet, struct mh_cluster_t *cluster)
+{
+	memset(packet, 0, sizeof(*packet));
+	packet->code = IPROTO_SETLSN;
+	/* node_id and lsn should be set to zero for SETLSN command */
+	assert(packet->node_id == 0 && packet->lsn == 0);
+
+	uint32_t cluster_size = cluster != NULL ? mh_size(cluster) : 0;
+	size_t size = 128 + cluster_size *
+		(mp_sizeof_uint(UINT32_MAX) + mp_sizeof_uint(UINT64_MAX));
+	char *buf = (char *) region_alloc(&fiber()->gc, size);
+	char *data = buf;
+	data = mp_encode_map(data, 1);
+	data = mp_encode_uint(data, IPROTO_LSNMAP);
+	data = mp_encode_map(data, cluster_size);
+	if (cluster != NULL) {
+		uint32_t k;
+		mh_foreach(cluster, k) {
+			struct node *node = *mh_cluster_node(cluster, k);
+			data = mp_encode_uint(data, node->id);
+			data = mp_encode_uint(data, node->current_lsn);
+		}
+	}
+	assert(data <= buf + size);
+	packet->body[0].iov_base = buf;
+	packet->body[0].iov_len = (data - buf);
+	packet->bodycnt = 1;
+}
+
+struct log_setlsn_row *
+log_decode_setlsn(struct iproto_packet *packet, uint32_t *p_row_count)
+{
+	if (packet->bodycnt == 0)
+		tnt_raise(ClientError, ER_INVALID_MSGPACK, "SETLSN body");
+	const char *data = (const char *) packet->body[0].iov_base;
+	const char *d = data;
+	if (mp_typeof(*data) != MP_MAP) {
+		tnt_raise(ClientError, ER_INVALID_MSGPACK,
+			  "SETLSN request body");
+	}
+	const char *lsnmap = NULL;
+	uint32_t map_size = mp_decode_map(&d);
+	for (uint32_t i = 0; i < map_size; i++) {
+		if (mp_typeof(*d) != MP_UINT) {
+			mp_next(&d); /* key */
+			mp_next(&d); /* value */
+			continue;
+		}
+		uint8_t key = mp_decode_uint(&d);
+		switch (key) {
+		case IPROTO_LSNMAP:
+			if (mp_typeof(*d) != MP_MAP) {
+				tnt_raise(ClientError, ER_INVALID_MSGPACK,
+					  "invalid LSN Map");
+			}
+			lsnmap = d;
+			mp_next(&d);
+			break;
+		default:
+			mp_next(&d); /* value */
+		}
+	}
+
+	if (lsnmap == NULL)
+		tnt_raise(ClientError, ER_INVALID_MSGPACK, "missing LSNMAP");
+
+	d = lsnmap;
+	uint32_t row_count = mp_decode_map(&d);
+	struct log_setlsn_row *rows = (struct log_setlsn_row *)
+			calloc(row_count, sizeof(*rows));
+	if (rows == NULL) {
+		tnt_raise(LoggedError, ER_MEMORY_ISSUE, sizeof(*rows),
+			  "log_index", "meta");
+	}
+
+	for (uint32_t i = 0; i < row_count; i++) {
+		if (mp_typeof(*d) != MP_UINT)
+			tnt_raise(ClientError, ER_INVALID_MSGPACK, "LSNMAP");
+		rows[i].node_id = mp_decode_uint(&d);
+		if (mp_typeof(*d) != MP_UINT)
+			tnt_raise(ClientError, ER_INVALID_MSGPACK, "LSNMAP");
+		rows[i].lsn = mp_decode_uint(&d);
+	}
+
+	*p_row_count = row_count;
+	return rows;
+}
+
 /* }}} */
 
 /* {{{ struct log_io_cursor */
@@ -554,12 +868,18 @@ log_io_sync(struct log_io *l)
 	return 0;
 }
 
+#define NODE_UUID_KEY "Node"
+
 static int
-log_io_write_header(struct log_io *l)
+log_io_write_meta(struct log_io *l, const tt_uuid *node_uuid)
 {
-	int ret = fprintf(l->f, "%s%s\n", l->dir->filetype, v12);
+	if (fprintf(l->f, "%s%s", l->dir->filetype, v12) < 0 ||
+	    fprintf(l->f, NODE_UUID_KEY ": %s\n\n",
+		    tt_uuid_str(node_uuid)) < 0) {
+		return -1;
+	}
 
-	return ret < 0 ? -1 : 0;
+	return 0;
 }
 
 /**
@@ -571,7 +891,8 @@ log_io_write_header(struct log_io *l)
  * @return 0 if success, -1 on error.
  */
 static int
-log_io_verify_meta(struct log_io *l, const char **errmsg)
+log_io_verify_meta(struct log_io *l, tt_uuid *node_uuid,
+		   const char **errmsg)
 {
 	char filetype[32], version[32], buf[256];
 	struct log_dir *dir = l->dir;
@@ -596,8 +917,30 @@ log_io_verify_meta(struct log_io *l, const char **errmsg)
 			*errmsg = "failed to read log file header";
 			goto error;
 		}
-		if (strcmp(buf, "\n") == 0 || strcmp(buf, "\r\n") == 0)
+		if (strcmp(buf, "\n") == 0)
 			break;
+
+		/* Parse RFC822-like string */
+		char *end = buf + strlen(buf);
+		if (end > buf && *(end - 1) == '\n') *(--end) = 0; /* skip \n */
+		char *key = buf;
+		char *val = strchr(buf, ':');
+		if (val == NULL) {
+			*errmsg = "invalid meta";
+			goto error;
+		}
+		*(val++) = 0;
+		while (*val == ' ') ++val; /* skip starting spaces */
+
+		if (strcmp(key, NODE_UUID_KEY) == 0) {
+			if ((end - val) != UUID_STR_LEN ||
+			    tt_uuid_from_string(val, node_uuid) != 0) {
+				*errmsg = "can't parse node uuid";
+				goto error;
+			}
+		} else {
+			/* Skip unknown key */
+		}
 	}
 	return 0;
 error:
@@ -605,8 +948,8 @@ log_io_verify_meta(struct log_io *l, const char **errmsg)
 }
 
 struct log_io *
-log_io_open(struct log_dir *dir, enum log_mode mode,
-	    const char *filename, enum log_suffix suffix, FILE *file)
+log_io_open(struct log_dir *dir, enum log_mode mode, const char *filename,
+	    tt_uuid *node_uuid, enum log_suffix suffix, FILE *file)
 {
 	struct log_io *l = NULL;
 	int save_errno;
@@ -630,11 +973,11 @@ log_io_open(struct log_dir *dir, enum log_mode mode,
 	l->dir = dir;
 	l->is_inprogress = suffix == INPROGRESS;
 	if (mode == LOG_READ) {
-		if (log_io_verify_meta(l, &errmsg) != 0)
+		if (log_io_verify_meta(l, node_uuid, &errmsg) != 0)
 			goto error;
 	} else { /* LOG_WRITE */
 		setvbuf(l->f, NULL, _IONBF, 0);
-		if (log_io_write_header(l) != 0) {
+		if (log_io_write_meta(l, node_uuid) != 0) {
 			errmsg = strerror(errno);
 			goto error;
 		}
@@ -652,13 +995,16 @@ log_io_open(struct log_dir *dir, enum log_mode mode,
 }
 
 struct log_io *
-log_io_open_for_read(struct log_dir *dir, int64_t lsn, enum log_suffix suffix)
+log_io_open_for_read(struct log_dir *dir, int64_t lsnsum,
+		     tt_uuid *node_uuid, enum log_suffix suffix)
 {
-	assert(lsn != 0);
-
-	const char *filename = format_filename(dir, lsn, suffix);
+	const char *filename = format_filename(dir, lsnsum, suffix);
 	FILE *f = fopen(filename, "r");
-	return log_io_open(dir, LOG_READ, filename, suffix, f);
+	if (suffix == INPROGRESS && f == NULL) {
+		filename = format_filename(dir, lsnsum, NONE);
+		f = fopen(filename, "r");
+	}
+	return log_io_open(dir, LOG_READ, filename, node_uuid, suffix, f);
 }
 
 /**
@@ -666,7 +1012,8 @@ log_io_open_for_read(struct log_dir *dir, int64_t lsn, enum log_suffix suffix)
  * and sets errno.
  */
 struct log_io *
-log_io_open_for_write(struct log_dir *dir, int64_t lsn, enum log_suffix suffix)
+log_io_open_for_write(struct log_dir *dir, int64_t lsn, tt_uuid *node_uuid,
+		      enum log_suffix suffix)
 {
 	char *filename;
 	FILE *f;
@@ -692,7 +1039,7 @@ log_io_open_for_write(struct log_dir *dir, int64_t lsn, enum log_suffix suffix)
 	if (!f)
 		goto error;
 	say_info("creating `%s'", filename);
-	return log_io_open(dir, LOG_WRITE, filename, suffix, f);
+	return log_io_open(dir, LOG_WRITE, filename, node_uuid, suffix, f);
 error:
 	say_syserror("%s: failed to open `%s'", __func__, filename);
 	return NULL;
diff --git a/src/log_io.h b/src/log_io.h
index 764d767091f11a60bd7531788748c9e2610b54b6..adc783c7252f3635bf50e29325deb63a9d136c63 100644
--- a/src/log_io.h
+++ b/src/log_io.h
@@ -33,13 +33,12 @@
 #include <stdbool.h>
 #include <sys/uio.h>
 #include "trivia/util.h"
-#include "tarantool_ev.h"
+#include "third_party/tarantool_ev.h"
 #include "iproto_constants.h"
+#include "tt_uuid.h"
 
 extern const uint32_t xlog_format;
 
-enum log_format { WAL = 65534 };
-
 enum log_mode {
 	LOG_READ,
 	LOG_WRITE
@@ -47,6 +46,48 @@ enum log_mode {
 
 enum log_suffix { NONE, INPROGRESS };
 
+struct log_meta;
+struct log_meta_lsn;
+
+#define RB_COMPACT 1
+#include <third_party/rb.h>
+
+/* Used by internal functions */
+struct log_meta_lsn {
+	rb_node(struct log_meta_lsn) link;
+	int32_t node_id;
+	int64_t lsn;
+	struct log_meta *meta;
+};
+
+/* Used by internal functions */
+struct log_meta {
+	rb_node(struct log_meta) link;
+	int64_t lsnsum;
+	bool remove_flag; /* used internally */
+	uint32_t lsn_count;
+	struct log_meta_lsn lsns[0]; /* [0] is better for clang */
+};
+
+/*
+ * Map: (lsnsum) => (struct log_meta)
+ */
+
+typedef rb_tree(struct log_meta) log_dir_map_t;
+rb_proto(, log_dir_map_, log_dir_map_t, struct log_meta)
+
+/*
+ * Map: (node_id, lsn) => (struct log_meta)
+ */
+
+typedef rb_tree(struct log_meta_lsn) log_dir_lsnmap_t;
+rb_proto(, log_dir_lsnmap_, log_dir_lsnmap_t, struct log_meta_lsn)
+
+/*
+ * Set: (node_id) - defined in .cc
+ */
+struct mh_nodeids_t;
+
 struct log_dir {
 	bool panic_if_error;
 	/**
@@ -54,6 +95,8 @@ struct log_dir {
 	 * in a separate thread.
 	 */
 	bool sync_is_async;
+	/* don't check that sum(setlsn) == lsnsum in filename (for snaps) */
+	bool ignore_initial_setlsn;
 
 	/* Additional flags to apply at fopen(2) to write. */
 	char open_wflags[6];
@@ -62,17 +105,40 @@ struct log_dir {
 	char *dirname;
 	/** File create mode in this directory. */
 	mode_t mode;
+
+	/* Directory indexes for log_dir_next() */
+	log_dir_lsnmap_t lsnmap;
+	log_dir_map_t map;
+	struct mh_nodeids_t *nodeids;
 };
 
-extern struct log_dir snap_dir;
-extern struct log_dir wal_dir;
+int
+log_dir_create(struct log_dir *dir);
+void
+log_dir_destroy(struct log_dir *dir);
+
+int
+log_dir_scan(struct log_dir *dir);
+
+int64_t
+log_dir_greatest(struct log_dir *dir);
 
 int64_t
-greatest_lsn(struct log_dir *dir);
+log_dir_next(struct log_dir *dir, struct mh_cluster_t *cluster);
+
 char *
 format_filename(struct log_dir *dir, int64_t lsn, enum log_suffix suffix);
-int64_t
-find_including_file(struct log_dir *dir, int64_t target_lsn);
+
+void
+log_encode_setlsn(struct iproto_packet *packet, struct mh_cluster_t *cluster);
+
+struct log_setlsn_row {
+	uint32_t node_id;
+	int64_t lsn;
+};
+
+struct log_setlsn_row *
+log_decode_setlsn(struct iproto_packet *packet, uint32_t *p_size);
 
 struct log_io {
 	struct log_dir *dir;
@@ -87,12 +153,14 @@ struct log_io {
 };
 
 struct log_io *
-log_io_open_for_read(struct log_dir *dir, int64_t lsn, enum log_suffix suffix);
+log_io_open_for_read(struct log_dir *dir, int64_t lsn, tt_uuid *node_uuid,
+		     enum log_suffix suffix);
 struct log_io *
-log_io_open_for_write(struct log_dir *dir, int64_t lsn, enum log_suffix suffix);
+log_io_open_for_write(struct log_dir *dir, int64_t lsn,
+		      tt_uuid *node_uuid, enum log_suffix suffix);
 struct log_io *
-log_io_open(struct log_dir *dir, enum log_mode mode,
-	    const char *filename, enum log_suffix suffix, FILE *file);
+log_io_open(struct log_dir *dir, enum log_mode mode, const char *filename,
+	    tt_uuid *node_uuid, enum log_suffix suffix, FILE *file);
 int
 log_io_sync(struct log_io *l);
 int
diff --git a/src/lua/info.cc b/src/lua/info.cc
index e4e4599a0175bcb7c39e407860579f06f00f24bf..4c523db4adc619cf6dfffdfdd2326f64e1933adb 100644
--- a/src/lua/info.cc
+++ b/src/lua/info.cc
@@ -63,9 +63,25 @@ lbox_info_recovery_last_update_tstamp(struct lua_State *L)
 }
 
 static int
-lbox_info_lsn(struct lua_State *L)
+lbox_info_node(struct lua_State *L)
 {
-	luaL_pushnumber64(L, recovery_state->confirmed_lsn);
+	lua_pushlstring(L, tt_uuid_str(&recovery_state->node_uuid), UUID_STR_LEN);
+	return 1;
+}
+
+static int
+lbox_info_cluster(struct lua_State *L)
+{
+	uint32_t cluster_size = mh_size(recovery_state->cluster);
+	lua_createtable(L, 0, cluster_size);
+	uint32_t k;
+	mh_foreach(recovery_state->cluster, k) {
+		struct node *node = *mh_cluster_node(recovery_state->cluster,k);
+		lua_pushlstring(L, tt_uuid_str(&node->uuid), UUID_STR_LEN);
+		luaL_pushnumber64(L, node->confirmed_lsn);
+		lua_settable(L, -3);
+	}
+
 	return 1;
 }
 
@@ -103,7 +119,8 @@ lbox_info_dynamic_meta [] =
 {
 	{"recovery_lag", lbox_info_recovery_lag},
 	{"recovery_last_update", lbox_info_recovery_last_update_tstamp},
-	{"lsn", lbox_info_lsn},
+	{"cluster", lbox_info_cluster},
+	{"node", lbox_info_node},
 	{"status", lbox_info_status},
 	{"uptime", lbox_info_uptime},
 	{"snapshot_pid", lbox_info_snapshot_pid},
diff --git a/src/lua/uuid.lua b/src/lua/uuid.lua
index 0fe76633baef4368bfb45413d851c24dfb5840b3..72c50e811e6dfe680796e440f1ce9d770fdea563 100644
--- a/src/lua/uuid.lua
+++ b/src/lua/uuid.lua
@@ -10,22 +10,15 @@
         int snprintf(char *str, size_t size, const char *format, ...);
     ]]
 
-    local libuuid = nil
     local builtin = ffi.C
-    function check_libs()
-        if libuuid then return end
-        libuuid = ffi.load('uuid.so.1')
-    end
     box.uuid = function()
-        check_libs()
         local uuid = ffi.new('uuid_t')
-        libuuid.uuid_generate(uuid)
+        builtin.uuid_generate(uuid)
         return ffi.string(uuid, 16)
     end
     box.uuid_hex = function()
-        check_libs()
         local uuid = ffi.new('uuid_t')
-        libuuid.uuid_generate(uuid)
+        builtin.uuid_generate(uuid)
         local uuid_hex = ffi.new('char[33]')
         for i = 0,ffi.sizeof('uuid_t'),1 do
             builtin.snprintf(uuid_hex + i * 2, 3, "%02x",
diff --git a/src/recovery.cc b/src/recovery.cc
index 078ca53786981faa20423120337ab002b5a8f123..4a06d01cc73e8b9e1b485f755e9cb032031573a8 100644
--- a/src/recovery.cc
+++ b/src/recovery.cc
@@ -26,6 +26,7 @@
  * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
+#define MH_SOURCE 1
 #include "recovery.h"
 
 #include <fcntl.h>
@@ -43,6 +44,8 @@
 #include "msgpuck/msgpuck.h"
 #include "iproto_constants.h"
 #include "crc32.h"
+#include "scoped_guard.h"
+#include "box/cluster.h"
 
 /*
  * Recovery subsystem
@@ -110,90 +113,111 @@ struct recovery_state *recovery_state;
 
 const char *wal_mode_STRS[] = { "none", "write", "fsync", "fsync_delay", NULL };
 
-/* {{{ LSN API */
+/* {{{ mh_cluster definition */
 
+/** Removes all nodes from mhash */
 void
-wait_lsn_set(struct wait_lsn *wait_lsn, int64_t lsn)
+mh_cluster_clean(struct mh_cluster_t *hash)
 {
-	assert(wait_lsn->waiter == NULL);
-	wait_lsn->waiter = fiber();
-	wait_lsn->lsn = lsn;
+	while (mh_size(hash) > 0) {
+		mh_int_t k = mh_first(hash);
+		struct node *node = *mh_cluster_node(hash, k);
+		mh_cluster_del(hash, k, NULL);
+		free(node);
+	}
 }
 
+/** Gets or creates a node */
+struct node *
+mh_cluster_fetch(struct mh_cluster_t *hash, uint32_t node_id)
+{
+	uint32_t k = mh_cluster_find(hash, node_id, NULL);
+	if (k != mh_end(hash))
+		return *mh_cluster_node(hash, k);
+
+	/* Create node if it doesn't exist */
+	struct node *node = (struct node *) calloc(1, sizeof(*node));
+	if (node == NULL)
+		return NULL;
+	node->id = node_id;
+	k = mh_cluster_put(hash, (const struct node **) &node, NULL, NULL);
+	if (k == mh_end(hash))
+		return NULL;
+	return node;
+}
 
-/* Alert the waiter, if any. */
-static inline void
-wakeup_lsn_waiter(struct recovery_state *r)
+/** Calculates sum([node.current_lsn]) */
+static int64_t
+mh_cluster_current_sum(struct mh_cluster_t *cluster)
 {
-	if (r->wait_lsn.waiter && r->confirmed_lsn >= r->wait_lsn.lsn) {
-		fiber_wakeup(r->wait_lsn.waiter);
+	int64_t sum = 0;
+	uint32_t k;
+	mh_foreach(cluster, k) {
+		struct node *node = *mh_cluster_node(cluster, k);
+		sum += node->current_lsn;
 	}
+
+	return sum;
 }
 
-void
-confirm_lsn(struct recovery_state *r, int64_t lsn, bool is_commit)
+/* }}} */
+
+/* {{{ LSN API */
+
+static struct node *
+fill_lsn(struct recovery_state *r, struct iproto_packet *packet)
 {
-	assert(r->confirmed_lsn <= r->lsn);
+	struct node *node = r->local_node;
+	assert(packet != NULL || node != NULL);
+	if (packet == NULL || packet->node_id == 0) {
+		/* Local request */
+		if (node == NULL)
+			tnt_raise(ClientError, ER_LOCAL_NODE_IS_NOT_ACTIVE);
+		++node->current_lsn;
+		if (packet != NULL) {
+			packet->lsn = node->current_lsn;
+			packet->node_id = node->id;
+		}
+	} else {
+		/* Remote request */
+		uint32_t k = mh_cluster_find(r->cluster, packet->node_id, NULL);
+		if (k == mh_end(r->cluster))
+			tnt_raise(ClientError, ER_UNKNOWN_NODE, packet->node_id);
+		node = *mh_cluster_node(r->cluster, k);
+		node->current_lsn = packet->lsn;
+	}
+
+	return node;
+}
 
-	if (r->confirmed_lsn < lsn) {
+static void
+confirm_lsn(struct node *node, int64_t lsn, bool is_commit)
+{
+	if (node->confirmed_lsn < lsn) {
 		if (is_commit) {
-			if (r->confirmed_lsn + 1 != lsn)
-				say_warn("non consecutive LSN, confirmed: %jd, "
-					 " new: %jd, diff: %jd",
-					 (intmax_t) r->confirmed_lsn,
+			if (node->confirmed_lsn + 1 != lsn) {
+				say_warn("non consecutive LSN for node %u (%s) "
+					 "confirmed: %jd, new: %jd, diff: %jd",
+					 (unsigned) node->id,
+					 tt_uuid_str(&node->uuid),
+					 (intmax_t) node->confirmed_lsn,
 					 (intmax_t) lsn,
-					 (intmax_t) (lsn - r->confirmed_lsn));
-			r->confirmed_lsn = lsn;
+					 (intmax_t) (lsn - node->confirmed_lsn));
+			}
+			node->confirmed_lsn = lsn;
 		 }
 	} else {
-		 /*
+		/*
 		 * There can be holes in
 		 * confirmed_lsn, in case of disk write failure, but
 		 * wal_writer never confirms LSNs out order.
 		 */
-		assert(false);
-		say_error("LSN is used twice or COMMIT order is broken: "
-			  "confirmed: %jd, new: %jd",
-			  (intmax_t) r->confirmed_lsn, (intmax_t) lsn);
-	}
-	wakeup_lsn_waiter(r);
-}
-
-void
-set_lsn(struct recovery_state *r, int64_t lsn)
-{
-	r->lsn = lsn;
-	r->confirmed_lsn = lsn;
-	say_debug("set_lsn(%p, %" PRIi64, r, r->lsn);
-	wakeup_lsn_waiter(r);
-}
-
-/** Wait until the given LSN makes its way to disk. */
-void
-recovery_wait_lsn(struct recovery_state *r, int64_t lsn)
-{
-	while (lsn < r->confirmed_lsn) {
-		wait_lsn_set(&r->wait_lsn, lsn);
-		try {
-			fiber_yield();
-			wait_lsn_clear(&r->wait_lsn);
-		} catch (Exception *e) {
-			wait_lsn_clear(&r->wait_lsn);
-			throw;
-		}
+		panic("LSN for %s is used twice or COMMIT order is broken: "
+		      "confirmed: %jd, new: %jd", tt_uuid_str(&node->uuid),
+		      (intmax_t) node->confirmed_lsn, (intmax_t) lsn);
 	}
 }
 
-
-int64_t
-next_lsn(struct recovery_state *r)
-{
-	r->lsn++;
-	say_debug("next_lsn(%p, %" PRIi64, r, r->lsn);
-	return r->lsn;
-}
-
-
 /* }}} */
 
 /* {{{ Initial recovery */
@@ -208,7 +232,8 @@ recovery_stop_local(struct recovery_state *r);
 void
 recovery_init(const char *snap_dirname, const char *wal_dirname,
 	      row_handler row_handler, void *row_handler_param,
-	      snapshot_handler snapshot_handler, int rows_per_wal)
+	      snapshot_handler snapshot_handler, join_handler join_handler,
+	      int rows_per_wal)
 {
 	assert(recovery_state == NULL);
 	recovery_state = (struct recovery_state *) calloc(1, sizeof(struct recovery_state));
@@ -222,16 +247,52 @@ recovery_init(const char *snap_dirname, const char *wal_dirname,
 	r->row_handler_param = row_handler_param;
 
 	r->snapshot_handler = snapshot_handler;
+	r->join_handler = join_handler;
+
+	log_dir_create(&r->snap_dir);
+	r->snap_dir.panic_if_error = false;
+	r->snap_dir.sync_is_async = false;
+	strcpy(r->snap_dir.open_wflags, "wxd");
+	r->snap_dir.filetype = "SNAP\n";
+	r->snap_dir.filename_ext = ".snap";
+	r->snap_dir.dirname = strdup(snap_dirname);
+	r->snap_dir.mode = 0660;
+	r->snap_dir.ignore_initial_setlsn = true;
+
+	log_dir_create(&r->wal_dir);
+	r->wal_dir.panic_if_error = false;
+	r->wal_dir.sync_is_async = true;
+	strcpy(r->wal_dir.open_wflags, "wx");
+	r->wal_dir.filetype = "XLOG\n";
+	r->wal_dir.filename_ext = ".xlog";
+	r->wal_dir.dirname = strdup(wal_dirname);
+	r->wal_dir.mode = 0660;
 
-	r->snap_dir = &snap_dir;
-	r->snap_dir->dirname = strdup(snap_dirname);
-	r->wal_dir = &wal_dir;
-	r->wal_dir->dirname = strdup(wal_dirname);
 	if (r->wal_mode == WAL_FSYNC) {
-		(void) strcat(r->wal_dir->open_wflags, "s");
+		(void) strcat(r->wal_dir.open_wflags, "s");
 	}
 	r->rows_per_wal = rows_per_wal;
-	wait_lsn_clear(&r->wait_lsn);
+
+	r->cluster = mh_cluster_new();
+	if (r->cluster == NULL)
+		panic("cannot reallocate r->cluster");
+
+	/* Add a fake node for snapshot/bootstrap */
+	struct node *node = (struct node *) calloc(1, sizeof(*node));
+	if (node == NULL)
+		panic("cannot allocate struct node");
+	node->id = 0;
+	assert(tt_uuid_is_nil(&node->uuid));
+	uint32_t k = mh_cluster_put(r->cluster,
+		(const struct node **) &node, NULL, NULL);
+	if (k == mh_end(r->cluster))
+		panic("cannot reallocate r->cluster");
+	r->local_node = node;
+
+	if (log_dir_scan(&r->snap_dir) != 0)
+		panic("can't scan snap directory");
+	if (log_dir_scan(&r->wal_dir) != 0)
+		panic("can't scan wal directory");
 }
 
 void
@@ -274,8 +335,8 @@ recovery_free()
 	if (r->writer)
 		wal_writer_stop(r);
 
-	free(r->snap_dir->dirname);
-	free(r->wal_dir->dirname);
+	log_dir_destroy(&r->snap_dir);
+	log_dir_destroy(&r->wal_dir);
 	if (r->current_wal) {
 		/*
 		 * Possible if shutting down a replication
@@ -284,50 +345,123 @@ recovery_free()
 		log_io_close(&r->current_wal);
 	}
 
+	mh_cluster_clean(r->cluster);
+	mh_cluster_delete(r->cluster);
+
 	recovery_state = NULL;
 }
 
 void
 recovery_setup_panic(struct recovery_state *r, bool on_snap_error, bool on_wal_error)
 {
-	r->wal_dir->panic_if_error = on_wal_error;
-	r->snap_dir->panic_if_error = on_snap_error;
+	r->wal_dir.panic_if_error = on_wal_error;
+	r->snap_dir.panic_if_error = on_snap_error;
+}
+
+static void
+recovery_process_setlsn(struct recovery_state *r, struct iproto_packet *packet)
+{
+	say_debug("SETLSN");
+	uint32_t row_count;
+	struct log_setlsn_row *rows = log_decode_setlsn(packet, &row_count);
+	auto rows_guard = make_scoped_guard([=]{
+		free(rows);
+	});
+
+	for (uint32_t i = 0; i < row_count; i++) {
+		uint32_t k = mh_cluster_find(r->cluster, rows[i].node_id, NULL);
+		if (k == mh_end(r->cluster))
+			tnt_raise(ClientError, ER_UNKNOWN_NODE, rows[i].node_id);
+
+		struct node *node = *mh_cluster_node(r->cluster, k);
+		assert(node->confirmed_lsn == node->current_lsn);
+
+		if (node->current_lsn <= rows[i].lsn) {
+			say_debug("setting\t(%2u, %020lld)",
+				  node->id, (long long) rows[i].lsn);
+			node->confirmed_lsn = node->current_lsn = rows[i].lsn;
+		} else {
+			/* Ignore outdated SETLSN rows */
+			say_debug("skipping\t(%2u, %020lld)",
+				  node->id, (long long) rows[i].lsn);
+		}
+	}
+	say_debug("--");
 }
 
-/** Write the bootstrap snapshot.
- *
- *  @return panics on error
- *  Errors are logged to the log file.
- */
 void
-init_storage_on_master(struct log_dir *dir)
+recovery_process(struct recovery_state *r, struct iproto_packet *packet)
 {
-	const char *filename = format_filename(dir, 1 /* lsn */, NONE);
-	int fd = open(filename, O_EXCL|O_CREAT|O_WRONLY, dir->mode);
-	say_info("saving snapshot `%s'", filename);
-	if (fd == -1) {
-		panic_syserror("failed to open snapshot file `%s' for "
-			       "writing", filename);
+	if (r->relay)
+		return r->row_handler(r->row_handler_param, packet);
+
+	if (!iproto_request_is_dml(packet->code)) {
+		/* Process admin commands (node_id, lsn are ignored) */
+		switch (packet->code) {
+		case IPROTO_SETLSN:
+			recovery_process_setlsn(r, packet);
+			break;
+		default:
+			tnt_raise(ClientError, ER_UNKNOWN_REQUEST_TYPE,
+				  packet->code);
+		}
+		return;
 	}
-	if (write(fd, bootstrap_bin, sizeof(bootstrap_bin)) !=
-						sizeof(bootstrap_bin)) {
-		panic_syserror("failed to write to snapshot file `%s'",
-			       filename);
+
+	/* Check node_id and lsn */
+	uint32_t k = mh_cluster_find(r->cluster, packet->node_id, NULL);
+	if (k != mh_end(r->cluster)) {
+		struct node *node = *mh_cluster_node(r->cluster, k);
+		if (packet->lsn <= node->confirmed_lsn) {
+			say_debug("skipping too young row");
+			return;
+		}
+	} else {
+		say_warn("skipping row with unknown node_id");
+		return;
 	}
-	close(fd);
+
+	return r->row_handler(r->row_handler_param, packet);
+}
+
+void
+cluster_bootstrap(struct recovery_state *r)
+{
+	/* Generate Node-UUID */
+	tt_uuid_create(&r->node_uuid);
+
+	/* Recover from bootstrap.snap */
+	say_info("initializing cluster");
+	FILE *f = fmemopen((void *) &bootstrap_bin,
+			   sizeof(bootstrap_bin), "r");
+	tt_uuid bootstrap_uuid; /* ignored */
+	struct log_io *snap = log_io_open(&r->snap_dir, LOG_READ,
+		"bootstrap.snap", &bootstrap_uuid, NONE, f);
+	assert(snap != NULL);
+	auto snap_guard = make_scoped_guard([&]{
+		log_io_close(&snap);
+	});
+
+	int rc = recover_wal(r, snap);
+
+	if (rc != 0)
+		panic("failed to bootstrap data directory");
+
+	/* Initialize local node */
+	r->join_handler(&r->node_uuid);
+	assert(r->local_node != NULL);
+	assert(r->local_node->id == 1);
+	assert(tt_uuid_cmp(&r->local_node->uuid, &r->node_uuid) == 0);
+
 	say_info("done");
 }
 
-/**
- * Read a snapshot and call row_handler for every snapshot row.
- * Panic in case of error.
- */
 /**
  * Read a snapshot and call row_handler for every snapshot row.
  * Panic in case of error.
  */
 void
-recover_snap(struct recovery_state *r, const char *replication_source)
+recover_snap(struct recovery_state *r)
 {
 	/*  current_wal isn't open during initial recover. */
 	assert(r->current_wal == NULL);
@@ -335,57 +469,32 @@ recover_snap(struct recovery_state *r, const char *replication_source)
 
 	struct log_io *snap;
 	int64_t lsn;
-	int rc = 0;
-
-	lsn = greatest_lsn(r->snap_dir);
-	if (lsn == 0 && greatest_lsn(r->wal_dir) == 0) {
-		say_info("found an empty data directory, initializing...");
-		if (replication_source) {
-			/* play rows and save snapshot */
-			replica_bootstrap(r, replication_source);
-			snapshot_save(r);
-			assert(r->lsn == greatest_lsn(r->snap_dir));
-			return;
-		} else {
-			init_storage_on_master(r->snap_dir);
-			lsn = greatest_lsn(r->snap_dir);
-		}
-	}
 
+	if (log_dir_scan(&r->snap_dir) != 0) {
+		say_error("can't find snapshot");
+		goto error;
+	}
+	lsn = log_dir_greatest(&r->snap_dir);
 	if (lsn <= 0) {
 		say_error("can't find snapshot");
 		goto error;
 	}
-	snap = log_io_open_for_read(r->snap_dir, lsn, NONE);
+	snap = log_io_open_for_read(&r->snap_dir, lsn, &r->node_uuid, NONE);
 	if (snap == NULL) {
 		say_error("can't find/open snapshot");
 		goto error;
 	}
-	say_info("recover from `%s'", snap->filename);
-	struct log_io_cursor i;
 
-	log_io_cursor_open(&i, snap);
-
-	struct iproto_packet packet;
-	while (log_io_cursor_next(&i, &packet) == 0) {
-		if (r->row_handler(r->row_handler_param, &packet) < 0) {
-			say_error("can't apply row");
-			if (snap->dir->panic_if_error)
-				break;
-			rc = 1;
-		}
+	if (tt_uuid_is_nil(&r->node_uuid)) {
+		say_error("can't find node uuid in snapshot");
+		goto error;
 	}
-	log_io_cursor_close(&i);
-	log_io_close(&snap);
 
-	if (rc == 0) {
-		r->lsn = r->confirmed_lsn = lsn;
-		say_info("snapshot recovered, confirmed lsn: %"
-			 PRIi64, r->confirmed_lsn);
+	say_info("recover from `%s'", snap->filename);
+	if (recover_wal(r, snap) == 0)
 		return;
-	}
 error:
-	if (greatest_lsn(r->snap_dir) <= 0) {
+	if (log_dir_greatest(&r->snap_dir) <= 0) {
 		say_crit("didn't you forget to initialize storage with --init-storage switch?");
 		_exit(1);
 	}
@@ -409,20 +518,20 @@ recover_wal(struct recovery_state *r, struct log_io *l)
 
 	struct iproto_packet packet;
 	while (log_io_cursor_next(&i, &packet) == 0) {
-		if (packet.lsn <= r->confirmed_lsn) {
-			say_debug("skipping too young row");
-			continue;
-		}
 		/*
 		 * After handler(row) returned, row may be
 		 * modified, do not use it.
 		 */
-		if (r->row_handler(r->row_handler_param, &packet) < 0) {
-			say_error("can't apply row");
+		try {
+			recovery_process(r, &packet);
+		} catch (SocketError *e) {
+			say_error("can't apply row: %s", e->errmsg());
+			goto end;
+		} catch (Exception *e) {
+			say_error("can't apply row: %s", e->errmsg());
 			if (l->dir->panic_if_error)
 				goto end;
 		}
-		set_lsn(r, packet.lsn);
 	}
 	res = i.eof_read ? LOG_EOF : 1;
 end:
@@ -448,35 +557,29 @@ recover_remaining_wals(struct recovery_state *r)
 	char *filename;
 	enum log_suffix suffix;
 
-	current_lsn = r->confirmed_lsn + 1;
-	wal_greatest_lsn = greatest_lsn(r->wal_dir);
+	if (log_dir_scan(&r->wal_dir) != 0)
+		return -1;
 
+	wal_greatest_lsn = log_dir_greatest(&r->wal_dir);
 	/* if the caller already opened WAL for us, recover from it first */
 	if (r->current_wal != NULL)
 		goto recover_current_wal;
 
-	while (r->confirmed_lsn < wal_greatest_lsn) {
-		/*
-		 * If a newer WAL appeared in the directory before
-		 * current_wal was fully read, try re-reading
-		 * one last time. */
-		if (r->current_wal != NULL) {
-			if (r->current_wal->retry++ < 3) {
-				say_warn("`%s' has no EOF marker, yet a newer WAL file exists:"
-					 " trying to re-read (attempt #%d)",
-					 r->current_wal->filename, r->current_wal->retry);
-				goto recover_current_wal;
-			} else {
-				say_warn("WAL `%s' wasn't correctly closed",
-					 r->current_wal->filename);
-				log_io_close(&r->current_wal);
+	while (1) {
+find_next_wal:
+		current_lsn = log_dir_next(&r->wal_dir, r->cluster);
+		if (current_lsn == INT64_MAX)
+			break; /* No more WALs */
+
+		if (current_lsn == r->lsnsum) {
+			if (current_lsn != wal_greatest_lsn) {
+				say_error("missing xlog between %020lld and %020lld",
+					  (long long) current_lsn,
+					  (long long) wal_greatest_lsn);
 			}
+			break;
 		}
 
-		/* TODO: find a better way of finding the next xlog */
-		current_lsn = r->confirmed_lsn;
-find_next_wal:
-		current_lsn++;
 		/*
 		 * For the last WAL, first try to open .inprogress
 		 * file: if it doesn't exist, we can safely try an
@@ -487,13 +590,13 @@ recover_remaining_wals(struct recovery_state *r)
 		suffix = INPROGRESS;
 		if (current_lsn == wal_greatest_lsn) {
 			/* Last WAL present at the time of rescan. */
-			filename = format_filename(r->wal_dir,
+			filename = format_filename(&r->wal_dir,
 						   current_lsn, suffix);
 			f = fopen(filename, "r");
 		}
 		if (f == NULL) {
 			suffix = NONE;
-			filename = format_filename(r->wal_dir,
+			filename = format_filename(&r->wal_dir,
 						   current_lsn, suffix);
 			f = fopen(filename, "r");
 			/*
@@ -504,12 +607,14 @@ recover_remaining_wals(struct recovery_state *r)
 			    current_lsn < wal_greatest_lsn)
 				goto find_next_wal;
 		}
-		next_wal = log_io_open(r->wal_dir, LOG_READ, filename, suffix, f);
+		next_wal = log_io_open(&r->wal_dir, LOG_READ, filename,
+				       &r->node_uuid, suffix, f);
 		/*
 		 * When doing final recovery, and dealing with the
 		 * last file, try opening .<ext>.inprogress.
 		 */
 		if (next_wal == NULL) {
+			say_warn("open fail: %lu", current_lsn);
 			if (r->finalize && suffix == INPROGRESS) {
 				/*
 				 * There is an .inprogress file, but
@@ -524,6 +629,7 @@ recover_remaining_wals(struct recovery_state *r)
 			break;
 		}
 		assert(r->current_wal == NULL);
+		r->lsnsum = current_lsn;
 		r->current_wal = next_wal;
 		say_info("recover from `%s'", r->current_wal->filename);
 
@@ -547,9 +653,28 @@ recover_remaining_wals(struct recovery_state *r)
 			break;
 		}
 		if (result == LOG_EOF) {
-			say_info("done `%s' confirmed_lsn: %" PRIi64,
-				 r->current_wal->filename,
-				 r->confirmed_lsn);
+			say_info("done `%s'", r->current_wal->filename);
+			log_io_close(&r->current_wal);
+			/* goto find_next_wal; */
+		} else if (r->lsnsum == wal_greatest_lsn) {
+			/* last file is not finished */
+			break;
+		} else if (r->finalize && r->current_wal->is_inprogress) {
+			say_warn("fail to find eof on inprogress");
+			/* Let recovery_finalize deal with last file */
+			break;
+		} else if (r->current_wal->retry++ < 3) {
+			/*
+			 * If a newer WAL appeared in the directory before
+			 * current_wal was fully read, try re-reading
+			 * one last time. */
+			say_warn("`%s' has no EOF marker, yet a newer WAL file exists:"
+				 " trying to re-read (attempt #%d)",
+				 r->current_wal->filename, r->current_wal->retry);
+			goto recover_current_wal;
+		} else {
+			say_warn("WAL `%s' wasn't correctly closed",
+				 r->current_wal->filename);
 			log_io_close(&r->current_wal);
 		}
 	}
@@ -558,7 +683,7 @@ recover_remaining_wals(struct recovery_state *r)
 	 * It's not a fatal error when last WAL is empty, but if
 	 * we lose some logs it is a fatal error.
 	 */
-	if (wal_greatest_lsn > r->confirmed_lsn + 1) {
+	if (wal_greatest_lsn > r->lsnsum) {
 		say_error("not all WALs have been successfully read");
 		result = -1;
 	}
@@ -569,30 +694,23 @@ recover_remaining_wals(struct recovery_state *r)
 	return result;
 }
 
-/**
- * Recover all WALs created after the last snapshot. Panic if
- * error.
- */
 void
-recover_existing_wals(struct recovery_state *r)
+recovery_fix_lsn(struct recovery_state *r, bool master_bootstrap)
 {
-	int64_t next_lsn = r->confirmed_lsn + 1;
-	int64_t wal_lsn = find_including_file(r->wal_dir, next_lsn);
-	if (wal_lsn <= 0) {
-		/* No WALs to recover from. */
-		goto out;
+	/* Remove fake snapshot/bootstrap node */
+	uint32_t k = mh_cluster_find(r->cluster, 0, NULL);
+	assert(k != mh_end(r->cluster));
+	struct node *node = *mh_cluster_node(r->cluster, k);
+	if (master_bootstrap) {
+		assert(r->local_node != NULL);
+		assert(r->local_node->confirmed_lsn = r->local_node->current_lsn);
+		r->local_node->current_lsn += node->current_lsn;
+		r->local_node->confirmed_lsn = r->local_node->current_lsn;
 	}
-	r->current_wal = log_io_open_for_read(r->wal_dir, wal_lsn, NONE);
-	if (r->current_wal == NULL)
-		goto out;
-	if (recover_remaining_wals(r) < 0)
-		panic("recover failed");
-	say_info("WALs recovered, confirmed lsn: %" PRIi64, r->confirmed_lsn);
-out:
-#if 0
-	region_free(&fiber()->gc);
-#endif
-	;
+	mh_cluster_del(r->cluster, k, NULL);
+	if (r->local_node == node)
+		r->local_node = NULL;
+	free(node);
 }
 
 void
@@ -606,6 +724,7 @@ recovery_finalize(struct recovery_state *r)
 	r->finalize = true;
 
 	result = recover_remaining_wals(r);
+
 	if (result < 0)
 		panic("unable to successfully finalize recovery");
 
@@ -622,7 +741,7 @@ recovery_finalize(struct recovery_state *r)
 			say_warn("unlink broken %s WAL", r->current_wal->filename);
 			if (inprogress_log_unlink(r->current_wal->filename) != 0)
 				panic("can't unlink 'inprogress' WAL");
-		} else if (r->current_wal->rows == 1) {
+		} else if (r->current_wal->rows <= 2 /* SETLSN + one row */) {
 			/* Rename inprogress wal with one row */
 			say_warn("rename unfinished %s WAL", r->current_wal->filename);
 			if (inprogress_log_rename(r->current_wal) != 0)
@@ -708,9 +827,7 @@ recovery_rescan_file(ev_loop * loop, ev_stat *w, int /* revents */)
 	if (result < 0)
 		panic("recover failed");
 	if (result == LOG_EOF) {
-		say_info("done `%s' confirmed_lsn: %" PRIi64,
-			 r->current_wal->filename,
-			 r->confirmed_lsn);
+		say_info("done `%s'", r->current_wal->filename);
 		log_io_close(&r->current_wal);
 		recovery_stop_file(watcher);
 		/* Don't wait for wal_dir_rescan_delay. */
@@ -764,6 +881,7 @@ struct wal_write_request {
 	struct fiber *fiber;
 	struct iproto_packet *packet;
 	char wal_fixheader[XLOG_FIXHEADER_SIZE];
+	struct node *node;
 };
 
 /* Context of the WAL writer thread. */
@@ -781,6 +899,7 @@ struct wal_writer
 	bool is_shutdown;
 	bool is_rollback;
 	ev_loop *txn_loop;
+	struct mh_cluster_t *cluster;
 };
 
 static pthread_once_t wal_writer_once = PTHREAD_ONCE_INIT;
@@ -879,7 +998,7 @@ wal_schedule(ev_loop * /* loop */, ev_async *watcher, int /* event */)
  * more writers in the future.
  */
 static void
-wal_writer_init(struct wal_writer *writer)
+wal_writer_init(struct wal_writer *writer, struct mh_cluster_t *cluster)
 {
 	/* I. Initialize the state. */
 	pthread_mutexattr_t errorcheck;
@@ -908,6 +1027,20 @@ wal_writer_init(struct wal_writer *writer)
 
 	if (writer->batch == NULL)
 		panic_syserror("fio_batch_alloc");
+
+	/* Create and fill writer->cluster hash */
+	writer->cluster = mh_cluster_new();
+	if (writer->cluster == NULL)
+		panic_syserror("can't reallocate writer->cluster");
+	uint32_t k;
+	mh_foreach(cluster, k) {
+		struct node *node = *mh_cluster_node(cluster, k);
+		struct node *wnode = mh_cluster_fetch(writer->cluster,
+						      node->id);
+		if (wnode == NULL)
+			panic_syserror("can't reallocate writer->cluster");
+		wnode->current_lsn = node->current_lsn;
+	}
 }
 
 /** Destroy a WAL writer structure. */
@@ -917,6 +1050,8 @@ wal_writer_destroy(struct wal_writer *writer)
 	(void) tt_pthread_mutex_destroy(&writer->mutex);
 	(void) tt_pthread_cond_destroy(&writer->cond);
 	free(writer->batch);
+	mh_cluster_clean(writer->cluster);
+	mh_cluster_delete(writer->cluster);
 }
 
 /** WAL writer thread routine. */
@@ -945,7 +1080,7 @@ wal_writer_start(struct recovery_state *r)
 	assert(STAILQ_EMPTY(&wal_writer.commit));
 
 	/* I. Initialize the state. */
-	wal_writer_init(&wal_writer);
+	wal_writer_init(&wal_writer, r->cluster);
 	r->writer = &wal_writer;
 
 	ev_async_start(wal_writer.txn_loop, &wal_writer.write_event);
@@ -1001,6 +1136,26 @@ wal_writer_pop(struct wal_writer *writer, struct wal_fifo *input)
 	}
 }
 
+int
+wal_write_setlsn(struct log_io *wal, struct fio_batch *batch,
+		 struct mh_cluster_t *cluster)
+{
+	/* Write SETLSN command */
+	struct iproto_packet setlsn;
+	char fixheader[XLOG_FIXHEADER_SIZE];
+	struct iovec iov[XLOG_ROW_IOVMAX];
+	log_encode_setlsn(&setlsn, cluster);
+	int iovcnt = xlog_encode_row(&setlsn, iov, fixheader);
+	fio_batch_start(batch, 1);
+	fio_batch_add(batch, iov, iovcnt);
+	if (fio_batch_write(batch, fileno(wal->f)) != 1) {
+		say_error("wal_write_setlsn failed");
+		return -1;
+	}
+
+	return 0;
+}
+
 /**
  * If there is no current WAL, try to open it, and close the
  * previous WAL. We close the previous WAL only after opening
@@ -1016,14 +1171,14 @@ wal_writer_pop(struct wal_writer *writer, struct wal_fifo *input)
  * @return 0 in case of success, -1 on error.
  */
 static int
-wal_opt_rotate(struct log_io **wal, int rows_per_wal, struct log_dir *dir,
-	       int64_t lsn)
+wal_opt_rotate(struct log_io **wal, struct fio_batch *batch,
+	       struct recovery_state *r, struct mh_cluster_t *cluster)
 {
 	struct log_io *l = *wal, *wal_to_close = NULL;
 
 	ERROR_INJECT_RETURN(ERRINJ_WAL_ROTATE);
 
-	if (l != NULL && (l->rows >= rows_per_wal || lsn % rows_per_wal == 0)) {
+	if (l != NULL && l->rows >= r->rows_per_wal) {
 		/*
 		 * if l->rows == 1, log_io_close() does
 		 * inprogress_log_rename() for us.
@@ -1033,7 +1188,13 @@ wal_opt_rotate(struct log_io **wal, int rows_per_wal, struct log_dir *dir,
 	}
 	if (l == NULL) {
 		/* Open WAL with '.inprogress' suffix. */
-		l = log_io_open_for_write(dir, lsn, INPROGRESS);
+		int64_t lsnsum = mh_cluster_current_sum(cluster);
+		l = log_io_open_for_write(&r->wal_dir, lsnsum, &r->node_uuid,
+					  INPROGRESS);
+		if (l != NULL) {
+			if (wal_write_setlsn(l, batch, cluster) != 0)
+				log_io_close(&l);
+		}
 		/*
 		 * Close the file *after* we create the new WAL, since
 		 * this is when replication relays get an inotify alarm
@@ -1048,6 +1209,7 @@ wal_opt_rotate(struct log_io **wal, int rows_per_wal, struct log_dir *dir,
 			 * A warning is written to the server
 			 * log file.
 			 */
+			wal_write_setlsn(wal_to_close, batch, cluster);
 			log_io_close(&wal_to_close);
 		}
 	} else if (l->rows == 1) {
@@ -1086,7 +1248,7 @@ wal_opt_sync(struct log_io *wal, double sync_delay)
 
 static struct wal_write_request *
 wal_fill_batch(struct log_io *wal, struct fio_batch *batch, int rows_per_wal,
-	       struct wal_write_request *req)
+	       struct wal_write_request *req, struct mh_cluster_t *cluster)
 {
 	int max_rows = wal->is_inprogress ? 1 : rows_per_wal - wal->rows;
 	/* Post-condition of successful wal_opt_rotate(). */
@@ -1095,6 +1257,11 @@ wal_fill_batch(struct log_io *wal, struct fio_batch *batch, int rows_per_wal,
 
 	struct iovec iov[XLOG_ROW_IOVMAX];
 	while (req != NULL && !fio_batch_has_space(batch, nelem(iov))) {
+		req->node = mh_cluster_fetch(cluster, req->packet->node_id);
+		if (req->node == NULL) {
+			say_syserror("can't reallocate writer->cluster");
+			return NULL;
+		}
 		int iovcnt = xlog_encode_row(req->packet, iov, req->wal_fixheader);
 		fio_batch_add(batch, iov, iovcnt);
 		req = STAILQ_NEXT(req, wal_fifo_entry);
@@ -1109,6 +1276,8 @@ wal_write_batch(struct log_io *wal, struct fio_batch *batch,
 	int rows_written = fio_batch_write(batch, fileno(wal->f));
 	wal->rows += rows_written;
 	while (req != end && rows_written-- != 0)  {
+		assert(req->node->id == req->packet->node_id);
+		req->node->current_lsn = req->packet->lsn;
 		req->res = 0;
 		req = STAILQ_NEXT(req, wal_fifo_entry);
 	}
@@ -1127,11 +1296,11 @@ wal_write_to_disk(struct recovery_state *r, struct wal_writer *writer,
 	struct wal_write_request *write_end = req;
 
 	while (req) {
-		if (wal_opt_rotate(wal, r->rows_per_wal, r->wal_dir,
-				   req->packet->lsn) != 0)
+		if (wal_opt_rotate(wal, batch, r, writer->cluster) != 0)
 			break;
 		struct wal_write_request *batch_end;
-		batch_end = wal_fill_batch(*wal, batch, r->rows_per_wal, req);
+		batch_end = wal_fill_batch(*wal, batch, r->rows_per_wal, req,
+					   writer->cluster);
 		write_end = wal_write_batch(*wal, batch, req, batch_end);
 		if (batch_end != write_end)
 			break;
@@ -1175,8 +1344,10 @@ wal_writer_thread(void *worker_args)
 		ev_async_send(writer->txn_loop, &writer->write_event);
 	}
 	(void) tt_pthread_mutex_unlock(&writer->mutex);
-	if (r->current_wal != NULL)
+	if (r->current_wal != NULL) {
+		wal_write_setlsn(r->current_wal, writer->batch, writer->cluster);
 		log_io_close(&r->current_wal);
+	}
 	return NULL;
 }
 
@@ -1187,8 +1358,14 @@ wal_writer_thread(void *worker_args)
 int
 wal_write(struct recovery_state *r, struct iproto_packet *packet)
 {
+	struct node *node = fill_lsn(r, packet);
+	if (r->wal_mode == WAL_NONE) {
+		confirm_lsn(node, node->current_lsn, true);
+		return 0;
+	}
+
+	assert(packet != NULL);
 	assert(r->wal_mode != WAL_NONE);
-	say_debug("wal_write lsn=%" PRIi64, packet->lsn);
 	ERROR_INJECT_RETURN(ERRINJ_WAL_IO);
 
 	struct wal_writer *writer = r->writer;
@@ -1212,8 +1389,10 @@ wal_write(struct recovery_state *r, struct iproto_packet *packet)
 
 	(void) tt_pthread_mutex_unlock(&writer->mutex);
 
+	int64_t lsn = node->current_lsn; /* save current lsn on the stack */
 	fiber_yield(); /* Request was inserted. */
 
+	confirm_lsn(node, lsn, req->res == 0);
 	return req->res;
 }
 
@@ -1231,7 +1410,9 @@ snapshot_write_row(struct log_io *l, struct iproto_packet *packet)
 	ev_loop *loop = loop();
 
 	packet->tm = last;
-	packet->lsn = ++rows;
+	packet->node_id = 0;
+	if (iproto_request_is_dml(packet->code))
+		packet->lsn = ++rows;
 	packet->sync = 0; /* don't write sync to wal */
 
 	char fixheader[XLOG_FIXHEADER_SIZE];
@@ -1296,7 +1477,8 @@ snapshot_save(struct recovery_state *r)
 {
 	assert(r->snapshot_handler != NULL);
 	struct log_io *snap;
-	snap = log_io_open_for_write(r->snap_dir, r->confirmed_lsn,
+	int64_t lsnsum = mh_cluster_current_sum(r->cluster);
+	snap = log_io_open_for_write(&r->snap_dir, lsnsum, &r->node_uuid,
 				     INPROGRESS);
 	if (snap == NULL)
 		panic_status(errno, "Failed to save snapshot: failed to open file in write mode.");
@@ -1305,12 +1487,19 @@ snapshot_save(struct recovery_state *r)
 	 * <lsn>.snap.inprogress. When done, the snapshot is
 	 * renamed to <lsn>.snap.
 	 */
-	say_info("saving snapshot `%s'",
-		 format_filename(r->snap_dir, r->confirmed_lsn,
-				 NONE));
+	say_info("saving snapshot `%s'", snap->filename);
+
+	/* Write starting SETLSN (always empty table for snapshot) */
+	struct iproto_packet setlsn;
+	log_encode_setlsn(&setlsn, NULL);
+	snapshot_write_row(snap, &setlsn);
 
 	r->snapshot_handler(snap);
 
+	/* Write finishing SETLSN */
+	log_encode_setlsn(&setlsn, r->cluster);
+	snapshot_write_row(snap, &setlsn);
+
 	log_io_close(&snap);
 
 	say_info("done");
diff --git a/src/recovery.h b/src/recovery.h
index dc343d47a9c9e509a1ef64f649de7b66011e15ff..2b8a7e9884da7f58bfbcdf365d41aea7c79cb157 100644
--- a/src/recovery.h
+++ b/src/recovery.h
@@ -31,8 +31,9 @@
 #include <stdbool.h>
 
 #include "trivia/util.h"
-#include "tarantool_ev.h"
+#include "third_party/tarantool_ev.h"
 #include "log_io.h"
+#include "tt_uuid.h"
 
 #if defined(__cplusplus)
 extern "C" {
@@ -41,28 +42,14 @@ extern "C" {
 struct fiber;
 struct tbuf;
 
-typedef int (row_handler)(void *, struct iproto_packet *packet);
+typedef void (row_handler)(void *, struct iproto_packet *packet);
 typedef void (snapshot_handler)(struct log_io *);
+typedef void (join_handler)(const tt_uuid *node_uuid);
 
 /** A "condition variable" that allows fibers to wait when a given
  * LSN makes it to disk.
  */
 
-struct wait_lsn {
-	struct fiber *waiter;
-	int64_t lsn;
-};
-
-void
-wait_lsn_set(struct wait_lsn *wait_lsn, int64_t lsn);
-
-inline static void
-wait_lsn_clear(struct wait_lsn *wait_lsn)
-{
-	wait_lsn->waiter = NULL;
-	wait_lsn->lsn = 0LL;
-}
-
 struct wal_writer;
 struct wal_watcher;
 struct remote;
@@ -72,15 +59,44 @@ enum wal_mode { WAL_NONE = 0, WAL_WRITE, WAL_FSYNC, WAL_FSYNC_DELAY, WAL_MODE_MA
 /** String constants for the supported modes. */
 extern const char *wal_mode_STRS[];
 
+/*
+ * Cluster Node
+ */
+struct node {
+	uint32_t id;
+	tt_uuid uuid;
+	int64_t current_lsn;
+	int64_t confirmed_lsn;
+};
+
+/*
+ * Map: (node_id) => (struct node)
+ */
+#define mh_name _cluster
+#define mh_key_t uint32_t
+#define mh_node_t struct node *
+#define mh_arg_t void *
+#define mh_hash(a, arg) ((*a)->id)
+#define mh_hash_key(a, arg) (a)
+#define mh_eq(a, b, arg) ((*a)->id == (*b)->id)
+#define mh_eq_key(key, node, arg) (key == (*node)->id)
+#include "salad/mhash.h"
+
+void
+mh_cluster_clean(struct mh_cluster_t *hash);
+
 struct recovery_state {
-	int64_t lsn, confirmed_lsn;
+	struct mh_cluster_t *cluster;
+	struct node *local_node;
 	/* The WAL we're currently reading/writing from/to. */
 	struct log_io *current_wal;
-	struct log_dir *snap_dir;
-	struct log_dir *wal_dir;
+	struct log_dir snap_dir;
+	struct log_dir wal_dir;
+	int64_t lsnsum; /* used to find missing xlog files */
 	struct wal_writer *writer;
 	struct wal_watcher *watcher;
 	struct remote *remote;
+	bool relay; /* true if recovery initialized for JOIN/SUBSCRIBE */
 	/**
 	 * row_handler is a module callback invoked during initial
 	 * recovery and when reading rows from the master.  It is
@@ -91,11 +107,13 @@ struct recovery_state {
 	row_handler *row_handler;
 	void *row_handler_param;
 	snapshot_handler *snapshot_handler;
+	join_handler *join_handler;
 	uint64_t snap_io_rate_limit;
 	int rows_per_wal;
 	double wal_fsync_delay;
-	struct wait_lsn wait_lsn;
 	enum wal_mode wal_mode;
+	tt_uuid node_uuid;
+	tt_uuid cluster_uuid;
 
 	bool finalize;
 };
@@ -104,28 +122,31 @@ extern struct recovery_state *recovery_state;
 
 void recovery_init(const char *snap_dirname, const char *xlog_dirname,
 		   row_handler row_handler, void *row_handler_param,
-		   snapshot_handler snapshot_handler, int rows_per_wal);
+		   snapshot_handler snapshot_handler, join_handler join_handler,
+		   int rows_per_wal);
 void recovery_update_mode(struct recovery_state *r, enum wal_mode mode);
 void recovery_update_fsync_delay(struct recovery_state *r, double new_delay);
 void recovery_update_io_rate_limit(struct recovery_state *r,
 				   double new_limit);
 void recovery_free();
-void recover_snap(struct recovery_state *r, const char *replication_source);
-void recover_existing_wals(struct recovery_state *);
+
+static inline bool
+recovery_has_data(struct recovery_state *r)
+{
+	return log_dir_greatest(&r->snap_dir) > 0 ||
+	       log_dir_greatest(&r->wal_dir) > 0;
+}
+void cluster_bootstrap(struct recovery_state *r);
+void recover_snap(struct recovery_state *r);
 void recovery_follow_local(struct recovery_state *r, ev_tstamp wal_dir_rescan_delay);
 void recovery_finalize(struct recovery_state *r);
 
-int
-recover_wal(struct recovery_state *r, struct log_io *l); /* for replication */
+int recover_wal(struct recovery_state *r, struct log_io *l);
 int wal_write(struct recovery_state *r, struct iproto_packet *packet);
 
 void recovery_setup_panic(struct recovery_state *r, bool on_snap_error, bool on_wal_error);
-
-void confirm_lsn(struct recovery_state *r, int64_t lsn, bool is_commit);
-int64_t next_lsn(struct recovery_state *r);
-void set_lsn(struct recovery_state *r, int64_t lsn);
-
-void recovery_wait_lsn(struct recovery_state *r, int64_t lsn);
+void recovery_process(struct recovery_state *r, struct iproto_packet *packet);
+void recovery_fix_lsn(struct recovery_state *r, bool master_bootstrap);
 
 struct fio_batch;
 
@@ -133,8 +154,10 @@ void
 snapshot_write_row(struct log_io *l, struct iproto_packet *packet);
 void snapshot_save(struct recovery_state *r);
 
-void
-init_storage_on_master(struct log_dir *dir);
+/* Only for tests */
+int
+wal_write_setlsn(struct log_io *wal, struct fio_batch *batch,
+		 struct mh_cluster_t *cluster);
 
 #if defined(__cplusplus)
 } /* extern "C" */
diff --git a/src/replica.cc b/src/replica.cc
index 66b20dfee5fc1e6d6544a69155954075674b6ace..cdd3bb12f9face0cfb5c803d090008d999b706ce 100644
--- a/src/replica.cc
+++ b/src/replica.cc
@@ -28,7 +28,6 @@
  */
 #include "recovery.h"
 #include "tarantool.h"
-
 #include <sys/socket.h>
 #include <netinet/in.h>
 #include <arpa/inet.h>
@@ -45,11 +44,8 @@
 #include "replica.h"
 
 static void
-remote_apply_row(struct recovery_state *r, struct iproto_packet *packet);
-
-static void
-remote_remote_read_row_fd(struct ev_io *coio, struct iobuf *iobuf,
-		 struct iproto_packet *packet)
+remote_read_row(struct ev_io *coio, struct iobuf *iobuf,
+		struct iproto_packet *packet)
 {
 	struct ibuf *in = &iobuf->in;
 
@@ -120,6 +116,11 @@ remote_read_row_fd(int sock, struct iproto_packet *packet)
 void
 replica_bootstrap(struct recovery_state *r, const char *replication_source)
 {
+	say_info("bootstrapping replica");
+
+	/* Generate Node-UUID */
+	tt_uuid_create(&r->node_uuid);
+
 	char ip_addr[32];
 	char greeting[IPROTO_GREETING_SIZE];
 	int port;
@@ -141,55 +142,102 @@ replica_bootstrap(struct recovery_state *r, const char *replication_source)
 	int master = sio_socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
 	FDGuard guard(master);
 
-	assert(r->confirmed_lsn == 0 && r->lsn == 0);
 	uint64_t sync = rand();
 
 	/* Send JOIN request */
-	struct iproto_subscribe subscribe = iproto_subscribe_stub;
-	subscribe.sync = mp_bswap_u64(sync);
+	struct iproto_packet packet;
+	memset(&packet, 0, sizeof(packet));
+	packet.code = IPROTO_JOIN;
+	packet.sync = sync;
+
+	char buf[128];
+	char *data = buf;
+	data = mp_encode_map(data, 1);
+	data = mp_encode_uint(data, IPROTO_NODE_UUID);
+	data = mp_encode_strl(data, UUID_LEN);
+	tt_uuid_enc_be(&recovery_state->node_uuid, data);
+	data += UUID_LEN;
+
+	assert(data <= buf + sizeof(buf));
+	packet.body[0].iov_base = buf;
+	packet.body[0].iov_len = (data - buf);
+	packet.bodycnt = 1;
+	char fixheader[IPROTO_FIXHEADER_SIZE];
+	struct iovec iov[IPROTO_ROW_IOVMAX];
+	int iovcnt = iproto_encode_row(&packet, iov, fixheader);
+
 	sio_connect(master, &addr, sizeof(addr));
 	sio_readn(master, greeting, sizeof(greeting));
-	sio_write(master, &subscribe, sizeof(subscribe));
+	sio_writev_all(master, iov, iovcnt);
 
 	while (true) {
-		struct iproto_packet packet;
-
 		remote_read_row_fd(master, &packet);
-		if (packet.sync != sync)
-			tnt_raise(IllegalParams, "unexpected packet");
+		if (packet.sync != sync) {
+			tnt_raise(ClientError, ER_INVALID_MSGPACK,
+				  "unexpected packet sync");
+		}
 
 		/* Recv JOIN response (= end of stream) */
-		if (packet.code == IPROTO_SUBSCRIBE) {
+		if (packet.code == IPROTO_JOIN) {
 			if (packet.bodycnt != 0)
-				tnt_raise(IllegalParams, "subscribe response body");
-			set_lsn(r, packet.lsn);
+				tnt_raise(IllegalParams, "JOIN body");
 			say_info("done");
 			break;
 		}
 
-		remote_apply_row(r, &packet);
+		recovery_process(r, &packet);
 	}
+	say_info("done");
 	/* master socket closed by guard */
 }
 
 static void
-remote_connect(struct ev_io *coio, struct sockaddr_in *remote_addr,
-	       int64_t initial_lsn, const char **err)
+remote_connect(struct recovery_state *r, struct ev_io *coio,const char **err)
 {
 	char greeting[IPROTO_GREETING_SIZE];
 	evio_socket(coio, AF_INET, SOCK_STREAM, IPPROTO_TCP);
 
 	*err = "can't connect to master";
-	coio_connect(coio, remote_addr);
+	coio_connect(coio, &r->remote->addr);
 	coio_readn(coio, greeting, sizeof(greeting));
 
-	/* Send JOIN request */
-	struct iproto_subscribe request = iproto_subscribe_stub;
-	request.lsn = mp_bswap_u64(initial_lsn);
-	coio_write(coio, &request, sizeof(request));
+	/* Send SUBSCRIBE request */
+	struct iproto_packet packet;
+	memset(&packet, 0, sizeof(packet));
+	packet.code = IPROTO_SUBSCRIBE;
+
+	uint32_t cluster_size = mh_size(r->cluster);
+	size_t size = 128 + cluster_size *
+		(mp_sizeof_uint(UINT32_MAX) + mp_sizeof_uint(UINT64_MAX));
+	char *buf = (char *) region_alloc(&fiber()->gc, size);
+	char *data = buf;
+	data = mp_encode_map(data, 3);
+	data = mp_encode_uint(data, IPROTO_CLUSTER_UUID);
+	data = mp_encode_strl(data, UUID_LEN);
+	tt_uuid_enc_be(&r->cluster_uuid, data);
+	data += UUID_LEN;
+	data = mp_encode_uint(data, IPROTO_NODE_UUID);
+	data = mp_encode_strl(data, UUID_LEN);
+	tt_uuid_enc_be(&recovery_state->node_uuid, data);
+	data += UUID_LEN;
+	data = mp_encode_uint(data, IPROTO_LSNMAP);
+	data = mp_encode_map(data, cluster_size);
+	uint32_t k;
+	mh_foreach(r->cluster, k) {
+		struct node *node = *mh_cluster_node(r->cluster, k);
+		data = mp_encode_uint(data, node->id);
+		data = mp_encode_uint(data, node->current_lsn);
+	}
+	assert(data <= buf + size);
+	packet.body[0].iov_base = buf;
+	packet.body[0].iov_len = (data - buf);
+	packet.bodycnt = 1;
+	char fixheader[IPROTO_FIXHEADER_SIZE];
+	struct iovec iov[IPROTO_ROW_IOVMAX];
+	int iovcnt = iproto_encode_row(&packet, iov, fixheader);
+	coio_writev(coio, iov, iovcnt, 0);
 
 	say_crit("successfully connected to master");
-	say_crit("starting replication from lsn: %" PRIi64, initial_lsn);
 }
 
 static void
@@ -213,15 +261,14 @@ pull_from_remote(va_list ap)
 				      "connecting");
 				if (iobuf == NULL)
 					iobuf = iobuf_new(fiber_name(fiber()));
-				remote_connect(&coio, &r->remote->addr,
-					       r->confirmed_lsn + 1, &err);
+				remote_connect(r, &coio, &err);
 				warning_said = false;
 				title("replica", "%s/%s", r->remote->source,
 				      "connected");
 			}
 			err = "can't read row";
 			struct iproto_packet packet;
-			remote_remote_read_row_fd(&coio, iobuf, &packet);
+			remote_read_row(&coio, iobuf, &packet);
 			fiber_setcancellable(false);
 			err = NULL;
 
@@ -229,9 +276,9 @@ pull_from_remote(va_list ap)
 			r->remote->recovery_last_update_tstamp =
 				ev_now(loop);
 
-			remote_apply_row(r, &packet);
+			recovery_process(r, &packet);
 
-			iobuf_gc(iobuf);
+			iobuf_reset(iobuf);
 			fiber_gc();
 		} catch (FiberCancelException *e) {
 			title("replica", "%s/%s", r->remote->source, "failed");
@@ -268,15 +315,6 @@ pull_from_remote(va_list ap)
 	}
 }
 
-static void
-remote_apply_row(struct recovery_state *r, struct iproto_packet *packet)
-{
-	if (r->row_handler(r->row_handler_param, packet) < 0)
-		panic("replication failure: can't apply row");
-
-	set_lsn(r, packet->lsn);
-}
-
 void
 recovery_follow_remote(struct recovery_state *r, const char *addr)
 {
diff --git a/src/replication.cc b/src/replication.cc
index 89b3fde0165507c483ab692e34055316bad24e05..ce303d670704f2f44f53ea6c0eafe861a60fe9e6 100644
--- a/src/replication.cc
+++ b/src/replication.cc
@@ -135,7 +135,7 @@ spawner_sigchld_handler(int signal __attribute__((unused)));
  * @return 0 on success, -1 on error
  */
 static int
-spawner_create_replication_relay();
+spawner_create_replication_relay(struct relay_data *data);
 
 /** Shut down all relays when shutting down the spawner. */
 static void
@@ -143,7 +143,7 @@ spawner_shutdown_children();
 
 /** Initialize replication relay process. */
 static void
-replication_relay_loop();
+replication_relay_loop(struct relay_data *data);
 
 /*
  * ------------------------------------------------------------------------
@@ -194,27 +194,202 @@ replication_prefork(const char *snap_dir, const char *wal_dir)
 /*-----------------------------------------------------------------------------*/
 
 /** State of subscribe request - master process. */
-struct subscribe_request {
+struct relay_data {
+	uint32_t code;
+	uint64_t sync;
+
+	/* for SUBSCRIBE */
+	uint32_t node_id;
+	uint32_t lsnmap_size;
+	struct {
+		uint32_t node_id;
+		int64_t lsn;
+	} lsnmap[];
+};
+
+struct replication_request {
 	struct ev_io io;
 	int fd;
-	int64_t lsn;
-	uint64_t sync;
+	struct relay_data data;
 };
 
 /** Replication acceptor fiber handler. */
 void
-subscribe(int fd, int64_t lsn, uint64_t sync)
+replication_join(int fd, struct iproto_packet *packet)
 {
-	struct subscribe_request *request = (struct subscribe_request *)
-		malloc(sizeof(struct subscribe_request));
+	assert(packet->code == IPROTO_JOIN);
+	if (packet->bodycnt == 0)
+		tnt_raise(ClientError, ER_INVALID_MSGPACK, "JOIN body");
+
+	const char *data = (const char *) packet->body[0].iov_base;
+	const char *end = data + packet->body[0].iov_len;
+	const char *d = data;
+	if (mp_check(&d, end) != 0 || mp_typeof(*data) != MP_MAP)
+		tnt_raise(ClientError, ER_INVALID_MSGPACK, "JOIN body");
+
+	tt_uuid node_uuid = uuid_nil;
+	d = data;
+	uint32_t map_size = mp_decode_map(&d);
+	for (uint32_t i = 0; i < map_size; i++) {
+		if (mp_typeof(*d) != MP_UINT) {
+			mp_next(&d); /* key */
+			mp_next(&d); /* value */
+			continue;
+		}
+		uint8_t key = mp_decode_uint(&d);
+		if (key == IPROTO_NODE_UUID) {
+			if (mp_typeof(*d) != MP_STR ||
+			    mp_decode_strl(&d) != UUID_LEN) {
+				tnt_raise(ClientError, ER_INVALID_MSGPACK,
+					  "invalid Node-UUID");
+			}
+			tt_uuid_dec_be(d, &node_uuid);
+			d += UUID_LEN;
+		} else {
+			mp_next(&d); /* value */
+		}
+	}
+
+	if (tt_uuid_is_nil(&node_uuid)) {
+		tnt_raise(ClientError, ER_INVALID_MSGPACK,
+			  "Can't find Node-UUID in JOIN request");
+	}
+
+	/* Notify box about new cluster node */
+	recovery_state->join_handler(&node_uuid);
+
+	struct replication_request *request = (struct replication_request *)
+			malloc(sizeof(*request));
 	if (request == NULL) {
-		close(fd);
-		return;
+		tnt_raise(ClientError, ER_MEMORY_ISSUE, sizeof(*request),
+			  "iproto", "JOIN");
 	}
 	request->fd = fd;
 	request->io.data = request;
-	request->lsn = lsn;
-	request->sync = sync;
+	request->data.code = packet->code;
+	request->data.sync = packet->sync;
+
+	ev_io_init(&request->io, replication_send_socket,
+		   master_to_spawner_socket, EV_WRITE);
+	ev_io_start(loop(), &request->io);
+}
+
+/** Replication acceptor fiber handler. */
+void
+replication_subscribe(int fd, struct iproto_packet *packet)
+{
+	assert(packet->code == IPROTO_SUBSCRIBE);
+	if (packet->bodycnt == 0)
+		tnt_raise(ClientError, ER_INVALID_MSGPACK, "subscribe body");
+	assert(packet->bodycnt == 1);
+	const char *data = (const char *) packet->body[0].iov_base;
+	const char *end = data + packet->body[0].iov_len;
+	const char *d = data;
+	if (mp_check(&d, end) != 0 || mp_typeof(*data) != MP_MAP)
+		tnt_raise(ClientError, ER_INVALID_MSGPACK, "subscribe body");
+	tt_uuid cluster_uuid = uuid_nil, node_uuid = uuid_nil;
+
+	const char *lsnmap = NULL;
+	d = data;
+	uint32_t map_size = mp_decode_map(&d);
+	for (uint32_t i = 0; i < map_size; i++) {
+		if (mp_typeof(*d) != MP_UINT) {
+			mp_next(&d); /* key */
+			mp_next(&d); /* value */
+			continue;
+		}
+		uint8_t key = mp_decode_uint(&d);
+		switch (key) {
+		case IPROTO_CLUSTER_UUID:
+			if (mp_typeof(*d) != MP_STR ||
+			    mp_decode_strl(&d) != UUID_LEN) {
+				tnt_raise(ClientError, ER_INVALID_MSGPACK,
+					  "invalid Cluster-UUID");
+			}
+			tt_uuid_dec_be(d, &cluster_uuid);
+			d += UUID_LEN;
+			break;
+		case IPROTO_NODE_UUID:
+			if (mp_typeof(*d) != MP_STR ||
+			    mp_decode_strl(&d) != UUID_LEN) {
+				tnt_raise(ClientError, ER_INVALID_MSGPACK,
+					  "invalid Node-UUID");
+			}
+			tt_uuid_dec_be(d, &node_uuid);
+			d += UUID_LEN;
+			break;
+		case IPROTO_LSNMAP:
+			if (mp_typeof(*d) != MP_MAP) {
+				tnt_raise(ClientError, ER_INVALID_MSGPACK,
+					  "invalid LSNMAP");
+			}
+			lsnmap = d;
+			mp_next(&d);
+			break;
+		default:
+			mp_next(&d); /* value */
+		}
+	}
+
+	/* Check Cluster-UUID */
+	if (tt_uuid_cmp(&cluster_uuid, &recovery_state->cluster_uuid) != 0) {
+		tnt_raise(ClientError, ER_INVALID_MSGPACK,
+			  "Unknown Cluster-UUID");
+	}
+	/* Check Node-UUID */
+	struct node *node = NULL;
+	uint32_t k;
+	mh_foreach(recovery_state->cluster, k) {
+		struct node *n = *mh_cluster_node(recovery_state->cluster, k);
+		if (tt_uuid_cmp(&n->uuid, &node_uuid) == 0) {
+			node = n;
+			break;
+		}
+	}
+	assert(node !=  NULL);
+	if (lsnmap == NULL)
+		tnt_raise(ClientError, ER_INVALID_MSGPACK, "LSNMAP");
+	/* Check & save LSNMAP */
+	d = lsnmap;
+	uint32_t lsnmap_size = mp_decode_map(&d);
+	struct replication_request *request = (struct replication_request *)
+		calloc(1, sizeof(*request) + sizeof(*request->data.lsnmap) *
+		       (lsnmap_size + 1)); /* use calloc() for valgrind */
+
+	if (request == NULL) {
+		tnt_raise(ClientError, ER_MEMORY_ISSUE, sizeof(*request) +
+			  sizeof(*request->data.lsnmap) * (lsnmap_size + 1),
+			  "iproto", "SUBSCRIBE");
+	}
+
+	bool remote_found = false;
+	for (uint32_t i = 0; i < lsnmap_size; i++) {
+		if (mp_typeof(*d) != MP_UINT) {
+		map_error:
+			free(request);
+			tnt_raise(ClientError, ER_INVALID_MSGPACK, "LSNMAP");
+		}
+		request->data.lsnmap[i].node_id = mp_decode_uint(&d);
+		if (mp_typeof(*d) != MP_UINT)
+			goto map_error;
+		request->data.lsnmap[i].lsn = mp_decode_uint(&d);
+		if (request->data.lsnmap[i].node_id == node->id)
+			remote_found = true;
+	}
+	if (!remote_found) {
+		/* Add remote node to the list */
+		request->data.lsnmap[lsnmap_size].node_id = node->id;
+		request->data.lsnmap[lsnmap_size].lsn = 0;
+		++lsnmap_size;
+	}
+
+	request->fd = fd;
+	request->io.data = request;
+	request->data.code = packet->code;
+	request->data.sync = packet->sync;
+	request->data.node_id = node->id;
+	request->data.lsnmap_size = lsnmap_size;
+
 	ev_io_init(&request->io, replication_send_socket,
 		   master_to_spawner_socket, EV_WRITE);
 	ev_io_start(loop(), &request->io);
@@ -225,22 +400,27 @@ subscribe(int fd, int64_t lsn, uint64_t sync)
 static void
 replication_send_socket(ev_loop *loop, ev_io *watcher, int /* events */)
 {
-	struct subscribe_request *request =
-		(struct subscribe_request *) watcher->data;
+	struct replication_request *request =
+		(struct replication_request *) watcher->data;
 	struct msghdr msg;
-	struct iovec iov;
+	struct iovec iov[2];
 	char control_buf[CMSG_SPACE(sizeof(int))];
+	memset(control_buf, 0, sizeof(control_buf)); /* valgrind */
 	struct cmsghdr *control_message = NULL;
 
-	iov.iov_base = &request->lsn;
-	iov.iov_len = sizeof(request->lsn) + sizeof(request->sync);
+	size_t len = sizeof(request->data) + sizeof(*request->data.lsnmap) *
+			request->data.lsnmap_size;
+	iov[0].iov_base = &len;
+	iov[0].iov_len = sizeof(len);
+	iov[1].iov_base = &request->data;
+	iov[1].iov_len = len;
 
 	memset(&msg, 0, sizeof(msg));
 
 	msg.msg_name = NULL;
 	msg.msg_namelen = 0;
-	msg.msg_iov = &iov;
-	msg.msg_iovlen = 1;
+	msg.msg_iov = iov;
+	msg.msg_iovlen = nelem(iov);
 	msg.msg_control = control_buf;
 	msg.msg_controllen = sizeof(control_buf);
 
@@ -347,8 +527,9 @@ spawner_main_loop()
 	struct iovec iov;
 	char control_buf[CMSG_SPACE(sizeof(int))];
 
-	iov.iov_base = &replica.lsn;
-	iov.iov_len = sizeof(replica.lsn) + sizeof(replica.sync);
+	size_t len;
+	iov.iov_base = &len;
+	iov.iov_len = sizeof(len);
 
 	msg.msg_name = NULL;
 	msg.msg_namelen = 0;
@@ -358,18 +539,37 @@ spawner_main_loop()
 	msg.msg_controllen = sizeof(control_buf);
 
 	while (!spawner.killed) {
-		int msglen = recvmsg(spawner.sock, &msg, 0);
-		if (msglen > 0) {
-			replica.sock = spawner_unpack_cmsg(&msg);
-			spawner_create_replication_relay();
-		} else if (msglen == 0) { /* orderly master shutdown */
+		ssize_t msglen = recvmsg(spawner.sock, &msg, 0);
+		if (msglen == 0) { /* orderly master shutdown */
 			say_info("Exiting: master shutdown");
 			break;
-		} else { /* msglen == -1 */
-			if (errno != EINTR)
-				say_syserror("recvmsg");
+		} else if (msglen == -1) {
+			if (errno == EINTR)
+				continue;
+			say_syserror("recvmsg");
 			/* continue, the error may be temporary */
+			break;
 		}
+
+		replica.sock = spawner_unpack_cmsg(&msg);
+		struct relay_data *data = (struct relay_data *) malloc(len);
+		msglen = read(spawner.sock, data, len);
+		if (msglen == 0) { /* orderly master shutdown */
+			say_info("Exiting: master shutdown");
+			free(data);
+			break;
+		} else if (msglen == -1) {
+			free(data);
+			if (errno == EINTR)
+				continue;
+			say_syserror("recvmsg");
+			/* continue, the error may be temporary */
+			break;
+		}
+		replica.sync = data->sync;
+
+		spawner_create_replication_relay(data);
+		free(data);
 	}
 	spawner_shutdown();
 }
@@ -428,7 +628,7 @@ spawner_sigchld_handler(int signo __attribute__((unused)))
 
 /** Create replication client handler process. */
 static int
-spawner_create_replication_relay()
+spawner_create_replication_relay(struct relay_data *data)
 {
 	pid_t pid = fork();
 
@@ -441,7 +641,7 @@ spawner_create_replication_relay()
 		ev_loop_fork(loop());
 		ev_run(loop(), EVRUN_NOWAIT);
 		close(spawner.sock);
-		replication_relay_loop();
+		replication_relay_loop(data);
 	} else {
 		spawner.child_count++;
 		close(replica.sock);
@@ -543,107 +743,108 @@ replication_relay_recv(ev_loop * /* loop */, struct ev_io *w, int __attribute__(
 	exit(EXIT_FAILURE);
 }
 
-/* Only for blocked I/O */
-static inline ssize_t
-sio_writev_all(int fd, struct iovec *iov, int iovcnt)
-{
-	ssize_t bytes_total = 0;
-	struct iovec *iovend = iov + iovcnt;
-	while(1) {
-		ssize_t bytes_written = sio_writev(fd, iov, iovend - iov);
-		bytes_total += bytes_written;
-		while (bytes_written > 0 && bytes_written >= iov->iov_len)
-			bytes_written -= (iov++)->iov_len;
-		if (iov == iovend)
-			break;
-		iov->iov_base = (char *) iov->iov_base + bytes_written;
-		iov->iov_len -= bytes_written;
-	}
-
-	return bytes_total;
-}
-
-
-enum { IPROTO_ROW_IOVMAX = IPROTO_PACKET_IOVMAX + 1 };
-
-static int
-iproto_encode_row(const struct iproto_packet *packet, struct iovec *iov,
-		  char fixheader[IPROTO_FIXHEADER_SIZE])
-{
-	int iovcnt = iproto_packet_encode(packet, iov + 1) + 1;
-	uint32_t len = 0;
-	for (int i = 1; i < iovcnt; i++)
-		len += iov[i].iov_len;
-
-	/* Encode length */
-	char *data = fixheader;
-	data = mp_encode_uint(data, len);
-	/* Encode padding */
-	ssize_t padding = IPROTO_FIXHEADER_SIZE - (data - fixheader);
-	if (padding > 0)
-		data = mp_encode_strl(data, padding - 1) + padding - 1;
-	assert(data == fixheader + IPROTO_FIXHEADER_SIZE);
-	iov[0].iov_base = fixheader;
-	iov[0].iov_len = IPROTO_FIXHEADER_SIZE;
-
-	assert(iovcnt <= IPROTO_ROW_IOVMAX);
-	return iovcnt;
-}
-
 /** Send a single row to the client. */
-static int
+static void
 replication_relay_send_row(void * /* param */, struct iproto_packet *packet)
 {
-	try {
+	struct recovery_state *r = recovery_state;
+
+	/* Don't duplicate data */
+	assert(r->local_node != NULL);
+	if (r->local_node->id == 0 || packet->node_id != r->local_node->id)  {
 		packet->sync = replica.sync;
 		/* Encode length */
 		struct iovec iov[IPROTO_ROW_IOVMAX];
 		char fixheader[IPROTO_FIXHEADER_SIZE];
 		int iovcnt = iproto_encode_row(packet, iov, fixheader);
 		sio_writev_all(replica.sock, iov, iovcnt);
-	} catch(SocketError *e) {
-		say_info("the client has closed its replication socket, exiting");
-		exit(EXIT_SUCCESS);
 	}
 
-	return 0;
+	/*
+	 * Update LSN table
+	 * This code needed to recover_remaining_wals() logic.
+	 */
+	uint32_t k = mh_cluster_find(r->cluster, packet->node_id, NULL);
+	struct node *node;
+	if (k != mh_end(r->cluster)) {
+		node = *mh_cluster_node(r->cluster, k);
+	} else {
+		/* Create node if it doesn't exist */
+		node = (struct node *) calloc(1, sizeof(*node));
+		if (node == NULL) {
+			tnt_raise(ClientError, ER_MEMORY_ISSUE, sizeof(node),
+				  "r->cluster", "node");
+		}
+		k = mh_cluster_put(r->cluster, (const struct node **) &node,
+				   NULL, NULL);
+		if (k == mh_end(r->cluster)) {
+			tnt_raise(ClientError, ER_MEMORY_ISSUE, 0,
+				  "r->cluster", "r->cluster");
+		}
+		node->id = packet->node_id;
+	}
+	node->confirmed_lsn = node->current_lsn = packet->lsn;
 }
 
 static void
-replication_relay_join(struct recovery_state *r, uint64_t sync)
+replication_relay_join(struct recovery_state *r)
 {
 	FDGuard guard_replica(replica.sock);
 
-	int64_t lsn = greatest_lsn(r->snap_dir);
-	if (lsn <= 0)
-		panic("can't find snapshot");
+	/* Send snapshot */
+	recover_snap(r);
 
-	struct log_io *snap = log_io_open_for_read(r->snap_dir, lsn, NONE);
-	if (snap == NULL)
-		panic("can't open snapshot");
-	say_info("sending snapshot `%s'", snap->filename);
+	/* Send response to JOIN command = end of stream */
+	struct iproto_packet packet;
+	memset(&packet, 0, sizeof(packet));
+	packet.code = IPROTO_JOIN;
+	packet.sync = replica.sync;
 
-	/* Send rows */
-	int rc = recover_wal(r, snap);
-	log_io_close(&snap);
+	char fixheader[IPROTO_FIXHEADER_SIZE];
+	struct iovec iov[IPROTO_ROW_IOVMAX];
+	int iovcnt = iproto_encode_row(&packet, iov, fixheader);
+	sio_writev_all(replica.sock, iov, iovcnt);
 
-	if (rc != 0)
-		panic("can't sent snapshot");
+	say_info("snapshot sent");
+	/* replica.sock closed by guard */
+}
 
-	/* Send response to JOIN command = end of stream */
-	struct iproto_subscribe response = iproto_subscribe_stub;
-	response.lsn = mp_bswap_u64(lsn);
-	response.sync = mp_bswap_u64(sync);
-	sio_write(replica.sock, &response, sizeof(response));
+static void
+replication_relay_subscribe(struct recovery_state *r, struct relay_data *data)
+{
+	assert(data->code == IPROTO_SUBSCRIBE);
+	/* Set LSNs */
+	for (uint32_t i = 0; i < data->lsnmap_size; i++) {
+		struct node *node = (struct node *) calloc(1, sizeof(*node));
+		if (node == NULL)
+			panic("cannot allocate struct node");
+		node->id = data->lsnmap[i].node_id;
+		node->confirmed_lsn = node->current_lsn = data->lsnmap[i].lsn;
+		uint32_t k = mh_cluster_put(r->cluster,
+			(const struct node **) &node, NULL, NULL);
+		if (k == mh_end(r->cluster))
+			panic("cannot reallocate r->cluster");
+	}
 
-	say_info("snapshot sent, lsn: %" PRIi64, lsn);
-	exit(EXIT_SUCCESS);
-	/* replica.sock closed by guard */
+	/* Set node */
+	uint32_t k = mh_cluster_find(r->cluster, data->node_id, NULL);
+	assert(k != mh_end(r->cluster));
+	r->local_node = *mh_cluster_node(r->cluster, k);
+	assert(r->local_node->id == data->node_id);
+
+	/* Remove SNAPSHOT_NODE_ID */
+	recovery_fix_lsn(r, false);
+
+	say_warn("replication follow local");
+	recovery_follow_local(r, 0.1);
+	ev_run(loop(), 0);
+
+	say_crit("exiting the relay loop");
 }
 
 /** The main loop of replication client service process. */
 static void
-replication_relay_loop()
+replication_relay_loop(struct relay_data *data)
 {
 	struct sigaction sa;
 
@@ -699,26 +900,23 @@ replication_relay_loop()
 	/* Initialize the recovery process */
 	recovery_init(cfg_snap_dir, cfg_wal_dir,
 		      replication_relay_send_row,
-		      NULL, NULL, INT32_MAX);
-	/*
-	 * Note that recovery starts with lsn _NEXT_ to
-	 * the confirmed one.
-	 */
-	if (replica.lsn == 0) {
-		recovery_state->lsn = recovery_state->confirmed_lsn = 0;
-		replication_relay_join(recovery_state, replica.sync); /* exits */
-	}
-
-	recovery_state->lsn = recovery_state->confirmed_lsn = replica.lsn - 1;
-	recover_existing_wals(recovery_state);
-	/* Found nothing. */
-	if (recovery_state->lsn == replica.lsn - 1)
-		say_error("can't find WAL containing record with lsn: %" PRIi64,
-			  replica.lsn);
-	recovery_follow_local(recovery_state, 0.1);
-
-	ev_run(loop(), 0);
+		      NULL, NULL, NULL, INT32_MAX);
+	recovery_state->relay = true; /* recovery used in relay mode */
 
-	say_crit("exiting the relay loop");
+	try {
+		switch (data->code) {
+		case IPROTO_JOIN:
+			replication_relay_join(recovery_state);
+			break;
+		case IPROTO_SUBSCRIBE:
+			replication_relay_subscribe(recovery_state, data);
+			break;
+		default:
+			assert(false);
+		}
+	} catch(Exception *e) {
+		say_error("relay error: %s", e->errmsg());
+		exit(EXIT_FAILURE);
+	}
 	exit(EXIT_SUCCESS);
 }
diff --git a/src/replication.h b/src/replication.h
index 291f4306b8f18257b147980f95366878dce584c8..bf6e4a11607e2e33777618db4cda315d35201ad8 100644
--- a/src/replication.h
+++ b/src/replication.h
@@ -39,13 +39,16 @@
 void
 replication_prefork(const char *snap_dir, const char *wal_dir);
 
+void
+replication_join(int fd, struct iproto_packet *packet);
+
 /**
  * Subscribe a replica to updates.
  *
  * @return None. On error, closes the socket.
  */
 void
-subscribe(int fd, int64_t lsn, uint64_t sync);
+replication_subscribe(int fd, struct iproto_packet *packet);
 
 #endif // TARANTOOL_REPLICATION_H_INCLUDED
 
diff --git a/src/sio.h b/src/sio.h
index 49a135308be2b8e95a63af5d929e25970d9b35ca..316c08bbd4900d1f173a24034932b5fbabd6a052 100644
--- a/src/sio.h
+++ b/src/sio.h
@@ -117,6 +117,26 @@ sio_readn(int fd, void *buf, size_t count)
 ssize_t
 sio_writen(int fd, const void *buf, size_t count);
 
+/* Only for blocked I/O */
+static inline ssize_t
+sio_writev_all(int fd, struct iovec *iov, int iovcnt)
+{
+	ssize_t bytes_total = 0;
+	struct iovec *iovend = iov + iovcnt;
+	while(1) {
+		ssize_t bytes_written = sio_writev(fd, iov, iovend - iov);
+		bytes_total += bytes_written;
+		while (bytes_written > 0 && bytes_written >= iov->iov_len)
+			bytes_written -= (iov++)->iov_len;
+		if (iov == iovend)
+			break;
+		iov->iov_base = (char *) iov->iov_base + bytes_written;
+		iov->iov_len -= bytes_written;
+	}
+
+	return bytes_total;
+}
+
 /**
  * A wrapper over sendfile.
  * Throw if send file failed.
diff --git a/src/trivia/config.h.cmake b/src/trivia/config.h.cmake
index 04fa7fe30ffcf5cdd9058c924d390ad4bcd008c6..a89e0548185d34ab5749ec2e3b3c8b2e542e17bb 100644
--- a/src/trivia/config.h.cmake
+++ b/src/trivia/config.h.cmake
@@ -131,6 +131,10 @@
 #cmakedefine HAVE_PRCTL_H 1
 
 #cmakedefine HAVE_OPEN_MEMSTREAM 1
+#cmakedefine HAVE_FMEMOPEN 1
+
+#cmakedefine HAVE_LIBUUID_LINUX 1
+#cmakedefine HAVE_LIBUUID_BSD 1
 
 /*
  * predefined /etc directory prefix.
diff --git a/src/trivia/util.h b/src/trivia/util.h
index 7a1c9ece86773e42de504aea9175cecbf93ca816..cb071e612a39bd9b482ce4bac599ee8170c0ea24 100644
--- a/src/trivia/util.h
+++ b/src/trivia/util.h
@@ -193,6 +193,12 @@ FILE *
 open_memstream(char **ptr, size_t *sizeloc);
 #endif /* HAVE_OPEN_MEMSTREAM */
 
+#ifndef HAVE_FMEMOPEN
+/* Declare open_memstream(). */
+#include <stdio.h>
+FILE *
+fmemopen(void *buf, size_t size, const char *mode);
+#endif /* HAVE_FMEMOPEN */
 
 #if defined(__cplusplus)
 } /* extern "C" */
diff --git a/src/tt_uuid.c b/src/tt_uuid.c
new file mode 100644
index 0000000000000000000000000000000000000000..6127f5258a0378fe8715d5be4f8f3e18fc24b16f
--- /dev/null
+++ b/src/tt_uuid.c
@@ -0,0 +1,40 @@
+/*
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above
+ *    copyright notice, this list of conditions and the
+ *    following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above
+ *    copyright notice, this list of conditions and the following
+ *    disclaimer in the documentation and/or other materials
+ *    provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY <COPYRIGHT HOLDER> ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+ * <COPYRIGHT HOLDER> OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
+ * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+#include "tt_uuid.h"
+/* Zeroed by the linker. */
+const tt_uuid uuid_nil;
+
+char *
+tt_uuid_str(const tt_uuid *uu)
+{
+	static __thread char buf[UUID_STR_LEN + 1];
+	tt_uuid_to_string(uu, buf);
+	return buf;
+}
+
diff --git a/src/tt_uuid.h b/src/tt_uuid.h
new file mode 100644
index 0000000000000000000000000000000000000000..3f584008ce7b97d323727e68f8a5131bdf179773
--- /dev/null
+++ b/src/tt_uuid.h
@@ -0,0 +1,166 @@
+#ifndef TARANTOOL_UUID_H_INCLUDED
+#define TARANTOOL_UUID_H_INCLUDED
+/*
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above
+ *    copyright notice, this list of conditions and the
+ *    following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above
+ *    copyright notice, this list of conditions and the following
+ *    disclaimer in the documentation and/or other materials
+ *    provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY <COPYRIGHT HOLDER> ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+ * <COPYRIGHT HOLDER> OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
+ * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+#include <trivia/config.h>
+#include <string.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <assert.h>
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+enum { UUID_LEN = 16, UUID_STR_LEN = 36 };
+
+#if defined(HAVE_LIBUUID_LINUX)
+
+#include <uuid/uuid.h>
+
+typedef struct tt_uuid {
+	uuid_t id;
+} tt_uuid;
+
+static inline void
+tt_uuid_create(tt_uuid *uu)
+{
+	uuid_generate(uu->id);
+}
+
+static inline int
+tt_uuid_from_string(const char *in, tt_uuid *uu)
+{
+	return uuid_parse((char *) in, uu->id);
+}
+
+static inline void
+tt_uuid_to_string(const tt_uuid *uu, char *out)
+{
+	uuid_unparse(uu->id, out);
+}
+
+static inline void
+tt_uuid_dec_be(const void *in, tt_uuid *uu)
+{
+	memcpy(uu->id, in, sizeof(uu->id));
+}
+
+static inline void
+tt_uuid_enc_be(const tt_uuid *uu, void *out)
+{
+	memcpy(out, uu->id, sizeof(uu->id));
+}
+
+static inline bool
+tt_uuid_is_nil(const tt_uuid *uu)
+{
+	return uuid_is_null(uu->id);
+}
+
+static inline bool
+tt_uuid_cmp(const tt_uuid *lhs, const tt_uuid *rhs)
+{
+	return uuid_compare(lhs->id, rhs->id);
+}
+
+#elif defined(HAVE_LIBUUID_BSD)
+
+#include <uuid.h>
+
+typedef struct uuid tt_uuid;
+
+static inline int
+tt_uuid_create(tt_uuid *uu)
+{
+	uint32_t status;
+	uuid_create(uu, &status);
+	return status == uuid_s_ok;
+}
+
+static inline int
+tt_uuid_from_string(const char *in, tt_uuid *uu)
+{
+	uint32_t status;
+	uuid_from_string(in, uu, &status);
+	return status == uuid_s_ok;
+}
+
+static inline void
+tt_uuid_to_string(const tt_uuid *uu, char *out)
+{
+	uint32_t status;
+	char *buf = NULL;
+	uuid_to_string(uu, &buf, &status);
+	assert(status == uuid_s_ok);
+	strncpy(out, buf, UUID_STR_LEN);
+	out[UUID_STR_LEN] = '\0';
+	free(buf);
+}
+
+static inline bool
+tt_uuid_cmp(const tt_uuid *lhs, const tt_uuid *rhs)
+{
+	uint32_t status;
+	return uuid_compare(lhs, rhs, &status);
+}
+
+static inline bool
+tt_uuid_is_nil(const tt_uuid *uu)
+{
+	uint32_t status;
+	return uuid_is_nil(uu, &status);
+}
+
+static inline void
+tt_uuid_dec_be(const void *in, tt_uuid *uu)
+{
+	uuid_dec_be(in, uu);
+
+}
+
+static inline void
+tt_uuid_enc_be(const tt_uuid *uu, void *out)
+{
+	uuid_enc_be(out, uu);
+}
+#else
+#error Unsupported libuuid
+#endif /* HAVE_LIBUUID_XXX */
+
+extern const tt_uuid uuid_nil;
+
+char *
+tt_uuid_str(const tt_uuid *uu);
+
+#if defined(__cplusplus)
+} /* extern "C" */
+#endif
+
+#endif /* TARANTOOL_UUID_H_INCLUDED */
diff --git a/src/util.cc b/src/util.cc
index 339e5a10b5d92293193c2416082409afccf21b38..1cf563abf2e350b049c7f29cf91e3da9c9505968 100644
--- a/src/util.cc
+++ b/src/util.cc
@@ -202,6 +202,19 @@ fdprintf(int fd, const char *format, ...)
 	return total;
 }
 
+#ifndef HAVE_FMEMOPEN
+FILE *
+fmemopen(void *buf, size_t size, const char *mode)
+{
+	assert(strcmp(mode, "r") == 0);
+
+	FILE *ret = tmpfile();
+	fwrite(buf, 1, size, ret);
+	rewind(ret);
+	return ret;
+}
+#endif /* HAVE_FMEMOPEN */
+
 #ifdef ENABLE_BACKTRACE
 
 /*
diff --git a/test/box/admin.result b/test/box/admin.result
index ff9063f99d78994b0042ec57745da5f262eb8632..b115f2bbbd276306d622e949dac0aac2f634f67b 100644
--- a/test/box/admin.result
+++ b/test/box/admin.result
@@ -11,16 +11,19 @@ space:create_index('primary', { type = 'hash' })
 box.stat()
 ---
 - DELETE:
-    total: 0
-    rps: 0
+    total: 34
+    rps: 6
   SELECT:
     total: 1
     rps: 0
   REPLACE:
-    total: 0
+    total: 2
     rps: 0
   INSERT:
-    total: 2
+    total: 34
+    rps: 6
+  AUTH:
+    total: 0
     rps: 0
   CALL:
     total: 0
@@ -69,16 +72,19 @@ box.cfg
 box.stat()
 ---
 - DELETE:
-    total: 0
-    rps: 0
+    total: 34
+    rps: 6
   SELECT:
     total: 1
     rps: 0
   REPLACE:
-    total: 0
+    total: 2
     rps: 0
   INSERT:
-    total: 2
+    total: 34
+    rps: 6
+  AUTH:
+    total: 0
     rps: 0
   CALL:
     total: 0
@@ -112,10 +118,10 @@ function test_box_info()
     local buildstr = {'flags', 'target', 'compiler', 'options'}
     local str = {'version', 'status' }
     local failed = {}
-    if check_type(tmp.lsn, 'cdata') == false then
-        table.insert(failed1, 'box.info().lsn')
+    if check_type(tmp.cluster, 'table') == false then
+        table.insert(failed1, 'box.info().cluster')
     else
-        tmp.lsn = nil
+        tmp.cluster = nil
     end
     for k, v in ipairs(num) do
         if check_type(tmp[v], 'number') == false then
diff --git a/test/box/admin.test.lua b/test/box/admin.test.lua
index 3b1a1f2e8496b54549b2553e21acec8ffe612116..20e454340ee04970410eeec0ad04732ace71c049 100644
--- a/test/box/admin.test.lua
+++ b/test/box/admin.test.lua
@@ -26,10 +26,10 @@ function test_box_info()
     local buildstr = {'flags', 'target', 'compiler', 'options'}
     local str = {'version', 'status' }
     local failed = {}
-    if check_type(tmp.lsn, 'cdata') == false then
-        table.insert(failed1, 'box.info().lsn')
+    if check_type(tmp.cluster, 'table') == false then
+        table.insert(failed1, 'box.info().cluster')
     else
-        tmp.lsn = nil
+        tmp.cluster = nil
     end
     for k, v in ipairs(num) do
         if check_type(tmp[v], 'number') == false then
diff --git a/test/box/alter.result b/test/box/alter.result
index 59bfceb3c28b222ebedfd0d72f709a4fa974b9b6..744100e6d9ba6d13c86da5454ade376dea39ed97 100644
--- a/test/box/alter.result
+++ b/test/box/alter.result
@@ -89,7 +89,7 @@ space = box.space[t[0]]
 ...
 space.n
 ---
-- 313
+- 321
 ...
 space.arity
 ---
@@ -104,23 +104,23 @@ space.index[0]
 --
 space:select{0}
 ---
-- error: 'No index #0 is defined in space 313'
+- error: 'No index #0 is defined in space 321'
 ...
 space:insert{0, 0}
 ---
-- error: 'No index #0 is defined in space 313'
+- error: 'No index #0 is defined in space 321'
 ...
 space:replace{0, 0}
 ---
-- error: 'No index #0 is defined in space 313'
+- error: 'No index #0 is defined in space 321'
 ...
 space:update({0}, {{'+', 0, 1}})
 ---
-- error: 'No index #0 is defined in space 313'
+- error: 'No index #0 is defined in space 321'
 ...
 space:delete{0}
 ---
-- error: 'No index #0 is defined in space 313'
+- error: 'No index #0 is defined in space 321'
 ...
 t = _space:delete{space.n}
 ---
@@ -134,7 +134,7 @@ space_deleted
 ...
 space:replace{0}
 ---
-- error: Space 313 does not exist
+- error: Space 321 does not exist
 ...
 _index:insert{_space.n, 0, 'primary', 'tree', 1, 1, 0, 'num'}
 ---
@@ -167,6 +167,8 @@ _index:select{}
   - [304, 2, 'name', 'tree', 1, 1, 2, 'str']
   - [312, 0, 'primary', 'tree', 1, 3, 1, 'num', 2, 'str', 3, 'num']
   - [312, 1, 'owner', 'tree', 0, 1, 1, 'num']
+  - [320, 0, 'primary', 'tree', 1, 1, 0, 'num']
+  - [320, 1, 'uuid', 'tree', 1, 1, 1, 'str']
 ...
 -- modify indexes of a system space
 _index:delete{_index.n, 0}
diff --git a/test/box/dup_key1.xlog b/test/box/dup_key1.xlog
deleted file mode 100644
index 2821f7db0b489ca138f7ce3d8869f384c1c96471..0000000000000000000000000000000000000000
Binary files a/test/box/dup_key1.xlog and /dev/null differ
diff --git a/test/box/dup_key2.xlog b/test/box/dup_key2.xlog
deleted file mode 100644
index 052bfac1cd3392ee0609a962720aaeebf04eb2ef..0000000000000000000000000000000000000000
Binary files a/test/box/dup_key2.xlog and /dev/null differ
diff --git a/test/box/info.result b/test/box/info.result
index f998b0f30d8e07d00f28dbe82b5d9d5947b6a73c..0ded19458af80e18fe432b3a746495d0c578e373 100644
--- a/test/box/info.result
+++ b/test/box/info.result
@@ -24,9 +24,9 @@ string.match(box.info.logger_pid, '^[0-9]+$') ~= nil
 ---
 - true
 ...
-box.info.lsn > 0
+#box.info.cluster > 0
 ---
-- true
+- false
 ...
 box.info.recovery_lag
 ---
@@ -76,8 +76,9 @@ table.sort(t)
 t
 ---
 - - build
+  - cluster
   - logger_pid
-  - lsn
+  - node
   - pid
   - recovery_lag
   - recovery_last_update
diff --git a/test/box/info.test.lua b/test/box/info.test.lua
index 716e7bd1c6e140e68007f8ae39f5babffccb2c2b..7fbdf7893e134ed59ad33f198fe7fed232dd3e15 100644
--- a/test/box/info.test.lua
+++ b/test/box/info.test.lua
@@ -6,7 +6,7 @@ box.info['unknown_variable']
 string.match(box.info.version, '^[1-9]') ~= nil
 string.match(box.info.pid, '^[1-9][0-9]*$') ~= nil
 string.match(box.info.logger_pid, '^[0-9]+$') ~= nil
-box.info.lsn > 0
+#box.info.cluster > 0
 box.info.recovery_lag
 box.info.recovery_last_update
 box.info.status
diff --git a/test/box/misc.result b/test/box/misc.result
index a6c0f62361a4bd710eb261e607e3f9d3ee525da8..eb279fe24f5c9d61070ba891de4500dafb339524 100644
--- a/test/box/misc.result
+++ b/test/box/misc.result
@@ -117,6 +117,7 @@ t;
   - SELECT
   - REPLACE
   - INSERT
+  - AUTH
   - CALL
   - UPDATE
   - total
@@ -193,66 +194,72 @@ end;
 t;
 ---
 - - 'box.error.ER_CREATE_FUNCTION : 50'
-  - 'box.error.ER_NO_SUCH_INDEX : 35'
-  - 'box.error.ER_TUPLE_FOUND : 3'
-  - 'box.error.ER_CREATE_SPACE : 9'
   - 'box.error.ER_PROC_RET : 21'
   - 'box.error.ER_TUPLE_FORMAT_LIMIT : 16'
-  - 'box.error.ER_FIELD_TYPE : 23'
-  - 'box.error.ER_CFG : 59'
-  - 'box.error.ER_UNKNOWN_SCHEMA_OBJECT : 49'
-  - 'box.error.ER_OK : 0'
-  - 'box.error.ER_NO_SUCH_ENGINE : 57'
+  - 'box.error.ER_FUNCTION_MAX : 54'
   - 'box.error.ER_TUPLE_NOT_FOUND : 4'
-  - 'box.error.ER_INDEX_ARITY : 39'
-  - 'box.error.ER_WAL_IO : 40'
-  - 'box.error.ER_USER_MAX : 56'
-  - 'box.error.ER_NO_SUCH_FUNCTION : 51'
-  - 'box.error.ER_INJECTION : 8'
-  - 'box.error.ER_DROP_PRIMARY_KEY : 17'
-  - 'box.error.ER_INDEX_TYPE : 13'
+  - 'box.error.ER_PASSWORD_MISMATCH : 47'
+  - 'box.error.ER_LAST_DROP : 15'
   - 'box.error.ER_ARG_TYPE : 26'
-  - 'box.error.ER_FUNCTION_MAX : 54'
-  - 'box.error.ER_FUNCTION_ACCESS_DENIED : 53'
-  - 'box.error.ER_SPACE_ARITY : 38'
+  - 'box.error.ER_INVALID_CLUSTER : 63'
   - 'box.error.ER_INVALID_MSGPACK : 20'
-  - 'box.error.ER_SPACE_ACCESS_DENIED : 55'
-  - 'box.error.ER_KEY_PART_COUNT : 31'
   - 'box.error.ER_RELOAD_CFG : 58'
   - 'box.error.ER_USER_EXISTS : 46'
   - 'box.error.ER_MEMORY_ISSUE : 2'
   - 'box.error.ER_ILLEGAL_PARAMS : 1'
   - 'box.error.ER_KEY_FIELD_TYPE : 18'
   - 'box.error.ER_NONMASTER : 6'
-  - 'box.error.ER_UNKNOWN_REQUEST_TYPE : 48'
-  - 'box.error.ER_FIELD_TYPE_MISMATCH : 24'
-  - 'box.error.ER_MODIFY_INDEX : 14'
-  - 'box.error.ER_PASSWORD_MISMATCH : 47'
-  - 'box.error.ER_EXACT_MATCH : 19'
   - 'box.error.ER_NO_SUCH_USER : 45'
-  - 'box.error.ER_SECONDARY : 7'
-  - 'box.error.ER_FUNCTION_EXISTS : 52'
+  - 'box.error.ER_EXACT_MATCH : 19'
   - 'box.error.ER_CREATE_USER : 43'
-  - 'box.error.ER_ACCESS_DENIED : 42'
-  - 'box.error.ER_LAST_DROP : 15'
-  - 'box.error.ER_UPDATE_FIELD : 29'
+  - 'box.error.ER_FUNCTION_EXISTS : 52'
+  - 'box.error.ER_NO_SUCH_FUNCTION : 51'
   - 'box.error.ER_FIBER_STACK : 30'
-  - 'box.error.ER_UNKNOWN_UPDATE_OP : 28'
-  - 'box.error.ER_DROP_USER : 44'
-  - 'box.error.ER_UNSUPPORTED : 5'
-  - 'box.error.ER_NO_SUCH_FIELD : 37'
+  - 'box.error.ER_FUNCTION_ACCESS_DENIED : 53'
+  - 'box.error.ER_CFG : 59'
   - 'box.error.ER_TUPLE_NOT_ARRAY : 22'
-  - 'box.error.ER_NO_SUCH_SPACE : 36'
+  - 'box.error.ER_CLUSTER_ID_IS_RO : 65'
   - 'box.error.ER_MORE_THAN_ONE_TUPLE : 41'
-  - 'box.error.ER_ALTER_SPACE : 12'
-  - 'box.error.ER_NO_SUCH_PROC : 33'
+  - 'box.error.ER_NO_SUCH_SPACE : 36'
+  - 'box.error.ER_NO_SUCH_INDEX : 35'
+  - 'box.error.ER_TUPLE_FOUND : 3'
+  - 'box.error.ER_CREATE_SPACE : 9'
+  - 'box.error.ER_FIELD_TYPE : 23'
+  - 'box.error.ER_OK : 0'
+  - 'box.error.ER_INDEX_ARITY : 39'
+  - 'box.error.ER_WAL_IO : 40'
+  - 'box.error.ER_INJECTION : 8'
+  - 'box.error.ER_NO_SUCH_ENGINE : 57'
+  - 'box.error.ER_INDEX_TYPE : 13'
+  - 'box.error.ER_UNKNOWN_SCHEMA_OBJECT : 49'
+  - 'box.error.ER_SPACE_ACCESS_DENIED : 55'
+  - 'box.error.ER_KEY_PART_COUNT : 31'
   - 'box.error.ER_SPACE_EXISTS : 10'
+  - 'box.error.ER_UNKNOWN_NODE : 62'
+  - 'box.error.ER_MODIFY_INDEX : 14'
+  - 'box.error.ER_SECONDARY : 7'
+  - 'box.error.ER_NODE_ID_IS_RO : 66'
+  - 'box.error.ER_INVALID_UUID : 64'
+  - 'box.error.ER_FIELD_TYPE_MISMATCH : 24'
+  - 'box.error.ER_SPLICE : 25'
+  - 'box.error.ER_TUPLE_IS_TOO_LONG : 27'
+  - 'box.error.ER_DROP_SPACE : 11'
+  - 'box.error.ER_SPACE_ARITY : 38'
+  - 'box.error.ER_LOCAL_NODE_IS_NOT_ACTIVE : 61'
+  - 'box.error.ER_UNSUPPORTED : 5'
+  - 'box.error.ER_ACCESS_DENIED : 42'
   - 'box.error.ER_PROC_LUA : 32'
+  - 'box.error.ER_UPDATE_FIELD : 29'
+  - 'box.error.ER_NO_SUCH_FIELD : 37'
+  - 'box.error.ER_ALTER_SPACE : 12'
+  - 'box.error.ER_DROP_USER : 44'
+  - 'box.error.ER_UNKNOWN_UPDATE_OP : 28'
+  - 'box.error.ER_NO_SUCH_PROC : 33'
   - 'box.error.ER_SOPHIA : 60'
   - 'box.error.ER_NO_SUCH_TRIGGER : 34'
-  - 'box.error.ER_TUPLE_IS_TOO_LONG : 27'
-  - 'box.error.ER_SPLICE : 25'
-  - 'box.error.ER_DROP_SPACE : 11'
+  - 'box.error.ER_UNKNOWN_REQUEST_TYPE : 48'
+  - 'box.error.ER_DROP_PRIMARY_KEY : 17'
+  - 'box.error.ER_USER_MAX : 56'
 ...
 --# setopt delimiter ''
 -- A test case for Bug#901674
diff --git a/test/box/snapshot.test.py b/test/box/snapshot.test.py
index 746b2ca35998a0b21c1eca4f07fafd77ecb6f72f..0ec7969434d83d11dded75017bceb3511db77cdb 100644
--- a/test/box/snapshot.test.py
+++ b/test/box/snapshot.test.py
@@ -49,7 +49,7 @@ print """
 admin("space:insert{1, 'Test tuple'}")
 
 pid = int(yaml.load(admin("box.info.pid", silent=True))[0])
-lsn = yaml.load(admin("box.info.lsn", silent=True))[0]
+lsn = yaml.load(admin("next(box.info.cluster)", silent=True))[1]
 
 snapshot = str(lsn).zfill(20) + ".snap"
 snapshot = os.path.join(server.vardir, snapshot)
@@ -57,7 +57,6 @@ snapshot = os.path.join(server.vardir, snapshot)
 iteration = 0
 
 MAX_ITERATIONS = 100
-
 while not os.access(snapshot, os.F_OK) and iteration < MAX_ITERATIONS:
   if iteration % 10 == 0:
     os.kill(pid, SIGUSR1)
diff --git a/test/box/sophia.result b/test/box/sophia.result
index 7cae8cef6e6fc90c5528fb2ef51db96264e78d82..0e2a3c921d3269978c0ecf19f635ad0dbec1abba 100644
--- a/test/box/sophia.result
+++ b/test/box/sophia.result
@@ -1,3 +1,9 @@
+os.execute("rm -rf sophia")
+---
+- 0
+...
+--# stop server default
+--# start server default
 space = box.schema.create_space('tweedledum', { id = 123, engine = 'sophia' })
 ---
 ...
diff --git a/test/box/sophia.test.lua b/test/box/sophia.test.lua
index 759fe750abd7e9d79163701fadeb2c02606a263f..d4bd1f220b8930db03d6f969f6a7187f5cdd173d 100644
--- a/test/box/sophia.test.lua
+++ b/test/box/sophia.test.lua
@@ -1,3 +1,6 @@
+os.execute("rm -rf sophia")
+--# stop server default
+--# start server default
 
 space = box.schema.create_space('tweedledum', { id = 123, engine = 'sophia' })
 space:create_index('primary', { type = 'tree', parts = {0, 'num'} })
diff --git a/test/box/stat.result b/test/box/stat.result
index a6bf3953ff026edc5174c2423131a02277a821a3..21d9e24590d41c49b6ae93f0ab7fb85fa7d1077b 100644
--- a/test/box/stat.result
+++ b/test/box/stat.result
@@ -15,8 +15,8 @@ for i=1,10 do space:insert{i, 'tuple'..tostring(i)} end
 box.stat()
 ---
 - DELETE:
-    total: 0
-    rps: 0
+    total: 14
+    rps: 2
   SELECT:
     total: 1
     rps: 0
@@ -24,8 +24,11 @@ box.stat()
     total: 0
     rps: 0
   INSERT:
-    total: 12
-    rps: 2
+    total: 34
+    rps: 6
+  AUTH:
+    total: 0
+    rps: 0
   CALL:
     total: 0
     rps: 0
@@ -39,8 +42,8 @@ box.stat()
 box.stat()
 ---
 - DELETE:
-    total: 0
-    rps: 0
+    total: 14
+    rps: 2
   SELECT:
     total: 0
     rps: 0
@@ -48,6 +51,9 @@ box.stat()
     total: 0
     rps: 0
   INSERT:
+    total: 34
+    rps: 6
+  AUTH:
     total: 0
     rps: 0
   CALL:
diff --git a/test/box/unfinished.xlog b/test/box/unfinished.xlog
deleted file mode 100644
index 5ebfa99f87438a2dbf7f67843b2b6e22f7a88e68..0000000000000000000000000000000000000000
Binary files a/test/box/unfinished.xlog and /dev/null differ
diff --git a/test/box/xlog.result b/test/box/xlog.result
index ad4de5f8ec3b26afc3ea2ad5841dfc9256485797..b17f6d3d482b010082e5c53bb6acd4281edb1418 100644
--- a/test/box/xlog.result
+++ b/test/box/xlog.result
@@ -4,11 +4,11 @@
 space = box.schema.create_space('tweedledum', { id = 0 })
 ---
 ...
-00000000000000000002.xlog.inprogress exists
+.xlog.inprogress exists
 space:create_index('primary', { type = 'hash' })
 ---
 ...
-00000000000000000002.xlog.inprogress has been successfully renamed
+.xlog.inprogress has been successfully renamed
 
 # Inprogress xlog must be renamed during regular termination.
 
@@ -16,22 +16,72 @@ box.space[0]:insert{3, 'third tuple'}
 ---
 - [3, 'third tuple']
 ...
-00000000000000000004.xlog.inprogress exists
-00000000000000000004.xlog.inprogress has been successfully renamed
+.xlog.inprogress exists
+.xlog.inprogress has been successfully renamed
 
 # An inprogress xlog file with one record must be renamed during recovery.
 
-00000000000000000005.xlog.inprogress hash been successfully renamed
-
-# Empty (zero size) inprogress xlog must be deleted during recovery.
-
-00000000000000000006.xlog.inprogress has been successfully deleted
-
-# Empty (header only, no records) inprogress xlog must be deleted
-# during recovery.
-
-00000000000000000006.xlog.inprogress has been successfully deleted
+box.space[0]:insert{4, 'fourth tuple'}
+---
+- [4, 'fourth tuple']
+...
+box.space[0]:insert{5, 'Unfinished record'}
+---
+- [5, 'Unfinished record']
+...
+.xlog exists
+.xlog.inprogress hash been successfully renamed
+space = box.schema.create_space('test')
+---
+...
+box.space['test']:create_index('primary')
+---
+...
+box.space['test']:insert{1, 'first tuple'}
+---
+- [1, 'first tuple']
+...
+box.space['test']:insert{2, 'second tuple'}
+---
+- [2, 'second tuple']
+...
+.xlog exists
+space = box.schema.create_space('test')
+---
+...
+box.space['test']:create_index('primary')
+---
+...
+box.space['test']:insert{1, 'first tuple'}
+---
+- [1, 'first tuple']
+...
+box.space['test']:delete{1}
+---
+- [1, 'first tuple']
+...
+box.space['test']:insert{1, 'third tuple'}
+---
+- [1, 'third tuple']
+...
+box.space['test']:insert{2, 'fourth tuple'}
+---
+- [2, 'fourth tuple']
+...
+.xlog exists
+check log line for 'Duplicate key'
 
-# Inprogress xlog with bad record must be deleted during recovery.
+'Duplicate key' exists in server log
 
-00000000000000000006.xlog.inprogress has been successfully deleted
+box.space['test']:get{1}
+---
+- [1, 'first tuple']
+...
+box.space['test']:get{2}
+---
+- [2, 'second tuple']
+...
+box.space['test']:len()
+---
+- 2
+...
diff --git a/test/box/xlog.test.py b/test/box/xlog.test.py
index db67d2d6e115b59bc131c7971cda6cc744846cfc..231c4e632c9ccc8f0675662cf4315e787bd638cb 100644
--- a/test/box/xlog.test.py
+++ b/test/box/xlog.test.py
@@ -1,153 +1,202 @@
 import os
 import shutil
+import yaml
+import re
 
 from os.path import abspath
 
 # cleanup server.vardir
 server.stop()
 server.deploy()
+lsn = yaml.load(server.admin("next(box.info.cluster)", silent=True))[1]
 server.stop()
 
 print """
 # Inprogress xlog must be renamed before second insert.
 """
-wal_inprogress = os.path.join(server.vardir, "00000000000000000002.xlog.inprogress")
-wal = os.path.join(server.vardir, "00000000000000000002.xlog")
+filename = str(lsn).zfill(20) + ".xlog"
+wal_inprogress = os.path.join(server.vardir, filename + ".inprogress")
+wal = os.path.join(server.vardir, filename)
 
 server.start()
 
 server.admin("space = box.schema.create_space('tweedledum', { id = 0 })")
 if os.access(wal_inprogress, os.F_OK):
-  print "00000000000000000002.xlog.inprogress exists"
+  print ".xlog.inprogress exists"
 
 server.admin("space:create_index('primary', { type = 'hash' })")
 
 if os.access(wal, os.F_OK) and not os.access(wal_inprogress, os.F_OK):
-  print "00000000000000000002.xlog.inprogress has been successfully renamed"
+  print ".xlog.inprogress has been successfully renamed"
 server.stop()
+lsn += 2
 
 print """
 # Inprogress xlog must be renamed during regular termination.
 """
+filename = str(lsn).zfill(20) + ".xlog"
 server.start()
 
-wal_inprogress = os.path.join(server.vardir, "00000000000000000004.xlog.inprogress")
-wal = os.path.join(server.vardir, "00000000000000000004.xlog")
+wal_inprogress = os.path.join(server.vardir, filename + ".inprogress")
+wal = os.path.join(server.vardir, filename)
 
 server.admin("box.space[0]:insert{3, 'third tuple'}")
 
 if os.access(wal_inprogress, os.F_OK):
-  print "00000000000000000004.xlog.inprogress exists"
+  print ".xlog.inprogress exists"
 
 server.stop()
 
 if os.access(wal, os.F_OK) and not os.access(wal_inprogress, os.F_OK):
-  print "00000000000000000004.xlog.inprogress has been successfully renamed"
+  print ".xlog.inprogress has been successfully renamed"
+lsn += 1
 
 print """
 # An inprogress xlog file with one record must be renamed during recovery.
 """
 
-wal_inprogress = os.path.join(server.vardir, "00000000000000000005.xlog.inprogress")
-wal = os.path.join(server.vardir, "00000000000000000005.xlog")
+server.start()
+filename = str(lsn).zfill(20) + ".xlog"
+wal_inprogress = os.path.join(server.vardir, filename + ".inprogress")
+wal = os.path.join(server.vardir, filename)
+server.admin("box.space[0]:insert{4, 'fourth tuple'}")
+server.admin("box.space[0]:insert{5, 'Unfinished record'}")
+pid = int(yaml.load(server.admin("box.info.pid", silent=True))[0])
+from signal import SIGKILL
+if pid > 0:
+    os.kill(pid, SIGKILL)
+server.stop()
 
-os.symlink(abspath("box/unfinished.xlog"), wal_inprogress)
+if os.access(wal, os.F_OK):
+    print ".xlog exists"
+    # Remove last byte from xlog
+    f = open(wal, "a")
+    size = f.tell()
+    f.truncate(size - 1)
+    f.close()
+    os.rename(wal, wal_inprogress)
 
 server.start()
 
 if os.access(wal, os.F_OK) and not os.access(wal_inprogress, os.F_OK):
-  print "00000000000000000005.xlog.inprogress hash been successfully renamed"
+  print ".xlog.inprogress hash been successfully renamed"
 server.stop()
+lsn += 1
 
-print """
-# Empty (zero size) inprogress xlog must be deleted during recovery.
-"""
-
-wal_inprogress = os.path.join(server.vardir, "00000000000000000006.xlog.inprogress")
-wal = os.path.join(server.vardir, "00000000000000000006.xlog")
+# print """
+# # Empty (zero size) inprogress xlog must be deleted during recovery.
+# """
+#
+# wal_inprogress = os.path.join(server.vardir, "00000000000000000006.xlog.inprogress")
+# wal = os.path.join(server.vardir, "00000000000000000006.xlog")
+# 
+# os.symlink(abspath("box/empty.xlog"), wal_inprogress)
+# server.start()
+#
+# if not os.access(wal_inprogress, os.F_OK) and not os.access(wal, os.F_OK):
+#    print "00000000000000000006.xlog.inprogress has been successfully deleted"
+# server.stop()
+
+# print """
+# # Empty (header only, no records) inprogress xlog must be deleted
+# # during recovery.
+# """
+# 
+# # If the previous test has failed, there is a dangling link
+# # and symlink fails.
+# try:
+#   os.symlink(abspath("box/just_header.xlog"), wal_inprogress)
+# except OSError as e:
+#   print e
+# 
+# server.start()
+# 
+# if not os.access(wal_inprogress, os.F_OK) and not os.access(wal, os.F_OK):
+#    print "00000000000000000006.xlog.inprogress has been successfully deleted"
+# server.stop()
+
+# print """
+# # Inprogress xlog with bad record must be deleted during recovery.
+# """
+# 
+# # If the previous test has failed, there is a dangling link
+# # and symlink fails.
+# try:
+#   os.symlink(abspath("box/bad_record.xlog"), wal_inprogress)
+# except OSError as e:
+#   print e
+# 
+# server.start()
+# 
+# if not os.access(wal_inprogress, os.F_OK) and not os.access(wal, os.F_OK):
+#    print "00000000000000000006.xlog.inprogress has been successfully deleted"
 
-os.symlink(abspath("box/empty.xlog"), wal_inprogress)
-server.start()
+#print """
+#A test case for https://bugs.launchpad.net/tarantool/+bug/1052018
+#panic_on_wal_error doesn't work for duplicate key errors
+#"""
 
-if not os.access(wal_inprogress, os.F_OK) and not os.access(wal, os.F_OK):
-   print "00000000000000000006.xlog.inprogress has been successfully deleted"
+server.stop()
+server.cfgfile_source = "box/panic_on_wal_error.cfg"
+server.deploy()
+lsn = yaml.load(server.admin("next(box.info.cluster)", silent=True))[1]
+filename = str(lsn).zfill(20) + ".xlog"
+wal_old = os.path.join(server.vardir, "old_" + filename)
+wal = os.path.join(server.vardir, filename)
+
+# Create wal#1
+server.admin("space = box.schema.create_space('test')")
+server.admin("box.space['test']:create_index('primary')")
+server.admin("box.space['test']:insert{1, 'first tuple'}")
+server.admin("box.space['test']:insert{2, 'second tuple'}")
 server.stop()
 
-print """
-# Empty (header only, no records) inprogress xlog must be deleted
-# during recovery.
-"""
+# Save wal #1
+if os.access(wal, os.F_OK):
+    print ".xlog exists"
+    os.rename(wal, wal_old)
 
-# If the previous test has failed, there is a dangling link
-# and symlink fails.
-try:
-  os.symlink(abspath("box/just_header.xlog"), wal_inprogress)
-except OSError as e:
-  print e
+lsn += 4
 
+# Create another wal#1
 server.start()
-
-if not os.access(wal_inprogress, os.F_OK) and not os.access(wal, os.F_OK):
-   print "00000000000000000006.xlog.inprogress has been successfully deleted"
+server.admin("space = box.schema.create_space('test')")
+server.admin("box.space['test']:create_index('primary')")
+server.admin("box.space['test']:insert{1, 'first tuple'}")
+server.admin("box.space['test']:delete{1}")
 server.stop()
 
-print """
-# Inprogress xlog with bad record must be deleted during recovery.
-"""
-
-# If the previous test has failed, there is a dangling link
-# and symlink fails.
-try:
-  os.symlink(abspath("box/bad_record.xlog"), wal_inprogress)
-except OSError as e:
-  print e
-
+# Create wal#2
 server.start()
+server.admin("box.space['test']:insert{1, 'third tuple'}")
+server.admin("box.space['test']:insert{2, 'fourth tuple'}")
+server.stop()
 
-if not os.access(wal_inprogress, os.F_OK) and not os.access(wal, os.F_OK):
-   print "00000000000000000006.xlog.inprogress has been successfully deleted"
+if os.access(wal, os.F_OK):
+    print ".xlog exists"
+    # Replace wal#1 with saved copy
+    os.unlink(wal)
+    os.rename(wal_old, wal)
 
-#print """
-#A test case for https://bugs.launchpad.net/tarantool/+bug/1052018
-#panic_on_wal_error doesn't work for duplicate key errors
-#"""
+f = open(server.logfile, "r")
+f.seek(0, 2)
 
-# Step-by-step instruction for log files preparation
-# needed for bugtest #1052018.
-#
-#
-# 1.  box.schema.create_space('test')
-# 2.  box.space['test']:create_index('primary')
-# 3.  box.space['test']:insert{1, 'first tuple}
-# 4.  box.space['test']:insert{2, 'second tuple}
-# 5.  stop tarantool
-# 6.  copy xlog to dup_key1.xlog
-# 7.  remove xlog
-# 8.  start tarantool
-# 9.  box.schema.create_space('test')
-# 10. box.space['test']:create_index('primary')
-# 11. box.space['test']:insert{1, 'first tuple}
-# 12. box.space['test']:delete{1}
-# 13. stop tarantool
-# 14. start tarantool
-# 15. box.space['test']:insert{1, 'third tuple'}
-# 16. box.space['test']:insert{2, 'fourth tuple'}
-# 17. stop tarantool
-# 18. copy xlog to dup_key2.xlog
-#
+server.start()
 
-#server.stop()
-#server.cfgfile_source = "box/panic_on_wal_error.cfg"
-#server.deploy()
-#server.stop()
-#shutil.copy(abspath("box/dup_key1.xlog"),
-            #os.path.join(server.vardir, "00000000000000000002.xlog"))
-#shutil.copy(abspath("box/dup_key2.xlog"),
-           #os.path.join(server.vardir, "00000000000000000004.xlog"))
-#server.start()
-#admin("box.space['test']:get{1}")
-#admin("box.space['test']:get{2}")
-#admin("box.space['test']:len()")
+check="Duplicate key"
+print "check log line for '%s'" % check
+print
+line = f.readline()
+while line:
+    if re.search(r'(%s)' % check, line):
+        print "'%s' exists in server log" % check
+        break
+    line = f.readline()
+print
+
+server.admin("box.space['test']:get{1}")
+server.admin("box.space['test']:get{2}")
+server.admin("box.space['test']:len()")
 
 # cleanup
 server.stop()
diff --git a/test/lib/tarantool_server.py b/test/lib/tarantool_server.py
index aa71a80ba9fd93a9d16078ec012293185d7ac6a6..cf11b0829fc88af2cdb33ac8034d9d050a503a2c 100644
--- a/test/lib/tarantool_server.py
+++ b/test/lib/tarantool_server.py
@@ -575,8 +575,15 @@ class TarantoolServer(Server):
             return yaml.load(self.admin("box.info." + param, silent=True))[0]
         return yaml.load(self.admin("box.info", silent=True))
 
-    def wait_lsn(self, lsn):
-        while (int(self.get_param("lsn")) < lsn):
+    def get_lsn(self, node_uuid):
+        nodes = self.get_param("cluster")
+        if node_uuid in nodes:
+            return int(nodes[node_uuid])
+        else:
+            return -1
+
+    def wait_lsn(self, node_uuid, lsn):
+        while (self.get_lsn(node_uuid) < lsn):
             time.sleep(0.01)
 
     def version(self):
diff --git a/test/replication/hot_standby.result b/test/replication/hot_standby.result
index 3b3060400dd93ab85bc9bca35a3e88ac4c2e0847..836ee4ccd1a1541ccf70d38d267dedf88ec22c9b 100644
--- a/test/replication/hot_standby.result
+++ b/test/replication/hot_standby.result
@@ -12,14 +12,20 @@ while box.space['_priv']:len() < 1 do box.fiber.sleep(0.01) end;
 ---
 ...
 do
-    begin_lsn = box.info.lsn
+    local pri_uuid = ''
+    local begin_lsn = 0
 
-    function _set_pri_lsn(_lsn)
+    function _set_pri_lsn(_uuid, _lsn)
+        pri_uuid = _uuid
         begin_lsn = _lsn
     end
 
+    function _get_pri_lsn()
+        return box.info.cluster[pri_uuid]
+    end
+
     function _print_lsn()
-        return (box.info.lsn - begin_lsn + 1)
+        return (_get_pri_lsn() - begin_lsn + 1)
     end
 
     function _insert(_begin, _end)
@@ -39,7 +45,7 @@ do
     end
 
     function _wait_lsn(_lsnd)
-        while box.info.lsn < _lsnd + begin_lsn do
+        while _get_pri_lsn() < _lsnd + begin_lsn do
             box.fiber.sleep(0.001)
         end
         begin_lsn = begin_lsn + _lsnd
@@ -51,13 +57,10 @@ end;
 --# set connection default
 -- set begin lsn on master, replica and hot_standby.
 --# set variable replica_port to 'replica.primary_port'
-begin_lsn = box.info.lsn
----
-...
 a = box.net.box.new('127.0.0.1', replica_port)
 ---
 ...
-a:call('_set_pri_lsn', box.info.lsn)
+a:call('_set_pri_lsn', box.info.node, box.info.cluster[box.info.node])
 ---
 - []
 ...
@@ -124,7 +127,7 @@ box.fiber.sleep(0.2)
 a = box.net.box.new('127.0.0.1', hot_standby_port)
 ---
 ...
-a:call('_set_pri_lsn', box.info.lsn)
+a:call('_set_pri_lsn', box.info.node, box.info.cluster[box.info.node])
 ---
 - []
 ...
diff --git a/test/replication/hot_standby.test.lua b/test/replication/hot_standby.test.lua
index 883f2796d0216e15bf602ee180125c0a5d5ea1a9..72e2120545b6ef8d9706a58dabeb1472948cbfbd 100644
--- a/test/replication/hot_standby.test.lua
+++ b/test/replication/hot_standby.test.lua
@@ -9,14 +9,20 @@ box.schema.user.grant('guest', 'read,write,execute', 'universe')
 --# set connection default, hot_standby, replica
 while box.space['_priv']:len() < 1 do box.fiber.sleep(0.01) end;
 do
-    begin_lsn = box.info.lsn
+    local pri_uuid = ''
+    local begin_lsn = 0
 
-    function _set_pri_lsn(_lsn)
+    function _set_pri_lsn(_uuid, _lsn)
+        pri_uuid = _uuid
         begin_lsn = _lsn
     end
 
+    function _get_pri_lsn()
+        return box.info.cluster[pri_uuid]
+    end
+
     function _print_lsn()
-        return (box.info.lsn - begin_lsn + 1)
+        return (_get_pri_lsn() - begin_lsn + 1)
     end
 
     function _insert(_begin, _end)
@@ -36,7 +42,7 @@ do
     end
 
     function _wait_lsn(_lsnd)
-        while box.info.lsn < _lsnd + begin_lsn do
+        while _get_pri_lsn() < _lsnd + begin_lsn do
             box.fiber.sleep(0.001)
         end
         begin_lsn = begin_lsn + _lsnd
@@ -47,10 +53,8 @@ end;
 
 -- set begin lsn on master, replica and hot_standby.
 --# set variable replica_port to 'replica.primary_port'
-begin_lsn = box.info.lsn
-
 a = box.net.box.new('127.0.0.1', replica_port)
-a:call('_set_pri_lsn', box.info.lsn)
+a:call('_set_pri_lsn', box.info.node, box.info.cluster[box.info.node])
 a:close()
 
 space = box.schema.create_space('tweedledum')
@@ -70,7 +74,7 @@ box.fiber.sleep(0.2)
 -- uses MASTER_PORT environment variable for its primary_port
 --# set variable hot_standby_port to 'hot_standby.master_port'
 a = box.net.box.new('127.0.0.1', hot_standby_port)
-a:call('_set_pri_lsn', box.info.lsn)
+a:call('_set_pri_lsn', box.info.node, box.info.cluster[box.info.node])
 a:close()
 
 --# set connection hot_standby
diff --git a/test/replication/init_storage.test.py b/test/replication/init_storage.test.py
index 84599a8c31a8c089862e76004c6774720caaf2e5..4d13de50b5c9831d507267aa36381a25f9f1413e 100644
--- a/test/replication/init_storage.test.py
+++ b/test/replication/init_storage.test.py
@@ -31,7 +31,8 @@ replica.cleanup(True)
 master.admin('box.snapshot()')
 master.restart()
 master.admin('for k = 10, 19 do box.space[42]:insert{k, k*k*k} end')
-lsn = master.get_param('lsn')
+master_uuid = master.get_param('node')
+lsn = master.get_lsn(master_uuid)
 print '-------------------------------------------------------------'
 print 'replica test 2 (must be ok)'
 print '-------------------------------------------------------------'
@@ -43,7 +44,7 @@ replica.rpl_master = master
 replica.deploy()
 
 replica.admin('space = box.space.test');
-replica.wait_lsn(lsn)
+replica.wait_lsn(master_uuid, lsn)
 for i in range(1, 20):
     replica.admin('space:get{%d}' % i)
 
diff --git a/test/replication/status.test.py b/test/replication/status.test.py
index 111259af87e50d95545430efab484eebd5e7d47e..c5385931263135c5aee80901ef228a026e9f7fd1 100644
--- a/test/replication/status.test.py
+++ b/test/replication/status.test.py
@@ -13,7 +13,7 @@ replica.rpl_master = master
 replica.vardir = os.path.join(master.vardir, 'replica')
 replica.deploy()
 
-replica.get_param("lsn")
+replica.get_param('node')
 
 cycles = 0
 status = replica.admin.execute_no_reconnect("box.info.status", True)
diff --git a/test/replication/suite.ini b/test/replication/suite.ini
index 792ebe9c8090f1d94405481d532cc288c0e989f5..ab9af45495f3cdfe00ddc9f0dd7055b2afa2e9b1 100644
--- a/test/replication/suite.ini
+++ b/test/replication/suite.ini
@@ -2,3 +2,4 @@
 core = tarantool
 script =  master.lua
 description = tarantool/box, replication
+disabled = consistent.test.lua
diff --git a/test/replication/swap.test.py b/test/replication/swap.test.py
index fa4efc728d6217d0cb82cd8cdc27fa7011a55f87..396db71e8e085c79713d7eade5426a8eb19ab385 100644
--- a/test/replication/swap.test.py
+++ b/test/replication/swap.test.py
@@ -10,8 +10,7 @@ def insert_tuples(_server, begin, end, msg = "tuple"):
     for i in range(begin, end):
         _server.sql("insert into t0 values (%d, '%s %d')" % (i, msg, i))
 
-def select_tuples(_server, begin, end, lsn):
-    _server.wait_lsn(lsn)
+def select_tuples(_server, begin, end):
     for i in range(begin, end):
         _server.sql("select * from t0 where k0 = %d" % i)
 
@@ -28,6 +27,10 @@ master.admin("box.schema.user.grant('guest', 'read,write,execute', 'universe')")
 replica.admin("while box.space['_priv']:len() < 1 do box.fiber.sleep(0.01) end")
 master.admin("s = box.schema.create_space('tweedledum', {id = 0})")
 master.admin("s:create_index('primary', {type = 'hash'})")
+
+master_uuid = master.get_param('node')
+replica_uuid = replica.get_param('node')
+
 id = ID_BEGIN
 for i in range(REPEAT):
     print "test %d iteration" % i
@@ -35,13 +38,15 @@ for i in range(REPEAT):
     # insert to master
     insert_tuples(master, id, id + ID_STEP)
     # select from replica
-    select_tuples(replica, id, id + ID_STEP, master.get_param("lsn"))
+    replica.wait_lsn(master_uuid, master.get_lsn(master_uuid))
+    select_tuples(replica, id, id + ID_STEP)
     id += ID_STEP
 
     # insert to master
     insert_tuples(master, id, id + ID_STEP)
     # select from replica
-    select_tuples(replica, id, id + ID_STEP, master.get_param("lsn"))
+    replica.wait_lsn(master_uuid, master.get_lsn(master_uuid))
+    select_tuples(replica, id, id + ID_STEP)
     id += ID_STEP
 
     print "swap servers"
@@ -57,13 +62,15 @@ for i in range(REPEAT):
     # insert to replica
     insert_tuples(replica, id, id + ID_STEP)
     # select from master
-    select_tuples(master, id, id + ID_STEP, replica.get_param("lsn"))
+    master.wait_lsn(replica_uuid, replica.get_lsn(replica_uuid))
+    select_tuples(master, id, id + ID_STEP)
     id += ID_STEP
 
     # insert to replica
     insert_tuples(replica, id, id + ID_STEP)
     # select from master
-    select_tuples(master, id, id + ID_STEP, replica.get_param("lsn"))
+    master.wait_lsn(replica_uuid, replica.get_lsn(replica_uuid))
+    select_tuples(master, id, id + ID_STEP)
     id += ID_STEP
 
     print "rollback servers configuration"
diff --git a/test/unit/CMakeLists.txt b/test/unit/CMakeLists.txt
index 2aef1a7d2332bddda80ea51ac6a1e15b0012a3db..e0ba1a678e46d02f543aaadc7fd8e5d4b7f599df 100644
--- a/test/unit/CMakeLists.txt
+++ b/test/unit/CMakeLists.txt
@@ -37,6 +37,11 @@ add_executable(arena_mt.test arena_mt.c)
 target_link_libraries(arena_mt.test small pthread)
 add_executable(pt_alloc.test pt_alloc.cc)
 target_link_libraries(pt_alloc.test small)
+add_executable(log_dir.test log_dir.cc test.c)
+target_link_libraries(log_dir.test ${LIBUUID_LIBRARIES} core small salad misc bitset msgpuck
+    ${LIBEV_LIBRARIES}
+    ${LIBEIO_LIBRARIES}
+    ${LIBCORO_LIBRARIES})
 
 set(MSGPUCK_DIR ${PROJECT_SOURCE_DIR}/src/lib/msgpuck/)
 add_executable(msgpack.test
diff --git a/test/unit/log_dir.cc b/test/unit/log_dir.cc
new file mode 100644
index 0000000000000000000000000000000000000000..0dcf422585b099253e648e7490b796013a877053
--- /dev/null
+++ b/test/unit/log_dir.cc
@@ -0,0 +1,245 @@
+#include <sys/types.h>
+#include <dirent.h>
+
+extern "C" {
+#include "test.h"
+} /* extern "C" */
+#include "log_io.h"
+#include "fio.h"
+#include "recovery.h" /* wal_write_setlsn() */
+#include "memory.h"
+#include "fiber.h"
+#include "crc32.h"
+
+#define header() note("*** %s ***", __func__)
+#define footer() note("*** %s: done ***", __func__)
+
+tt_uuid node_uuid;
+
+static void
+testset_create(struct log_dir *dir, int64_t *files, int files_n, int node_n)
+{
+	char tpl[] = "/tmp/fileXXXXXX";
+
+	struct fio_batch *batch = fio_batch_alloc(1024);
+	assert(log_dir_create(dir) == 0);
+	strcpy(dir->open_wflags, "wx");
+	dir->filetype = "XLOG\n";
+	dir->filename_ext = ".xlog";
+	dir->dirname = strdup(mkdtemp(tpl));
+	dir->mode = 0660;
+
+	struct mh_cluster_t *cluster = mh_cluster_new();
+	assert(cluster != NULL);
+	for (int f = 0; f < files_n; f++) {
+		int64_t lsnsum = 0;
+		for (uint32_t node_id = 0; node_id < node_n; node_id++) {
+			int64_t lsn = *(files + f * node_n + node_id);
+			if (lsn <= 0)
+				continue;
+
+			/* Calculate LSNSUM */
+			lsnsum += lsn;
+
+			/* Update cluster hash */
+			struct node *node = (struct node *)
+					calloc(1, sizeof(*node));
+			assert(node != NULL);
+			node->id = node_id;
+			node->current_lsn = lsn;
+			uint32_t k = mh_cluster_put(cluster,
+				(const struct node **) &node, NULL, NULL);
+			assert(k != mh_end(cluster));
+		}
+
+		/* Write XLOG */
+		struct log_io *l = log_io_open_for_write(dir, lsnsum, &node_uuid,
+							 INPROGRESS);
+		int rc = wal_write_setlsn(l, batch, cluster);
+		assert(rc == 0);
+		(void) rc;
+		log_io_close(&l);
+		mh_cluster_clean(cluster);
+	}
+
+	mh_cluster_delete(cluster);
+	free(batch);
+
+	int rc = log_dir_scan(dir);
+	assert(rc == 0);
+	(void) rc;
+
+#if 0
+	diag("dir->map dump:");
+	diag("file => len(lsns)");
+	struct log_meta *meta = log_dir_map_first(&dir->map);
+	while (meta != NULL) {
+		diag("%lld => %u", (long long) meta->lsnsum, meta->lsn_count);
+		meta = log_dir_map_next(&dir->map, meta);
+	}
+
+	diag("dir->lsnmap dump:");
+	diag("node_id,lsn => file");
+	struct log_meta_lsn *meta_lsn = log_dir_lsnmap_first(&dir->lsnmap);
+	while (meta_lsn != NULL) {
+		diag("%u,%lld => %lld", meta_lsn->node_id,
+		     (long long) meta_lsn->lsn,
+		     (long long) meta_lsn->meta->lsnsum);
+		meta_lsn = log_dir_lsnmap_next(&dir->lsnmap, meta_lsn);
+	}
+#endif
+}
+
+static void
+testset_destroy(struct log_dir *dir)
+{
+	DIR *dh = opendir(dir->dirname);
+	assert(dh != NULL);
+	struct dirent *dent;
+	char path[PATH_MAX];
+	while ((dent = readdir(dh)) != NULL) {
+		snprintf(path, sizeof(path), "%s/%s", dir->dirname, dent->d_name);
+		unlink(path);
+	}
+	closedir(dh);
+	rmdir(dir->dirname);
+	log_dir_destroy(dir);
+}
+
+
+static void
+test_next(int64_t *files, int files_n, int node_n, int64_t *queries, int query_n)
+{
+	struct log_dir dir;
+	testset_create(&dir, (int64_t *) files, files_n, node_n);
+
+	struct mh_cluster_t *cluster = mh_cluster_new();
+	assert(cluster != NULL);
+
+	for (int q = 0; q < query_n; q++) {
+		int64_t *query = (int64_t *) queries + q * (node_n + 1);
+
+		/* Update cluster hash */
+		for (uint32_t node_id = 0; node_id < node_n; node_id++) {
+			int64_t lsn = *(query + node_id);
+			if (lsn <= 0)
+				continue;
+
+			struct node *node = (struct node *) calloc(1, sizeof(*node));
+			assert(node != NULL);
+			node->id = node_id;
+			node->current_lsn = lsn;
+			uint32_t k = mh_cluster_put(cluster,
+				(const struct node **) &node, NULL, NULL);
+			assert(k != mh_end(cluster));
+		}
+
+		int64_t check = *(query + node_n);
+		int64_t value = log_dir_next(&dir, cluster);
+		is(value, check, "query #%d", q + 1);
+		mh_cluster_clean(cluster);
+	}
+
+	mh_cluster_delete(cluster);
+	testset_destroy(&dir);
+}
+
+static int
+test1()
+{
+	plan(36);
+	header();
+
+	enum { NODE_N = 4};
+	int64_t files[][NODE_N] = {
+		{ 10, 0, 0, 0}, /* =10.xlog */
+		{ 12, 2, 0, 0}, /* =14.xlog */
+		{ 14, 2, 0, 0}, /* =16.xlog */
+		{ 14, 2, 2, 0}, /* =18.xlog */
+		{ 14, 4, 2, 3}, /* =23.xlog */
+		{ 14, 4, 2, 5}, /* =25.xlog */
+	};
+	enum { FILE_N = sizeof(files) / (sizeof(files[0])) };
+
+	int64_t queries[][NODE_N + 1] = {
+		/* not found (lsns are too old) */
+		{  0,  0, 0, 0, /* => */ INT64_MAX},
+		{  1,  0, 0, 0, /* => */ INT64_MAX},
+		{  5,  0, 0, 0, /* => */ INT64_MAX},
+
+		/* =10.xlog (left bound) */
+		{  10, 0, 0, 0, /* => */ 10},
+		{  10, 1, 0, 0, /* => */ 10},
+		{  10, 2, 0, 0, /* => */ 10},
+		{  10, 3, 0, 0, /* => */ 10},
+		{  10, 4, 0, 0, /* => */ 10},
+
+		/* =10.xlog (middle) */
+		{  11, 0, 0, 0, /* => */ 10},
+		{  11, 1, 0, 0, /* => */ 10},
+		{  11, 2, 0, 0, /* => */ 10},
+		{  11, 3, 0, 0, /* => */ 10},
+		{  11, 4, 0, 0, /* => */ 10},
+		{  11, 5, 3, 6, /* => */ 10},
+
+		/* =10.xlog (right bound) */
+		{  12, 0, 0, 0, /* => */ 10},
+		{  12, 1, 0, 0, /* => */ 10},
+		{  12, 1, 1, 1, /* => */ 10},
+		{  12, 1, 2, 5, /* => */ 10},
+
+		/* =14.xlog */
+		{  12, 2, 0, 0, /* => */ 14},
+		{  12, 3, 0, 0, /* => */ 14},
+		{  12, 4, 0, 0, /* => */ 14},
+		{  12, 5, 3, 6, /* => */ 14},
+
+		/* =16.xlog */
+		{  14, 2, 0, 0, /* => */ 16},
+		{  14, 2, 1, 0, /* => */ 16},
+		{  14, 2, 0, 1, /* => */ 16},
+
+		/* =18.xlog */
+		{  14, 2, 2, 0, /* => */ 18},
+		{  14, 2, 4, 0, /* => */ 18},
+		{  14, 2, 4, 3, /* => */ 18},
+		{  14, 2, 4, 5, /* => */ 18},
+		{  14, 4, 2, 0, /* => */ 18},
+		{  14, 5, 2, 0, /* => */ 18},
+
+		/* =23.xlog */
+		{  14, 4, 2, 3, /* => */ 23},
+		{  14, 5, 2, 3, /* => */ 23},
+
+		/* =25.xlog */
+		{  14, 4, 2, 5, /* => */ 25},
+		{  14, 5, 2, 6, /* => */ 25},
+		{ 100, 9, 9, 9, /* => */ 25},
+	};
+	enum { QUERY_N = sizeof(queries) / (sizeof(queries[0])) };
+
+	test_next((int64_t *) files, FILE_N, NODE_N, (int64_t *) queries, QUERY_N);
+
+	footer();
+	return check_plan();
+}
+
+int
+main(int argc, char *argv[])
+{
+	(void) argc;
+
+	say_init(argv[0]);
+	say_set_log_level(4);
+	memory_init();
+	fiber_init();
+	crc32_init();
+	tt_uuid_create(&node_uuid);
+
+	plan(1);
+	test1();
+
+	fiber_free();
+	memory_free();
+	return check_plan();
+}
diff --git a/test/unit/log_dir.result b/test/unit/log_dir.result
new file mode 100644
index 0000000000000000000000000000000000000000..76c26b37a7c1b6b648b821e01e2637439b5d3781
--- /dev/null
+++ b/test/unit/log_dir.result
@@ -0,0 +1,41 @@
+1..1
+    1..36
+    # *** test1 ***
+    ok 1 - query #1
+    ok 2 - query #2
+    ok 3 - query #3
+    ok 4 - query #4
+    ok 5 - query #5
+    ok 6 - query #6
+    ok 7 - query #7
+    ok 8 - query #8
+    ok 9 - query #9
+    ok 10 - query #10
+    ok 11 - query #11
+    ok 12 - query #12
+    ok 13 - query #13
+    ok 14 - query #14
+    ok 15 - query #15
+    ok 16 - query #16
+    ok 17 - query #17
+    ok 18 - query #18
+    ok 19 - query #19
+    ok 20 - query #20
+    ok 21 - query #21
+    ok 22 - query #22
+    ok 23 - query #23
+    ok 24 - query #24
+    ok 25 - query #25
+    ok 26 - query #26
+    ok 27 - query #27
+    ok 28 - query #28
+    ok 29 - query #29
+    ok 30 - query #30
+    ok 31 - query #31
+    ok 32 - query #32
+    ok 33 - query #33
+    ok 34 - query #34
+    ok 35 - query #35
+    ok 36 - query #36
+    # *** test1: done ***
+ok 1 - subtests
diff --git a/test/unit/test.h b/test/unit/test.h
index 06558c2354f8a64a4fb241bac822c36fe19c4ac2..55f0f17bd661b48a7840b6e45cb9b75fdd3dea23 100644
--- a/test/unit/test.h
+++ b/test/unit/test.h
@@ -27,7 +27,7 @@ int _ok(int condition, const char *fmt, ...);
 /* private function, use note(...) or diag(...) instead */
 void _space(FILE *stream);
 
-#define msg(stream, ...) ({ __space(stream); fprintf(stream, "# ");            \
+#define msg(stream, ...) ({ _space(stream); fprintf(stream, "# ");            \
 	fprintf(stream, __VA_ARGS__); fprintf(stream, "\n"); })
 
 #define note(...) msg(stdout, __VA_ARGS__)
diff --git a/test/wal/alter.result b/test/wal/alter.result
index d417108da16d0b331d1a027e96a6ddc322039de8..4ff8a354729c9ff2a100242aff4efd4e0612facf 100644
--- a/test/wal/alter.result
+++ b/test/wal/alter.result
@@ -17,7 +17,7 @@ end;
 ...
 #spaces;
 ---
-- 65523
+- 65522
 ...
 -- cleanup
 for k, v in pairs(spaces) do