From 64c463e0366233eb3bad82e094831daa058b5888 Mon Sep 17 00:00:00 2001 From: Nick Zavaritsky <mejedi@gmail.com> Date: Mon, 11 Jan 2016 20:50:45 +0300 Subject: [PATCH] Fix #1075: box.cfg blocks until rw mode is activated, 1.6 --- src/box/box.cc | 22 ++++++++++++++++++++++ test/replication-py/init_storage.result | 4 ++++ test/replication-py/init_storage.test.py | 15 ++++++++++++++- test/replication-py/readonly.result | 4 ++++ test/replication-py/readonly.test.py | 16 +++++++++++++++- test/replication-py/replica.lua | 5 ++++- 6 files changed, 63 insertions(+), 3 deletions(-) diff --git a/src/box/box.cc b/src/box/box.cc index fb6e93c984..3c25883d71 100644 --- a/src/box/box.cc +++ b/src/box/box.cc @@ -31,6 +31,7 @@ #include "box/box.h" #include <say.h> +#include "ipc.h" #include "iproto.h" #include "iproto_constants.h" #include "recovery.h" @@ -86,6 +87,7 @@ static struct recover_row_ctx { bool snapshot_in_progress = false; static bool box_init_done = false; bool is_ro = true; +struct ipc_channel *wait_rw; void recover_row_ctx_init(struct recover_row_ctx *ctx, size_t rows_per_wal) @@ -175,6 +177,18 @@ void box_set_ro(bool ro) { is_ro = ro; + if (ro == false && !ipc_channel_is_full(wait_rw)) + ipc_channel_put(wait_rw, NULL); +} + +static void +box_wait_rw() +{ + void *msg; + while (is_ro) { + ipc_channel_get(wait_rw, &msg); + assert(msg == NULL); + } } bool @@ -939,6 +953,8 @@ box_free(void) #endif engine_shutdown(); } + if (wait_rw) + ipc_channel_delete(wait_rw); } static void @@ -985,6 +1001,10 @@ box_init(void) { error_init(); + wait_rw = ipc_channel_new(1); + if (wait_rw == NULL) + diag_raise(); + tuple_init(cfg_getd("slab_alloc_arena"), cfg_geti("slab_alloc_minimal"), cfg_geti("slab_alloc_maximal"), @@ -1086,6 +1106,8 @@ box_init(void) /* Enter read-write mode. */ if (recovery->server_id > 0) box_set_ro(false); + else + box_wait_rw(); title("running"); say_info("ready to accept requests"); diff --git a/test/replication-py/init_storage.result b/test/replication-py/init_storage.result index 0e27d903be..759a1b86bc 100644 --- a/test/replication-py/init_storage.result +++ b/test/replication-py/init_storage.result @@ -17,6 +17,10 @@ box.space.test --- - null ... +box_cfg_done +--- +- false +... ------------------------------------------------------------- replica JOIN ------------------------------------------------------------- diff --git a/test/replication-py/init_storage.test.py b/test/replication-py/init_storage.test.py index d94d4414f7..7083ea09da 100644 --- a/test/replication-py/init_storage.test.py +++ b/test/replication-py/init_storage.test.py @@ -23,10 +23,23 @@ replica = TarantoolServer(server.ini) replica.script = 'replication-py/replica.lua' replica.vardir = server.vardir #os.path.join(server.vardir, 'replica') replica.rpl_master = master -replica.deploy() + +# #1075: Box.once should wait before the server enters RW mode +# +# We expect the replica to get blocked in box.cfg{}, hence wait = False. +# Since xlog files on master were deleted, they aren't delivered, +# and replica waits indefinitely. +# +# Note: replica waits for a log entry indicating that this very replica +# joined the cluster. Once the entry is fetched we assume that the +# replica is relatively up to date and enter RW mode. Never happens in +# this particular test case. +replica.deploy(wait = False) replica.admin('box.space.test') +replica.admin('box_cfg_done') # blocked in box.cfg it should be + replica.stop() replica.cleanup(True) diff --git a/test/replication-py/readonly.result b/test/replication-py/readonly.result index a7848717d0..954221155d 100644 --- a/test/replication-py/readonly.result +++ b/test/replication-py/readonly.result @@ -39,3 +39,7 @@ box.info.vclock[2] --- - null ... +box_cfg_done +--- +- false +... diff --git a/test/replication-py/readonly.test.py b/test/replication-py/readonly.test.py index 0ecf684dd3..1b12cabe50 100644 --- a/test/replication-py/readonly.test.py +++ b/test/replication-py/readonly.test.py @@ -31,7 +31,18 @@ os.remove(wal) # Start replica without master server.stop() -replica.start() + +# #1075: Box.once should wait before the server enters RW mode +# +# We expect the replica to get blocked in box.cfg{}, hence wait = False. +# Since neither xlog files nor master are available, the replica waits +# indefinitely. +# +# Note: replica monitors _cluster table, synchronized via replication. +# The replica enters RW mode once it discovers that according to +# _cluster table it had joined the cluster. Never happens in this +# particular test case. +replica.start(wait = False) replica.admin('box.cfg{replication_source = ""}') # Check that replica in read-only mode @@ -41,6 +52,9 @@ replica.admin('box.info.server.lsn') replica.admin('space = box.schema.space.create("ro")') replica.admin('box.info.vclock[%d]' % replica_id) +# Check that box.cfg didn't return yet +replica.admin('box_cfg_done') + replica.stop() replica.cleanup(True) server.deploy() diff --git a/test/replication-py/replica.lua b/test/replication-py/replica.lua index 3a08208e06..00ecd005ec 100644 --- a/test/replication-py/replica.lua +++ b/test/replication-py/replica.lua @@ -1,4 +1,7 @@ #!/usr/bin/env tarantool +box_cfg_done = false + +require('console').listen(os.getenv('ADMIN')) box.cfg({ listen = os.getenv("LISTEN"), @@ -6,4 +9,4 @@ box.cfg({ slab_alloc_arena = 0.1, }) -require('console').listen(os.getenv('ADMIN')) +box_cfg_done = true -- GitLab