From cb9307a77ab0b91c5fcbe3e21c74f31241d87780 Mon Sep 17 00:00:00 2001 From: Vladislav Shpilevoy <v.shpilevoy@tarantool.org> Date: Mon, 16 Jan 2023 23:26:08 +0100 Subject: [PATCH] replication: introduce cluster name The patch adds 2 new entities to replication: the concept of a cluster which has multiple replicasets and a name for this cluster. The name so far doesn't participate in any replication protocols. It is just stored in _schema and is validated against the config. The old mentions of 'cluster' (in logs, in some protocol keys like in the feedback daemon) everywhere are now considered obsolete and probably will be eventually replaced with 'replicaset'. Part of #5029 @TarantoolBot document Title: `box.cfg.cluster_name` and `box.info.cluster.name` The new option `box.cfg.cluster_name` allows to assign the cluster name to a human-readable text value to be displayed in the new info key - `box.info.cluster.name` - and to be validated when the instances in the cluster connect to each other. The name is broadcasted in "box.id" built-in event as "cluster_name" key. It is string when set and nil when not set. When set, it has to match in all instances of the entire cluster in all its replicasets. If a name wasn't set on cluster bootstrap (was forgotten or the cluster is upgraded from a version < 3.0), then it can be set on an already running instance via `box.cfg.cluster_name`. To change or drop an already installed name one has to use `box.cfg.force_recovery == true` in all instances of the cluster. After the name is updated and all the instances synced, the `force_recovery` can be set back to `false`. The name can be <= 63 symbols long, can consist only of chars '0'-'9', '-' and 'a'-'z'. It must start with a letter. When upper-case letters are used in `box.cfg`, they are automatically converted to lower-case. The names are host- and DNS-friendly. --- changelogs/unreleased/global-names.md | 5 + src/box/alter.cc | 68 +++ src/box/box.cc | 93 +++- src/box/box.h | 1 + src/box/errcode.h | 1 + src/box/lua/cfg.cc | 13 + src/box/lua/info.c | 8 +- src/box/lua/load_cfg.lua | 17 + src/box/lua/upgrade.lua | 11 + src/box/replication.cc | 2 + src/box/replication.h | 2 + test/box-luatest/downgrade_test.lua | 19 + test/box/error.result | 1 + .../replication-luatest/cluster_name_test.lua | 443 ++++++++++++++++++ 14 files changed, 675 insertions(+), 9 deletions(-) create mode 100644 changelogs/unreleased/global-names.md create mode 100644 test/replication-luatest/cluster_name_test.lua diff --git a/changelogs/unreleased/global-names.md b/changelogs/unreleased/global-names.md new file mode 100644 index 0000000000..2a48b6f980 --- /dev/null +++ b/changelogs/unreleased/global-names.md @@ -0,0 +1,5 @@ +## feature/replication + +* A new option `box.cfg.cluster_name` allows assigning a human-readable name to + the entire cluster. It has to match in all instances and is displayed in + `box.info.cluster.name` (gh-5029). diff --git a/src/box/alter.cc b/src/box/alter.cc index 933d6af302..628e647088 100644 --- a/src/box/alter.cc +++ b/src/box/alter.cc @@ -58,6 +58,7 @@ #include "space_upgrade.h" #include "box.h" #include "authentication.h" +#include "node_name.h" /* {{{ Auxiliary functions and methods. */ @@ -248,6 +249,35 @@ index_opts_decode(struct index_opts *opts, const char *map, return 0; } +/** Decode an optional node name field from the tuple. */ +static int +tuple_field_node_name(char *out, struct tuple *tuple, uint32_t fieldno, + const char *field_name) +{ + const char *name, *field; + uint32_t len; + if (tuple == NULL) + goto nil; + field = tuple_field(tuple, fieldno); + if (field == NULL || mp_typeof(*field) == MP_NIL) + goto nil; + if (mp_typeof(*field) != MP_STR) + goto error; + name = mp_decode_str(&field, &len); + if (!node_name_is_valid_n(name, len)) + goto error; + memcpy(out, name, len); + out[len] = 0; + return 0; +nil: + *out = 0; + return 0; +error: + diag_set(ClientError, ER_FIELD_TYPE, field_name, "a valid name", + "a bad name"); + return -1; +} + /** * Helper routine for functional index function verification: * only a deterministic persistent Lua function may be used in @@ -3898,6 +3928,19 @@ on_commit_dd_version(struct trigger *trigger, void * /* event */) return 0; } +/** Set cluster name on _schema commit. */ +static int +on_commit_cluster_name(struct trigger *trigger, void * /* event */) +{ + const char *name = (typeof(name))trigger->data; + if (strcmp(CLUSTER_NAME, name) == 0) + return 0; + strlcpy(CLUSTER_NAME, name, NODE_NAME_SIZE_MAX); + box_broadcast_id(); + say_info("cluster name: %s", node_name_str(name)); + return 0; +} + /** * This trigger implements the "last write wins" strategy for * "bootstrap_leader_uuid" tuple of space _schema. Comparison is performed by @@ -4073,6 +4116,31 @@ on_replace_dd_schema(struct trigger * /* trigger */, void *event) if (on_commit == NULL) return -1; txn_on_commit(txn, on_commit); + } else if (strcmp(key, "cluster_name") == 0) { + char name[NODE_NAME_SIZE_MAX]; + const char *field_name = "_schema['cluster_name'].value"; + if (tuple_field_node_name(name, new_tuple, + BOX_SCHEMA_FIELD_VALUE, + field_name) != 0) + return -1; + if (box_is_configured() && *CLUSTER_NAME != 0 && + strcmp(name, CLUSTER_NAME) != 0) { + if (!box_is_force_recovery) { + diag_set(ClientError, ER_UNSUPPORTED, + "Tarantool", "cluster name change " + "(without 'force_recovery')"); + return -1; + } + say_info("cluster rename allowed by 'force_recovery'"); + } + size_t size = strlen(name) + 1; + char *name_copy = (char *)xregion_alloc(&txn->region, size); + memcpy(name_copy, name, size); + struct trigger *on_commit = txn_alter_trigger_new( + on_commit_cluster_name, name_copy); + if (on_commit == NULL) + return -1; + txn_stmt_on_commit(stmt, on_commit); } return 0; } diff --git a/src/box/box.cc b/src/box/box.cc index 81b947a846..ef094dc291 100644 --- a/src/box/box.cc +++ b/src/box/box.cc @@ -95,6 +95,7 @@ #include "wal_ext.h" #include "mp_util.h" #include "small/static.h" +#include "node_name.h" static char status[64] = "unconfigured"; @@ -1323,6 +1324,25 @@ box_check_instance_uuid(struct tt_uuid *uuid) return box_check_uuid(uuid, "instance_uuid", true); } +/** Fetch an optional node name from the config. */ +static int +box_check_node_name(const char *cfg_name, char *out) +{ + const char *name = cfg_gets(cfg_name); + if (name == NULL) { + *out = 0; + return 0; + } + /* Nil name is allowed as Lua box.NULL or nil. Not as "". */ + if (!node_name_is_valid(name)) { + diag_set(ClientError, ER_CFG, cfg_name, + "expected a valid name"); + return -1; + } + strlcpy(out, name, NODE_NAME_SIZE_MAX); + return 0; +} + static int box_check_replicaset_uuid(struct tt_uuid *uuid) { @@ -1362,6 +1382,12 @@ box_check_bootstrap_leader(struct uri *uri, struct tt_uuid *uuid) return -1; } +static int +box_check_cluster_name(char *out) +{ + return box_check_node_name("cluster_name", out); +} + static enum wal_mode box_check_wal_mode(const char *mode_name) { @@ -2022,6 +2048,40 @@ box_set_replication_anon(void) guard.is_active = false; } +/** + * Set the cluster name record in _schema, bypassing all checks like whether the + * instance is writable. It makes the function usable by bootstrap master when + * it is read-only but has to make the first registration. + */ +static void +box_set_cluster_name_record(const char *name) +{ + int rc; + if (*name == 0) { + rc = boxk(IPROTO_DELETE, BOX_SCHEMA_ID, "[%s]", "cluster_name"); + } else { + rc = boxk(IPROTO_REPLACE, BOX_SCHEMA_ID, "[%s%s]", + "cluster_name", name); + } + if (rc != 0) + diag_raise(); +} + +void +box_set_cluster_name(void) +{ + char name[NODE_NAME_SIZE_MAX]; + if (box_check_cluster_name(name) != 0) + diag_raise(); + /* Nil means the config doesn't care, allows to use any name. */ + if (*name == 0) + return; + if (strcmp(CLUSTER_NAME, name) == 0) + return; + box_check_writable_xc(); + box_set_cluster_name_record(name); +} + /** Trigger to catch ACKs from all nodes when need to wait for quorum. */ struct box_quorum_trigger { /** Inherit trigger. */ @@ -4209,17 +4269,25 @@ box_process_vote(struct ballot *ballot) ballot->registered_replica_uuids_size = i; } +/** Fill _schema space with initial data on bootstrap. */ static void -box_set_replicaset_uuid(void) +box_populate_schema_space(void) { struct tt_uuid replicaset_uuid; if (box_check_replicaset_uuid(&replicaset_uuid) != 0) diag_raise(); + char cluster_name[NODE_NAME_SIZE_MAX]; + if (box_check_cluster_name(cluster_name) != 0) + diag_raise(); + if (tt_uuid_is_nil(&replicaset_uuid)) tt_uuid_create(&replicaset_uuid); if (boxk(IPROTO_INSERT, BOX_SCHEMA_ID, "[%s%s]", "replicaset_uuid", tt_uuid_str(&replicaset_uuid))) diag_raise(); + box_set_cluster_name_record(cluster_name); + if (bootstrap_strategy == BOOTSTRAP_STRATEGY_SUPERVISED) + box_set_bootstrap_leader_record(); } static void @@ -4319,10 +4387,18 @@ check_bootstrap_unanimity(void) static int check_global_ids_integrity(void) { + char cluster_name[NODE_NAME_SIZE_MAX]; struct tt_uuid replicaset_uuid; - if (box_check_replicaset_uuid(&replicaset_uuid) != 0) + if (box_check_cluster_name(cluster_name) != 0 || + box_check_replicaset_uuid(&replicaset_uuid) != 0) return -1; + if (*cluster_name != 0 && strcmp(cluster_name, CLUSTER_NAME) != 0) { + diag_set(ClientError, ER_CLUSTER_NAME_MISMATCH, + node_name_str(cluster_name), + node_name_str(CLUSTER_NAME)); + return -1; + } if (!tt_uuid_is_nil(&replicaset_uuid) && !tt_uuid_is_equal(&replicaset_uuid, &REPLICASET_UUID)) { diag_set(ClientError, ER_REPLICASET_UUID_MISMATCH, @@ -4354,11 +4430,7 @@ bootstrap_master(void) uint32_t replica_id = 1; box_insert_replica_record(replica_id, &INSTANCE_UUID); assert(replica_by_uuid(&INSTANCE_UUID)->id == 1); - - /* Set UUID of a new replica set */ - box_set_replicaset_uuid(); - if (bootstrap_strategy == BOOTSTRAP_STRATEGY_SUPERVISED) - box_set_bootstrap_leader_record(); + box_populate_schema_space(); /* Enable WAL subsystem. */ if (wal_enable() != 0) @@ -5090,13 +5162,18 @@ box_broadcast_id(void) { char buf[1024]; char *w = buf; - w = mp_encode_map(w, 3); + w = mp_encode_map(w, 4); w = mp_encode_str0(w, "id"); w = mp_encode_uint(w, instance_id); w = mp_encode_str0(w, "instance_uuid"); w = mp_encode_uuid(w, &INSTANCE_UUID); w = mp_encode_str0(w, "replicaset_uuid"); w = mp_encode_uuid(w, &REPLICASET_UUID); + w = mp_encode_str0(w, "cluster_name"); + if (*CLUSTER_NAME == 0) + w = mp_encode_nil(w); + else + w = mp_encode_str0(w, CLUSTER_NAME); box_broadcast("box.id", strlen("box.id"), buf, w); diff --git a/src/box/box.h b/src/box/box.h index fdeb89bbbb..b0c46c0b24 100644 --- a/src/box/box.h +++ b/src/box/box.h @@ -320,6 +320,7 @@ int box_set_replication_synchro_timeout(void); void box_set_replication_sync_timeout(void); void box_set_replication_skip_conflict(void); void box_set_replication_anon(void); +void box_set_cluster_name(void); void box_set_net_msg_max(void); int box_set_prepared_stmt_cache_size(void); int box_set_feedback(void); diff --git a/src/box/errcode.h b/src/box/errcode.h index bd909fae3f..4c851b686d 100644 --- a/src/box/errcode.h +++ b/src/box/errcode.h @@ -319,6 +319,7 @@ struct errcode_record { /*264 */_(ER_NIL_UUID, "Nil UUID is reserved and can't be used in replication") \ /*265 */_(ER_WRONG_FUNCTION_OPTIONS, "Wrong function options: %s") \ /*266 */_(ER_MISSING_SYSTEM_SPACES, "Snapshot has no system spaces") \ + /*267 */_(ER_CLUSTER_NAME_MISMATCH, "Cluster name mismatch: expected %s, got %s") \ /* * !IMPORTANT! Please follow instructions at start of the file diff --git a/src/box/lua/cfg.cc b/src/box/lua/cfg.cc index 5812374529..a831db863b 100644 --- a/src/box/lua/cfg.cc +++ b/src/box/lua/cfg.cc @@ -404,6 +404,18 @@ lbox_cfg_set_replication_anon(struct lua_State *L) return 0; } +/** box.cfg.cluster_name. */ +static int +lbox_cfg_set_cluster_name(struct lua_State *L) +{ + try { + box_set_cluster_name(); + } catch (Exception *) { + luaT_error(L); + } + return 0; +} + static int lbox_cfg_set_replication_skip_conflict(struct lua_State *L) { @@ -483,6 +495,7 @@ box_lua_cfg_init(struct lua_State *L) {"cfg_set_replication_sync_timeout", lbox_cfg_set_replication_sync_timeout}, {"cfg_set_replication_skip_conflict", lbox_cfg_set_replication_skip_conflict}, {"cfg_set_replication_anon", lbox_cfg_set_replication_anon}, + {"cfg_set_cluster_name", lbox_cfg_set_cluster_name}, {"cfg_set_net_msg_max", lbox_cfg_set_net_msg_max}, {"cfg_set_sql_cache_size", lbox_set_prepared_stmt_cache_size}, {"cfg_set_feedback", lbox_cfg_set_feedback}, diff --git a/src/box/lua/info.c b/src/box/lua/info.c index 0db8f947a4..86aa355d12 100644 --- a/src/box/lua/info.c +++ b/src/box/lua/info.c @@ -52,6 +52,7 @@ #include "box/raft.h" #include "box/txn_limbo.h" #include "box/schema.h" +#include "box/node_name.h" #include "lua/utils.h" #include "lua/serializer.h" /* luaL_setmaphint */ #include "fiber.h" @@ -396,7 +397,12 @@ lbox_info_cluster(struct lua_State *L) { if (!box_info_cluster_new_meaning) return lbox_info_replicaset(L); - lua_createtable(L, 0, 0); + lua_createtable(L, 0, 1); + if (*CLUSTER_NAME == 0) + luaL_pushnull(L); + else + lua_pushstring(L, CLUSTER_NAME); + lua_setfield(L, -2, "name"); return 1; } diff --git a/src/box/lua/load_cfg.lua b/src/box/lua/load_cfg.lua index f3a5e2f1ba..1096c1979b 100644 --- a/src/box/lua/load_cfg.lua +++ b/src/box/lua/load_cfg.lua @@ -152,6 +152,7 @@ local default_cfg = { replication = nil, instance_uuid = nil, replicaset_uuid = nil, + cluster_name = nil, custom_proc_title = nil, pid_file = nil, background = false, @@ -342,6 +343,7 @@ local template_cfg = { replication = 'string, number, table', instance_uuid = 'string', replicaset_uuid = 'string', + cluster_name = 'string', custom_proc_title = 'string', pid_file = 'string', background = 'boolean', @@ -392,9 +394,21 @@ local function normalize_uri_list_for_replication(port_list) return {port_list} end +local function normalize_node_name(name) + if name == nil then + return nil + end + -- Node names are DNS-friendly. Those are case-insensitive. Here instead of + -- storing the names as is they are converted to the lowercase. It makes + -- possible to use normal comparison functions and display also always + -- lowercase. + return string.lower(name) +end + -- options that require special handling local modify_cfg = { replication = normalize_uri_list_for_replication, + cluster_name = normalize_node_name, } local function purge_password_from_uri(uri) @@ -484,6 +498,7 @@ local dynamic_cfg = { bootstrap_strategy = private.cfg_set_bootstrap_strategy, instance_uuid = check_instance_uuid, replicaset_uuid = check_replicaset_uuid, + cluster_name = private.cfg_set_cluster_name, net_msg_max = private.cfg_set_net_msg_max, sql_cache_size = private.cfg_set_sql_cache_size, txn_timeout = private.cfg_set_txn_timeout, @@ -581,6 +596,7 @@ local dynamic_cfg_modules = { -- changed. -- local dynamic_cfg_order = { + force_recovery = 50, listen = 100, -- Order of replication_* options does not matter. The only -- rule - apply before replication itself. @@ -644,6 +660,7 @@ local dynamic_cfg_skip_at_load = { force_recovery = true, instance_uuid = true, replicaset_uuid = true, + cluster_name = true, net_msg_max = true, readahead = true, auth_type = true, diff --git a/src/box/lua/upgrade.lua b/src/box/lua/upgrade.lua index 8e36a50598..6ebdc066c0 100644 --- a/src/box/lua/upgrade.lua +++ b/src/box/lua/upgrade.lua @@ -1951,8 +1951,19 @@ local function store_replicaset_uuid_in_old_way(issue_handler) change_replicaset_uuid_key('replicaset_uuid', 'cluster') end +-- Global names are stored in spaces. Can't silently delete them. It might break +-- the cluster. The user has to do it manually and carefully. +local function check_names_are_not_set(issue_handler) + local _schema = box.space._schema + local msg_suffix = 'name is set. It is supported from version 3.0.0' + if _schema:get{'cluster_name'} ~= nil then + issue_handler('Cluster %s', msg_suffix) + end +end + local function downgrade_from_3_0_0(issue_handler) store_replicaset_uuid_in_old_way(issue_handler) + check_names_are_not_set(issue_handler) end -- Versions should be ordered from newer to older. diff --git a/src/box/replication.cc b/src/box/replication.cc index 9baee0dca1..fbf43b630f 100644 --- a/src/box/replication.cc +++ b/src/box/replication.cc @@ -39,6 +39,7 @@ #include "box.h" #include "gc.h" #include "error.h" +#include "node_name.h" #include "raft.h" #include "relay.h" #include "sio.h" @@ -46,6 +47,7 @@ uint32_t instance_id = REPLICA_ID_NIL; struct tt_uuid INSTANCE_UUID; struct tt_uuid REPLICASET_UUID; +char CLUSTER_NAME[NODE_NAME_SIZE_MAX]; struct uri_set replication_uris; double replication_timeout = 1.0; /* seconds */ diff --git a/src/box/replication.h b/src/box/replication.h index 591ed6d1b7..b2863fefc9 100644 --- a/src/box/replication.h +++ b/src/box/replication.h @@ -239,6 +239,8 @@ extern uint32_t instance_id; extern struct tt_uuid INSTANCE_UUID; /** UUID of the replica set. */ extern struct tt_uuid REPLICASET_UUID; +/** Name of the entire cluster with all its replicasets. */ +extern char CLUSTER_NAME[]; typedef rb_tree(struct replica) replica_hash_t; diff --git a/test/box-luatest/downgrade_test.lua b/test/box-luatest/downgrade_test.lua index 340c7c1738..78c3eba3a4 100644 --- a/test/box-luatest/downgrade_test.lua +++ b/test/box-luatest/downgrade_test.lua @@ -909,3 +909,22 @@ g.test_downgrade_replicaset_uuid_key = function(cg) end end) end + +g.test_downgrade_global_names = function(cg) + cg.server:exec(function() + local helper = require('test.box-luatest.downgrade_helper') + box.cfg{ + force_recovery = true, + cluster_name = 'test' + } + local prev_version = helper.prev_version(helper.app_version('3.0.0')) + local issues = box.schema.downgrade_issues(prev_version) + t.assert_str_contains(issues[1], 'Cluster name is set') + box.space._schema:delete{'cluster_name'} + box.cfg{ + cluster_name = box.NULL, + force_recovery = false, + } + box.schema.downgrade(prev_version) + end) +end diff --git a/test/box/error.result b/test/box/error.result index 68c20db041..2d037a02f3 100644 --- a/test/box/error.result +++ b/test/box/error.result @@ -484,6 +484,7 @@ t; | 264: box.error.NIL_UUID | 265: box.error.WRONG_FUNCTION_OPTIONS | 266: box.error.MISSING_SYSTEM_SPACES + | 267: box.error.CLUSTER_NAME_MISMATCH | ... test_run:cmd("setopt delimiter ''"); diff --git a/test/replication-luatest/cluster_name_test.lua b/test/replication-luatest/cluster_name_test.lua new file mode 100644 index 0000000000..87591c460a --- /dev/null +++ b/test/replication-luatest/cluster_name_test.lua @@ -0,0 +1,443 @@ +local fio = require('fio') +local replica_set = require('luatest.replica_set') +local server = require('luatest.server') +local t = require('luatest') +local g = t.group() + +local function wait_for_death(instance) + t.helpers.retrying({}, function() + assert(not instance.process:is_alive()) + end) + -- Nullify already dead process or server:drop() fails. + instance.process = nil +end + +g.before_all = function(lg) + lg.replica_set = replica_set:new({}) + local box_cfg = { + replication = { + server.build_listen_uri('master', lg.replica_set.id), + server.build_listen_uri('replica', lg.replica_set.id), + }, + replication_timeout = 0.1, + cluster_name = 'test-name', + } + lg.master = lg.replica_set:build_and_add_server({ + alias = 'master', + box_cfg = box_cfg, + }) + box_cfg.read_only = true + lg.replica = lg.replica_set:build_and_add_server({ + alias = 'replica', + box_cfg = box_cfg, + }) + lg.replica_set:start() +end + +g.after_all = function(lg) + lg.replica_set:drop() +end + +g.test_local_errors = function(lg) + lg.master:exec(function() + local msg = 'expected a valid name' + t.assert_error_msg_contains(msg, box.cfg, {cluster_name = '123'}) + t.assert_error_msg_contains(msg, box.cfg, {cluster_name = '-abc'}) + t.assert_error_msg_contains(msg, box.cfg, {cluster_name = 'a~b'}) + + msg = 'cluster name change' + t.assert_error_msg_contains(msg, box.cfg, {cluster_name = 'test'}) + + msg = 'type does not match' + local _schema = box.space._schema + t.assert_error_msg_contains(msg, _schema.replace, _schema, + {'cluster_name', 'bad name'}) + t.assert_error_msg_contains(msg, _schema.replace, _schema, + {'cluster_name', 100}) + end) + lg.replica:exec(function() + local msg = 'read-only instance' + t.assert_error_msg_contains(msg, box.cfg, {cluster_name = 'test'}) + end) +end + +g.test_cluster_name_basic = function(lg) + local check_name_f = function() + local _schema = box.space._schema + t.assert_equals(box.info.cluster.name, 'test-name') + t.assert_equals(_schema:get{'cluster_name'}.value, 'test-name') + end + lg.master:exec(check_name_f) + lg.replica:exec(check_name_f) +end + +g.test_cluster_rename = function(lg) + -- + -- Drop the name. + -- + lg.replica:exec(function() + box.cfg{force_recovery = true} + end) + lg.master:exec(function() + rawset(_G, 'last_event', {}) + rawset(_G, 'watcher', box.watch('box.id', function(_, event) + _G.last_event = event + end)) + box.cfg{ + force_recovery = true, + cluster_name = box.NULL, + } + box.space._schema:delete{'cluster_name'} + t.assert_equals(box.info.cluster.name, nil) + t.assert_equals(_G.last_event.cluster_name, nil) + end) + lg.replica:wait_for_vclock_of(lg.master) + lg.replica:exec(function() + -- Box.info uses applied names, not cfg. + t.assert_not_equals(box.cfg.cluster_name, nil) + t.assert_equals(box.info.cluster.name, nil) + box.cfg{cluster_name = box.NULL} + end) + -- + -- Replace with the same nil name. + -- + lg.master:exec(function() + local _schema = box.space._schema + -- No tuple at all -> box.NULL. + _schema:replace{'cluster_name', box.NULL} + t.assert_equals(_G.last_event.cluster_name, nil) + t.assert_equals(box.info.cluster.name, nil) + + -- Box.NULL -> nil. + _schema:replace{'cluster_name'} + t.assert_equals(_G.last_event.cluster_name, nil) + t.assert_equals(box.info.cluster.name, nil) + end) + -- + -- Change nil -> not nil. + -- + lg.replica:exec(function() + box.cfg{force_recovery = false} + end) + lg.master:exec(function() + -- Don't need force_recovery for that. + box.cfg{ + force_recovery = false, + cluster_name = 'test' + } + t.assert_equals(_G.last_event.cluster_name, 'test') + t.assert_equals(box.info.cluster.name, box.cfg.cluster_name) + end) + lg.replica:wait_for_vclock_of(lg.master) + lg.replica:exec(function() + box.cfg{cluster_name = 'test'} + t.assert_equals(box.info.cluster.name, box.cfg.cluster_name) + end) + -- + -- Change not nil -> same not nil. + -- + lg.master:exec(function() + box.space._schema:replace(box.space._schema:get{'cluster_name'}) + t.assert_equals(box.info.cluster.name, 'test') + end) + -- + -- Change not nil -> new not nil. + -- + lg.replica:exec(function() + box.cfg{force_recovery = true} + end) + lg.master:exec(function() + -- The name is converted to the proper form automatically. + box.cfg{ + force_recovery = true, + cluster_name = 'TEST2' + } + t.assert_equals(box.cfg.cluster_name, 'test2') + t.assert_equals(_G.last_event.cluster_name, 'test2') + t.assert_equals(box.info.cluster.name, box.cfg.cluster_name) + end) + lg.replica:wait_for_vclock_of(lg.master) + lg.replica:exec(function() + t.assert_equals(box.cfg.cluster_name, 'test') + box.cfg{cluster_name = 'TeSt2'} + t.assert_equals(box.cfg.cluster_name, 'test2') + t.assert_equals(box.info.cluster.name, box.cfg.cluster_name) + end) + -- + -- Cleanup. + -- + lg.master:exec(function() + _G.watcher:unregister() + _G.watcher = nil + _G.last_event = nil + box.cfg{cluster_name = 'test-name'} + box.cfg{force_recovery = false} + end) + lg.replica:wait_for_vclock_of(lg.master) + lg.replica:exec(function() + box.cfg{cluster_name = 'test-name'} + box.cfg{force_recovery = false} + end) +end + +g.test_cluster_name_transactional = function(lg) + lg.master:exec(function() + t.assert_equals(box.info.cluster.name, 'test-name') + box.cfg{force_recovery = true} + box.begin() + box.space._schema:replace{'cluster_name', 'new-name'} + box.rollback() + t.assert_equals(box.info.cluster.name, 'test-name') + box.cfg{force_recovery = false} + end) +end + +g.test_cluster_name_bootstrap_mismatch = function(lg) + -- + -- New replica has no cluster name, the master does. Then the replica uses + -- the master's name, no conflict. + -- + local box_cfg = table.copy(lg.replica.box_cfg) + box_cfg.cluster_name = nil + -- Don't bother with fullmesh. Master won't replicate from the new instance. + box_cfg.bootstrap_strategy = 'legacy' + local new_replica = server:new({ + alias = 'new_replica', + box_cfg = box_cfg, + }) + new_replica:start() + new_replica:exec(function() + t.assert_equals(box.cfg.cluster_name, nil) + t.assert_equals(box.info.cluster.name, 'test-name') + box.cfg{cluster_name = 'test-name'} + end) + new_replica:drop() + -- + -- New replica has cluster name, the master doesn't. + -- + lg.replica:exec(function() + box.cfg{force_recovery = true} + end) + lg.master:exec(function() + box.cfg{ + force_recovery = true, + cluster_name = box.NULL + } + t.assert_equals(box.info.cluster.name, 'test-name') + box.space._schema:delete{'cluster_name'} + t.assert_equals(box.info.cluster.name, nil) + end) + box_cfg.cluster_name = 'test-name' + new_replica = server:new({ + alias = 'new_replica', + box_cfg = box_cfg, + }) + new_replica:start({wait_until_ready = false}) + local logfile = fio.pathjoin(new_replica.workdir, + new_replica.alias .. '.log') + wait_for_death(new_replica) + t.assert(new_replica:grep_log( + 'Cluster name mismatch: expected test%-name, got <no%-name>', 1024, + {filename = logfile})) + new_replica:drop() + -- + -- Both master and replica have cluster names. But different ones. + -- + lg.master:exec(function() + box.cfg{cluster_name = 'test-name'} + box.cfg{force_recovery = false} + t.assert_equals(box.info.cluster.name, 'test-name') + end) + lg.replica:wait_for_vclock_of(lg.master) + lg.replica:exec(function() + box.cfg{force_recovery = false} + end) + box_cfg.cluster_name = 'new-name' + new_replica = server:new({ + alias = 'new_replica', + box_cfg = box_cfg, + }) + new_replica:start({wait_until_ready = false}) + logfile = fio.pathjoin(new_replica.workdir, new_replica.alias .. '.log') + wait_for_death(new_replica) + t.assert(new_replica:grep_log( + 'Cluster name mismatch: expected new%-name, got test%-name', 1024, + {filename = logfile})) + new_replica:drop() + lg.master:exec(function(replica_id) + local own_id = box.info.id + local _cluster = box.space._cluster + for _, v in _cluster:pairs() do + local id = v.id + if id ~= own_id and id ~= replica_id then + _cluster:delete{id} + end + end + end, {lg.replica:get_instance_id()}) +end + +g.test_cluster_name_recovery_mismatch = function(lg) + -- + -- Has name in WAL, other name in cfg. + -- + local box_cfg = table.copy(lg.replica.box_cfg) + box_cfg.cluster_name = 'new-name' + -- Force recovery won't help. + box_cfg.force_recovery = true + lg.replica:restart({ + box_cfg = box_cfg, + }, {wait_until_ready = false}) + local logfile = fio.pathjoin(lg.replica.workdir, lg.replica.alias .. '.log') + wait_for_death(lg.replica) + t.assert(lg.replica:grep_log( + 'Cluster name mismatch: expected new%-name, got test%-name', 1024, + {filename = logfile})) + -- + -- Has name in WAL, no name in cfg. Then the replica uses the saved name, no + -- conflict. + -- + box_cfg.cluster_name = nil + -- Don't need force_recovery for this. + box_cfg.force_recovery = nil + lg.replica:restart({ + box_cfg = box_cfg, + }) + lg.replica:exec(function() + t.assert_equals(box.cfg.cluster_name, nil) + t.assert_equals(box.info.cluster.name, 'test-name') + end) + -- + -- No name in WAL, has name in cfg. + -- + lg.replica:exec(function() + box.cfg{force_recovery = true} + end) + lg.master:exec(function() + box.cfg{ + force_recovery = true, + cluster_name = box.NULL, + } + box.space._schema:delete{'cluster_name'} + end) + lg.replica:wait_for_vclock_of(lg.master) + box_cfg.cluster_name = 'new-name' + -- Force recovery won't help. + box_cfg.force_recovery = true + lg.replica:restart({ + box_cfg = box_cfg, + }, {wait_until_ready = false}) + wait_for_death(lg.replica) + t.assert(lg.replica:grep_log( + 'Cluster name mismatch: expected new%-name, got <no%-name>', 1024, + {filename = logfile})) + box_cfg.cluster_name = nil + box_cfg.force_recovery = nil + lg.replica:restart({ + box_cfg = box_cfg, + }) + -- + -- Restore the names. + -- + lg.replica:exec(function() + box.cfg{force_recovery = true} + end) + lg.master:exec(function() + box.cfg{cluster_name = 'test-name'} + end) + -- + -- Master can't change the name on recovery either. + -- + -- Has name in WAL, other name in cfg. + -- + box_cfg = table.copy(lg.master.box_cfg) + box_cfg.cluster_name = 'new-name' + -- Force recovery won't help. + box_cfg.force_recovery = true + lg.master:restart({ + box_cfg = box_cfg, + }, {wait_until_ready = false}) + logfile = fio.pathjoin(lg.master.workdir, lg.master.alias .. '.log') + wait_for_death(lg.master) + t.assert(lg.master:grep_log( + 'Cluster name mismatch: expected new%-name, got test%-name', 1024, + {filename = logfile})) + -- + -- No name in WAL, has name in cfg. + -- + box_cfg.cluster_name = nil + -- Don't need force_recovery for this. + box_cfg.force_recovery = nil + lg.master:restart({ + box_cfg = box_cfg, + }) + lg.master:exec(function() + box.cfg{ + force_recovery = true, + cluster_name = box.NULL, + } + box.space._schema:delete{'cluster_name'} + end) + box_cfg.cluster_name = 'new-name' + -- Force recovery won't help. + box_cfg.force_recovery = true + lg.master:restart({ + box_cfg = box_cfg, + }, {wait_until_ready = false}) + wait_for_death(lg.master) + t.assert(lg.master:grep_log( + 'Cluster name mismatch: expected new%-name, got <no%-name>', 1024, + {filename = logfile})) + box_cfg.cluster_name = nil + box_cfg.force_recovery = nil + lg.master:restart({ + box_cfg = box_cfg, + }) + -- + -- Cleanup. + -- + lg.master:exec(function() + box.cfg{ + cluster_name = 'test-name', + force_recovery = true, + } + box.cfg{force_recovery = false} + end) + lg.replica:wait_for_vclock_of(lg.master) + lg.replica:exec(function() + box.cfg{cluster_name = 'test-name'} + box.cfg{force_recovery = false} + end) + lg.replica.box_cfg.cluster_name = 'test-name' + lg.master.box_cfg.cluster_name = 'test-name' +end + +-- +-- See what happens when multiple cluster name updates arrive in one applier +-- batch and are applied without yields in parallel txns. +-- +g.test_cluster_name_change_batch = function(lg) + lg.replica:exec(function() + box.cfg{ + force_recovery = true, + replication = {}, + } + end) + lg.master:exec(function() + box.cfg{force_recovery = true} + t.assert_equals(box.cfg.cluster_name, 'test-name') + for _ = 1, 3 do + box.cfg{cluster_name = 'test-name-new'} + box.cfg{cluster_name = 'test-name'} + end + box.cfg{force_recovery = false} + t.assert_equals(box.info.cluster.name, 'test-name') + end) + lg.replica:exec(function(replication) + box.cfg{replication = replication} + end, {lg.replica.box_cfg.replication}) + lg.replica:wait_for_vclock_of(lg.master) + lg.replica:exec(function() + box.cfg{force_recovery = false} + t.assert_equals(box.info.cluster.name, 'test-name') + end) +end -- GitLab