diff --git a/changelogs/unreleased/global-names.md b/changelogs/unreleased/global-names.md index 2a48b6f980dfa93dc0a0686ff497652c79fa31ee..9962038d0bead981d5c28eadeba59fd3f44c5747 100644 --- a/changelogs/unreleased/global-names.md +++ b/changelogs/unreleased/global-names.md @@ -3,3 +3,8 @@ * A new option `box.cfg.cluster_name` allows assigning a human-readable name to the entire cluster. It has to match in all instances and is displayed in `box.info.cluster.name` (gh-5029). + +* A new option `box.cfg.replicaset_name` allows assigning a human-readable name + to the replicaset. It works the same as `box.cfg.replicaset_uuid`. Its value + must be the same across all instances of one replicaset. The replicaset name + is displayed in `box.info.replicaset.name` (gh-5029). diff --git a/src/box/alter.cc b/src/box/alter.cc index 628e647088ee4f2538663ea636f0e42f3b4a4715..ef4b42f6ac56187d50141660153e68d0a5ee3715 100644 --- a/src/box/alter.cc +++ b/src/box/alter.cc @@ -3921,6 +3921,19 @@ on_commit_replicaset_uuid(struct trigger *trigger, void * /* event */) return 0; } +/** Set replicaset name on _schema commit. */ +static int +on_commit_replicaset_name(struct trigger *trigger, void * /* event */) +{ + const char *name = (typeof(name))trigger->data; + if (strcmp(REPLICASET_NAME, name) == 0) + return 0; + strlcpy(REPLICASET_NAME, name, NODE_NAME_SIZE_MAX); + box_broadcast_id(); + say_info("replicaset name: %s", node_name_str(name)); + return 0; +} + static int on_commit_dd_version(struct trigger *trigger, void * /* event */) { @@ -4141,6 +4154,32 @@ on_replace_dd_schema(struct trigger * /* trigger */, void *event) if (on_commit == NULL) return -1; txn_stmt_on_commit(stmt, on_commit); + } else if (strcmp(key, "replicaset_name") == 0) { + char name[NODE_NAME_SIZE_MAX]; + const char *field_name = "_schema['replicaset_name'].value"; + if (tuple_field_node_name(name, new_tuple, + BOX_SCHEMA_FIELD_VALUE, + field_name) != 0) + return -1; + if (box_is_configured() && *REPLICASET_NAME != 0 && + strcmp(name, REPLICASET_NAME) != 0) { + if (!box_is_force_recovery) { + diag_set(ClientError, ER_UNSUPPORTED, + "Tarantool", "replicaset name change " + "(without 'force_recovery')"); + return -1; + } + say_info("replicaset name mismatch, " + "ignore due to 'force_recovery'"); + } + size_t size = strlen(name) + 1; + char *name_copy = (char *)xregion_alloc(&txn->region, size); + memcpy(name_copy, name, size); + struct trigger *on_commit = txn_alter_trigger_new( + on_commit_replicaset_name, name_copy); + if (on_commit == NULL) + return -1; + txn_stmt_on_commit(stmt, on_commit); } return 0; } diff --git a/src/box/applier.cc b/src/box/applier.cc index 7a8f3e33a10b467d54bcae23dfd4b62b8c0ccb8f..bcb3ae594e282eab9e66a1a64137478b28a5a1e7 100644 --- a/src/box/applier.cc +++ b/src/box/applier.cc @@ -2313,6 +2313,7 @@ applier_subscribe(struct applier *applier) vclock_create(&req.vclock); }); req.replicaset_uuid = REPLICASET_UUID; + strlcpy(req.replicaset_name, REPLICASET_NAME, NODE_NAME_SIZE_MAX); req.instance_uuid = INSTANCE_UUID; req.version_id = tarantool_version_id(); req.is_anon = box_is_anon(); @@ -2357,6 +2358,20 @@ applier_subscribe(struct applier *applier) tt_uuid_str(&rsp.replicaset_uuid), tt_uuid_str(&REPLICASET_UUID)); } + if (*REPLICASET_NAME != 0 && + strcmp(rsp.replicaset_name, REPLICASET_NAME) != 0) { + if (!box_is_force_recovery) { + const char *expected = node_name_str( + REPLICASET_NAME); + const char *got = node_name_str( + rsp.replicaset_name); + tnt_raise(ClientError, + ER_REPLICASET_NAME_MISMATCH, expected, + got); + } + say_info("replicaset name mismatch allowed by " + "'force_recovery'"); + } say_info("subscribed"); say_info("remote vclock %s local vclock %s", vclock_to_string(&rsp.vclock), diff --git a/src/box/box.cc b/src/box/box.cc index ef094dc291c60057427bccda91f9c7141852f9cb..31fff3352a4fc4583b14f917f338b5adfd4dc6a5 100644 --- a/src/box/box.cc +++ b/src/box/box.cc @@ -1382,6 +1382,12 @@ box_check_bootstrap_leader(struct uri *uri, struct tt_uuid *uuid) return -1; } +static int +box_check_replicaset_name(char *out) +{ + return box_check_node_name("replicaset_name", out); +} + static int box_check_cluster_name(char *out) { @@ -2082,6 +2088,41 @@ box_set_cluster_name(void) box_set_cluster_name_record(name); } +/** + * Set the new replicaset name record in _schema, bypassing all checks like + * whether the instance is writable. It makes the function usable by bootstrap + * master when it is read-only but has to make the first registration. + */ +static void +box_set_replicaset_name_record(const char *name) +{ + int rc; + if (*name == 0) { + rc = boxk(IPROTO_DELETE, BOX_SCHEMA_ID, "[%s]", + "replicaset_name"); + } else { + rc = boxk(IPROTO_REPLACE, BOX_SCHEMA_ID, "[%s%s]", + "replicaset_name", name); + } + if (rc != 0) + diag_raise(); +} + +void +box_set_replicaset_name(void) +{ + char name[NODE_NAME_SIZE_MAX]; + if (box_check_replicaset_name(name) != 0) + diag_raise(); + /* Nil means the config doesn't care, allows to use any name. */ + if (*name == 0) + return; + if (strcmp(REPLICASET_NAME, name) == 0) + return; + box_check_writable_xc(); + box_set_replicaset_name_record(name); +} + /** Trigger to catch ACKs from all nodes when need to wait for quorum. */ struct box_quorum_trigger { /** Inherit trigger. */ @@ -4113,7 +4154,16 @@ box_process_subscribe(struct iostream *io, const struct xrow_header *header) tt_uuid_str(&REPLICASET_UUID), tt_uuid_str(&req.replicaset_uuid)); } - + /* + * Replicaset name mismatch is not considered a critical error. It can + * happen if rename happened and then some replicas reconnected. They + * won't ever be able to fetch the new name if the master rejects them. + */ + if (strcmp(req.replicaset_name, REPLICASET_NAME) != 0) { + say_warn("Replicaset name mismatch on subscribe. Peer name - %s, " + "local name - %s", node_name_str(req.replicaset_name), + node_name_str(REPLICASET_NAME)); + } /* * Do not allow non-anonymous followers for anonymous * instances. @@ -4180,6 +4230,7 @@ box_process_subscribe(struct iostream *io, const struct xrow_header *header) memset(&rsp, 0, sizeof(rsp)); vclock_copy(&rsp.vclock, &replicaset.vclock); rsp.replicaset_uuid = REPLICASET_UUID; + strlcpy(rsp.replicaset_name, REPLICASET_NAME, NODE_NAME_SIZE_MAX); struct xrow_header row; RegionGuard region_guard(&fiber()->gc); xrow_encode_subscribe_response(&row, &rsp); @@ -4276,6 +4327,9 @@ box_populate_schema_space(void) struct tt_uuid replicaset_uuid; if (box_check_replicaset_uuid(&replicaset_uuid) != 0) diag_raise(); + char replicaset_name[NODE_NAME_SIZE_MAX]; + if (box_check_replicaset_name(replicaset_name) != 0) + diag_raise(); char cluster_name[NODE_NAME_SIZE_MAX]; if (box_check_cluster_name(cluster_name) != 0) diag_raise(); @@ -4286,6 +4340,7 @@ box_populate_schema_space(void) tt_uuid_str(&replicaset_uuid))) diag_raise(); box_set_cluster_name_record(cluster_name); + box_set_replicaset_name_record(replicaset_name); if (bootstrap_strategy == BOOTSTRAP_STRATEGY_SUPERVISED) box_set_bootstrap_leader_record(); } @@ -4388,8 +4443,10 @@ static int check_global_ids_integrity(void) { char cluster_name[NODE_NAME_SIZE_MAX]; + char replicaset_name[NODE_NAME_SIZE_MAX]; struct tt_uuid replicaset_uuid; if (box_check_cluster_name(cluster_name) != 0 || + box_check_replicaset_name(replicaset_name) != 0 || box_check_replicaset_uuid(&replicaset_uuid) != 0) return -1; @@ -4399,6 +4456,13 @@ check_global_ids_integrity(void) node_name_str(CLUSTER_NAME)); return -1; } + if (*replicaset_name != 0 && + strcmp(replicaset_name, REPLICASET_NAME) != 0) { + diag_set(ClientError, ER_REPLICASET_NAME_MISMATCH, + node_name_str(replicaset_name), + node_name_str(REPLICASET_NAME)); + return -1; + } if (!tt_uuid_is_nil(&replicaset_uuid) && !tt_uuid_is_equal(&replicaset_uuid, &REPLICASET_UUID)) { diag_set(ClientError, ER_REPLICASET_UUID_MISMATCH, @@ -5162,13 +5226,18 @@ box_broadcast_id(void) { char buf[1024]; char *w = buf; - w = mp_encode_map(w, 4); + w = mp_encode_map(w, 5); w = mp_encode_str0(w, "id"); w = mp_encode_uint(w, instance_id); w = mp_encode_str0(w, "instance_uuid"); w = mp_encode_uuid(w, &INSTANCE_UUID); w = mp_encode_str0(w, "replicaset_uuid"); w = mp_encode_uuid(w, &REPLICASET_UUID); + w = mp_encode_str0(w, "replicaset_name"); + if (*REPLICASET_NAME == 0) + w = mp_encode_nil(w); + else + w = mp_encode_str0(w, REPLICASET_NAME); w = mp_encode_str0(w, "cluster_name"); if (*CLUSTER_NAME == 0) w = mp_encode_nil(w); diff --git a/src/box/box.h b/src/box/box.h index b0c46c0b247f7fbf9d65371cc9b7a150bbcd6f1e..53a8fe95544374304aff89c3c0edd8c4fe8305a6 100644 --- a/src/box/box.h +++ b/src/box/box.h @@ -320,6 +320,7 @@ int box_set_replication_synchro_timeout(void); void box_set_replication_sync_timeout(void); void box_set_replication_skip_conflict(void); void box_set_replication_anon(void); +void box_set_replicaset_name(void); void box_set_cluster_name(void); void box_set_net_msg_max(void); int box_set_prepared_stmt_cache_size(void); diff --git a/src/box/errcode.h b/src/box/errcode.h index 4c851b686d93bf3f379e61c942eca2591198dc2f..d31ba6355ce3b6aec68ef038516ec73d0aebabef 100644 --- a/src/box/errcode.h +++ b/src/box/errcode.h @@ -320,6 +320,7 @@ struct errcode_record { /*265 */_(ER_WRONG_FUNCTION_OPTIONS, "Wrong function options: %s") \ /*266 */_(ER_MISSING_SYSTEM_SPACES, "Snapshot has no system spaces") \ /*267 */_(ER_CLUSTER_NAME_MISMATCH, "Cluster name mismatch: expected %s, got %s") \ + /*268 */_(ER_REPLICASET_NAME_MISMATCH, "Replicaset name mismatch: expected %s, got %s") \ /* * !IMPORTANT! Please follow instructions at start of the file diff --git a/src/box/iproto_constants.c b/src/box/iproto_constants.c index 1fc05f3a66d715ccbbf8aebc509f24e3dff96eb0..b792af07d2ad9da049ceee807c5540b86eac08da 100644 --- a/src/box/iproto_constants.c +++ b/src/box/iproto_constants.c @@ -152,6 +152,7 @@ const unsigned char iproto_key_type[iproto_key_MAX] = /* 0x59 */ MP_UINT, /* IPROTO_TXN_ISOLATION */ /* 0x5a */ MP_UINT, /* IPROTO_VCLOCK_SYNC */ /* 0x5b */ MP_STR, /* IPROTO_AUTH_TYPE */ + /* 0x5c */ MP_STR, /* IPROTO_REPLICASET_NAME */ /* }}} */ }; diff --git a/src/box/iproto_constants.h b/src/box/iproto_constants.h index 38c2e20976f6e9162a7da40b6da7c87f4dd0f951..ccbe166e28bc75ad2090a2eb0d6c1995c08add71 100644 --- a/src/box/iproto_constants.h +++ b/src/box/iproto_constants.h @@ -193,6 +193,7 @@ extern const size_t iproto_flag_constants_size; * authentication method. */ \ _(IPROTO_AUTH_TYPE, 0x5b) \ + _(IPROTO_REPLICASET_NAME, 0x5c) \ ENUM(iproto_key, IPROTO_KEYS); /** diff --git a/src/box/lua/cfg.cc b/src/box/lua/cfg.cc index a831db863bcc54e91c5696d80ec4001f8c5ece3c..d70958eada2e5c309266f393441ed898e590ee1e 100644 --- a/src/box/lua/cfg.cc +++ b/src/box/lua/cfg.cc @@ -404,6 +404,18 @@ lbox_cfg_set_replication_anon(struct lua_State *L) return 0; } +/** box.cfg.replicaset_name. */ +static int +lbox_cfg_set_replicaset_name(struct lua_State *L) +{ + try { + box_set_replicaset_name(); + } catch (Exception *) { + luaT_error(L); + } + return 0; +} + /** box.cfg.cluster_name. */ static int lbox_cfg_set_cluster_name(struct lua_State *L) @@ -495,6 +507,7 @@ box_lua_cfg_init(struct lua_State *L) {"cfg_set_replication_sync_timeout", lbox_cfg_set_replication_sync_timeout}, {"cfg_set_replication_skip_conflict", lbox_cfg_set_replication_skip_conflict}, {"cfg_set_replication_anon", lbox_cfg_set_replication_anon}, + {"cfg_set_replicaset_name", lbox_cfg_set_replicaset_name}, {"cfg_set_cluster_name", lbox_cfg_set_cluster_name}, {"cfg_set_net_msg_max", lbox_cfg_set_net_msg_max}, {"cfg_set_sql_cache_size", lbox_set_prepared_stmt_cache_size}, diff --git a/src/box/lua/info.c b/src/box/lua/info.c index 86aa355d12b633fbb682394a9ee4e09d12084ed0..650cfe3c22de569cd00eba17fd8d258796be9641 100644 --- a/src/box/lua/info.c +++ b/src/box/lua/info.c @@ -388,6 +388,11 @@ lbox_info_replicaset(struct lua_State *L) lua_pushliteral(L, "uuid"); luaT_pushuuidstr(L, &REPLICASET_UUID); lua_settable(L, -3); + if (*REPLICASET_NAME == 0) + luaL_pushnull(L); + else + lua_pushstring(L, REPLICASET_NAME); + lua_setfield(L, -2, "name"); return 1; } diff --git a/src/box/lua/load_cfg.lua b/src/box/lua/load_cfg.lua index 1096c1979bcd7cfbffc77c242522ca491fb3c210..74358f145400c574d25c82407113c4df4ddbf45d 100644 --- a/src/box/lua/load_cfg.lua +++ b/src/box/lua/load_cfg.lua @@ -152,6 +152,7 @@ local default_cfg = { replication = nil, instance_uuid = nil, replicaset_uuid = nil, + replicaset_name = nil, cluster_name = nil, custom_proc_title = nil, pid_file = nil, @@ -343,6 +344,7 @@ local template_cfg = { replication = 'string, number, table', instance_uuid = 'string', replicaset_uuid = 'string', + replicaset_name = 'string', cluster_name = 'string', custom_proc_title = 'string', pid_file = 'string', @@ -408,6 +410,7 @@ end -- options that require special handling local modify_cfg = { replication = normalize_uri_list_for_replication, + replicaset_name = normalize_node_name, cluster_name = normalize_node_name, } @@ -498,6 +501,7 @@ local dynamic_cfg = { bootstrap_strategy = private.cfg_set_bootstrap_strategy, instance_uuid = check_instance_uuid, replicaset_uuid = check_replicaset_uuid, + replicaset_name = private.cfg_set_replicaset_name, cluster_name = private.cfg_set_cluster_name, net_msg_max = private.cfg_set_net_msg_max, sql_cache_size = private.cfg_set_sql_cache_size, @@ -660,6 +664,7 @@ local dynamic_cfg_skip_at_load = { force_recovery = true, instance_uuid = true, replicaset_uuid = true, + replicaset_name = true, cluster_name = true, net_msg_max = true, readahead = true, diff --git a/src/box/lua/upgrade.lua b/src/box/lua/upgrade.lua index 6ebdc066c011014aa5f40abc63f401de78e3b1d4..7894a8296fcfe43294dcc7ddaa4c49862ac9837c 100644 --- a/src/box/lua/upgrade.lua +++ b/src/box/lua/upgrade.lua @@ -1959,6 +1959,9 @@ local function check_names_are_not_set(issue_handler) if _schema:get{'cluster_name'} ~= nil then issue_handler('Cluster %s', msg_suffix) end + if _schema:get{'replicaset_name'} ~= nil then + issue_handler('Replicaset %s', msg_suffix) + end end local function downgrade_from_3_0_0(issue_handler) diff --git a/src/box/replication.cc b/src/box/replication.cc index fbf43b630fdb2792c75a821c99d380a9c220a3c6..af03dc28ba31708889be047c19293fc032186beb 100644 --- a/src/box/replication.cc +++ b/src/box/replication.cc @@ -47,6 +47,7 @@ uint32_t instance_id = REPLICA_ID_NIL; struct tt_uuid INSTANCE_UUID; struct tt_uuid REPLICASET_UUID; +char REPLICASET_NAME[NODE_NAME_SIZE_MAX]; char CLUSTER_NAME[NODE_NAME_SIZE_MAX]; struct uri_set replication_uris; diff --git a/src/box/replication.h b/src/box/replication.h index b2863fefc99ec50a55efbe244ad9b792baf96a5f..7d72c494f09fbb67597d39116a030fda3c99e9d2 100644 --- a/src/box/replication.h +++ b/src/box/replication.h @@ -237,8 +237,10 @@ replication_free(void); extern uint32_t instance_id; /** UUID of the instance. */ extern struct tt_uuid INSTANCE_UUID; -/** UUID of the replica set. */ +/** UUID of the replicaset. */ extern struct tt_uuid REPLICASET_UUID; +/** Name of the replicaset. */ +extern char REPLICASET_NAME[]; /** Name of the entire cluster with all its replicasets. */ extern char CLUSTER_NAME[]; diff --git a/src/box/xrow.c b/src/box/xrow.c index 586c1ad831585aeb97547c9b0537fa90a0c510e6..028f58a5c733b593e3bc557f4ee2fe6a1382bfe7 100644 --- a/src/box/xrow.c +++ b/src/box/xrow.c @@ -252,6 +252,22 @@ xrow_decode_uuid(const char **pos, struct tt_uuid *out) return 0; } +/** Decode an optional node name. */ +static inline int +xrow_decode_node_name(const char **pos, char *out) +{ + enum mp_type type = mp_typeof(**pos); + if (type != MP_STR) + return -1; + uint32_t len; + const char *str = mp_decode_str(pos, &len); + if (!node_name_is_valid_n(str, len)) + return -1; + memcpy(out, str, len); + out[len] = 0; + return 0; +} + void xrow_header_encode(const struct xrow_header *header, uint64_t sync, size_t fixheader_len, struct iovec *out, int *iovcnt) @@ -1914,6 +1930,8 @@ xrow_decode_ballot_event(const struct watch_request *req, struct replication_request { /** IPROTO_REPLICASET_UUID. */ struct tt_uuid *replicaset_uuid; + /** IPROTO_REPLICASET_NAME. */ + char *replicaset_name; /** IPROTO_INSTANCE_UUID. */ struct tt_uuid *instance_uuid; /** IPROTO_VCLOCK. */ @@ -1945,6 +1963,11 @@ xrow_encode_replication_request(struct xrow_header *row, data = mp_encode_uint(data, IPROTO_REPLICASET_UUID); data = xrow_encode_uuid(data, req->replicaset_uuid); } + if (req->replicaset_name != NULL && *req->replicaset_name != 0) { + ++map_size; + data = mp_encode_uint(data, IPROTO_REPLICASET_NAME); + data = mp_encode_str0(data, req->replicaset_name); + } if (req->instance_uuid != NULL) { ++map_size; data = mp_encode_uint(data, IPROTO_INSTANCE_UUID); @@ -2021,6 +2044,16 @@ xrow_decode_replication_request(const struct xrow_header *row, return -1; } break; + case IPROTO_REPLICASET_NAME: + if (req->replicaset_name == NULL) + goto skip; + if (xrow_decode_node_name( + &d, req->replicaset_name) != 0) { + xrow_on_decode_err(row, ER_INVALID_MSGPACK, + "invalid REPLICASET_NAME"); + return -1; + } + break; case IPROTO_INSTANCE_UUID: if (req->instance_uuid == NULL) goto skip; @@ -2115,6 +2148,7 @@ xrow_encode_subscribe(struct xrow_header *row, struct subscribe_request *cast = (struct subscribe_request *)req; const struct replication_request base_req = { .replicaset_uuid = &cast->replicaset_uuid, + .replicaset_name = cast->replicaset_name, .instance_uuid = &cast->instance_uuid, .vclock = &cast->vclock, .is_anon = &cast->is_anon, @@ -2131,6 +2165,7 @@ xrow_decode_subscribe(const struct xrow_header *row, memset(req, 0, sizeof(*req)); struct replication_request base_req = { .replicaset_uuid = &req->replicaset_uuid, + .replicaset_name = req->replicaset_name, .instance_uuid = &req->instance_uuid, .vclock = &req->vclock, .version_id = &req->version_id, @@ -2407,6 +2442,7 @@ xrow_encode_subscribe_response(struct xrow_header *row, struct subscribe_response *cast = (struct subscribe_response *)rsp; const struct replication_request base_req = { .replicaset_uuid = &cast->replicaset_uuid, + .replicaset_name = cast->replicaset_name, .vclock = &cast->vclock, }; xrow_encode_replication_request(row, &base_req, IPROTO_OK); @@ -2419,6 +2455,7 @@ xrow_decode_subscribe_response(const struct xrow_header *row, memset(rsp, 0, sizeof(*rsp)); struct replication_request base_req = { .replicaset_uuid = &rsp->replicaset_uuid, + .replicaset_name = rsp->replicaset_name, .vclock = &rsp->vclock, }; return xrow_decode_replication_request(row, &base_req); diff --git a/src/box/xrow.h b/src/box/xrow.h index 0cf0020124959e8eb9fd9804c29633be3e71b275..e170ba50a28d9dd717e1845461df100219e5772d 100644 --- a/src/box/xrow.h +++ b/src/box/xrow.h @@ -37,6 +37,7 @@ #include "diag.h" #include "iproto_features.h" +#include "node_name.h" #include "tt_uuid.h" #include "vclock/vclock.h" @@ -49,7 +50,7 @@ enum { XROW_BODY_IOVMAX = 2, XROW_IOVMAX = XROW_HEADER_IOVMAX + XROW_BODY_IOVMAX, XROW_HEADER_LEN_MAX = 52, - XROW_BODY_LEN_MAX = 256, + XROW_BODY_LEN_MAX = 512, XROW_SYNCHRO_BODY_LEN_MAX = 32, IPROTO_HEADER_LEN = 32, /** 7 = sizeof(iproto_body_bin). */ @@ -549,6 +550,8 @@ xrow_decode_register(const struct xrow_header *row, struct subscribe_request { /** Replica's replicaset UUID. */ struct tt_uuid replicaset_uuid; + /** Replica's replicaset name. */ + char replicaset_name[NODE_NAME_SIZE_MAX]; /** Replica's instance UUID. */ struct tt_uuid instance_uuid; /** Replica's vclock. */ @@ -575,6 +578,8 @@ xrow_decode_subscribe(const struct xrow_header *row, struct subscribe_response { /** Master's replicaset UUID. */ struct tt_uuid replicaset_uuid; + /** Master's replicaset name. */ + char replicaset_name[NODE_NAME_SIZE_MAX]; /** Master's vclock. */ struct vclock vclock; }; diff --git a/test/box-luatest/downgrade_test.lua b/test/box-luatest/downgrade_test.lua index 78c3eba3a45da0252629f2685cb8a866a07d3bf7..00f0eae593308206c1accc015fb5be2e8c3f4294 100644 --- a/test/box-luatest/downgrade_test.lua +++ b/test/box-luatest/downgrade_test.lua @@ -923,6 +923,13 @@ g.test_downgrade_global_names = function(cg) box.space._schema:delete{'cluster_name'} box.cfg{ cluster_name = box.NULL, + replicaset_name = 'test' + } + issues = box.schema.downgrade_issues(prev_version) + t.assert_str_contains(issues[1], 'Replicaset name is set') + box.space._schema:delete{'replicaset_name'} + box.cfg{ + replicaset_name = box.NULL, force_recovery = false, } box.schema.downgrade(prev_version) diff --git a/test/box-luatest/gh_7894_export_iproto_constants_and_features_test.lua b/test/box-luatest/gh_7894_export_iproto_constants_and_features_test.lua index a300fc05e5b0cd02d05d101d473d6b7d5ec777d1..5f37042d023d7c1a645e7041b85dac3879b92745 100644 --- a/test/box-luatest/gh_7894_export_iproto_constants_and_features_test.lua +++ b/test/box-luatest/gh_7894_export_iproto_constants_and_features_test.lua @@ -80,6 +80,7 @@ local reference_table = { TXN_ISOLATION = 0x59, VCLOCK_SYNC = 0x5a, AUTH_TYPE = 0x5b, + REPLICASET_NAME = 0x5c, }, -- `iproto_metadata_key` enumeration. diff --git a/test/box/error.result b/test/box/error.result index 2d037a02f3e42b103f75c2810314fbc196391f43..ff2b657cc2ff708ccc9a64779e77ee0c15b859d5 100644 --- a/test/box/error.result +++ b/test/box/error.result @@ -485,6 +485,7 @@ t; | 265: box.error.WRONG_FUNCTION_OPTIONS | 266: box.error.MISSING_SYSTEM_SPACES | 267: box.error.CLUSTER_NAME_MISMATCH + | 268: box.error.REPLICASET_NAME_MISMATCH | ... test_run:cmd("setopt delimiter ''"); diff --git a/test/replication-luatest/replicaset_name_test.lua b/test/replication-luatest/replicaset_name_test.lua new file mode 100644 index 0000000000000000000000000000000000000000..ece44051c78c77457ba12620e5a503a156a56ea7 --- /dev/null +++ b/test/replication-luatest/replicaset_name_test.lua @@ -0,0 +1,494 @@ +local fio = require('fio') +local replica_set = require('luatest.replica_set') +local server = require('luatest.server') +local t = require('luatest') +local g = t.group() + +local function wait_for_death(instance) + t.helpers.retrying({}, function() + assert(not instance.process:is_alive()) + end) + -- Nullify already dead process or server:drop() fails. + instance.process = nil +end + +g.before_all = function(lg) + lg.replica_set = replica_set:new({}) + local box_cfg = { + replication = { + server.build_listen_uri('master', lg.replica_set.id), + server.build_listen_uri('replica', lg.replica_set.id), + }, + replication_timeout = 0.1, + replicaset_name = 'test-name', + } + lg.master = lg.replica_set:build_and_add_server({ + alias = 'master', + box_cfg = box_cfg, + }) + box_cfg.read_only = true + lg.replica = lg.replica_set:build_and_add_server({ + alias = 'replica', + box_cfg = box_cfg, + }) + lg.replica_set:start() +end + +g.after_all = function(lg) + lg.replica_set:drop() +end + +g.test_local_errors = function(lg) + lg.master:exec(function() + local msg = 'expected a valid name' + t.assert_error_msg_contains(msg, box.cfg, {replicaset_name = '123'}) + t.assert_error_msg_contains(msg, box.cfg, {replicaset_name = '-abc'}) + t.assert_error_msg_contains(msg, box.cfg, {replicaset_name = 'a~b'}) + + msg = 'replicaset name change' + t.assert_error_msg_contains(msg, box.cfg, {replicaset_name = 'test'}) + + msg = 'type does not match' + local _schema = box.space._schema + t.assert_error_msg_contains(msg, _schema.replace, _schema, + {'replicaset_name', 'bad name'}) + t.assert_error_msg_contains(msg, _schema.replace, _schema, + {'replicaset_name', 100}) + end) + lg.replica:exec(function() + local msg = 'read-only instance' + t.assert_error_msg_contains(msg, box.cfg, {replicaset_name = 'test'}) + end) +end + +g.test_replicaset_name_basic = function(lg) + local check_name_f = function() + local _schema = box.space._schema + t.assert_equals(box.info.replicaset.name, 'test-name') + t.assert_equals(_schema:get{'replicaset_name'}.value, 'test-name') + end + lg.master:exec(check_name_f) + lg.replica:exec(check_name_f) +end + +g.test_replicaset_rename = function(lg) + -- + -- Drop the name. + -- + lg.replica:exec(function() + box.cfg{force_recovery = true} + end) + lg.master:exec(function() + rawset(_G, 'last_event', {}) + rawset(_G, 'watcher', box.watch('box.id', function(_, event) + _G.last_event = event + end)) + box.cfg{ + force_recovery = true, + replicaset_name = box.NULL + } + box.space._schema:delete{'replicaset_name'} + t.assert_equals(box.info.replicaset.name, nil) + t.assert_equals(_G.last_event.replicaset_name, nil) + end) + lg.replica:wait_for_vclock_of(lg.master) + lg.replica:exec(function() + -- Box.info uses applied names, not cfg. + t.assert_not_equals(box.cfg.replicaset_name, nil) + t.assert_equals(box.info.replicaset.name, nil) + box.cfg{replicaset_name = box.NULL} + end) + -- + -- Replace with the same nil name. + -- + lg.master:exec(function() + local _schema = box.space._schema + -- No tuple at all -> box.NULL. + _schema:replace{'replicaset_name', box.NULL} + t.assert_equals(_G.last_event.replicaset_name, nil) + t.assert_equals(box.info.replicaset.name, nil) + + -- Box.NULL -> nil. + _schema:replace{'replicaset_name'} + t.assert_equals(_G.last_event.replicaset_name, nil) + t.assert_equals(box.info.replicaset.name, nil) + end) + -- + -- Change nil -> not nil. + -- + lg.replica:exec(function() + box.cfg{force_recovery = false} + end) + lg.master:exec(function() + box.cfg{ + force_recovery = false, + replicaset_name = 'test' + } + t.assert_equals(_G.last_event.replicaset_name, 'test') + t.assert_equals(box.info.replicaset.name, box.cfg.replicaset_name) + end) + lg.replica:wait_for_vclock_of(lg.master) + lg.replica:exec(function() + box.cfg{replicaset_name = 'test'} + t.assert_equals(box.info.replicaset.name, box.cfg.replicaset_name) + end) + -- + -- Change not nil -> same not nil. + -- + lg.master:exec(function() + box.space._schema:replace(box.space._schema:get{'replicaset_name'}) + t.assert_equals(box.info.replicaset.name, 'test') + end) + -- + -- Change not nil -> new not nil. + -- + lg.replica:exec(function() + box.cfg{force_recovery = true} + end) + lg.master:exec(function() + -- The name is converted to a proper form automatically. + box.cfg{ + force_recovery = true, + replicaset_name = 'TEST2' + } + t.assert_equals(box.cfg.replicaset_name, 'test2') + t.assert_equals(_G.last_event.replicaset_name, 'test2') + t.assert_equals(box.info.replicaset.name, box.cfg.replicaset_name) + end) + lg.replica:wait_for_vclock_of(lg.master) + lg.replica:exec(function() + t.assert_equals(box.cfg.replicaset_name, 'test') + box.cfg{replicaset_name = 'TeSt2'} + t.assert_equals(box.cfg.replicaset_name, 'test2') + t.assert_equals(box.info.replicaset.name, box.cfg.replicaset_name) + end) + -- + -- Cleanup. + -- + lg.master:exec(function() + _G.watcher:unregister() + _G.watcher = nil + _G.last_event = nil + box.cfg{replicaset_name = 'test-name'} + box.cfg{force_recovery = false} + end) + lg.replica:wait_for_vclock_of(lg.master) + lg.replica:exec(function() + box.cfg{replicaset_name = 'test-name'} + box.cfg{force_recovery = false} + end) +end + +g.test_replicaset_name_transactional = function(lg) + lg.master:exec(function() + t.assert_equals(box.info.replicaset.name, 'test-name') + box.cfg{force_recovery = true} + box.begin() + box.space._schema:replace{'replicaset_name', 'new-name'} + box.rollback() + t.assert_equals(box.info.replicaset.name, 'test-name') + box.cfg{force_recovery = false} + end) +end + +g.test_replicaset_name_bootstrap_mismatch = function(lg) + -- + -- New replica has no replicaset name, the master does. Then the replica + -- uses the master's name, no conflict. + -- + local box_cfg = table.copy(lg.replica.box_cfg) + box_cfg.replicaset_name = nil + local new_replica = server:new({ + alias = 'new_replica', + box_cfg = box_cfg, + }) + new_replica:start() + new_replica:exec(function() + t.assert_equals(box.cfg.replicaset_name, nil) + t.assert_equals(box.info.replicaset.name, 'test-name') + box.cfg{replicaset_name = 'test-name'} + end) + new_replica:drop() + -- + -- New replica has replicaset name, the master doesn't. + -- + lg.replica:exec(function() + box.cfg{force_recovery = true} + end) + lg.master:exec(function() + box.cfg{ + force_recovery = true, + replicaset_name = box.NULL + } + t.assert_equals(box.info.replicaset.name, 'test-name') + box.space._schema:delete{'replicaset_name'} + t.assert_equals(box.info.replicaset.name, nil) + end) + box_cfg.replicaset_name = 'test-name' + box_cfg.bootstrap_strategy = 'legacy' + new_replica = server:new({ + alias = 'new_replica', + box_cfg = box_cfg, + }) + new_replica:start({wait_until_ready = false}) + local logfile = fio.pathjoin(new_replica.workdir, + new_replica.alias .. '.log') + wait_for_death(new_replica) + t.assert(new_replica:grep_log( + 'Replicaset name mismatch: expected test%-name, got <no%-name>', 1024, + {filename = logfile})) + new_replica:drop() + -- + -- Both master and replica have replicaset names. But different ones. + -- + lg.master:exec(function() + box.cfg{replicaset_name = 'test-name'} + box.cfg{force_recovery = false} + t.assert_equals(box.info.replicaset.name, 'test-name') + end) + lg.replica:wait_for_vclock_of(lg.master) + lg.replica:exec(function() + box.cfg{force_recovery = false} + end) + box_cfg.replicaset_name = 'new-name' + new_replica = server:new({ + alias = 'new_replica', + box_cfg = box_cfg, + }) + new_replica:start({wait_until_ready = false}) + logfile = fio.pathjoin(new_replica.workdir, new_replica.alias .. '.log') + wait_for_death(new_replica) + t.assert(new_replica:grep_log( + 'Replicaset name mismatch: expected new%-name, got test%-name', 1024, + {filename = logfile})) + new_replica:drop() + lg.master:exec(function(replica_id) + local own_id = box.info.id + local _cluster = box.space._cluster + for _, v in _cluster:pairs() do + local id = v.id + if id ~= own_id and id ~= replica_id then + _cluster:delete{id} + end + end + end, {lg.replica:get_instance_id()}) +end + +g.test_replicaset_name_recovery_mismatch = function(lg) + -- + -- Has name in WAL, other name in cfg. + -- + local box_cfg = table.copy(lg.replica.box_cfg) + box_cfg.replicaset_name = 'new-name' + -- Force recovery won't help. + box_cfg.force_recovery = true + lg.replica:restart({ + box_cfg = box_cfg, + }, {wait_until_ready = false}) + local logfile = fio.pathjoin(lg.replica.workdir, lg.replica.alias .. '.log') + wait_for_death(lg.replica) + t.assert(lg.replica:grep_log( + 'Replicaset name mismatch: expected new%-name, got test%-name', 1024, + {filename = logfile})) + -- + -- Has name in WAL, no name in cfg. Then the replica uses the saved name, no + -- conflict. + -- + box_cfg.replicaset_name = nil + -- Don't need force_recovery for this. + box_cfg.force_recovery = nil + lg.replica:restart({ + box_cfg = box_cfg, + }) + lg.replica:exec(function() + t.assert_equals(box.cfg.replicaset_name, nil) + t.assert_equals(box.info.replicaset.name, 'test-name') + end) + -- + -- No name in WAL, has name in cfg. + -- + lg.replica:exec(function() + box.cfg{force_recovery = true} + end) + lg.master:exec(function() + box.cfg{ + force_recovery = true, + replicaset_name = box.NULL, + } + box.space._schema:delete{'replicaset_name'} + end) + lg.replica:wait_for_vclock_of(lg.master) + box_cfg.replicaset_name = 'new-name' + -- Force recovery won't help. + box_cfg.force_recovery = true + lg.replica:restart({ + box_cfg = box_cfg, + }, {wait_until_ready = false}) + wait_for_death(lg.replica) + t.assert(lg.replica:grep_log( + 'Replicaset name mismatch: expected new%-name, got <no%-name>', 1024, + {filename = logfile})) + box_cfg.replicaset_name = nil + box_cfg.force_recovery = nil + lg.replica:restart({ + box_cfg = box_cfg, + }) + -- + -- Restore the names. + -- + lg.replica:exec(function() + box.cfg{force_recovery = true} + end) + lg.master:exec(function() + box.cfg{replicaset_name = 'test-name'} + end) + -- + -- Master can't change the name on recovery either. + -- + -- Has name in WAL, other name in cfg. + -- + box_cfg = table.copy(lg.master.box_cfg) + box_cfg.replicaset_name = 'new-name' + -- Force recovery won't help. + box_cfg.force_recovery = true + lg.master:restart({ + box_cfg = box_cfg, + }, {wait_until_ready = false}) + logfile = fio.pathjoin(lg.master.workdir, lg.master.alias .. '.log') + wait_for_death(lg.master) + t.assert(lg.master:grep_log( + 'Replicaset name mismatch: expected new%-name, got test%-name', 1024, + {filename = logfile})) + -- + -- No name in WAL, has name in cfg. + -- + box_cfg.replicaset_name = nil + -- Don't need force_recovery for this. + box_cfg.force_recovery = nil + lg.master:restart({ + box_cfg = box_cfg, + }) + lg.master:exec(function() + box.cfg{ + force_recovery = true, + replicaset_name = box.NULL, + } + box.space._schema:delete{'replicaset_name'} + end) + box_cfg.replicaset_name = 'new-name' + -- Force recovery won't help. + box_cfg.force_recovery = true + lg.master:restart({ + box_cfg = box_cfg, + }, {wait_until_ready = false}) + wait_for_death(lg.master) + t.assert(lg.master:grep_log( + 'Replicaset name mismatch: expected new%-name, got <no%-name>', 1024, + {filename = logfile})) + box_cfg.replicaset_name = nil + -- Has to be forced or it won't be able to sync with the replica because of + -- their replicaset name mismatch. + box_cfg.force_recovery = true + lg.master:restart({ + box_cfg = box_cfg, + }) + -- + -- Cleanup. + -- + lg.master:exec(function() + box.cfg{replicaset_name = 'test-name'} + box.cfg{force_recovery = false} + end) + lg.replica:wait_for_vclock_of(lg.master) + lg.replica:exec(function() + box.cfg{replicaset_name = 'test-name'} + box.cfg{force_recovery = false} + end) + lg.replica.box_cfg.replicaset_name = 'test-name' + lg.master.box_cfg.replicaset_name = 'test-name' +end + +-- +-- See what happens when multiple replicaset name updates arrive in one applier +-- batch and are applied without yields in parallel txns. +-- +g.test_replicaset_name_change_batch = function(lg) + lg.replica:exec(function() + box.cfg{ + force_recovery = true, + replication = {}, + } + end) + lg.master:exec(function() + box.cfg{force_recovery = true} + t.assert_equals(box.cfg.replicaset_name, 'test-name') + for _ = 1, 3 do + box.cfg{replicaset_name = 'test-name-new'} + box.cfg{replicaset_name = 'test-name'} + end + box.cfg{force_recovery = false} + t.assert_equals(box.info.replicaset.name, 'test-name') + end) + lg.replica:exec(function(replication) + box.cfg{replication = replication} + end, {lg.replica.box_cfg.replication}) + lg.replica:wait_for_vclock_of(lg.master) + lg.replica:exec(function() + box.cfg{force_recovery = false} + t.assert_equals(box.info.replicaset.name, 'test-name') + end) +end + +g.test_replicaset_name_subscribe_request_mismatch = function(lg) + lg.replica:exec(function() + box.cfg{ + replication = {}, + } + end) + lg.master:exec(function() + box.cfg{ + force_recovery = true, + replicaset_name = 'test-name-new', + } + t.assert_equals(box.info.replicaset.name, 'test-name-new') + end) + lg.replica:exec(function(replication) + box.cfg{replication = replication} + local msg = box.info.replication[1].upstream.message + t.assert_str_contains(msg, 'Replicaset name mismatch') + box.cfg{ + force_recovery = true, + replication = {}, + } + box.cfg{replication = replication} + end, {lg.replica.box_cfg.replication}) + lg.replica:wait_for_vclock_of(lg.master) + lg.replica:assert_follows_upstream(1) + -- + -- No mismatch when local name is empty and the remote one is not. + -- + lg.master:exec(function() + box.space._schema:delete{'replicaset_name'} + box.cfg{replicaset_name = box.NULL} + end) + lg.replica:wait_for_vclock_of(lg.master) + lg.replica:exec(function() + t.assert_equals(box.info.replicaset.name, nil) + box.cfg{ + force_recovery = false, + replication = {}, + } + end) + lg.master:exec(function() + box.cfg{replicaset_name = 'test-name'} + box.cfg{force_recovery = false} + t.assert_equals(box.info.replicaset.name, 'test-name') + end) + lg.replica:exec(function(replication) + box.cfg{replication = replication} + end, {lg.replica.box_cfg.replication}) + lg.replica:wait_for_vclock_of(lg.master) + lg.replica:exec(function() + t.assert_equals(box.info.replicaset.name, 'test-name') + end) +end