From 39bec27b9b40925b5d3663e564e251bc43fd37a5 Mon Sep 17 00:00:00 2001 From: Vladislav Shpilevoy <v.shpilevoy@tarantool.org> Date: Tue, 28 Feb 2023 23:06:40 +0100 Subject: [PATCH] replication: split anon cfg and actual 'is anon' To tell whether the instance is anon there used to be just one flag in C code: replication_anon. Having one flag both for cfg and for the actual state is bad because if cfg is updated, then there is a moment when that flag can't be safely used to check the actual state. For example, when replication_anon had been true and was set to false, it took time to register the instance. In the meantime the C flag replication_anon was already false, although the instance is still anon (not present in _cluster). In the existing code it could lead to insignificant errors like when an anon instance was being registered, it could already accept IPROTO_REGISTER requests. It would fail on ER_READONLY instead of ER_UNSUPPORTED. It wasn't a critical problem, but still it wasn't correct to use cfg flag for checking the actual state. Now there is a separate cfg flag and a function for checking the real state. This patch is done because soon there will be a new option which also takes time to change: instance name. This commit sets a pattern how to deal with such options. In scope of #5029 NO_DOC=refactoring NO_CHANGELOG=refactoring NO_TEST=already covered --- src/box/applier.cc | 12 ++++----- src/box/box.cc | 55 ++++++++++++++++++++++++------------------ src/box/box.h | 4 +++ src/box/lua/info.c | 6 +++-- src/box/replication.cc | 4 +-- src/box/replication.h | 12 ++++----- 6 files changed, 52 insertions(+), 41 deletions(-) diff --git a/src/box/applier.cc b/src/box/applier.cc index 9e32d5bbc6..7a8f3e33a1 100644 --- a/src/box/applier.cc +++ b/src/box/applier.cc @@ -2315,7 +2315,7 @@ applier_subscribe(struct applier *applier) req.replicaset_uuid = REPLICASET_UUID; req.instance_uuid = INSTANCE_UUID; req.version_id = tarantool_version_id(); - req.is_anon = replication_anon; + req.is_anon = box_is_anon(); /* * Stop accepting local rows coming from a remote * instance as soon as local WAL starts accepting writes. @@ -2464,9 +2464,8 @@ applier_f(va_list ap) */ struct session *session = session_new_on_demand(); session_set_type(session, SESSION_TYPE_APPLIER); - /* - * The instance saves replication_anon value on bootstrap. + * The instance saves replicaset anon cfg-value on bootstrap. * If a freshly started instance sees it has received * REPLICASET_UUID but hasn't yet registered, it must be an * anonymous replica, hence the default value 'true'. @@ -2487,14 +2486,13 @@ applier_f(va_list ap) * The join will pause the applier * until WAL is created. */ - was_anon = replication_anon; - if (replication_anon) + was_anon = cfg_replication_anon; + if (was_anon) applier_fetch_snapshot(applier); else applier_join(applier); } - if (instance_id == REPLICA_ID_NIL && - !replication_anon) { + if (box_is_anon() && !cfg_replication_anon) { /* * The instance transitioned from anonymous or * is retrying final join. diff --git a/src/box/box.cc b/src/box/box.cc index a489dd5614..8b576a7ace 100644 --- a/src/box/box.cc +++ b/src/box/box.cc @@ -510,6 +510,12 @@ box_is_orphan(void) return is_orphan; } +bool +box_is_anon(void) +{ + return instance_id == REPLICA_ID_NIL; +} + int box_wait_ro(bool ro, double timeout) { @@ -1963,18 +1969,18 @@ box_set_replication_skip_conflict(void) void box_set_replication_anon(void) { - bool anon = box_check_replication_anon(); - if (anon == replication_anon) + assert(is_box_configured); + assert(cfg_replication_anon == box_is_anon()); + bool new_anon = box_check_replication_anon(); + if (new_anon == cfg_replication_anon) return; - - if (!anon) { - auto guard = make_scoped_guard([&]{ - replication_anon = !anon; - box_broadcast_ballot(); - }); - /* Turn anonymous instance into a normal one. */ - replication_anon = anon; + auto guard = make_scoped_guard([&]{ + cfg_replication_anon = !new_anon; box_broadcast_ballot(); + }); + cfg_replication_anon = new_anon; + box_broadcast_ballot(); + if (!new_anon) { /* * Reset all appliers. This will interrupt * anonymous follow they're in so that one of @@ -2003,10 +2009,7 @@ box_set_replication_anon(void) */ replicaset_follow(); replicaset_sync(); - guard.is_active = false; - } else if (!is_box_configured) { - replication_anon = anon; - box_broadcast_ballot(); + assert(!box_is_anon()); } else { /* * It is forbidden to turn a normal replica into @@ -2016,6 +2019,7 @@ box_set_replication_anon(void) "cannot be turned on after bootstrap" " has finished"); } + guard.is_active = false; } /** Trigger to catch ACKs from all nodes when need to wait for quorum. */ @@ -2759,7 +2763,7 @@ box_set_wal_cleanup_delay(void) * delay since they can't be a source * of replication. */ - if (replication_anon) + if (box_is_anon()) delay = 0; gc_set_wal_cleanup_delay(delay); return 0; @@ -3811,7 +3815,7 @@ box_process_register(struct iostream *io, const struct xrow_header *header) tt_uuid_str(&req.instance_uuid)); } - if (replication_anon) { + if (box_is_anon()) { tnt_raise(ClientError, ER_UNSUPPORTED, "Anonymous replica", "registration of non-anonymous nodes."); } @@ -3941,7 +3945,7 @@ box_process_join(struct iostream *io, const struct xrow_header *header) /* Check permissions */ access_check_universe_xc(PRIV_R); - if (replication_anon) { + if (box_is_anon()) { tnt_raise(ClientError, ER_UNSUPPORTED, "Anonymous replica", "registration of non-anonymous nodes."); } @@ -4063,7 +4067,7 @@ box_process_subscribe(struct iostream *io, const struct xrow_header *header) * Do not allow non-anonymous followers for anonymous * instances. */ - if (replication_anon && !req.is_anon) { + if (box_is_anon() && !req.is_anon) { tnt_raise(ClientError, ER_UNSUPPORTED, "Anonymous replica", "non-anonymous followers."); } @@ -4198,7 +4202,7 @@ box_process_vote(struct ballot *ballot) enum election_mode mode = box_check_election_mode(); ballot->can_lead = mode == ELECTION_MODE_CANDIDATE || mode == ELECTION_MODE_MANUAL; - ballot->is_anon = replication_anon; + ballot->is_anon = cfg_replication_anon; ballot->is_ro = is_ro_summary; ballot->is_booted = is_box_configured; vclock_copy(&ballot->vclock, &replicaset.vclock); @@ -4406,7 +4410,7 @@ bootstrap_from_master(struct replica *master) * Process initial data (snapshot or dirty disk data). */ engine_begin_initial_recovery_xc(NULL); - enum applier_state wait_state = replication_anon ? + enum applier_state wait_state = cfg_replication_anon ? APPLIER_FETCHED_SNAPSHOT : APPLIER_FINAL_JOIN; applier_resume_to_state(applier, wait_state, TIMEOUT_INFINITY); @@ -4419,7 +4423,7 @@ bootstrap_from_master(struct replica *master) engine_begin_final_recovery_xc(); recovery_journal_create(&replicaset.vclock); - if (!replication_anon) { + if (!cfg_replication_anon) { applier_resume_to_state(applier, APPLIER_JOINED, TIMEOUT_INFINITY); } @@ -4833,7 +4837,8 @@ box_cfg_xc(void) diag_raise(); box_set_replication_sync_timeout(); box_set_replication_skip_conflict(); - box_set_replication_anon(); + cfg_replication_anon = box_check_replication_anon(); + box_broadcast_ballot(); /* * Must be set before opening the server port, because it may be * requested by a client before the configuration is completed. @@ -4898,8 +4903,10 @@ box_cfg_xc(void) * The instance won't exist in _cluster space if it is an * anonymous replica, add it manually. */ + if (cfg_replication_anon != box_is_anon()) + panic("'replication_anon' cfg didn't work"); struct replica *self = replica_by_uuid(&INSTANCE_UUID); - if (!replication_anon) { + if (!cfg_replication_anon) { if (self == NULL || self->id == REPLICA_ID_NIL) { tnt_raise(ClientError, ER_UNKNOWN_REPLICA, tt_uuid_str(&INSTANCE_UUID), @@ -4924,7 +4931,7 @@ box_cfg_xc(void) * instance_id is not known, so Raft simply can't work. */ struct raft *raft = box_raft(); - if (!replication_anon) + if (!cfg_replication_anon) raft_cfg_instance_id(raft, instance_id); raft_cfg_vclock(raft, &replicaset.vclock); diff --git a/src/box/box.h b/src/box/box.h index e582f9c9bc..fdeb89bbbb 100644 --- a/src/box/box.h +++ b/src/box/box.h @@ -155,6 +155,10 @@ box_is_ro(void); bool box_is_orphan(void); +/** Check if the instance is not registered in the replicaset. */ +bool +box_is_anon(void); + /** * Wait until the instance switches to a desired mode. * \param ro wait read-only if set or read-write if unset diff --git a/src/box/lua/info.c b/src/box/lua/info.c index 03ab27e86d..e04d06a02d 100644 --- a/src/box/lua/info.c +++ b/src/box/lua/info.c @@ -267,7 +267,8 @@ lbox_info_id(struct lua_State *L) * at box.info.status. */ struct replica *self = replica_by_uuid(&INSTANCE_UUID); - if (self != NULL && (self->id != REPLICA_ID_NIL || replication_anon)) { + if (self != NULL && + (self->id != REPLICA_ID_NIL || cfg_replication_anon)) { lua_pushinteger(L, self->id); } else { luaL_pushnull(L); @@ -287,7 +288,8 @@ lbox_info_lsn(struct lua_State *L) { /* See comments in lbox_info_id */ struct replica *self = replica_by_uuid(&INSTANCE_UUID); - if (self != NULL && (self->id != REPLICA_ID_NIL || replication_anon)) { + if (self != NULL && + (self->id != REPLICA_ID_NIL || cfg_replication_anon)) { luaL_pushint64(L, vclock_get(box_vclock, self->id)); } else { luaL_pushint64(L, -1); diff --git a/src/box/replication.cc b/src/box/replication.cc index 7f834bab00..9baee0dca1 100644 --- a/src/box/replication.cc +++ b/src/box/replication.cc @@ -56,9 +56,9 @@ int replication_synchro_quorum = 1; double replication_synchro_timeout = 5.0; /* seconds */ double replication_sync_timeout = 300.0; /* seconds */ bool replication_skip_conflict = false; -bool replication_anon = false; int replication_threads = 1; +bool cfg_replication_anon = true; struct tt_uuid cfg_bootstrap_leader_uuid; struct uri cfg_bootstrap_leader_uri; @@ -763,7 +763,7 @@ replicaset_update(struct applier **appliers, int count, bool keep_connect) say_warn("No connection to %s", tt_uuid_str(&key.uuid)); } if (bootstrap_strategy == BOOTSTRAP_STRATEGY_AUTO && - !replication_anon) { + !cfg_replication_anon) { tnt_raise(ClientError, ER_BOOTSTRAP_CONNECTION_NOT_TO_ALL); } diff --git a/src/box/replication.h b/src/box/replication.h index 333b34381b..591ed6d1b7 100644 --- a/src/box/replication.h +++ b/src/box/replication.h @@ -118,6 +118,12 @@ enum replicaset_state { REPLICASET_READY, }; +/** + * Whether this replica should be anonymous or not, e.g. be present in _cluster + * table and have a non-zero id. + */ +extern bool cfg_replication_anon; + /** * The uuid of the bootstrap leader configured via the bootstrap_leader * configuration option. @@ -189,12 +195,6 @@ extern double replication_sync_timeout; */ extern bool replication_skip_conflict; -/** - * Whether this replica will be anonymous or not, e.g. be preset - * in _cluster table and have a non-zero id. - */ -extern bool replication_anon; - /** How many threads to use for decoding incoming replication stream. */ extern int replication_threads; -- GitLab