From ce0a83eb46cb0666e41d010e24e595874a4c2017 Mon Sep 17 00:00:00 2001 From: Serge Petrenko <sergepetrenko@tarantool.org> Date: Wed, 15 Jun 2022 15:21:50 +0300 Subject: [PATCH] txn_limbo: rework limbo->frozen flag Soon there will be more reasons for a transaction limbo to be frozen. Let's make the limbo->frozen flag a bitmap and rename it to limno->frozen_reasons. The first bit, named frozen_due_to_fencing, represents the only current reason for the limbo to be frozen. While we are at it, rename txn_limbo_(un)freeze to txn_limbo_(un)fence to better reflect the situation. Part-of #5295 NO_DOC=refactoring NO_TEST=refactoring NO_CHANGELOG=refactoring --- src/box/raft.c | 4 ++-- src/box/txn_limbo.c | 26 ++++++++++++++++---------- src/box/txn_limbo.h | 32 ++++++++++++++++++++++---------- 3 files changed, 40 insertions(+), 22 deletions(-) diff --git a/src/box/raft.c b/src/box/raft.c index fb518538ea..25da3f5df9 100644 --- a/src/box/raft.c +++ b/src/box/raft.c @@ -287,7 +287,7 @@ box_raft_fence(void) !election_fencing_enabled || box_raft_election_fencing_paused) return; - txn_limbo_freeze(&txn_limbo); + txn_limbo_fence(&txn_limbo); raft_resign(raft); } @@ -563,7 +563,7 @@ box_raft_set_election_fencing_enabled(bool enabled) election_fencing_enabled = enabled; say_info("RAFT: fencing %s", enabled ? "enabled" : "disabled"); if (!enabled) - txn_limbo_unfreeze(&txn_limbo); + txn_limbo_unfence(&txn_limbo); replicaset_on_health_change(); } diff --git a/src/box/txn_limbo.c b/src/box/txn_limbo.c index 8fc953e250..791c68f566 100644 --- a/src/box/txn_limbo.c +++ b/src/box/txn_limbo.c @@ -52,13 +52,19 @@ txn_limbo_create(struct txn_limbo *limbo) limbo->rollback_count = 0; limbo->is_in_rollback = false; limbo->svp_confirmed_lsn = -1; - limbo->frozen = false; + limbo->frozen_reasons = 0; +} + +static inline bool +txn_limbo_is_frozen(const struct txn_limbo *limbo) +{ + return limbo->frozen_reasons != 0; } bool txn_limbo_is_ro(struct txn_limbo *limbo) { - return limbo->frozen || + return txn_limbo_is_frozen(limbo) || (limbo->owner_id != REPLICA_ID_NIL && limbo->owner_id != instance_id); } @@ -237,7 +243,7 @@ txn_limbo_wait_complete(struct txn_limbo *limbo, struct txn_limbo_entry *entry) double deadline = start_time + replication_synchro_timeout; double timeout = deadline - fiber_clock(); int rc = fiber_cond_wait_timeout(&limbo->wait_cond, timeout); - if (limbo->frozen) + if (txn_limbo_is_frozen(limbo)) goto wait; if (txn_limbo_entry_is_complete(entry)) goto complete; @@ -586,7 +592,7 @@ txn_limbo_ack(struct txn_limbo *limbo, uint32_t replica_id, int64_t lsn) { if (rlist_empty(&limbo->queue)) return; - if (limbo->frozen) + if (txn_limbo_is_frozen(limbo)) return; assert(!txn_limbo_is_ro(limbo)); /* @@ -849,7 +855,7 @@ txn_limbo_req_commit(struct txn_limbo *limbo, const struct synchro_request *req) if (term > limbo->promote_greatest_term) { limbo->promote_greatest_term = term; if (iproto_type_is_promote_request(req->type)) - txn_limbo_unfreeze(&txn_limbo); + txn_limbo_unfence(&txn_limbo); } } else if (iproto_type_is_promote_request(req->type) && limbo->promote_greatest_term > 1) { @@ -918,7 +924,7 @@ txn_limbo_process(struct txn_limbo *limbo, const struct synchro_request *req) void txn_limbo_on_parameters_change(struct txn_limbo *limbo) { - if (rlist_empty(&limbo->queue) || limbo->frozen) + if (rlist_empty(&limbo->queue) || txn_limbo_is_frozen(limbo)) return; struct txn_limbo_entry *e; int64_t confirm_lsn = -1; @@ -948,16 +954,16 @@ txn_limbo_on_parameters_change(struct txn_limbo *limbo) } void -txn_limbo_freeze(struct txn_limbo *limbo) +txn_limbo_fence(struct txn_limbo *limbo) { - limbo->frozen = true; + limbo->is_frozen_due_to_fencing = true; box_update_ro_summary(); } void -txn_limbo_unfreeze(struct txn_limbo *limbo) +txn_limbo_unfence(struct txn_limbo *limbo) { - limbo->frozen = false; + limbo->is_frozen_due_to_fencing = false; box_update_ro_summary(); } diff --git a/src/box/txn_limbo.h b/src/box/txn_limbo.h index 7db69563d1..095c965a0c 100644 --- a/src/box/txn_limbo.h +++ b/src/box/txn_limbo.h @@ -189,14 +189,26 @@ struct txn_limbo { * synchro command (promote/demote/...) fails. */ int64_t svp_confirmed_lsn; - /** - * Whether the limbo is frozen. This mode prevents CONFIRMs and - * ROLLBACKs being written by this instance. This mode is turned on when - * quorum is lost if this instance is the current RAFT leader and - * fencing is enabled. Instance leaves this mode when it becomes leader - * again or PROMOTE/DEMOTE arrives from some remote instance. - */ - bool frozen; + union { + /** + * Whether the limbo is frozen. This mode prevents CONFIRMs and + * ROLLBACKs being written by this instance. This, in turn, + * helps to prevent split-brain situations, when a node + * finalizes some transaction before knowing that the + * transaction was already finalized by someone else. + */ + uint8_t frozen_reasons; + struct { + /* + * This mode is turned on when quorum is lost if this + * instance is the current RAFT leader and fencing is + * enabled. Instance leaves this mode when it becomes + * leader again or PROMOTE/DEMOTE arrives from some + * remote instance. + */ + bool is_frozen_due_to_fencing : 1; + }; + }; }; /** @@ -413,13 +425,13 @@ txn_limbo_on_parameters_change(struct txn_limbo *limbo); * Freeze limbo. Prevent CONFIRMs and ROLLBACKs until limbo is unfrozen. */ void -txn_limbo_freeze(struct txn_limbo *limbo); +txn_limbo_fence(struct txn_limbo *limbo); /** * Unfreeze limbo. Continue limbo processing as usual. */ void -txn_limbo_unfreeze(struct txn_limbo *limbo); +txn_limbo_unfence(struct txn_limbo *limbo); /** * Initialize qsync engine. -- GitLab