diff --git a/src/box/box.cc b/src/box/box.cc index cc0b8ed739faed6829aeeccceda25bb129b639f7..ed2627771dce06cb68eb2a7ad51351ae1dcfb950 100644 --- a/src/box/box.cc +++ b/src/box/box.cc @@ -271,6 +271,15 @@ box_check_writable(void) error_set_uuid(e, "queue_owner_uuid", &r->uuid); error_append_msg(e, " (%s)", tt_uuid_str(&r->uuid)); } + if (txn_limbo.owner_id == instance_id) { + if (txn_limbo.is_frozen_due_to_fencing) { + error_append_msg(e, " and is frozen due to " + "fencing"); + } else if (txn_limbo.is_frozen_until_promotion) { + error_append_msg(e, " and is frozen until " + "promotion"); + } + } } else { if (is_ro) error_append_msg(e, "box.cfg.read_only is true"); @@ -2005,8 +2014,10 @@ box_promote(void) * Currently active leader (the instance that is seen as leader by both * raft and txn_limbo) can't issue another PROMOTE. */ - bool is_leader = txn_limbo_replica_term(&txn_limbo, instance_id) == - raft->term && txn_limbo.owner_id == instance_id; + bool is_leader = + txn_limbo_replica_term(&txn_limbo, instance_id) == raft->term && + txn_limbo.owner_id == instance_id && + !txn_limbo.is_frozen_until_promotion; if (box_election_mode != ELECTION_MODE_OFF) is_leader = is_leader && raft->state == RAFT_STATE_LEADER; diff --git a/src/box/txn_limbo.c b/src/box/txn_limbo.c index d85e33bb2ca3db73848c56177de099c30607c9f6..f8382243cb63be7820c7f0e31087955c1dd325e8 100644 --- a/src/box/txn_limbo.c +++ b/src/box/txn_limbo.c @@ -54,6 +54,7 @@ txn_limbo_create(struct txn_limbo *limbo) limbo->is_in_rollback = false; limbo->svp_confirmed_lsn = -1; limbo->frozen_reasons = 0; + limbo->is_frozen_until_promotion = true; } static inline bool @@ -830,6 +831,16 @@ txn_limbo_req_rollback(struct txn_limbo *limbo, } } +/** Unfreeze the limbo encountering the first new PROMOTE after a restart. */ +static inline void +txn_limbo_unfreeze_on_first_promote(struct txn_limbo *limbo) +{ + if (box_is_configured()) { + limbo->is_frozen_until_promotion = false; + box_update_ro_summary(); + } +} + void txn_limbo_req_commit(struct txn_limbo *limbo, const struct synchro_request *req) { @@ -854,9 +865,11 @@ txn_limbo_req_commit(struct txn_limbo *limbo, const struct synchro_request *req) vclock_follow(&limbo->promote_term_map, origin, term); if (term > limbo->promote_greatest_term) { limbo->promote_greatest_term = term; - if (iproto_type_is_promote_request(req->type) && - term >= box_raft()->volatile_term) - txn_limbo_unfence(&txn_limbo); + if (iproto_type_is_promote_request(req->type)) { + if (term >= box_raft()->volatile_term) + txn_limbo_unfence(limbo); + txn_limbo_unfreeze_on_first_promote(&txn_limbo); + } } } else if (iproto_type_is_promote_request(req->type) && limbo->promote_greatest_term > 1) { diff --git a/src/box/txn_limbo.h b/src/box/txn_limbo.h index 095c965a0c53c9ec075682ae3df5ef6644668b66..7167065c221c664bc96a13f366948324a3d284df 100644 --- a/src/box/txn_limbo.h +++ b/src/box/txn_limbo.h @@ -207,6 +207,12 @@ struct txn_limbo { * remote instance. */ bool is_frozen_due_to_fencing : 1; + /* + * This mode is always on upon node start and is turned + * off by any new PROMOTE arriving either via + * replication or issued by the node. + */ + bool is_frozen_until_promotion : 1; }; }; }; diff --git a/test/replication/gh-5140-qsync-casc-rollback.result b/test/replication/gh-5140-qsync-casc-rollback.result index d3208e1a4f210b87ddbf221422e6e512d6bb6c4a..a71d8f9818cd40f8a9a61ed42c9a782b87899840 100644 --- a/test/replication/gh-5140-qsync-casc-rollback.result +++ b/test/replication/gh-5140-qsync-casc-rollback.result @@ -206,6 +206,10 @@ box.space.sync:select{} | - [4] | ... +box.ctl.promote() + | --- + | ... + box.space.sync:drop() | --- | ... diff --git a/test/replication/gh-5140-qsync-casc-rollback.test.lua b/test/replication/gh-5140-qsync-casc-rollback.test.lua index 96ddfd2609dbff795443a2d155bb03c0a7f54436..15732e332414da390dd7707513b52b33f2b0bf11 100644 --- a/test/replication/gh-5140-qsync-casc-rollback.test.lua +++ b/test/replication/gh-5140-qsync-casc-rollback.test.lua @@ -97,6 +97,8 @@ test_run:switch('default') box.space.async:select{} box.space.sync:select{} +box.ctl.promote() + box.space.sync:drop() box.space.async:drop() diff --git a/test/replication/gh-5163-qsync-restart-crash.result b/test/replication/gh-5163-qsync-restart-crash.result index 1b4d3d9b5cc1baaad43c3ae6547d93671d99929b..79bf96e474ecc2d8350af7e12d4d5b94e332ae5d 100644 --- a/test/replication/gh-5163-qsync-restart-crash.result +++ b/test/replication/gh-5163-qsync-restart-crash.result @@ -30,6 +30,9 @@ box.space.sync:select{} | --- | - - [1] | ... +box.ctl.promote() + | --- + | ... box.space.sync:drop() | --- | ... diff --git a/test/replication/gh-5163-qsync-restart-crash.test.lua b/test/replication/gh-5163-qsync-restart-crash.test.lua index c8d54aad2397e37d30df72a693e88f20def9deda..0298d8ce6e30e232bfca2eb0c79a5dc63fcf5d56 100644 --- a/test/replication/gh-5163-qsync-restart-crash.test.lua +++ b/test/replication/gh-5163-qsync-restart-crash.test.lua @@ -12,5 +12,6 @@ box.ctl.promote() box.space.sync:replace{1} test_run:cmd('restart server default') box.space.sync:select{} +box.ctl.promote() box.space.sync:drop() box.ctl.demote() diff --git a/test/replication/gh-5288-qsync-recovery.result b/test/replication/gh-5288-qsync-recovery.result index 704b71d930255f3eb5e5e3116096a71adcd3a27e..dc796181d57e2ebf1efe82453ec4c3b8c707568b 100644 --- a/test/replication/gh-5288-qsync-recovery.result +++ b/test/replication/gh-5288-qsync-recovery.result @@ -25,6 +25,9 @@ box.snapshot() | ... test_run:cmd('restart server default') | +box.ctl.promote() + | --- + | ... box.space.sync:drop() | --- | ... diff --git a/test/replication/gh-5288-qsync-recovery.test.lua b/test/replication/gh-5288-qsync-recovery.test.lua index 2455f7278abe28b9e84d14c21140ac098ead084c..095bc71f9b95166ef75c7ce848ad47ba6a163d31 100644 --- a/test/replication/gh-5288-qsync-recovery.test.lua +++ b/test/replication/gh-5288-qsync-recovery.test.lua @@ -9,5 +9,6 @@ box.ctl.promote() s:insert{1} box.snapshot() test_run:cmd('restart server default') +box.ctl.promote() box.space.sync:drop() box.ctl.demote() diff --git a/test/replication/gh-5298-qsync-recovery-snap.result b/test/replication/gh-5298-qsync-recovery-snap.result index 0883fe5f5e1a5b66c46afa4aa05a8b6945f00c2d..52e13d75d07457ce9bc269d2d37c4018de16b283 100644 --- a/test/replication/gh-5298-qsync-recovery-snap.result +++ b/test/replication/gh-5298-qsync-recovery-snap.result @@ -43,58 +43,16 @@ box.snapshot() test_run:cmd("restart server default") | --- Could hang if the limbo would incorrectly handle the snapshot end. -box.space.sync:replace{11} +-- Would be non-empty if limbo would incorrectly handle the snapshot end. +box.info.synchro.queue.len | --- - | - [11] + | - 0 | ... -old_synchro_quorum = box.cfg.replication_synchro_quorum - | --- - | ... -old_synchro_timeout = box.cfg.replication_synchro_timeout - | --- - | ... - -box.cfg{ \ - replication_synchro_timeout = 0.001, \ - replication_synchro_quorum = 2, \ -} - | --- - | ... -box.space.sync:replace{12} - | --- - | - error: Quorum collection for a synchronous transaction is timed out - | ... - -box.cfg{ \ - replication_synchro_timeout = 1000, \ - replication_synchro_quorum = 1, \ -} - | --- - | ... -box.space.sync:replace{13} - | --- - | - [13] - | ... -box.space.sync:get({11}) - | --- - | - [11] - | ... -box.space.sync:get({12}) - | --- - | ... -box.space.sync:get({13}) +box.ctl.promote() | --- - | - [13] | ... -box.cfg{ \ - replication_synchro_timeout = old_synchro_timeout, \ - replication_synchro_quorum = old_synchro_quorum, \ -} - | --- - | ... box.space.sync:drop() | --- | ... diff --git a/test/replication/gh-5298-qsync-recovery-snap.test.lua b/test/replication/gh-5298-qsync-recovery-snap.test.lua index 084cde963d8ef087cddf69d855e51248c06f8aea..55d9501647edce87aaef039ee65eb8eadb3d760b 100644 --- a/test/replication/gh-5298-qsync-recovery-snap.test.lua +++ b/test/replication/gh-5298-qsync-recovery-snap.test.lua @@ -20,31 +20,11 @@ box.snapshot() test_run:cmd("restart server default") --- Could hang if the limbo would incorrectly handle the snapshot end. -box.space.sync:replace{11} +-- Would be non-empty if limbo would incorrectly handle the snapshot end. +box.info.synchro.queue.len -old_synchro_quorum = box.cfg.replication_synchro_quorum -old_synchro_timeout = box.cfg.replication_synchro_timeout - -box.cfg{ \ - replication_synchro_timeout = 0.001, \ - replication_synchro_quorum = 2, \ -} -box.space.sync:replace{12} - -box.cfg{ \ - replication_synchro_timeout = 1000, \ - replication_synchro_quorum = 1, \ -} -box.space.sync:replace{13} -box.space.sync:get({11}) -box.space.sync:get({12}) -box.space.sync:get({13}) +box.ctl.promote() -box.cfg{ \ - replication_synchro_timeout = old_synchro_timeout, \ - replication_synchro_quorum = old_synchro_quorum, \ -} box.space.sync:drop() box.space.loc:drop() box.ctl.demote() diff --git a/test/replication/gh-5874-qsync-txn-recovery.result b/test/replication/gh-5874-qsync-txn-recovery.result index 01328a9e32cfda88bba221cbe6525e23426397a5..11f2fc9c79358b447c459619c16d8981c26175e5 100644 --- a/test/replication/gh-5874-qsync-txn-recovery.result +++ b/test/replication/gh-5874-qsync-txn-recovery.result @@ -154,6 +154,10 @@ loc:select() | - [2] | - [3] | ... + +box.ctl.promote() + | --- + | ... async:drop() | --- | ... diff --git a/test/replication/gh-5874-qsync-txn-recovery.test.lua b/test/replication/gh-5874-qsync-txn-recovery.test.lua index 6ddf164ace1af74cc286c48f8c6dd5c7c945cc62..5bf3ba206ef7d9345033ca22774105af46f502f4 100644 --- a/test/replication/gh-5874-qsync-txn-recovery.test.lua +++ b/test/replication/gh-5874-qsync-txn-recovery.test.lua @@ -80,6 +80,8 @@ loc = box.space.loc async:select() sync:select() loc:select() + +box.ctl.promote() async:drop() sync:drop() loc:drop() diff --git a/test/replication/suite.cfg b/test/replication/suite.cfg index 3eee0803c5dae9c06fd27a6692ba31c554e56b19..8ed9eeb0343b8c91457df1fd1fcacc2b93bd2295 100644 --- a/test/replication/suite.cfg +++ b/test/replication/suite.cfg @@ -18,6 +18,7 @@ "gh-4424-misc-orphan-on-reconfiguration-error.test.lua": {}, "gh-5213-qsync-applier-order.test.lua": {}, "gh-5213-qsync-applier-order-3.test.lua": {}, + "gh-5288-qsync-recovery.test.lua": {}, "gh-5426-election-on-off.test.lua": {}, "gh-5430-cluster-mvcc.test.lua": {}, "gh-5433-election-restart-recovery.test.lua": {},