From 26a8317f351c2fbedaf7a4528c8aba2c98f44362 Mon Sep 17 00:00:00 2001 From: Vladislav Shpilevoy <v.shpilevoy@tarantool.org> Date: Fri, 11 Jun 2021 00:19:05 +0200 Subject: [PATCH] txn: introduce TXN_SIGNATURE_ABORT Sometimes a transaction can fail before it goes to WAL. Then the signature didn't have any sign of it, as well as the journal_entry result (which might be not even created yet). Still if txn_commit/try_async() are called, they invoke on_rollback triggers. The triggers only can see TXN_SIGNATURE_ROLLBACK and can't distinguish it from a real rollback like box.rollback(). Due to that some important errors like a transaction manager conflict or OOM are lost. The patch introduces a new error signature TXN_SIGNATURE_ABORT which says the transaction didn't manage to try going to WAL and for an error need to look at the global diag. The next patch is going to stop overriding it with TXN_SIGNATURE_ROLLBACK. Part of #6027 --- src/box/applier.cc | 4 ++-- src/box/box.cc | 8 ++++---- src/box/memtx_engine.c | 2 +- src/box/txn.c | 24 ++++++++++++++++++++---- src/box/txn.h | 14 ++++++++++++++ src/box/vy_scheduler.c | 2 +- 6 files changed, 42 insertions(+), 12 deletions(-) diff --git a/src/box/applier.cc b/src/box/applier.cc index 3fd71393db..10cea26a77 100644 --- a/src/box/applier.cc +++ b/src/box/applier.cc @@ -261,7 +261,7 @@ apply_snapshot_row(struct xrow_header *row) rollback_stmt: txn_rollback_stmt(txn); rollback: - txn_rollback(txn); + txn_abort(txn); fiber_gc(); return -1; } @@ -942,7 +942,7 @@ apply_plain_tx(struct stailq *rows, bool skip_conflict, bool use_triggers) return txn_commit_try_async(txn); fail: - txn_rollback(txn); + txn_abort(txn); return -1; } diff --git a/src/box/box.cc b/src/box/box.cc index 8e601a268c..ab7d983c9f 100644 --- a/src/box/box.cc +++ b/src/box/box.cc @@ -255,7 +255,7 @@ box_process_rw(struct request *request, struct space *space, rollback: if (is_autocommit) { - txn_rollback(txn); + txn_abort(txn); fiber_gc(); } error: @@ -395,7 +395,7 @@ wal_stream_abort(struct wal_stream *stream) { struct txn *tx = in_txn(); if (tx != NULL) - txn_rollback(tx); + txn_abort(tx); stream->tsn = 0; fiber_gc(); } @@ -3247,9 +3247,9 @@ local_recovery(const struct tt_uuid *instance_uuid, engine_begin_final_recovery_xc(); recover_remaining_wals(recovery, &wal_stream.base, NULL, false); if (wal_stream_has_tx(&wal_stream)) { - wal_stream_abort(&wal_stream); diag_set(XlogError, "found a not finished transaction " "in the log"); + wal_stream_abort(&wal_stream); if (!is_force_recovery) diag_raise(); diag_log(); @@ -3274,9 +3274,9 @@ local_recovery(const struct tt_uuid *instance_uuid, recovery_stop_local(recovery); recover_remaining_wals(recovery, &wal_stream.base, NULL, true); if (wal_stream_has_tx(&wal_stream)) { - wal_stream_abort(&wal_stream); diag_set(XlogError, "found a not finished transaction " "in the log in hot standby mode"); + wal_stream_abort(&wal_stream); if (!is_force_recovery) diag_raise(); diag_log(); diff --git a/src/box/memtx_engine.c b/src/box/memtx_engine.c index 6c4982b9ff..c662a3c8cc 100644 --- a/src/box/memtx_engine.c +++ b/src/box/memtx_engine.c @@ -277,7 +277,7 @@ memtx_engine_recover_snapshot_row(struct memtx_engine *memtx, rollback_stmt: txn_rollback_stmt(txn); rollback: - txn_rollback(txn); + txn_abort(txn); fiber_gc(); return -1; } diff --git a/src/box/txn.c b/src/box/txn.c index b3819b8f91..596257e415 100644 --- a/src/box/txn.c +++ b/src/box/txn.c @@ -264,6 +264,10 @@ diag_set_txn_sign_detailed(const char *file, unsigned line, int64_t signature) case TXN_SIGNATURE_SYNC_ROLLBACK: diag_set_detailed(file, line, ClientError, ER_SYNC_ROLLBACK); return; + case TXN_SIGNATURE_ABORT: + if (diag_is_empty(diag_get())) + panic("Tried to get an absent transaction error"); + return; } panic("Transaction signature %lld can't be converted to an error " "at %s:%u", (long long)signature, file, line); @@ -870,7 +874,7 @@ txn_commit_try_async(struct txn *txn) rollback: assert(txn->fiber == NULL); - txn_rollback(txn); + txn_abort(txn); return -1; } @@ -883,7 +887,7 @@ txn_commit(struct txn *txn) txn->fiber = fiber(); if (txn_prepare(txn) != 0) - goto rollback; + goto rollback_abort; if (txn_commit_nop(txn)) { txn_free(txn); @@ -892,7 +896,7 @@ txn_commit(struct txn *txn) req = txn_journal_entry_new(txn); if (req == NULL) - goto rollback; + goto rollback_abort; /* * Do not cache the flag value in a variable. The flag might be deleted * during WAL write. This can happen for async transactions created @@ -914,7 +918,7 @@ txn_commit(struct txn *txn) */ limbo_entry = txn_limbo_append(&txn_limbo, origin_id, txn); if (limbo_entry == NULL) - goto rollback; + goto rollback_abort; } fiber_set_txn(fiber(), NULL); @@ -951,7 +955,10 @@ txn_commit(struct txn *txn) diag_log(); if (txn_has_flag(txn, TXN_WAIT_SYNC)) txn_limbo_abort(&txn_limbo, limbo_entry); +rollback_abort: + txn->signature = TXN_SIGNATURE_ABORT; rollback: + assert(txn->signature != TXN_SIGNATURE_UNKNOWN); assert(txn->fiber != NULL); if (!txn_has_flag(txn, TXN_IS_DONE)) { fiber_set_txn(fiber(), txn); @@ -985,6 +992,15 @@ txn_rollback(struct txn *txn) fiber_set_txn(fiber(), NULL); } +void +txn_abort(struct txn *txn) +{ + assert(!diag_is_empty(diag_get())); + assert(txn->signature == TXN_SIGNATURE_UNKNOWN); + txn->signature = TXN_SIGNATURE_ABORT; + txn_rollback(txn); +} + int txn_check_singlestatement(struct txn *txn, const char *where) { diff --git a/src/box/txn.h b/src/box/txn.h index 7638854a7f..8741dc6a15 100644 --- a/src/box/txn.h +++ b/src/box/txn.h @@ -129,6 +129,11 @@ enum { * read. */ TXN_SIGNATURE_SYNC_ROLLBACK = JOURNAL_ENTRY_ERR_MIN - 3, + /** + * Aborted before it could be written due an error which is already + * installed into the global diag. + */ + TXN_SIGNATURE_ABORT = JOURNAL_ENTRY_ERR_MIN - 4, }; /** @@ -491,6 +496,15 @@ txn_commit(struct txn *txn); void txn_rollback(struct txn *txn); +/** + * Rollback a transaction due to an error which is already installed into the + * global diag. This is preferable over the plain rollback when there are + * already triggers installed and they might need to know the exact reason for + * the rollback. + */ +void +txn_abort(struct txn *txn); + /** * Submit a transaction to the journal. * @pre txn == in_txn() diff --git a/src/box/vy_scheduler.c b/src/box/vy_scheduler.c index 917b75f934..b3e4120cde 100644 --- a/src/box/vy_scheduler.c +++ b/src/box/vy_scheduler.c @@ -920,7 +920,7 @@ vy_deferred_delete_batch_process_f(struct cmsg *cmsg) return; fail_rollback: - txn_rollback(txn); + txn_abort(txn); fiber_gc(); fail: batch->is_failed = true; -- GitLab