diff --git a/src/box/memtx_engine.cc b/src/box/memtx_engine.cc index dedf211c46ec983128843037850270abcd39bd55..e24f5c67379fbfb441ef551089da5b3162dbceb5 100644 --- a/src/box/memtx_engine.cc +++ b/src/box/memtx_engine.cc @@ -541,6 +541,7 @@ memtx_engine_prepare(struct engine *engine, struct txn *txn) assert(stmt->space->engine == engine); memtx_tx_history_prepare_stmt(stmt); } + memtx_tx_prepare_finalize(txn); } if (txn->is_schema_changed) memtx_tx_abort_all_for_ddl(txn); diff --git a/src/box/memtx_tx.c b/src/box/memtx_tx.c index c69dc0d2ef16b0ed57e39a24ca760847ef8d3314..ef79c3fcfe3cbf55ccd404d0507347c8efb32e88 100644 --- a/src/box/memtx_tx.c +++ b/src/box/memtx_tx.c @@ -38,6 +38,100 @@ #include "schema_def.h" #include "small/mempool.h" +/** + * Link that connects a memtx_story with older and newer stories of the same + * key in index. + */ +struct memtx_story_link { + /** Story that was happened after that story was ended. */ + struct memtx_story *newer_story; + /** Story that was happened before that story was started. */ + struct memtx_story *older_story; + /** List of interval items @sa gap_item. */ + struct rlist nearby_gaps; + /** + * If the tuple of story is physically in index, here the pointer + * to that index is stored. + */ + struct index *in_index; +}; + +/** + * A part of a history of a value in space. + * It's a story about a tuple, from the point it was added to space to the + * point when it was deleted from a space. + * All stories are linked into a list of stories of the same key of each index. + */ +struct memtx_story { + /** The story is about this tuple. The tuple is referenced. */ + struct tuple *tuple; + /** + * Statement that introduced this story. Is set to NULL when the + * statement's transaction becomes committed. Can also be NULL if we + * don't know who introduced that story, the tuple was added by a + * transaction that was completed and destroyed some time ago. + */ + struct txn_stmt *add_stmt; + /** + * Prepare sequence number of add_stmt's transaction. Is set when + * the transaction is prepared. Can be 0 if the transaction is + * in progress or we don't know who introduced that story. + */ + int64_t add_psn; + /** + * Statement that ended this story. Is set to NULL when the statement's + * transaction becomes committed. Can also be NULL if the tuple has not + * been deleted yet. + */ + struct txn_stmt *del_stmt; + /** + * Prepare sequence number of del_stmt's transaction. Is set when + * the transaction is prepared. Can be 0 if the transaction is + * in progress or if nobody has deleted the tuple. + */ + int64_t del_psn; + /** + * List of trackers - transactions that has read this tuple. + */ + struct rlist reader_list; + /** + * Link in tx_manager::all_stories + */ + struct rlist in_all_stories; + /** + * Link in space::memtx_tx_stories. + */ + struct rlist in_space_stories; + /** + * Number of indexes in this space - and the count of link[]. + */ + uint32_t index_count; + /** + * Status of story, describes the reason why story cannot be deleted. + * It is initialized in memtx_story constructor and is changed only in + * memtx_tx_story_gc. + */ + enum memtx_tx_story_status status; + /** + * Flag is set when @a tuple is not placed in primary key and + * the story is the only reason why @a tuple cannot be deleted. + */ + bool tuple_is_retained; + /* + * Transaction that added this story was rollbacked: this story is + * absolutely invisible — its only purpose is to retain the reader list. + * It is present at the end of some history chains and completely + * unlinked from others, which also implies it is not present in the + * corresponding indexes. + */ + bool rollbacked; + /** + * Link with older and newer stories (and just tuples) for each + * index respectively. + */ + struct memtx_story_link link[]; +}; + static uint32_t memtx_tx_story_key_hash(const struct tuple *a) { @@ -59,6 +153,37 @@ memtx_tx_story_key_hash(const struct tuple *a) #define MH_SOURCE #include "salad/mhash.h" +/** + * Record that links two transactions, breaker and victim. + * See memtx_tx_cause_conflict for details. + */ +struct tx_conflict_tracker { + /** TX that aborts victim on commit. */ + struct txn *breaker; + /** TX that will be aborted on breaker's commit. */ + struct txn *victim; + /** Link in breaker->conflict_list. */ + struct rlist in_conflict_list; + /** Link in victim->conflicted_by_list. */ + struct rlist in_conflicted_by_list; +}; + +/** + * Record that links transaction and a story that the transaction have read. + */ +struct tx_read_tracker { + /** The TX that read story. */ + struct txn *reader; + /** The story that was read by reader. */ + struct memtx_story *story; + /** Link in story->reader_list. */ + struct rlist in_reader_list; + /** Link in reader->read_set. */ + struct rlist in_read_set; + /** Bit field of indexes in which the data was tread by reader. */ + uint64_t index_mask; +}; + /** * An element that stores the fact that some transaction have read * a full key and found nothing. @@ -540,7 +665,21 @@ memtx_tx_abort_all_for_ddl(struct txn *ddl_owner) } } -int +/** + * Notify TX manager that if transaction @a breaker is committed then the + * transaction @a victim must be aborted due to conflict. It is achieved + * by adding corresponding entry (of tx_conflict_tracker type) to @a breaker + * conflict list. In case there's already such entry, then move it to the head + * of the list in order to optimize next invocations of this function. + * For example: there's two rw transaction in progress, one have read + * some value while the second is about to overwrite it. If the second + * is committed first, the first must be aborted. + * + * NB: can trigger story garbage collection. + * + * @return 0 on success, -1 on memory error. + */ +static int memtx_tx_cause_conflict(struct txn *breaker, struct txn *victim) { assert(breaker != victim); @@ -618,7 +757,22 @@ memtx_tx_adjust_position_in_read_view_list(struct txn *txn) rlist_add_tail(&prev_txn->in_read_view_txs, &txn->in_read_view_txs); } -void +/** + * Handle conflict when @a victim has read and @a breaker has written the same + * key, and @a breaker is prepared. The functions must be called in two cases: + * 1. @a breaker becomes prepared for every victim with non-empty intersection + * of victim read set / breaker write set. + * 2. @a victim has to read confirmed value and skips the value that prepared + * @a breaker wrote. + * If @a victim is read-only or hasn't made any changes, it should be sent + * to read view, in which is will not see @a breaker's changes. If @a victim + * is already in a read view - a read view that does not see every breaker + * changes is chosen. + * Otherwise @a victim must be marked as conflicted and aborted on occasion. + * + * NB: can trigger story garbage collection. + */ +static void memtx_tx_handle_conflict(struct txn *breaker, struct txn *victim) { assert(breaker != victim); @@ -1703,6 +1857,16 @@ check_hole(struct space *space, uint32_t index, return 0; } +/** + * Record in TX manager that a transaction @txn have read a @tuple in @space. + * + * NB: can trigger story garbage collection. + * + * @return 0 on success, -1 on memory error. + */ +static int +memtx_tx_track_read(struct txn *txn, struct space *space, struct tuple *tuple); + /** * Check that replaced tuples in space's indexes does not violate common * replace rules. See memtx_space_replace_all_keys comment. @@ -2605,6 +2769,29 @@ memtx_tx_history_prepare_stmt(struct txn_stmt *stmt) memtx_tx_story_gc(); } +void +memtx_tx_prepare_finalize(struct txn *txn) +{ + struct tx_conflict_tracker *entry, *next; + /* Handle conflicts. */ + rlist_foreach_entry_safe(entry, &txn->conflict_list, + in_conflict_list, next) { + assert(entry->breaker == txn); + memtx_tx_handle_conflict(txn, entry->victim); + rlist_del(&entry->in_conflict_list); + rlist_del(&entry->in_conflicted_by_list); + } + /* Just free conflict list - we don't need it anymore. */ + rlist_foreach_entry_safe(entry, &txn->conflicted_by_list, + in_conflicted_by_list, next) { + assert(entry->victim == txn); + rlist_del(&entry->in_conflict_list); + rlist_del(&entry->in_conflicted_by_list); + } + assert(rlist_empty(&txn->conflict_list)); + assert(rlist_empty(&txn->conflicted_by_list)); +} + void memtx_tx_history_commit_stmt(struct txn_stmt *stmt, size_t *bsize) { @@ -2713,6 +2900,10 @@ memtx_tx_tuple_clarify_slow(struct txn *txn, struct space *space, struct tuple *tuple, struct index *index, uint32_t mk_index) { + if (!tuple_has_flag(tuple, TUPLE_IS_DIRTY)) { + memtx_tx_track_read(txn, space, tuple); + return tuple; + } bool is_prepared_ok = detect_whether_prepared_ok(txn); struct tuple *res = memtx_tx_tuple_clarify_impl(txn, space, tuple, index, mk_index, @@ -2891,9 +3082,11 @@ memtx_tx_track_read_story(struct txn *txn, struct space *space, /** * Record in TX manager that a transaction @txn have read a @tuple in @space. * + * NB: can trigger story garbage collection. + * * @return 0 on success, -1 on memory error. */ -int +static int memtx_tx_track_read(struct txn *txn, struct space *space, struct tuple *tuple) { if (tuple == NULL) @@ -3211,6 +3404,32 @@ memtx_tx_clean_txn(struct txn *txn) in_full_scan_list); memtx_tx_full_scan_item_delete(item); } + + struct tx_read_tracker *tracker, *tmp; + rlist_foreach_entry_safe(tracker, &txn->read_set, + in_read_set, tmp) { + rlist_del(&tracker->in_reader_list); + rlist_del(&tracker->in_read_set); + } + assert(rlist_empty(&txn->read_set)); + + struct tx_conflict_tracker *entry, *next; + rlist_foreach_entry_safe(entry, &txn->conflict_list, + in_conflict_list, next) { + rlist_del(&entry->in_conflict_list); + rlist_del(&entry->in_conflicted_by_list); + } + rlist_foreach_entry_safe(entry, &txn->conflicted_by_list, + in_conflicted_by_list, next) { + rlist_del(&entry->in_conflict_list); + rlist_del(&entry->in_conflicted_by_list); + } + assert(rlist_empty(&txn->conflict_list)); + assert(rlist_empty(&txn->conflicted_by_list)); + + rlist_del(&txn->in_read_view_txs); + rlist_del(&txn->in_all_txs); + memtx_tx_story_gc(); } diff --git a/src/box/memtx_tx.h b/src/box/memtx_tx.h index ae71fe5bf7f3f8673a39b9d4e0ee9462021f9090..6a1067f0acb4bacef6615d31508d7e7ad6d10ea3 100644 --- a/src/box/memtx_tx.h +++ b/src/box/memtx_tx.h @@ -97,131 +97,6 @@ enum memtx_tx_story_status { extern const char *memtx_tx_story_status_strs[]; -/** - * Record that links two transactions, breaker and victim. - * See memtx_tx_cause_conflict for details. - */ -struct tx_conflict_tracker { - /** TX that aborts victim on commit. */ - struct txn *breaker; - /** TX that will be aborted on breaker's commit. */ - struct txn *victim; - /** Link in breaker->conflict_list. */ - struct rlist in_conflict_list; - /** Link in victim->conflicted_by_list. */ - struct rlist in_conflicted_by_list; -}; - -/** - * Record that links transaction and a story that the transaction have read. - */ -struct tx_read_tracker { - /** The TX that read story. */ - struct txn *reader; - /** The story that was read by reader. */ - struct memtx_story *story; - /** Link in story->reader_list. */ - struct rlist in_reader_list; - /** Link in reader->read_set. */ - struct rlist in_read_set; - /** Bit field of indexes in which the data was tread by reader. */ - uint64_t index_mask; -}; - -/** - * Link that connects a memtx_story with older and newer stories of the same - * key in index. - */ -struct memtx_story_link { - /** Story that was happened after that story was ended. */ - struct memtx_story *newer_story; - /** Story that was happened before that story was started. */ - struct memtx_story *older_story; - /** List of interval items @sa gap_item. */ - struct rlist nearby_gaps; - /** - * If the tuple of story is physically in index, here the pointer - * to that index is stored. - */ - struct index *in_index; -}; - -/** - * A part of a history of a value in space. - * It's a story about a tuple, from the point it was added to space to the - * point when it was deleted from a space. - * All stories are linked into a list of stories of the same key of each index. - */ -struct memtx_story { - /** The story is about this tuple. The tuple is referenced. */ - struct tuple *tuple; - /** - * Statement that introduced this story. Is set to NULL when the - * statement's transaction becomes committed. Can also be NULL if we - * don't know who introduced that story, the tuple was added by a - * transaction that was completed and destroyed some time ago. - */ - struct txn_stmt *add_stmt; - /** - * Prepare sequence number of add_stmt's transaction. Is set when - * the transaction is prepared. Can be 0 if the transaction is - * in progress or we don't know who introduced that story. - */ - int64_t add_psn; - /** - * Statement that ended this story. Is set to NULL when the statement's - * transaction becomes committed. Can also be NULL if the tuple has not - * been deleted yet. - */ - struct txn_stmt *del_stmt; - /** - * Prepare sequence number of del_stmt's transaction. Is set when - * the transaction is prepared. Can be 0 if the transaction is - * in progress or if nobody has deleted the tuple. - */ - int64_t del_psn; - /** - * List of trackers - transactions that has read this tuple. - */ - struct rlist reader_list; - /** - * Link in tx_manager::all_stories - */ - struct rlist in_all_stories; - /** - * Link in space::memtx_tx_stories. - */ - struct rlist in_space_stories; - /** - * Number of indexes in this space - and the count of link[]. - */ - uint32_t index_count; - /** - * Status of story, describes the reason why story cannot be deleted. - * It is initialized in memtx_story constructor and is changed only in - * memtx_tx_story_gc. - */ - enum memtx_tx_story_status status; - /** - * Flag is set when @a tuple is not placed in primary key and - * the story is the only reason why @a tuple cannot be deleted. - */ - bool tuple_is_retained; - /* - * Transaction that added this story was rollbacked: this story is - * absolutely invisible — its only purpose is to retain the reader list. - * It is present at the end of some history chains and completely - * unlinked from others, which also implies it is not present in the - * corresponding indexes. - */ - bool rollbacked; - /** - * Link with older and newer stories (and just tuples) for each - * index respectively. - */ - struct memtx_story_link link[]; -}; - /** * Snapshot cleaner is a short part of history that is supposed to clarify * tuples in a index snapshot. It's also supposed to be used in another @@ -299,41 +174,6 @@ memtx_tx_acquire_ddl(struct txn *tx); void memtx_tx_abort_all_for_ddl(struct txn *ddl_owner); -/** - * Notify TX manager that if transaction @a breaker is committed then the - * transaction @a victim must be aborted due to conflict. It is achieved - * by adding corresponding entry (of tx_conflict_tracker type) to @a breaker - * conflict list. In case there's already such entry, then move it to the head - * of the list in order to optimize next invocations of this function. - * For example: there's two rw transaction in progress, one have read - * some value while the second is about to overwrite it. If the second - * is committed first, the first must be aborted. - * - * NB: can trigger story garbage collection. - * - * @return 0 on success, -1 on memory error. - */ -int -memtx_tx_cause_conflict(struct txn *breaker, struct txn *victim); - -/** - * Handle conflict when @a victim has read and @a breaker has written the same - * key, and @a breaker is prepared. The functions must be called in two cases: - * 1. @a breaker becomes prepared for every victim with non-empty intersection - * of victim read set / breaker write set. - * 2. @a victim has to read confirmed value and skips the value that prepared - * @a breaker wrote. - * If @a victim is read-only or hasn't made any changes, it should be sent - * to read view, in which is will not see @a breaker's changes. If @a victim - * is already in a read view - a read view that does not see every breaker - * changes is chosen. - * Otherwise @a victim must be marked as conflicted and aborted on occasion. - * - * NB: can trigger story garbage collection. - */ -void -memtx_tx_handle_conflict(struct txn *breaker, struct txn *victim); - /** * @brief Add a statement to transaction manager's history. * Until unlinking or releasing the space could internally contain @@ -390,6 +230,16 @@ memtx_tx_history_rollback_stmt(struct txn_stmt *stmt); void memtx_tx_history_prepare_stmt(struct txn_stmt *stmt); +/** + * Finish preparing of a transaction. + * Must be called for entire transaction after `memtx_tx_history_rollback_stmt` + * was called for each transaction statement. + * + * NB: can trigger story garbage collection. + */ +void +memtx_tx_prepare_finalize(struct txn *txn); + /** * @brief Commit statement in history. * Make the statement's changes permanent. It becomes visible to all. @@ -408,17 +258,6 @@ memtx_tx_tuple_clarify_slow(struct txn *txn, struct space *space, struct tuple *tuples, struct index *index, uint32_t mk_index); -/** - * Record in TX manager that a transaction @txn have read a @tuple in @space. - * - * NB: can trigger story garbage collection. - * - * @return 0 on success, -1 on memory error. - */ -int -memtx_tx_track_read(struct txn *txn, struct space *space, struct tuple *tuple); - - /** Helper of memtx_tx_track_point */ int memtx_tx_track_point_slow(struct txn *txn, struct index *index, @@ -538,10 +377,6 @@ memtx_tx_tuple_clarify(struct txn *txn, struct space *space, { if (!memtx_tx_manager_use_mvcc_engine) return tuple; - if (!tuple_has_flag(tuple, TUPLE_IS_DIRTY)) { - memtx_tx_track_read(txn, space, tuple); - return tuple; - } return memtx_tx_tuple_clarify_slow(txn, space, tuple, index, mk_index); } diff --git a/src/box/txn.c b/src/box/txn.c index 823c7f6628ad267b3046395d4df3424bb0c643d5..2ba83505891729612894b80217cba30b30caefcd 100644 --- a/src/box/txn.c +++ b/src/box/txn.c @@ -451,31 +451,6 @@ txn_free(struct txn *txn) if (txn->rollback_timer != NULL) ev_timer_stop(loop(), txn->rollback_timer); memtx_tx_clean_txn(txn); - struct tx_read_tracker *tracker, *tmp; - rlist_foreach_entry_safe(tracker, &txn->read_set, - in_read_set, tmp) { - rlist_del(&tracker->in_reader_list); - rlist_del(&tracker->in_read_set); - } - assert(rlist_empty(&txn->read_set)); - - struct tx_conflict_tracker *entry, *next; - rlist_foreach_entry_safe(entry, &txn->conflict_list, - in_conflict_list, next) { - rlist_del(&entry->in_conflict_list); - rlist_del(&entry->in_conflicted_by_list); - } - rlist_foreach_entry_safe(entry, &txn->conflicted_by_list, - in_conflicted_by_list, next) { - rlist_del(&entry->in_conflict_list); - rlist_del(&entry->in_conflicted_by_list); - } - assert(rlist_empty(&txn->conflict_list)); - assert(rlist_empty(&txn->conflicted_by_list)); - - rlist_del(&txn->in_read_view_txs); - rlist_del(&txn->in_all_txs); - struct txn_stmt *stmt; stailq_foreach_entry(stmt, &txn->stmts, next) txn_stmt_destroy(stmt); @@ -1017,25 +992,6 @@ txn_prepare(struct txn *txn) return -1; } - struct tx_conflict_tracker *entry, *next; - /* Handle conflicts. */ - rlist_foreach_entry_safe(entry, &txn->conflict_list, - in_conflict_list, next) { - assert(entry->breaker == txn); - memtx_tx_handle_conflict(txn, entry->victim); - rlist_del(&entry->in_conflict_list); - rlist_del(&entry->in_conflicted_by_list); - } - /* Just free conflict list - we don't need it anymore. */ - rlist_foreach_entry_safe(entry, &txn->conflicted_by_list, - in_conflicted_by_list, next) { - assert(entry->victim == txn); - rlist_del(&entry->in_conflict_list); - rlist_del(&entry->in_conflicted_by_list); - } - assert(rlist_empty(&txn->conflict_list)); - assert(rlist_empty(&txn->conflicted_by_list)); - trigger_clear(&txn->fiber_on_stop); trigger_clear(&txn->fiber_on_yield);