diff --git a/src/box/memtx_allocator.cc b/src/box/memtx_allocator.cc index 2528066c5afac905817af5ff722f13bc4de52919..b2004cb9329c7973f5906a521c4fd3abd61e08c4 100644 --- a/src/box/memtx_allocator.cc +++ b/src/box/memtx_allocator.cc @@ -31,6 +31,112 @@ #include "memtx_allocator.h" #include "trivia/tuple.h" +struct memtx_tuple_rv * +memtx_tuple_rv_new(uint32_t version, struct rlist *list) +{ + assert(version > 0); + int count = 1; + struct memtx_tuple_rv *rv; + rlist_foreach_entry(rv, list, link) + count++; + struct memtx_tuple_rv *new_rv = (struct memtx_tuple_rv *)xmalloc( + sizeof(*new_rv) + count * sizeof(*new_rv->lists)); + new_rv->count = count; + /* Create one list per each open read view. */ + struct memtx_tuple_rv_list *l = &new_rv->lists[0]; + uint32_t prev_version = 0; + rlist_foreach_entry(rv, list, link) { + l->version = memtx_tuple_rv_version(rv); + /* List must be sorted by read view version. */ + assert(l->version > prev_version); + stailq_create(&l->tuples); + prev_version = l->version; + l++; + } + /* And one more list for self. */ + assert(l == &new_rv->lists[count - 1]); + l->version = version; + assert(l->version > prev_version); + (void)prev_version; + stailq_create(&l->tuples); + rlist_add_tail_entry(list, new_rv, link); + return new_rv; +} + +void +memtx_tuple_rv_delete(struct memtx_tuple_rv *rv, struct rlist *list, + struct stailq *tuples_to_free) +{ + struct memtx_tuple_rv *prev_rv = rlist_prev_entry_safe(rv, list, link); + uint32_t prev_version = prev_rv == nullptr ? 0 : + memtx_tuple_rv_version(prev_rv); + /* + * Move tuples from lists with version <= prev_version to the list of + * the previous read view and delete all other tuples. + */ + int i = 0; + int j = 0; + while (i < rv->count) { + struct memtx_tuple_rv_list *src = &rv->lists[i]; + if (src->version <= prev_version) { + /* + * The tuples were allocated before the previous read + * view was opened. Move them to the previous read + * view's list. + */ + assert(prev_rv != nullptr); + assert(j < prev_rv->count); + struct memtx_tuple_rv_list *dst = &prev_rv->lists[j]; + /* + * The previous read view may have more lists, because + * some read views could have been closed by the time + * this read view was open. Skip them. + */ + while (dst->version != src->version) { + j++; + assert(j < prev_rv->count); + dst = &prev_rv->lists[j]; + } + stailq_concat(&dst->tuples, &src->tuples); + j++; + } else { + /* + * The tuples were allocated after the previous read + * view was opened and freed before the next read view + * was opened. Free them immediately. + */ + stailq_concat(tuples_to_free, &src->tuples); + } + i++; + } + rlist_del_entry(rv, link); + free(rv); +} + +void +memtx_tuple_rv_add(struct memtx_tuple_rv *rv, struct memtx_tuple *tuple) +{ + /* + * Binary search the list with min version such that + * list->version > tuple->version. + */ + int begin = 0; + int end = rv->count; + struct memtx_tuple_rv_list *found = nullptr; + while (begin != end) { + int middle = begin + (end - begin) / 2; + struct memtx_tuple_rv_list *l = &rv->lists[middle]; + if (l->version <= tuple->version) { + begin = middle + 1; + } else { + found = l; + end = middle; + } + } + assert(found != nullptr); + stailq_add_entry(&found->tuples, tuple, in_gc); +} + void memtx_allocators_init(struct allocator_settings *settings) { diff --git a/src/box/memtx_allocator.h b/src/box/memtx_allocator.h index b72b74b21f2ea7c8998779c7da2e55d9c6bfd69d..d239f62fd08f86f4caa4533be7b241addee8867e 100644 --- a/src/box/memtx_allocator.h +++ b/src/box/memtx_allocator.h @@ -31,6 +31,7 @@ */ #include "allocator.h" #include "salad/stailq.h" +#include "small/rlist.h" #include "tuple.h" /** @@ -60,6 +61,98 @@ struct memtx_tuple { }; }; +/** + * List of tuples owned by a read view. + * + * See the comment to memtx_tuple_rv for details. + */ +struct memtx_tuple_rv_list { + /** Read view version. */ + uint32_t version; + /** List of tuples, linked by memtx_tuple::in_gc. */ + struct stailq tuples; +}; + +/** + * Tuple list array associated with a read view. + * + * When a read view is opened: + * + We assign a unique incrementally growing version to it. + * + We create and associate a list array with it. The array consists of one + * tuple list per each read view created so far, including the new read view. + * + * When a tuple is allocated, we store the most recent read view version in it. + * This will allow us to check if it's visible by a read view when it's freed. + * + * When a tuple is freed: + * 1. We look up the most recent open read view. + * 2. If there's no open read views or the most recent open read view's version + * is <= the tuple's version, we free the tuple immediately, because it was + * allocated after the most recent open read view was opened. + * 3. Otherwise, we add the tuple to the list that has the minimal version + * among all lists in the array such that list->version > tuple->version. + * In other words, we add it to the list corresponding to the oldest read + * view that can access the tuple. + * + * When a read view is closed: + * 1. We look up the most recent read view older than the closed one. + * 2. If there's no such read view, we free all tuples from the closed read + * view's lists. + * 3. Otherwise, + * + We free all tuples from lists with version > the found read view's + * version, because those tuples were allocated after any older read + * view was opened and freed before any newer read view was opened. + * + We move tuples from all other lists to the corresponding list of + * the found read view. + */ +struct memtx_tuple_rv { + /** Link in the list of all open read views. */ + struct rlist link; + /** Number of entries in the array. */ + int count; + /** + * Array of tuple lists, one per each read view that were open at the + * time when this read view was created, including this read view. + * Ordered by read view version, ascending (the oldest read view comes + * first). + */ + struct memtx_tuple_rv_list lists[0]; +}; + +/** Returns the read view version. */ +static inline uint32_t +memtx_tuple_rv_version(struct memtx_tuple_rv *rv) +{ + /* Last list corresponds to self. */ + assert(rv->count > 0); + return rv->lists[rv->count - 1].version; +} + +/** + * Allocates a list array for a read view and initializes it using the list of + * all open read views. Adds the new read view to the list. + */ +struct memtx_tuple_rv * +memtx_tuple_rv_new(uint32_t version, struct rlist *list); + +/** + * Deletes a list array. Tuples that are still visible from other read views + * are moved to the older read view's lists. Tuples that are not visible from + * any read view are appended to the tuples_to_free list. + */ +void +memtx_tuple_rv_delete(struct memtx_tuple_rv *rv, struct rlist *list, + struct stailq *tuples_to_free); + +/** + * Adds a freed tuple to a read view's list and returns true. + * + * The tuple must be visible from some read view, that is the tuple version + * must be < than the most recent open read view. + */ +void +memtx_tuple_rv_add(struct memtx_tuple_rv *rv, struct memtx_tuple *tuple); + /** Memtx read view options. */ struct memtx_read_view_opts {}; @@ -72,11 +165,15 @@ class MemtxAllocator { * Opening a read view pins tuples that were allocated before * the read view was created. See open_read_view(). */ - struct ReadView {}; + struct ReadView { + /** Lists of tuples owned by this read view. */ + struct memtx_tuple_rv *rv; + }; static void create() { stailq_create(&gc); + rlist_create(&read_views); } static void destroy() @@ -97,8 +194,9 @@ class MemtxAllocator { { (void)opts; read_view_version++; - delayed_free_mode++; - return nullptr; + ReadView *rv = (ReadView *)xmalloc(sizeof(*rv)); + rv->rv = memtx_tuple_rv_new(read_view_version, &read_views); + return rv; } /** @@ -106,10 +204,9 @@ class MemtxAllocator { */ static void close_read_view(ReadView *rv) { - assert(rv == nullptr); - (void)rv; - assert(delayed_free_mode > 0); - --delayed_free_mode; + memtx_tuple_rv_delete(rv->rv, &read_views, &gc); + TRASH(rv); + ::free(rv); } /** @@ -130,19 +227,19 @@ class MemtxAllocator { * Free a tuple allocated with alloc_tuple(). * * The tuple is freed immediately if there's no read view that may use - * it. Otherwise, it's put in the garbage collection list to be free as - * soon as the last read view using it is destroyed. + * it. Otherwise, it's put in a read view's list to be free as soon as + * the last read view using it is destroyed. */ static void free_tuple(struct tuple *tuple) { struct memtx_tuple *memtx_tuple = container_of( tuple, struct memtx_tuple, base); - if (delayed_free_mode == 0 || - memtx_tuple->version == read_view_version || - tuple_has_flag(tuple, TUPLE_IS_TEMPORARY)) { + struct memtx_tuple_rv *rv = tuple_rv_last(tuple); + if (rv == nullptr || + memtx_tuple->version >= memtx_tuple_rv_version(rv)) { immediate_free_tuple(memtx_tuple); } else { - delayed_free_tuple(memtx_tuple); + memtx_tuple_rv_add(rv, memtx_tuple); } } @@ -167,15 +264,8 @@ class MemtxAllocator { free(memtx_tuple, size); } - static void delayed_free_tuple(struct memtx_tuple *memtx_tuple) - { - stailq_add_entry(&gc, memtx_tuple, in_gc); - } - static void collect_garbage() { - if (delayed_free_mode > 0) - return; for (int i = 0; !stailq_empty(&gc) && i < GC_BATCH_SIZE; i++) { struct memtx_tuple *memtx_tuple = stailq_shift_entry( &gc, struct memtx_tuple, in_gc); @@ -184,31 +274,48 @@ class MemtxAllocator { } /** - * Tuple garbage collection list. Contains tuples that were not freed - * immediately because they are currently in use by a read view. + * Returns the most recent open read view that needs this tuple or null + * if the tuple may be freed immediately. */ - static struct stailq gc; + static struct memtx_tuple_rv * + tuple_rv_last(struct tuple *tuple) + { + /* Temporary tuples are freed immediately. */ + if (tuple_has_flag(tuple, TUPLE_IS_TEMPORARY)) + return nullptr; + if (rlist_empty(&read_views)) + return nullptr; + return rlist_last_entry(&read_views, + struct memtx_tuple_rv, link); + } + /** - * Unless zero, freeing of tuples allocated before the last call to - * open_read_view() is delayed until close_read_view() is called. + * List of freed tuples that were not freed immediately, because + * they were in use by a read view, linked in by memtx_tuple::in_gc. + * We collect tuples from this list on allocation. */ - static uint32_t delayed_free_mode; + static struct stailq gc; /** * Most recent read view's version. * * Incremented with each open read view. Not supposed to wrap around. */ static uint32_t read_view_version; + /** + * List of memtx_tuple_rv objects, ordered by read view version, + * ascending (the oldest read view comes first). + */ + static struct rlist read_views; }; template<class Allocator> struct stailq MemtxAllocator<Allocator>::gc; template<class Allocator> -uint32_t MemtxAllocator<Allocator>::delayed_free_mode; +uint32_t MemtxAllocator<Allocator>::read_view_version; template<class Allocator> -uint32_t MemtxAllocator<Allocator>::read_view_version; +struct rlist MemtxAllocator<Allocator>::read_views; void memtx_allocators_init(struct allocator_settings *settings); diff --git a/test/unit/memtx_allocator.cc b/test/unit/memtx_allocator.cc index d557e8b3ef1c531d010bd52288513962503acc62..b7c6f667799233212df1e6ac4ab9379a241973d9 100644 --- a/test/unit/memtx_allocator.cc +++ b/test/unit/memtx_allocator.cc @@ -212,10 +212,73 @@ test_free_not_delayed_if_temporary() check_plan(); } +/** + * Checks that tuples are freed as soon as all read views that can access it + * are closed, even if other (newer or older) read views still exist. + */ +static void +test_tuple_gc() +{ + plan(11); + header(); + + is(alloc_tuple_count(), 0, "count before alloc"); + struct tuple *tuple11 = alloc_tuple(); + struct tuple *tuple12 = alloc_tuple(); + struct tuple *tuple13 = alloc_tuple(); + struct tuple *tuple14 = alloc_tuple(); + memtx_allocators_read_view rv1 = memtx_allocators_open_read_view({}); + is(alloc_tuple_count(), 4, "count after rv1 opened"); + free_tuple(tuple11); + struct tuple *tuple22 = alloc_tuple(); + struct tuple *tuple23 = alloc_tuple(); + struct tuple *tuple24 = alloc_tuple(); + memtx_allocators_read_view rv2 = memtx_allocators_open_read_view({}); + is(alloc_tuple_count(), 7, "count after rv2 opened"); + free_tuple(tuple12); + free_tuple(tuple22); + struct tuple *tuple33 = alloc_tuple(); + struct tuple *tuple34 = alloc_tuple(); + memtx_allocators_read_view rv3 = memtx_allocators_open_read_view({}); + is(alloc_tuple_count(), 9, "count after rv3 opened"); + free_tuple(tuple13); + free_tuple(tuple23); + free_tuple(tuple33); + struct tuple *tuple44 = alloc_tuple(); + + is(alloc_tuple_count(), 10, "count before rv2 closed"); + memtx_allocators_close_read_view(rv2); + /* tuple22 is freed */ + is(alloc_tuple_count(), 9, "count after rv2 closed"); + + memtx_allocators_read_view rv4 = memtx_allocators_open_read_view({}); + is(alloc_tuple_count(), 9, "count after rv4 opened"); + free_tuple(tuple14); + free_tuple(tuple24); + free_tuple(tuple34); + free_tuple(tuple44); + + is(alloc_tuple_count(), 9, "count before rv4 closed"); + memtx_allocators_close_read_view(rv4); + /* tuple44 is freed */ + is(alloc_tuple_count(), 8, "count after rv4 closed"); + + memtx_allocators_close_read_view(rv1); + /* tuple11 and tuple12 are freed */ + is(alloc_tuple_count(), 6, "count after rv1 closed"); + + /* tuple13, tuple14, tuple23, tuple24, tuple33, tuple34 are freed */ + memtx_allocators_close_read_view(rv3); + is(alloc_tuple_count(), 0, "count after rv3 closed"); + + footer(); + check_plan(); +} + static int test_main() { - plan(5); + plan(6); header(); test_alloc_stats(); @@ -223,6 +286,7 @@ test_main() test_free_delayed_until_all_read_views_closed(); test_free_not_delayed_if_alloc_after_read_view(); test_free_not_delayed_if_temporary(); + test_tuple_gc(); footer(); return check_plan(); diff --git a/test/unit/memtx_allocator.result b/test/unit/memtx_allocator.result index 7e815d85675200dbb71dffd0496d3a4651a112a9..f8fe4bff6585fb083238fc02e89e9ffa47851dd5 100644 --- a/test/unit/memtx_allocator.result +++ b/test/unit/memtx_allocator.result @@ -1,4 +1,4 @@ -1..5 +1..6 *** test_main *** 1..5 *** test_alloc_stats *** @@ -40,4 +40,19 @@ ok 4 - subtests ok 3 - count after free *** test_free_not_delayed_if_temporary: done *** ok 5 - subtests + 1..11 + *** test_tuple_gc *** + ok 1 - count before alloc + ok 2 - count after rv1 opened + ok 3 - count after rv2 opened + ok 4 - count after rv3 opened + ok 5 - count before rv2 closed + ok 6 - count after rv2 closed + ok 7 - count after rv4 opened + ok 8 - count before rv4 closed + ok 9 - count after rv4 closed + ok 10 - count after rv1 closed + ok 11 - count after rv3 closed + *** test_tuple_gc: done *** +ok 6 - subtests *** test_main: done ***