From fe102ff7b94920415ba64a6a214a5c426a0aab58 Mon Sep 17 00:00:00 2001 From: Vladimir Davydov <vdavydov@tarantool.org> Date: Thu, 1 Sep 2022 17:45:31 +0300 Subject: [PATCH] memtx: reuse read views to prevent read_view_version wrap around The total number of read views that we can possibly (not necessarily simultaneously) ever create is limited by UINT32_MAX, because we use uint32_t for read view versioning and read view version must never wrap around. If read views were only used for making snapshots or joining replicas, this would be fine, because even if we made a snaphost every second (which is hardly possible), it'd take more than one hundred years for the read view version to wrap around. However, if read views could be created by users (which is our ultimate goal), they could get created as often as every millisecond, which would reduce the wrap around window down to one month, which is unacceptable. Let's fix this issue by reusing the most recent read views in case it was created less than 100 ms ago. The algorithm is described in the comments to the code. Closes #7189 NO_DOC=internal NO_CHANGELOG=internal --- src/box/memtx_allocator.cc | 17 ++++++ src/box/memtx_allocator.h | 73 ++++++++++++++++++++++++- test/unit/memtx_allocator.cc | 94 +++++++++++++++++++++++++++++++- test/unit/memtx_allocator.result | 22 +++++++- 4 files changed, 202 insertions(+), 4 deletions(-) diff --git a/src/box/memtx_allocator.cc b/src/box/memtx_allocator.cc index b2004cb932..a6b91fa757 100644 --- a/src/box/memtx_allocator.cc +++ b/src/box/memtx_allocator.cc @@ -35,6 +35,19 @@ struct memtx_tuple_rv * memtx_tuple_rv_new(uint32_t version, struct rlist *list) { assert(version > 0); + /* Reuse the last read view if its version matches. */ + struct memtx_tuple_rv *last_rv = rlist_empty(list) ? nullptr : + rlist_last_entry(list, struct memtx_tuple_rv, link); + if (last_rv != nullptr) { + uint32_t last_version = memtx_tuple_rv_version(last_rv); + assert(last_version <= version); + assert(last_rv->refs > 0); + if (last_version == version) { + last_rv->refs++; + return last_rv; + } + } + /* Proceed to creation of a new read view. */ int count = 1; struct memtx_tuple_rv *rv; rlist_foreach_entry(rv, list, link) @@ -60,6 +73,7 @@ memtx_tuple_rv_new(uint32_t version, struct rlist *list) (void)prev_version; stailq_create(&l->tuples); rlist_add_tail_entry(list, new_rv, link); + new_rv->refs = 1; return new_rv; } @@ -67,6 +81,9 @@ void memtx_tuple_rv_delete(struct memtx_tuple_rv *rv, struct rlist *list, struct stailq *tuples_to_free) { + assert(rv->refs > 0); + if (--rv->refs > 0) + return; struct memtx_tuple_rv *prev_rv = rlist_prev_entry_safe(rv, list, link); uint32_t prev_version = prev_rv == nullptr ? 0 : memtx_tuple_rv_version(prev_rv); diff --git a/src/box/memtx_allocator.h b/src/box/memtx_allocator.h index 5b5ae11eec..f53c6b31bb 100644 --- a/src/box/memtx_allocator.h +++ b/src/box/memtx_allocator.h @@ -30,6 +30,8 @@ * SUCH DAMAGE. */ #include "allocator.h" +#include "clock.h" +#include "clock_lowres.h" #include "salad/stailq.h" #include "small/rlist.h" #include "tuple.h" @@ -108,6 +110,8 @@ struct memtx_tuple_rv_list { struct memtx_tuple_rv { /** Link in the list of all open read views. */ struct rlist link; + /** Reference counter. */ + int refs; /** Number of entries in the array. */ int count; /** @@ -144,6 +148,9 @@ enum memtx_tuple_rv_type { /** * Allocates a list array for a read view and initializes it using the list of * all open read views. Adds the new read view to the list. + * + * If the version of the most recent read view matches the new version, + * the function will reuse it instead of creating a new one. */ struct memtx_tuple_rv * memtx_tuple_rv_new(uint32_t version, struct rlist *list); @@ -202,6 +209,14 @@ class MemtxAllocator { } } + /** + * Sets read_view_reuse_interval. Useful for testing. + */ + static void set_read_view_reuse_interval(double interval) + { + read_view_reuse_interval = interval; + } + /** * Opens a tuple read view: tuples visible from the read view * (allocated before the read view was created) won't be freed @@ -209,7 +224,11 @@ class MemtxAllocator { */ static ReadView *open_read_view(struct memtx_read_view_opts opts) { - read_view_version++; + if (!may_reuse_read_view) { + read_view_version++; + may_reuse_read_view = true; + read_view_timestamp = clock_monotonic(); + } ReadView *rv = (ReadView *)xcalloc(1, sizeof(*rv)); for (int type = 0; type < memtx_tuple_rv_type_MAX; type++) { if (!opts.include_temporary_tuples && @@ -246,7 +265,16 @@ class MemtxAllocator { (struct memtx_tuple *)alloc(total); if (memtx_tuple == NULL) return NULL; - memtx_tuple->version = read_view_version; + /* Use low-resolution clock, because it's hot path. */ + double now = clock_lowres_monotonic(); + if (read_view_version > 0 && read_view_reuse_interval > 0 && + now - read_view_timestamp < read_view_reuse_interval) { + /* See the comment to read_view_reuse_interval. */ + memtx_tuple->version = read_view_version - 1; + } else { + memtx_tuple->version = read_view_version; + may_reuse_read_view = false; + } return &memtx_tuple->base; } @@ -337,6 +365,38 @@ class MemtxAllocator { * ascending (the oldest read view comes first). */ static struct rlist read_views[]; + /** + * If the last read view was created less than read_view_reuse_interval + * seconds ago, reuse it instead of creating a new one. Setting to 0 + * effectively disables read view reusing. + * + * We reuse read views to ensure that read_view_version never wraps + * around. Here's how it works. When a tuple is allocated, we compare + * the current time with the time when the most recent read view was + * opened. If the difference is less than the reuse interval, we assign + * the previous read view version to it, read_view_version - 1, instead + * of read_view_version, like it was allocated before the last read + * view was created. + * + * When a read view is opened, we check if there were any tuples + * allocated with the current read_view_version. If such tuples exist, + * we proceed to creation of a new read view, as usual. Otherwise, we + * create a new read view with the previous read view's version + * (read_view_version, without bumping) and reuse its garbage + * collection lists (with reference counting). + */ + static double read_view_reuse_interval; + /** + * Monotonic clock time when the most recent read view was opened. + * See also read_view_reuse_interval. + */ + static double read_view_timestamp; + /** + * Set if the most recent read view may be reused (that is no new + * tuples were allocated with the current value of read_view_version). + * See also read_view_reuse_interval. + */ + static bool may_reuse_read_view; }; template<class Allocator> @@ -348,6 +408,15 @@ uint32_t MemtxAllocator<Allocator>::read_view_version; template<class Allocator> struct rlist MemtxAllocator<Allocator>::read_views[memtx_tuple_rv_type_MAX]; +template<class Allocator> +double MemtxAllocator<Allocator>::read_view_reuse_interval = 0.1; + +template<class Allocator> +double MemtxAllocator<Allocator>::read_view_timestamp; + +template<class Allocator> +bool MemtxAllocator<Allocator>::may_reuse_read_view; + void memtx_allocators_init(struct allocator_settings *settings); diff --git a/test/unit/memtx_allocator.cc b/test/unit/memtx_allocator.cc index 16792a572a..548dfe3197 100644 --- a/test/unit/memtx_allocator.cc +++ b/test/unit/memtx_allocator.cc @@ -2,6 +2,7 @@ #include "box/memtx_allocator.h" #include "box/tuple.h" #include "box/tuple_format.h" +#include "clock_lowres.h" #include "fiber.h" #include "memory.h" #include "say.h" @@ -9,6 +10,7 @@ #include "small/slab_arena.h" #include "small/slab_cache.h" #include "small/quota.h" +#include "trivia/util.h" #include "unit.h" #define ARENA_SIZE (16 * 1024 * 1024) @@ -367,10 +369,96 @@ test_temp_tuple_gc() check_plan(); } +/** + * Checks that read views can be reused. + */ +static void +test_reuse_read_view() +{ + plan(16); + header(); + + MemtxAllocator<SmallAlloc>::set_read_view_reuse_interval(0.1); + struct memtx_read_view_opts opts; + opts.include_temporary_tuples = true; + + is(alloc_tuple_count(), 0, "count before alloc"); + struct tuple *tuple1 = alloc_tuple(); + struct tuple *temp_tuple1 = alloc_temp_tuple(); + memtx_allocators_read_view rv1 = memtx_allocators_open_read_view({}); + is(alloc_tuple_count(), 2, "count after rv1 opened"); + free_tuple(tuple1); + free_tuple(temp_tuple1); + struct tuple *tuple2 = alloc_tuple(); + struct tuple *temp_tuple2 = alloc_temp_tuple(); + memtx_allocators_read_view rv2 = memtx_allocators_open_read_view(opts); + /* temp_tuple1 is freed */ + is(alloc_tuple_count(), 3, "count after rv2 opened"); + free_tuple(tuple2); + free_tuple(temp_tuple2); + struct tuple *tuple3 = alloc_tuple(); + struct tuple *temp_tuple3 = alloc_temp_tuple(); + memtx_allocators_read_view rv3 = memtx_allocators_open_read_view(opts); + is(alloc_tuple_count(), 5, "count after rv3 opened"); + free_tuple(tuple3); + free_tuple(temp_tuple3); + struct tuple *tuple4 = alloc_tuple(); + struct tuple *temp_tuple4 = alloc_temp_tuple(); + memtx_allocators_read_view rv4 = memtx_allocators_open_read_view({}); + is(alloc_tuple_count(), 7, "count after rv4 opened"); + free_tuple(tuple4); + free_tuple(temp_tuple4); + struct tuple *tuple5 = alloc_tuple(); + struct tuple *temp_tuple5 = alloc_temp_tuple(); + memtx_allocators_read_view rv5 = memtx_allocators_open_read_view({}); + is(alloc_tuple_count(), 9, "count after rv5 opened"); + free_tuple(tuple5); + free_tuple(temp_tuple5); + thread_sleep(0.2); + struct tuple *tuple6 = alloc_tuple(); + struct tuple *temp_tuple6 = alloc_temp_tuple(); + memtx_allocators_read_view rv6 = memtx_allocators_open_read_view(opts); + is(alloc_tuple_count(), 11, "count after rv6 opened"); + free_tuple(tuple6); + free_tuple(temp_tuple6); + thread_sleep(0.2); + struct tuple *tuple7 = alloc_tuple(); + struct tuple *temp_tuple7 = alloc_temp_tuple(); + memtx_allocators_read_view rv7 = memtx_allocators_open_read_view({}); + is(alloc_tuple_count(), 13, "count after rv7 opened"); + free_tuple(tuple7); + free_tuple(temp_tuple7); + /* temp_tuple7 is freed */ + is(alloc_tuple_count(), 12, "count before rv7 closed"); + memtx_allocators_close_read_view(rv7); + /* tuple7 is freed */ + is(alloc_tuple_count(), 11, "count after rv7 closed"); + memtx_allocators_close_read_view(rv6); + /* tuple6 and temp_tuple6 are freed */ + is(alloc_tuple_count(), 9, "count after rv6 closed"); + memtx_allocators_close_read_view(rv2); + is(alloc_tuple_count(), 9, "count after rv2 closed"); + memtx_allocators_close_read_view(rv1); + is(alloc_tuple_count(), 9, "count after rv1 closed"); + memtx_allocators_close_read_view(rv3); + /* temp_tuple2, temp_tuple3, temp_tuple4, temp_tuple5 are freed */ + is(alloc_tuple_count(), 5, "count after rv3 closed"); + memtx_allocators_close_read_view(rv5); + is(alloc_tuple_count(), 5, "count after rv5 closed"); + memtx_allocators_close_read_view(rv4); + /* tuple1, tuple2, tuple3, tuple4, tuple5 are freed */ + is(alloc_tuple_count(), 0, "count after rv4 closed"); + + MemtxAllocator<SmallAlloc>::set_read_view_reuse_interval(0); + + footer(); + check_plan(); +} + static int test_main() { - plan(7); + plan(8); header(); test_alloc_stats(); @@ -380,6 +468,7 @@ test_main() test_free_not_delayed_if_temporary(); test_tuple_gc(); test_temp_tuple_gc(); + test_reuse_read_view(); footer(); return check_plan(); @@ -390,6 +479,7 @@ main() { say_logger_init("/dev/null", S_INFO, /*nonblock=*/true, "plain", /*background=*/false); + clock_lowres_signal_init(); memory_init(); fiber_init(fiber_c_invoke); tuple_init(NULL); @@ -406,6 +496,7 @@ main() GRANULARITY, ALLOC_FACTOR, &actual_alloc_factor, "a); memtx_allocators_init(&alloc_settings); + MemtxAllocator<SmallAlloc>::set_read_view_reuse_interval(0); test_tuple_format = simple_tuple_format_new( &test_tuple_format_vtab, /*engine=*/NULL, /*keys=*/NULL, /*key_count=*/0); @@ -420,6 +511,7 @@ main() tuple_free(); fiber_free(); memory_free(); + clock_lowres_signal_reset(); say_logger_free(); return rc; } diff --git a/test/unit/memtx_allocator.result b/test/unit/memtx_allocator.result index 385a2077de..294ddf2226 100644 --- a/test/unit/memtx_allocator.result +++ b/test/unit/memtx_allocator.result @@ -1,4 +1,4 @@ -1..7 +1..8 *** test_main *** 1..5 *** test_alloc_stats *** @@ -69,4 +69,24 @@ ok 6 - subtests ok 10 - count after rv1 closed *** test_temp_tuple_gc: done *** ok 7 - subtests + 1..16 + *** test_reuse_read_view *** + ok 1 - count before alloc + ok 2 - count after rv1 opened + ok 3 - count after rv2 opened + ok 4 - count after rv3 opened + ok 5 - count after rv4 opened + ok 6 - count after rv5 opened + ok 7 - count after rv6 opened + ok 8 - count after rv7 opened + ok 9 - count before rv7 closed + ok 10 - count after rv7 closed + ok 11 - count after rv6 closed + ok 12 - count after rv2 closed + ok 13 - count after rv1 closed + ok 14 - count after rv3 closed + ok 15 - count after rv5 closed + ok 16 - count after rv4 closed + *** test_reuse_read_view: done *** +ok 8 - subtests *** test_main: done *** -- GitLab