From 6459a2bf47b46b84910b2d837be9e43451f3a918 Mon Sep 17 00:00:00 2001 From: Konstantin Osipov <kostja@tarantool.org> Date: Sun, 1 Feb 2015 19:41:45 +0300 Subject: [PATCH] gh-695: slab_alloc_maximal and variable slab sizes Avoid overwriting tuple data with information necessary for smfree(). Only use a single pointer to store delayed free context. Add a test case. Minor cleanups/comments/renames. --- src/box/tuple.cc | 41 +++++++++-------- src/box/tuple.h | 6 +++ src/lib/small/lifo.h | 6 +++ src/lib/small/mempool.c | 2 + src/lib/small/mempool.h | 29 +++++++----- src/lib/small/slab_cache.c | 12 +++-- src/lib/small/small.c | 91 ++++++++++++++++++-------------------- src/lib/small/small.h | 2 - test/app/snapshot.result | 1 + test/app/snapshot.test.lua | 62 ++++++++++++++++++++++++++ 10 files changed, 169 insertions(+), 83 deletions(-) create mode 100644 test/app/snapshot.result create mode 100755 test/app/snapshot.test.lua diff --git a/src/box/tuple.cc b/src/box/tuple.cc index 6fc4b2b00f..ab63e89271 100644 --- a/src/box/tuple.cc +++ b/src/box/tuple.cc @@ -50,12 +50,14 @@ struct slab_arena memtx_arena; static struct slab_cache memtx_slab_cache; struct small_alloc memtx_alloc; -/* Lowest allowed objsize_min setting of tuple arena */ -const uint32_t MIN_TUPLE_SIZE_NOT_LESS_THAN = 16; -/* Lowest allowed objsize_max setting of tuple arena */ -const uint32_t MAX_TUPLE_SIZE_NOT_LESS_THAN = 16 * 1024; -/* Lowest allowed slab size for tuple arena */ -const size_t SLAB_SIZE_NOT_LESS_THAN = 1024 * 1024; +enum { + /** Lowest allowed slab_alloc_minimal */ + OBJSIZE_MIN = 16, + /** Lowest allowed slab_alloc_maximal */ + OBJSIZE_MAX_MIN = 16 * 1024, + /** Lowest allowed slab size, for mmapped slabs */ + SLAB_SIZE_MIN = 1024 * 1024 +}; /** Extract all available type info from keys. */ void @@ -534,18 +536,19 @@ tuple_init(float tuple_arena_max_size, uint32_t objsize_min, tuple_format_ref(tuple_format_ber, 1); /* Apply lowest allowed objsize bounds */ - if (objsize_min < MIN_TUPLE_SIZE_NOT_LESS_THAN) - objsize_min = MIN_TUPLE_SIZE_NOT_LESS_THAN; - if (objsize_max < MAX_TUPLE_SIZE_NOT_LESS_THAN) - objsize_max = MAX_TUPLE_SIZE_NOT_LESS_THAN; + if (objsize_min < OBJSIZE_MIN) + objsize_min = OBJSIZE_MIN; + if (objsize_max < OBJSIZE_MAX_MIN) + objsize_max = OBJSIZE_MAX_MIN; /* Calculate slab size for tuple arena */ - size_t max_slab_size = small_round(objsize_max * 4); - if (max_slab_size < SLAB_SIZE_NOT_LESS_THAN) - max_slab_size = SLAB_SIZE_NOT_LESS_THAN; + size_t slab_size = small_round(objsize_max * 4); + if (slab_size < SLAB_SIZE_MIN) + slab_size = SLAB_SIZE_MIN; - size_t max_size = tuple_arena_max_size * 1024 * 1024 * 1024; - quota_init(&memtx_quota, max_size); + /** Preallocate entire quota. */ + size_t prealloc = tuple_arena_max_size * 1024 * 1024 * 1024; + quota_init(&memtx_quota, prealloc); int flags; if (access("/proc/user_beancounters", F_OK) == 0) { @@ -554,20 +557,20 @@ tuple_init(float tuple_arena_max_size, uint32_t objsize_min, flags = MAP_PRIVATE; } else { say_info("mapping %zu bytes for a shared arena...", - max_size); + prealloc); flags = MAP_SHARED; } if (slab_arena_create(&memtx_arena, &memtx_quota, - max_size, max_slab_size, flags)) { + prealloc, slab_size, flags)) { if (ENOMEM == errno) { panic("failed to preallocate %zu bytes: " "Cannot allocate memory, check option " "'slab_alloc_arena' in box.cfg(..)", - max_size); + prealloc); } else { panic_syserror("failed to preallocate %zu bytes", - max_size); + prealloc); } } slab_cache_create(&memtx_slab_cache, &memtx_arena); diff --git a/src/box/tuple.h b/src/box/tuple.h index 815e191874..666f190a70 100644 --- a/src/box/tuple.h +++ b/src/box/tuple.h @@ -139,6 +139,12 @@ tuple_format_ref(struct tuple_format *format, int count) */ struct tuple { + /* + * sic: the header of the tuple is used + * to store a free list pointer in smfree_delayed. + * Please don't change it without understanding + * how smfree_delayed and snapshotting COW works. + */ /** snapshot generation version */ uint32_t version; /** reference counter */ diff --git a/src/lib/small/lifo.h b/src/lib/small/lifo.h index 9c322b6c47..6641e396ec 100644 --- a/src/lib/small/lifo.h +++ b/src/lib/small/lifo.h @@ -65,6 +65,12 @@ lifo_pop(struct lifo *head) return elem; } +static inline void * +lifo_peek(struct lifo *head) +{ + return head->next; +} + static inline bool lifo_is_empty(struct lifo *head) { diff --git a/src/lib/small/mempool.c b/src/lib/small/mempool.c index 83a1dc3cd7..5570c674f2 100644 --- a/src/lib/small/mempool.c +++ b/src/lib/small/mempool.c @@ -125,6 +125,8 @@ mempool_create_with_order(struct mempool *pool, struct slab_cache *cache, uint32_t objsize, uint8_t order) { assert(order <= cache->order_max); + lifo_init(&pool->link); + lifo_init(&pool->delayed); pool->cache = cache; slab_list_create(&pool->slabs); mslab_tree_new(&pool->free_slabs); diff --git a/src/lib/small/mempool.h b/src/lib/small/mempool.h index dab88d42fd..5f2737957e 100644 --- a/src/lib/small/mempool.h +++ b/src/lib/small/mempool.h @@ -33,6 +33,7 @@ #include <inttypes.h> #include <string.h> #include "small/slab_cache.h" +#include "small/lifo.h" #define RB_COMPACT 1 #include "third_party/rb.h" @@ -86,10 +87,11 @@ struct mslab { }; /** - * Mempool will try to allocate blocks large enough to have overhead - * less than specified below + * Mempool will try to allocate blocks large enough to ensure + * the overhead from internal fragmentation is less than the + * specified below. */ -static const double expected_overhead_max = 0.01; +static const double OVERHEAD_RATIO = 0.01; static inline uint32_t mslab_sizeof() @@ -114,6 +116,14 @@ typedef rb_tree(struct mslab) mslab_tree_t; /** A memory pool. */ struct mempool { + /** + * A link in delayed free list of pools. Must be the first + * member in the struct. + * @sa smfree_delayed(). + */ + struct lifo link; + /** List of pointers for delayed free. */ + struct lifo delayed; /** The source of empty slabs. */ struct slab_cache *cache; /** All slabs. */ @@ -191,18 +201,17 @@ static inline void mempool_create(struct mempool *pool, struct slab_cache *cache, uint32_t objsize) { - size_t expected_loss = objsize > sizeof(struct mslab) - ? objsize : sizeof(struct mslab); - size_t slab_size_min = (size_t)(expected_loss / expected_overhead_max); - if (slab_size_min > cache->arena->slab_size) - slab_size_min = cache->arena->slab_size; - + size_t overhead = (objsize > sizeof(struct mslab) ? + objsize : sizeof(struct mslab)); + size_t slab_size = (size_t) (overhead / OVERHEAD_RATIO); + if (slab_size > cache->arena->slab_size) + slab_size = cache->arena->slab_size; /* * Calculate the amount of usable space in a slab. * @note: this asserts that slab_size_min is less than * SLAB_ORDER_MAX. */ - uint8_t order = slab_order(cache, slab_size_min); + uint8_t order = slab_order(cache, slab_size); assert(order <= cache->order_max); return mempool_create_with_order(pool, cache, objsize, order); } diff --git a/src/lib/small/slab_cache.c b/src/lib/small/slab_cache.c index d837fe462d..c64eb99e5d 100644 --- a/src/lib/small/slab_cache.c +++ b/src/lib/small/slab_cache.c @@ -162,10 +162,14 @@ void slab_cache_create(struct slab_cache *cache, struct slab_arena *arena) { cache->arena = arena; - - long lowest_order0_size = small_round(sysconf(_SC_PAGESIZE)); - assert(arena->slab_size >= lowest_order0_size); - cache->order_max = small_lb(arena->slab_size / lowest_order0_size); + /* + * We have a fixed number of orders (ORDER_MAX); calculate + * the size of buddies in the smallest order, given the size + * of the slab size in the slab arena. + */ + long min_order0_size = sysconf(_SC_PAGESIZE); + assert(arena->slab_size >= min_order0_size); + cache->order_max = small_lb(arena->slab_size / min_order0_size); if (cache->order_max > ORDER_MAX - 1) cache->order_max = ORDER_MAX - 1; diff --git a/src/lib/small/small.c b/src/lib/small/small.c index c665020e2b..b065d52470 100644 --- a/src/lib/small/small.c +++ b/src/lib/small/small.c @@ -42,15 +42,6 @@ enum { STEP_SIZE_LB = 3, }; -/** - * Extended lifo struct for store object size in addition to pointer - * to the next object in the list - */ -struct small_lifo_ext { - struct lifo base; - size_t size; -}; - rb_proto(, factor_tree_, factor_tree_t, struct factor_pool) /** Used for search in the tree. */ @@ -109,15 +100,10 @@ small_alloc_create(struct small_alloc *alloc, struct slab_cache *cache, { alloc->cache = cache; /* Align sizes. */ - objsize_min = small_align(objsize_min, sizeof(STEP_SIZE)); - /* An object must be large enough to contain struct small_lifo_ext */ - if (objsize_min < sizeof(struct small_lifo_ext)) - objsize_min = small_align(sizeof(struct small_lifo_ext), - sizeof(STEP_SIZE)); - alloc->slab_order = cache->order_max; + objsize_min = small_align(objsize_min, STEP_SIZE); /* Make sure at least 4 largest objects can fit in a slab. */ alloc->objsize_max = - mempool_objsize_max(slab_order_size(cache, alloc->slab_order)); + mempool_objsize_max(slab_order_size(cache, cache->order_max)); assert(alloc->objsize_max > objsize_min + STEP_POOL_MAX * STEP_SIZE); struct mempool *step_pool; @@ -180,14 +166,18 @@ smfree_batch(struct small_alloc *alloc) return; const int BATCH = 100; + struct mempool *pool = lifo_peek(&alloc->delayed); for (int i = 0; i < BATCH; i++) { - void *item = lifo_pop(&alloc->delayed); - if (item == NULL) - break; - struct small_lifo_ext *ext = (struct small_lifo_ext *) item; - size_t size = ext->size; - smfree(alloc, item, size); + void *item = lifo_pop(&pool->delayed); + if (item == NULL) { + (void) lifo_pop(&alloc->delayed); + pool = lifo_peek(&alloc->delayed); + if (pool == NULL) + break; + continue; + } + mempool_free(pool, item); } } @@ -256,33 +246,20 @@ small_recycle_pool(struct small_alloc *alloc, struct mempool *pool) } } -/** Free memory chunk allocated by the small allocator. */ -/** - * Free a small objects. - * - * This boils down to finding the object's mempool and delegating - * to mempool_free(). - * - * If the pool becomes completely empty, and it's a factored pool, - * and the factored pool's cache is empty, put back the empty - * factored pool into the factored pool cache. - */ -void -smfree(struct small_alloc *alloc, void *ptr, size_t size) +static inline struct mempool * +mempool_find(struct small_alloc *alloc, size_t size) { struct mempool *pool; if (size <= alloc->step_pool_objsize_max) { /* Allocated in a stepped pool. */ - int idx; - if (size <= alloc->step_pools[0].objsize) - idx = 0; - else { - idx = (size - alloc->step_pools[0].objsize - + STEP_SIZE - 1) >> STEP_SIZE_LB; + if (size <= alloc->step_pools[0].objsize) { + pool = &alloc->step_pools[0]; + } else { + int idx = (size - alloc->step_pools[0].objsize + + STEP_SIZE - 1) >> STEP_SIZE_LB; + pool = &alloc->step_pools[idx]; + assert(size + STEP_SIZE > pool->objsize); } - pool = &alloc->step_pools[idx]; - assert(size <= pool->objsize && - (size + STEP_SIZE > pool->objsize || idx == 0)); } else { /* Allocated in a factor pool. */ struct factor_pool pattern; @@ -294,6 +271,24 @@ smfree(struct small_alloc *alloc, void *ptr, size_t size) pool = &upper_bound->pool; } assert(size <= pool->objsize); + return pool; +} + +/** Free memory chunk allocated by the small allocator. */ +/** + * Free a small object. + * + * This boils down to finding the object's mempool and delegating + * to mempool_free(). + * + * If the pool becomes completely empty, and it's a factored pool, + * and the factored pool's cache is empty, put back the empty + * factored pool into the factored pool cache. + */ +void +smfree(struct small_alloc *alloc, void *ptr, size_t size) +{ + struct mempool *pool = mempool_find(alloc, size); mempool_free(pool, ptr); if (mempool_used(pool) == 0) @@ -308,11 +303,11 @@ smfree(struct small_alloc *alloc, void *ptr, size_t size) void smfree_delayed(struct small_alloc *alloc, void *ptr, size_t size) { - assert(size >= sizeof(struct small_lifo_ext)); if (alloc->is_delayed_free_mode && ptr) { - struct small_lifo_ext *ext = (struct small_lifo_ext *)ptr; - ext->size = size; - lifo_push(&alloc->delayed, ptr); + struct mempool *pool = mempool_find(alloc, size); + if (lifo_is_empty(&pool->delayed)) + lifo_push(&alloc->delayed, &pool->link); + lifo_push(&pool->delayed, ptr); } else { smfree(alloc, ptr, size); } diff --git a/src/lib/small/small.h b/src/lib/small/small.h index e400b613bf..1b8e5ffd5d 100644 --- a/src/lib/small/small.h +++ b/src/lib/small/small.h @@ -154,8 +154,6 @@ struct small_alloc { */ float factor; uint32_t objsize_max; - /** All slabs in all mempools have the same order. */ - uint8_t slab_order; /** * If true, smfree_delayed puts items to delayed list. */ diff --git a/test/app/snapshot.result b/test/app/snapshot.result new file mode 100644 index 0000000000..9766475a41 --- /dev/null +++ b/test/app/snapshot.result @@ -0,0 +1 @@ +ok diff --git a/test/app/snapshot.test.lua b/test/app/snapshot.test.lua new file mode 100755 index 0000000000..d904b6b7c1 --- /dev/null +++ b/test/app/snapshot.test.lua @@ -0,0 +1,62 @@ +#!/usr/bin/env tarantool +math = require('math') +fiber = require('fiber') + +-- +-- Check that Tarantool creates ADMIN session for #! script +-- +function noise() + fiber.name('noise-'..fiber.id()) + while true do + if box.space.test:len() < 300000 then + local value = string.rep('a', math.random(255)+1) + box.space.test:auto_increment{fiber.time64(), value} + end + fiber.sleep(0) + end +end + +function purge() + fiber.name('purge-'..fiber.id()) + while true do + local min = box.space.test.index.primary:min() + if min ~= nil then + box.space.test:delete{min[1]} + end + fiber.sleep(0) + end +end + +function snapshot(lsn) + fiber.name('snapshot') + while true do + local new_lsn = box.info.server.lsn + if new_lsn ~= lsn then + lsn = new_lsn; + box.snapshot() + end + fiber.sleep(0.001) + end +end +box.cfg{logger="tarantool.log", slab_alloc_arena=0.1, rows_per_wal=5000} + +if box.space.test == nil then + box.schema.space.create('test') + box.space.test:create_index('primary') +end + +require('console').listen(3303) + +fiber.create(noise) +fiber.create(purge) +fiber.create(noise) +fiber.create(purge) +fiber.create(noise) +fiber.create(purge) +fiber.create(noise) +fiber.create(purge) +fiber.create(snapshot, box.info.server.lsn) + +fiber.sleep(0.3) +print('ok') +os.exit(0) -- GitLab