From 3b4fbdc0badc4106632d34d1370516c8cb117c0a Mon Sep 17 00:00:00 2001 From: Olga Arkhangelskaia <arkholga@tarantool.org> Date: Tue, 10 Mar 2020 12:34:42 +0300 Subject: [PATCH] memtx: fix out of memory handling for rtree When tarantool tries to recover rtree from a snapshot and memtx_memory value is lower than it has been when the snapshot was created, server suffers from segmentation fault. This happens because there is no out of memory error handling in rtree lib. In another words, we do not check the result of malloc operation. The execution flow in case of recovery uses different way and the secondary keys are build in batches. That way has no checks and reservations. The patch adds memtx_rtree_index_reserve implementation to make sure that any memory allocation in rtree will fail. Although this gives us no additional optimization as in case of memtx_tree, the memory reservation prevents tarantool from segmentation fault. If there is not enough memory to be reserved server will fail gently with the "Failed to allocate" error message. Closes #4619 --- src/box/index.cc | 7 +++ src/box/memtx_engine.h | 12 +++++ src/box/memtx_rtree.c | 20 +++++++- src/box/memtx_space.c | 12 ----- src/lib/core/errinj.h | 1 + test/box/errinj.result | 99 ++++++++++++++++++++++++++++++++++++++ test/box/errinj.test.lua | 29 +++++++++++ test/box/lua/cfg_rtree.lua | 8 +++ 8 files changed, 175 insertions(+), 13 deletions(-) create mode 100644 test/box/lua/cfg_rtree.lua diff --git a/src/box/index.cc b/src/box/index.cc index 4e48671182..c2fc008675 100644 --- a/src/box/index.cc +++ b/src/box/index.cc @@ -733,6 +733,13 @@ int generic_index_build_next(struct index *index, struct tuple *tuple) { struct tuple *unused; + /* + * Note this is not no-op call in case of rtee index: + * reserving 0 bytes is required during rtree recovery. + * For details see memtx_rtree_index_reserve(). + */ + if (index_reserve(index, 0) != 0) + return -1; return index_replace(index, NULL, tuple, DUP_INSERT, &unused); } diff --git a/src/box/memtx_engine.h b/src/box/memtx_engine.h index f562c66df4..8b380bf3cc 100644 --- a/src/box/memtx_engine.h +++ b/src/box/memtx_engine.h @@ -87,6 +87,18 @@ enum memtx_recovery_state { /** Memtx extents pool, available to statistics. */ extern struct mempool memtx_index_extent_pool; +enum memtx_reserve_extents_num { + /** + * This number is calculated based on the + * max (realistic) number of insertions + * a deletion from a B-tree or an R-tree + * can lead to, and, as a result, the max + * number of new block allocations. + */ + RESERVE_EXTENTS_BEFORE_DELETE = 8, + RESERVE_EXTENTS_BEFORE_REPLACE = 16 +}; + /** * The size of the biggest memtx iterator. Used with * mempool_create. This is the size of the block that will be diff --git a/src/box/memtx_rtree.c b/src/box/memtx_rtree.c index 8badad797b..612fcb2a9f 100644 --- a/src/box/memtx_rtree.c +++ b/src/box/memtx_rtree.c @@ -242,6 +242,24 @@ memtx_rtree_index_replace(struct index *base, struct tuple *old_tuple, return 0; } +static int +memtx_rtree_index_reserve(struct index *base, uint32_t size_hint) +{ + /* + * In case of rtree we use reserve to make sure that + * memory allocation will not fail during any operation + * on rtree, because there is no error handling in the + * rtree lib. + */ + (void)size_hint; + ERROR_INJECT(ERRINJ_INDEX_RESERVE, { + diag_set(OutOfMemory, MEMTX_EXTENT_SIZE, "mempool", "new slab"); + return -1; + }); + struct memtx_engine *memtx = (struct memtx_engine *)base->engine; + return memtx_index_extent_reserve(memtx, RESERVE_EXTENTS_BEFORE_REPLACE); +} + static struct iterator * memtx_rtree_index_create_iterator(struct index *base, enum iterator_type type, const char *key, uint32_t part_count) @@ -333,7 +351,7 @@ static const struct index_vtab memtx_rtree_index_vtab = { /* .compact = */ generic_index_compact, /* .reset_stat = */ generic_index_reset_stat, /* .begin_build = */ generic_index_begin_build, - /* .reserve = */ generic_index_reserve, + /* .reserve = */ memtx_rtree_index_reserve, /* .build_next = */ generic_index_build_next, /* .end_build = */ generic_index_end_build, }; diff --git a/src/box/memtx_space.c b/src/box/memtx_space.c index 7c28b7d7b3..09f0de4cec 100644 --- a/src/box/memtx_space.c +++ b/src/box/memtx_space.c @@ -103,18 +103,6 @@ memtx_space_replace_no_keys(struct space *space, struct tuple *old_tuple, return -1; } -enum { - /** - * This number is calculated based on the - * max (realistic) number of insertions - * a deletion from a B-tree or an R-tree - * can lead to, and, as a result, the max - * number of new block allocations. - */ - RESERVE_EXTENTS_BEFORE_DELETE = 8, - RESERVE_EXTENTS_BEFORE_REPLACE = 16 -}; - /** * A short-cut version of replace() used during bulk load * from snapshot. diff --git a/src/lib/core/errinj.h b/src/lib/core/errinj.h index d8cdf3f27b..ee6c57a0d0 100644 --- a/src/lib/core/errinj.h +++ b/src/lib/core/errinj.h @@ -138,6 +138,7 @@ struct errinj { _(ERRINJ_FIBER_MADVISE, ERRINJ_BOOL, {.bparam = false}) \ _(ERRINJ_FIBER_MPROTECT, ERRINJ_INT, {.iparam = -1}) \ _(ERRINJ_RELAY_FASTER_THAN_TX, ERRINJ_BOOL, {.bparam = false}) \ + _(ERRINJ_INDEX_RESERVE, ERRINJ_BOOL, {.bparam = false})\ ENUM0(errinj_id, ERRINJ_LIST); extern struct errinj errinjs[]; diff --git a/test/box/errinj.result b/test/box/errinj.result index 4ad24d0c1b..0d3fedeb31 100644 --- a/test/box/errinj.result +++ b/test/box/errinj.result @@ -53,6 +53,7 @@ evals - ERRINJ_HTTPC_EXECUTE: false - ERRINJ_HTTP_RESPONSE_ADD_WAIT: false - ERRINJ_INDEX_ALLOC: false + - ERRINJ_INDEX_RESERVE: false - ERRINJ_IPROTO_TX_DELAY: false - ERRINJ_LOG_ROTATE: false - ERRINJ_MEMTX_DELAY_GC: false @@ -1742,3 +1743,101 @@ box.error.injection.get('ERRINJ_RELAY_TIMEOUT') --- - 0 ... +-- +-- gh-4619: make sure that if OOM takes place during rtree recovery, +-- Tarantool instance will fail gracefully. +-- +test_run:cmd('create server rtree with script = "box/lua/cfg_rtree.lua"') +--- +- true +... +test_run:cmd("start server rtree") +--- +- true +... +test_run:cmd('switch rtree') +--- +- true +... +math = require("math") +--- +... +rtreespace = box.schema.create_space('rtree', {if_not_exists = true}) +--- +... +rtreespace:create_index('pk', {if_not_exists = true}) +--- +- unique: true + parts: + - type: unsigned + is_nullable: false + fieldno: 1 + id: 0 + space_id: 512 + type: TREE + name: pk +... +rtreespace:create_index('target', {type='rtree', dimension = 3, parts={2, 'array'},unique = false, if_not_exists = true,}) +--- +- parts: + - type: array + is_nullable: false + fieldno: 2 + dimension: 3 + id: 1 + type: RTREE + space_id: 512 + name: target +... +count = 10 +--- +... +for i = 1, count do box.space.rtree:insert{i, {(i + 1) -\ + math.floor((i + 1)/7000) * 7000, (i + 2) - math.floor((i + 2)/7000) * 7000,\ + (i + 3) - math.floor((i + 3)/7000) * 7000}} end +--- +... +rtreespace:count() +--- +- 10 +... +box.snapshot() +--- +- ok +... +test_run:cmd('switch default') +--- +- true +... +test_run:cmd("stop server rtree") +--- +- true +... +test_run:cmd("start server rtree with crash_expected=True") +--- +- false +... +fio = require('fio') +--- +... +fh = fio.open(fio.pathjoin(fio.cwd(), 'cfg_rtree.log'), {'O_RDONLY'}) +--- +... +size = fh:seek(0, 'SEEK_END') +--- +... +fh:seek(-256, 'SEEK_END') ~= nil +--- +- true +... +line = fh:read(256) +--- +... +fh:close() +--- +- true +... +string.match(line, 'Failed to allocate') ~= nil +--- +- true +... diff --git a/test/box/errinj.test.lua b/test/box/errinj.test.lua index 01d2b68df1..5d8f4c6355 100644 --- a/test/box/errinj.test.lua +++ b/test/box/errinj.test.lua @@ -633,3 +633,32 @@ box.error.injection.set('ERRINJ_RELAY_TIMEOUT', 0.5) box.error.injection.get('ERRINJ_RELAY_TIMEOUT') box.error.injection.set('ERRINJ_RELAY_TIMEOUT', 0) box.error.injection.get('ERRINJ_RELAY_TIMEOUT') + + +-- +-- gh-4619: make sure that if OOM takes place during rtree recovery, +-- Tarantool instance will fail gracefully. +-- +test_run:cmd('create server rtree with script = "box/lua/cfg_rtree.lua"') +test_run:cmd("start server rtree") +test_run:cmd('switch rtree') +math = require("math") +rtreespace = box.schema.create_space('rtree', {if_not_exists = true}) +rtreespace:create_index('pk', {if_not_exists = true}) +rtreespace:create_index('target', {type='rtree', dimension = 3, parts={2, 'array'},unique = false, if_not_exists = true,}) +count = 10 +for i = 1, count do box.space.rtree:insert{i, {(i + 1) -\ + math.floor((i + 1)/7000) * 7000, (i + 2) - math.floor((i + 2)/7000) * 7000,\ + (i + 3) - math.floor((i + 3)/7000) * 7000}} end +rtreespace:count() +box.snapshot() +test_run:cmd('switch default') +test_run:cmd("stop server rtree") +test_run:cmd("start server rtree with crash_expected=True") +fio = require('fio') +fh = fio.open(fio.pathjoin(fio.cwd(), 'cfg_rtree.log'), {'O_RDONLY'}) +size = fh:seek(0, 'SEEK_END') +fh:seek(-256, 'SEEK_END') ~= nil +line = fh:read(256) +fh:close() +string.match(line, 'Failed to allocate') ~= nil diff --git a/test/box/lua/cfg_rtree.lua b/test/box/lua/cfg_rtree.lua new file mode 100644 index 0000000000..f2d32ef7d7 --- /dev/null +++ b/test/box/lua/cfg_rtree.lua @@ -0,0 +1,8 @@ +#!/usr/bin/env tarantool +os = require('os') +box.error.injection.set("ERRINJ_INDEX_RESERVE", true) +box.cfg{ + listen = os.getenv("LISTEN"), +} +require('console').listen(os.getenv('ADMIN')) +box.schema.user.grant('guest', 'read,write,execute', 'universe') -- GitLab