From 4dd421fbd2fcbb2c27a6f6f00d90e1ce63341b97 Mon Sep 17 00:00:00 2001 From: Kirill Yukhin <kyukhin@tarantool.org> Date: Mon, 18 May 2020 18:56:31 +0300 Subject: [PATCH] Copy DSO module before load instead of symlink-ing Tarantool module reload mechanism allows to reload a module even if there're functions running. It is implemented by refcounting each invocation of module's routines. If reload is called, then refcounter is checked: - If it is 0, then no routines are in flight and module is reloaded by simple pair of dlclose()/dlopen(). - If it is non-zero, then there're routines in flight. To allow to load multiple versions of modules it is loaded not from the DSO specified. Symlink to tempdir is created and dlopen() is invoked against it (w/RTLD_LOCAL flag to avoid conflicts). This trick was implemented in order to fool a dynamic linker: one cannot invoke dlopen() against same file, so let's pretend there're to independent DSOs. The problem is that dynamic linker is smart enough. It tracks not filenames, but i-nodes. Moreover it is smart enough to do stat -L against DSO to follow symlinks! [1][2] So, any attempts to create a symlinks to fool dynamic linker fail and instead of doing actual load it just increments internal refcounter in map w/ corresponding inode, which in turn leads to not-reloading. This wasn't caught by test since old module was always unlinked before new one is copied in place. The patch always copies DSO instead of creating a symlink. Also it fixes the test so in SEGFAULTs without the change. Closes #4945 [1] - https://sourceware.org/git/?p=glibc.git;a=blob;f=elf/dl-load\ .c;h=a5318f9c8d1d42745a254479cf6bb1cd2acd516f;hb=58557c229319a3b8d\ 2eefdb62e7df95089eabe37#l898 [2] - https://sourceware.org/git/?p=glibc.git;a=blob;f=sysdeps/pos\ ix/dl-fileid.h;hb=58557c229319a3b8d2eefdb62e7df95089eabe37#l33 --- src/box/func.c | 41 +++++++++++++++++++++++++++++++++-- test/box/func_reload.result | 32 ++++++--------------------- test/box/func_reload.test.lua | 20 ++++++----------- 3 files changed, 53 insertions(+), 40 deletions(-) diff --git a/src/box/func.c b/src/box/func.c index 04b04b6adc..c44da9971a 100644 --- a/src/box/func.c +++ b/src/box/func.c @@ -40,6 +40,8 @@ #include "port.h" #include "schema.h" #include "session.h" +#include "libeio/eio.h" +#include <fcntl.h> #include <dlfcn.h> /** @@ -269,10 +271,45 @@ module_load(const char *package, const char *package_end) char load_name[PATH_MAX + 1]; snprintf(load_name, sizeof(load_name), "%s/%.*s." TARANTOOL_LIBEXT, dir_name, package_len, package); - if (symlink(path, load_name) < 0) { - diag_set(SystemError, "failed to create dso link"); + + struct stat st; + if (stat(path, &st) < 0) { + diag_set(SystemError, "failed to stat() module %s", path); + goto error; + } + + int source_fd = open(path, O_RDONLY); + if (source_fd < 0) { + diag_set(SystemError, "failed to open module %s file for" \ + " reading", path); + goto error; + } + int dest_fd = open(load_name, O_WRONLY|O_CREAT|O_TRUNC, + st.st_mode & 0777); + if (dest_fd < 0) { + diag_set(SystemError, "failed to open file %s for writing ", + load_name); + close(source_fd); goto error; } + + off_t pos, left; + for (left = st.st_size, pos = 0; left > 0;) { + off_t ret = eio_sendfile_sync(dest_fd, source_fd, pos, + st.st_size); + if (ret < 0) { + diag_set(SystemError, "failed to copy DSO %s to %s", + path, load_name); + close(source_fd); + close(dest_fd); + goto error; + } + pos += ret; + left -= ret; + } + close(source_fd); + close(dest_fd); + module->handle = dlopen(load_name, RTLD_NOW | RTLD_LOCAL); if (unlink(load_name) != 0) say_warn("failed to unlink dso link %s", load_name); diff --git a/test/box/func_reload.result b/test/box/func_reload.result index b024cd143e..1313fdf184 100644 --- a/test/box/func_reload.result +++ b/test/box/func_reload.result @@ -46,7 +46,7 @@ box.schema.user.grant('guest', 'read,write', 'space', 'test') _ = fio.unlink(reload_path) --- ... -fio.symlink(reload1_path, reload_path) +fio.copyfile(reload1_path, reload_path) --- - true ... @@ -68,10 +68,7 @@ box.space.test:delete{0} --- - [0] ... -_ = fio.unlink(reload_path) ---- -... -fio.symlink(reload2_path, reload_path) +fio.copyfile(reload2_path, reload_path) --- - true ... @@ -92,10 +89,7 @@ box.space.test:truncate() --- ... -- test case with hanging calls -_ = fio.unlink(reload_path) ---- -... -fio.symlink(reload1_path, reload_path) +fio.copyfile(reload1_path, reload_path) --- - true ... @@ -115,10 +109,7 @@ while box.space.test:count() < fibers do fiber.sleep(0.001) end box.schema.func.reload("reload") --- ... -_ = fio.unlink(reload_path) ---- -... -fio.symlink(reload2_path, reload_path) +fio.copyfile(reload2_path, reload_path) --- - true ... @@ -162,10 +153,7 @@ box.schema.func.drop("reload.foo") box.space.test:drop() --- ... -_ = fio.unlink(reload_path) ---- -... -fio.symlink(reload1_path, reload_path) +fio.copyfile(reload1_path, reload_path) --- - true ... @@ -196,10 +184,7 @@ s:delete({1}) --- - [1, 2] ... -_ = fio.unlink(reload_path) ---- -... -fio.symlink(reload2_path, reload_path) +fio.copyfile(reload2_path, reload_path) --- - true ... @@ -236,10 +221,7 @@ c:call("reload.test_reload_fail") --- - [[2]] ... -_ = fio.unlink(reload_path) ---- -... -fio.symlink(reload1_path, reload_path) +fio.copyfile(reload1_path, reload_path) --- - true ... diff --git a/test/box/func_reload.test.lua b/test/box/func_reload.test.lua index 8906898ecb..4c062fd72a 100644 --- a/test/box/func_reload.test.lua +++ b/test/box/func_reload.test.lua @@ -17,7 +17,7 @@ _ = box.schema.space.create('test') _ = box.space.test:create_index('primary', {parts = {1, "integer"}}) box.schema.user.grant('guest', 'read,write', 'space', 'test') _ = fio.unlink(reload_path) -fio.symlink(reload1_path, reload_path) +fio.copyfile(reload1_path, reload_path) --check not fail on non-load func box.schema.func.reload("reload") @@ -26,8 +26,7 @@ box.schema.func.reload("reload") box.space.test:insert{0} c:call("reload.foo", {1}) box.space.test:delete{0} -_ = fio.unlink(reload_path) -fio.symlink(reload2_path, reload_path) +fio.copyfile(reload2_path, reload_path) box.schema.func.reload("reload") c:call("reload.foo") @@ -35,8 +34,7 @@ box.space.test:select{} box.space.test:truncate() -- test case with hanging calls -_ = fio.unlink(reload_path) -fio.symlink(reload1_path, reload_path) +fio.copyfile(reload1_path, reload_path) box.schema.func.reload("reload") fibers = 10 @@ -46,8 +44,7 @@ while box.space.test:count() < fibers do fiber.sleep(0.001) end -- double reload doesn't fail waiting functions box.schema.func.reload("reload") -_ = fio.unlink(reload_path) -fio.symlink(reload2_path, reload_path) +fio.copyfile(reload2_path, reload_path) box.schema.func.reload("reload") c:call("reload.foo") @@ -55,9 +52,8 @@ while box.space.test:count() < 2 * fibers + 1 do fiber.sleep(0.001) end box.space.test:select{} box.schema.func.drop("reload.foo") box.space.test:drop() -_ = fio.unlink(reload_path) -fio.symlink(reload1_path, reload_path) +fio.copyfile(reload1_path, reload_path) box.schema.func.create('reload.test_reload', {language = "C"}) box.schema.user.grant('guest', 'execute', 'function', 'reload.test_reload') s = box.schema.space.create('test_reload') @@ -67,8 +63,7 @@ ch = fiber.channel(2) -- call first time to load function c:call("reload.test_reload") s:delete({1}) -_ = fio.unlink(reload_path) -fio.symlink(reload2_path, reload_path) +fio.copyfile(reload2_path, reload_path) _ = fiber.create(function() ch:put(c:call("reload.test_reload")) end) while s:get({1}) == nil do fiber.yield(0.0001) end box.schema.func.reload("reload") @@ -80,8 +75,7 @@ s:drop() box.schema.func.create('reload.test_reload_fail', {language = "C"}) box.schema.user.grant('guest', 'execute', 'function', 'reload.test_reload_fail') c:call("reload.test_reload_fail") -_ = fio.unlink(reload_path) -fio.symlink(reload1_path, reload_path) +fio.copyfile(reload1_path, reload_path) c:call("reload.test_reload") c:call("reload.test_reload_fail") -- GitLab