From 7a58264644206fe81c0338aefe1409c98055334c Mon Sep 17 00:00:00 2001 From: Ilya Verbin <iverbin@tarantool.org> Date: Fri, 10 Jun 2022 14:54:42 +0300 Subject: [PATCH] core: allow spurious wakeups in coio_waitpid Currently it's possible to wakeup a fiber, which is waiting for a child process termination, using Tarantool C API. This will leave a zombie process behind. This patch reworks `coio_waitpid` in such a way that it yields until `cw.data` is set to NULL in the process status change callback. Part of #7166 NO_DOC=refactoring NO_CHANGELOG=refactoring --- src/lib/core/coio.c | 31 +++++++++++++++++++++---------- test/unit/coio.cc | 26 ++++++++++++++++++++++++++ test/unit/coio.result | 4 +++- 3 files changed, 50 insertions(+), 11 deletions(-) diff --git a/src/lib/core/coio.c b/src/lib/core/coio.c index 28ed096112..57caadeae3 100644 --- a/src/lib/core/coio.c +++ b/src/lib/core/coio.c @@ -491,25 +491,36 @@ coio_stat_stat_timeout(ev_stat *stat, ev_tstamp timeout) return 0; } -typedef void (*ev_child_cb)(ev_loop *, ev_child *, int); +/** + * The process status change callback. + * Similar to fiber_schedule_cb, but also set watcher->data to NULL to indicate + * that the fiber has been woken up on the child process termination. + */ +static void +coio_status_change_cb(ev_loop *loop, ev_child *watcher, int revents) +{ + (void)loop; + (void)revents; + struct fiber *fiber = watcher->data; + assert(fiber() == &cord()->sched); + watcher->data = NULL; + fiber_wakeup(fiber); +} int coio_waitpid(pid_t pid, int *status) { assert(cord_is_main()); ev_child cw; - ev_init(&cw, (ev_child_cb) fiber_schedule_cb); + ev_init(&cw, coio_status_change_cb); ev_child_set(&cw, pid, 0); cw.data = fiber(); ev_child_start(loop(), &cw); - /* - * It's not safe to spuriously wakeup this fiber since - * in this case the server will leave a zombie process - * behind. - */ - bool allow_cancel = fiber_set_cancellable(false); - fiber_yield(); - fiber_set_cancellable(allow_cancel); + + do { + fiber_yield(); + } while (cw.data != NULL); + ev_child_stop(loop(), &cw); if (fiber_is_cancelled()) { diag_set(FiberIsCancelled); diff --git a/test/unit/coio.cc b/test/unit/coio.cc index b617fd4433..dea9ef904c 100644 --- a/test/unit/coio.cc +++ b/test/unit/coio.cc @@ -8,6 +8,7 @@ #include <fcntl.h> #include <sys/uio.h> +#include <sys/wait.h> #include <sys/errno.h> int @@ -171,6 +172,29 @@ test_writev_f(va_list ap) return 0; } +static int +test_waitpid_f(va_list ap) +{ + /* Flush buffers to avoid multiple output. */ + fflush(stdout); + fflush(stderr); + + pid_t pid = fork(); + + /* Child process: do something for 10ms and exit. */ + if (pid == 0) { + usleep(10000); + exit(0); + } + + fail_if(pid == -1); + int status, rc = coio_waitpid(pid, &status); + fail_if(rc != 0); + fail_if(WIFEXITED(status) == 0); + + return 0; +} + static void fill_pipe(int fd) { @@ -211,11 +235,13 @@ read_write_test(void) test_read_f, test_write_f, test_writev_f, + test_waitpid_f }; const char *descr[] = { "read", "write", "writev", + "waitpid" }; int num_tests = sizeof(test_funcs) / sizeof(test_funcs[0]); diff --git a/test/unit/coio.result b/test/unit/coio.result index d35e33c19d..72625716d4 100644 --- a/test/unit/coio.result +++ b/test/unit/coio.result @@ -20,11 +20,13 @@ ok 3 - bad ipv6 host name - error ok 4 - bad ipv6 host name - error message *** test_connect: done *** *** read_write_test *** -1..6 +1..8 ok 1 - coio_read handle spurious wakeup ok 2 - coio_read success after a spurious wakeup ok 3 - coio_write handle spurious wakeup ok 4 - coio_write success after a spurious wakeup ok 5 - coio_writev handle spurious wakeup ok 6 - coio_writev success after a spurious wakeup +ok 7 - coio_waitpid handle spurious wakeup +ok 8 - coio_waitpid success after a spurious wakeup *** read_write_test: done *** -- GitLab