diff --git a/core/CMakeLists.txt b/core/CMakeLists.txt index 55d2c0ee847873c322dc23e1ca0316fd42de74fa..7d0ef6126cd267aab30b685564bdd176525f2fa7 100644 --- a/core/CMakeLists.txt +++ b/core/CMakeLists.txt @@ -50,7 +50,7 @@ set (recompiled_core_sources ${CMAKE_SOURCE_DIR}/core/admin.c ${CMAKE_SOURCE_DIR}/core/fiber.c PARENT_SCOPE) -set (common_sources tbuf.c palloc.c util.c +set (common_sources tbuf.c palloc.c util.c diagnostics.c salloc.c pickle.c coro.c stat.c log_io.c log_io_remote.c iproto.c) diff --git a/core/admin.c b/core/admin.c index 000cd857641b9224f5e74e722cb1db1785cf6c3c..ab1f5afc6df7503d00f671cd8fa5383f113fc165 100644 --- a/core/admin.c +++ b/core/admin.c @@ -186,15 +186,15 @@ case 6: } goto st0; tr12: -#line 180 "core/admin.rl" +#line 193 "core/admin.rl" {slab_validate(); ok(out);} goto st108; tr19: -#line 170 "core/admin.rl" +#line 183 "core/admin.rl" {return 0;} goto st108; tr28: -#line 166 "core/admin.rl" +#line 179 "core/admin.rl" {strend = p;} #line 137 "core/admin.rl" { @@ -221,12 +221,23 @@ case 6: } goto st108; tr66: -#line 177 "core/admin.rl" +#line 190 "core/admin.rl" {coredump(60); ok(out);} goto st108; tr75: -#line 178 "core/admin.rl" - {snapshot(NULL, 0); ok(out);} +#line 150 "core/admin.rl" + { + int ret = snapshot(NULL, 0); + + if (ret == 0) + ok(out); + else { + tbuf_printf(err, " can't save snapshot, errno %d (%s)", + ret, strerror(ret)); + + fail(out, err); + } + } goto st108; tr92: #line 113 "core/admin.rl" @@ -249,41 +260,41 @@ case 6: } goto st108; tr106: -#line 172 "core/admin.rl" +#line 185 "core/admin.rl" {start(out); fiber_info(out); end(out);} goto st108; tr112: -#line 171 "core/admin.rl" +#line 184 "core/admin.rl" {start(out); mod_info(out); end(out);} goto st108; tr117: -#line 175 "core/admin.rl" +#line 188 "core/admin.rl" {start(out); palloc_stat(out); end(out);} goto st108; tr125: -#line 174 "core/admin.rl" +#line 187 "core/admin.rl" {start(out); slab_stat(out); end(out);} goto st108; tr129: -#line 176 "core/admin.rl" +#line 189 "core/admin.rl" {start(out); stat_print(out);end(out);} goto st108; st108: if ( ++p == pe ) goto _test_eof108; case 108: -#line 276 "core/admin.c" +#line 287 "core/admin.c" goto st0; tr13: -#line 180 "core/admin.rl" +#line 193 "core/admin.rl" {slab_validate(); ok(out);} goto st7; tr20: -#line 170 "core/admin.rl" +#line 183 "core/admin.rl" {return 0;} goto st7; tr29: -#line 166 "core/admin.rl" +#line 179 "core/admin.rl" {strend = p;} #line 137 "core/admin.rl" { @@ -310,12 +321,23 @@ case 108: } goto st7; tr67: -#line 177 "core/admin.rl" +#line 190 "core/admin.rl" {coredump(60); ok(out);} goto st7; tr76: -#line 178 "core/admin.rl" - {snapshot(NULL, 0); ok(out);} +#line 150 "core/admin.rl" + { + int ret = snapshot(NULL, 0); + + if (ret == 0) + ok(out); + else { + tbuf_printf(err, " can't save snapshot, errno %d (%s)", + ret, strerror(ret)); + + fail(out, err); + } + } goto st7; tr93: #line 113 "core/admin.rl" @@ -338,30 +360,30 @@ case 108: } goto st7; tr107: -#line 172 "core/admin.rl" +#line 185 "core/admin.rl" {start(out); fiber_info(out); end(out);} goto st7; tr113: -#line 171 "core/admin.rl" +#line 184 "core/admin.rl" {start(out); mod_info(out); end(out);} goto st7; tr118: -#line 175 "core/admin.rl" +#line 188 "core/admin.rl" {start(out); palloc_stat(out); end(out);} goto st7; tr126: -#line 174 "core/admin.rl" +#line 187 "core/admin.rl" {start(out); slab_stat(out); end(out);} goto st7; tr130: -#line 176 "core/admin.rl" +#line 189 "core/admin.rl" {start(out); stat_print(out);end(out);} goto st7; st7: if ( ++p == pe ) goto _test_eof7; case 7: -#line 365 "core/admin.c" +#line 387 "core/admin.c" if ( (*p) == 10 ) goto st108; goto st0; @@ -442,28 +464,28 @@ case 15: } goto tr25; tr25: -#line 166 "core/admin.rl" +#line 179 "core/admin.rl" {strstart = p;} goto st16; st16: if ( ++p == pe ) goto _test_eof16; case 16: -#line 453 "core/admin.c" +#line 475 "core/admin.c" switch( (*p) ) { case 10: goto tr28; case 13: goto tr29; } goto st16; tr26: -#line 166 "core/admin.rl" +#line 179 "core/admin.rl" {strstart = p;} goto st17; st17: if ( ++p == pe ) goto _test_eof17; case 17: -#line 467 "core/admin.c" +#line 489 "core/admin.c" switch( (*p) ) { case 10: goto tr28; case 13: goto tr29; @@ -1427,7 +1449,7 @@ case 107: _out: {} } -#line 186 "core/admin.rl" +#line 199 "core/admin.rl" fiber->rbuf->len -= (void *)pe - (void *)fiber->rbuf->data; diff --git a/core/admin.rl b/core/admin.rl index 896f203272e76750995755e5ee0c4ca8c061f834..1be63602a494048b7bd4b5bf0dfba04b491dcc74 100644 --- a/core/admin.rl +++ b/core/admin.rl @@ -147,6 +147,19 @@ admin_dispatch(void) ok(out); } + action save_snapshot { + int ret = snapshot(NULL, 0); + + if (ret == 0) + ok(out); + else { + tbuf_printf(err, " can't save snapshot, errno %d (%s)", + ret, strerror(ret)); + + fail(out, err); + } + } + eol = "\n" | "\r\n"; show = "sh"("o"("w")?)?; info = "in"("f"("o")?)?; @@ -175,7 +188,7 @@ admin_dispatch(void) show " "+ palloc %{start(out); palloc_stat(out); end(out);} | show " "+ stat %{start(out); stat_print(out);end(out);} | save " "+ coredump %{coredump(60); ok(out);} | - save " "+ snapshot %{snapshot(NULL, 0); ok(out);} | + save " "+ snapshot %save_snapshot | exec " "+ string %mod_exec | check " "+ slab %{slab_validate(); ok(out);} | reload " "+ configuration %reload_configuration); diff --git a/core/diagnostics.c b/core/diagnostics.c new file mode 100644 index 0000000000000000000000000000000000000000..237c02d1135064251ec7b73d8e72b5c8e411660c --- /dev/null +++ b/core/diagnostics.c @@ -0,0 +1,90 @@ +/* + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: 1. Redistributions of source code must + * retain the above copyright notice, this list of conditions and + * the following disclaimer. 2. Redistributions in binary form + * must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ +#include "diagnostics.h" +#include "fiber.h" +#include <errno.h> +#include <string.h> + +static struct Error oom_error = { ENOMEM, "Out of memory" }; + +/** + * Allocate error on heap. Errors are expected to be rare and + * small, thus we don't care much about allocation speed, and + * memory fragmentation should be negligible. + */ + +static struct Error *error_create(int code, const char *msg_arg) +{ + /* Just something large enough. */ + const int MAX_MSGLEN = 200; + + if (msg_arg == NULL) + msg_arg = ""; + + size_t msglen = strlen(msg_arg); + char *msg; + + if (msglen > MAX_MSGLEN) + msglen = MAX_MSGLEN; + + struct Error *error = malloc(sizeof(struct Error) + msglen + 1); + + if (error == NULL) + return &oom_error; + + msg = (char *)(error + 1); + strncpy(msg, msg_arg, msglen); + msg[msglen] = '\0'; + + error->code = code; + error->msg = msg; + + return error; +} + + +static void error_destroy(struct Error *error) +{ + if (error != &oom_error) + free(error); +} + + +void diag_set_error(int code, const char *message) +{ + if (fiber->diagnostics) + diag_clear(); + fiber->diagnostics = error_create(code, message); +} + + +struct Error *diag_get_last_error() +{ + return fiber->diagnostics; +} + + +void diag_clear() +{ + error_destroy(fiber->diagnostics); +} diff --git a/core/diagnostics.h b/core/diagnostics.h new file mode 100644 index 0000000000000000000000000000000000000000..c136c8ecdfb06f4b75fe7d86d055c53078ef6088 --- /dev/null +++ b/core/diagnostics.h @@ -0,0 +1,58 @@ +#ifndef TARANTOOL_CORE_DIAGNOSTICS_H_INCLUDED +#define TARANTOOL_CORE_DIAGNOSTICS_H_INCLUDED +/* + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: 1. Redistributions of source code must + * retain the above copyright notice, this list of conditions and + * the following disclaimer. 2. Redistributions in binary form + * must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * This is used globally in the program to pass around information + * about execution errors. Each fiber has its own error context, + * setting an error in one doesn't affect another. + */ + +struct Error +{ + /** Most often contains system errno. */ + int code; + /** Text description of the error. Can be NULL. */ + const char *msg; +}; + +/** + * Set the last error in the current execution context (fiber). + * If another error was already set, it's overwritten. + * + * @param code Error code. + * @todo: think how to distinguish errno and tarantool codes here. + * @param message Optional text message. Can be NULL. + */ +void diag_set_error(int code, const char *msg); + +/** Return the last error. Return NULL if no error. + */ +struct Error *diag_get_last_error(); + +/** Clear the last error, if any. + */ +void diag_clear(); + +#endif /* TARANTOOL_CORE_DIAGNOSTICS_H_INCLUDED */ diff --git a/core/fiber.c b/core/fiber.c index 173bc9f8cd57ae26f489fa11b722422d741b0f87..3d184addfa7a032307fa877e0eb874039f0e54aa 100644 --- a/core/fiber.c +++ b/core/fiber.c @@ -53,6 +53,7 @@ #include <util.h> #include <stat.h> #include <pickle.h> +#include "diagnostics.h" static struct fiber sched; struct fiber *fiber = &sched; @@ -154,6 +155,18 @@ fiber_sleep(ev_tstamp delay) yield(); } + +/** Wait for a forked child to complete. */ + +void +wait_for_child(pid_t pid) +{ + ev_child_set(&fiber->cw, pid, 0); + ev_child_start(&fiber->cw); + yield(); + ev_child_stop(&fiber->cw); +} + void wait_for(int events) { @@ -305,17 +318,22 @@ fiber_gc(void) prelease(ex_pool); } -void -fiber_zombificate(struct fiber *f) + +/** Destroy the currently active fiber and prepare it for reuse. + */ + +static void +fiber_zombificate() { - f->name = NULL; - f->f = NULL; - f->data = NULL; - unregister_fid(f); - f->fid = 0; - fiber_alloc(f); - - SLIST_INSERT_HEAD(&zombie_fibers, f, zombie_link); + diag_clear(); + fiber->name = NULL; + fiber->f = NULL; + fiber->data = NULL; + unregister_fid(fiber); + fiber->fid = 0; + fiber_alloc(fiber); + + SLIST_INSERT_HEAD(&zombie_fibers, fiber, zombie_link); } static void @@ -330,7 +348,7 @@ fiber_loop(void *data __unused__) } fiber_close(); - fiber_zombificate(fiber); + fiber_zombificate(); yield(); /* give control back to scheduler */ } } @@ -367,7 +385,8 @@ fiber_create(const char *restrict name, int fd, int inbox_size, void (*f) (void fiber_alloc(fiber); ev_init(&fiber->io, (void *)ev_schedule); ev_init(&fiber->timer, (void *)ev_schedule); - fiber->io.data = fiber->timer.data = fiber; + ev_init(&fiber->cw, (void *)ev_schedule); + fiber->io.data = fiber->timer.data = fiber->cw.data = fiber; SLIST_INSERT_HEAD(&fibers, fiber, link); } diff --git a/core/log_io.c b/core/log_io.c index c84c45e0d30e054f77c838976dd76aea12015b29..ea572971a862ab8345c68ed98f538252881c2685 100644 --- a/core/log_io.c +++ b/core/log_io.c @@ -46,6 +46,7 @@ #include <util.h> #include <pickle.h> #include <tbuf.h> +#include "diagnostics.h" const u16 snap_tag = -1; const u16 wal_tag = -2; @@ -58,6 +59,7 @@ const u32 marker_v11 = 0xba0babed; const u32 eof_marker_v11 = 0x10adab1e; const char *snap_suffix = ".snap"; const char *xlog_suffix = ".xlog"; +const char *inprogress_suffix = ".inprogress"; const char *v04 = "0.04\n"; const char *v03 = "0.03\n"; const char *v11 = "0.11\n"; @@ -179,7 +181,7 @@ snap_classes(row_reader snap_row_reader, const char *dirname) c[1]->filetype = c[0]->filetype; c[1]->suffix = c[0]->suffix; - c[0]->dirname = c[1]->dirname = dirname; + c[0]->dirname = c[1]->dirname = dirname ? strdup(dirname) : NULL; return c; } @@ -198,7 +200,7 @@ xlog_classes(const char *dirname) xlog04_class(c[0]); v11_class(c[1]); - c[0]->dirname = c[1]->dirname = dirname; + c[0]->dirname = c[1]->dirname = dirname ? strdup(dirname) : NULL; return c; } @@ -365,14 +367,30 @@ scan_dir(struct log_io_class *class, i64 **ret_lsn) errno = 0; while ((dent = readdir(dh)) != NULL) { - size_t len = strlen(dent->d_name) + 1; - if (len < suffix_len + 1) + char *suffix = strrchr(dent->d_name, '.'); + + if (suffix == NULL) continue; - if (strcmp(dent->d_name + len - 1 - suffix_len, class->suffix)) + + char *sub_suffix = memrchr(dent->d_name, '.', suffix - dent->d_name); + + /* + * A valid suffix is either .xlog or + * .xlog.inprogress, given class->suffix == + * 'xlog'. + */ + bool valid_suffix; + valid_suffix = (strcmp(suffix, class->suffix) == 0 || + (sub_suffix != NULL && + strcmp(suffix, inprogress_suffix) == 0 && + strncmp(sub_suffix, class->suffix, suffix_len) == 0)); + + if (!valid_suffix) continue; lsn[i] = strtoll(dent->d_name, &parse_suffix, 10); - if (strcmp(parse_suffix, class->suffix) != 0) { /* d_name doesn't parse entirely, ignore it */ + if (strncmp(parse_suffix, class->suffix, suffix_len) != 0) { + /* d_name doesn't parse entirely, ignore it */ say_warn("can't parse `%s', skipping", dent->d_name); continue; } @@ -548,12 +566,59 @@ row_reader_v11(FILE *f, struct palloc_pool *pool) return m; } +static int +inprogress_log_rename(char *filename) +{ + char *new_filename; + char *suffix = strrchr(filename, '.'); + + assert(suffix); + assert(strcmp(suffix, inprogress_suffix) == 0); + + /* Create a new filename without '.inprogress' suffix. */ + new_filename = strndupa(filename, suffix - filename); + + if (rename(filename, new_filename) != 0) { + say_syserror("can't rename %s to %s", filename, new_filename); + + return -1; + } + + return 0; +} + +static int +inprogress_log_unlink(char *filename) +{ +#ifndef NDEBUG + char *suffix = strrchr(filename, '.'); + assert(suffix); + assert(strcmp(suffix, inprogress_suffix) == 0); +#endif + if (unlink(filename) != 0) { + if (errno == ENONET) + return 0; + + say_syserror("can't unlink %s", filename); + + return -1; + } + + return 0; +} + int close_log(struct log_io **lptr) { struct log_io *l = *lptr; int r; + if (l->rows == 1 && l->mode == LOG_WRITE) { + /* Rename WAL before finalize. */ + if (inprogress_log_rename(l->filename) != 0) + panic("can't rename 'inprogress' WAL"); + } + if (l->class->eof_marker_size > 0 && l->mode == LOG_WRITE) { if (fwrite(&l->class->eof_marker, l->class->eof_marker_size, 1, l->f) != 1) say_error("can't write eof_marker"); @@ -628,13 +693,12 @@ format_filename(char *filename, struct log_io_class *class, i64 lsn, int suffix) class->dirname, lsn, class->suffix); break; case -1: - snprintf(filename, PATH_MAX, "%s/%020" PRIi64 "%s.inprogress", - class->dirname, lsn, class->suffix); + snprintf(filename, PATH_MAX, "%s/%020" PRIi64 "%s%s", + class->dirname, lsn, class->suffix, inprogress_suffix); break; default: - snprintf(filename, PATH_MAX, "%s/%020" PRIi64 "%s.%i", - class->dirname, lsn, class->suffix, suffix); - break; + /* not reached */ + assert(0); } return filename; } @@ -646,14 +710,15 @@ open_for_read(struct recovery_state *recover, struct log_io_class **class, i64 l char filetype[32], version[32], buf[256]; struct log_io *l = NULL; char *r; - char *error = "unknown error"; - l = malloc(sizeof(*l)); - if (l == NULL) + l = calloc(1, sizeof(*l)); + if (l == NULL) { + diag_set_error(errno, strerror(errno)); goto error; - memset(l, 0, sizeof(*l)); + } l->mode = LOG_READ; l->stat.data = recover; + l->is_inprogress = suffix == -1 ? true : false; /* when filename is not null it is forced open for debug reading */ if (filename == NULL) { @@ -668,24 +733,24 @@ open_for_read(struct recovery_state *recover, struct log_io_class **class, i64 l l->f = fopen(l->filename, "r"); if (l->f == NULL) { - error = strerror(errno); + diag_set_error(errno, strerror(errno)); goto error; } r = fgets(filetype, sizeof(filetype), l->f); if (r == NULL) { - error = "header reading failed"; + diag_set_error(1, "header reading failed"); goto error; } r = fgets(version, sizeof(version), l->f); if (r == NULL) { - error = "header reading failed"; + diag_set_error(1, "header reading failed"); goto error; } if (strcmp((*class)->filetype, filetype) != 0) { - error = "unknown filetype"; + diag_set_error(1, "unknown filetype"); goto error; } @@ -696,7 +761,7 @@ open_for_read(struct recovery_state *recover, struct log_io_class **class, i64 l } if (*class == NULL) { - error = "unknown version"; + diag_set_error(1, "unknown version"); goto error; } l->class = *class; @@ -705,7 +770,7 @@ open_for_read(struct recovery_state *recover, struct log_io_class **class, i64 l for (;;) { r = fgets(buf, sizeof(buf), l->f); if (r == NULL) { - error = "header reading failed"; + diag_set_error(1, "header reading failed"); goto error; } if (strcmp(r, "\n") == 0 || strcmp(r, "\r\n") == 0) @@ -714,14 +779,15 @@ open_for_read(struct recovery_state *recover, struct log_io_class **class, i64 l } else { r = fgets(buf, sizeof(buf), l->f); /* skip line with time */ if (r == NULL) { - error = "header reading failed"; + diag_set_error(1, "header reading failed"); goto error; } } return l; error: - say_error("open_for_read: failed to open `%s': %s", l->filename, error); + say_error("open_for_read: failed to open `%s': %s", l->filename, + diag_get_last_error()->msg); if (l != NULL) { if (l->f != NULL) fclose(l->f); @@ -735,12 +801,14 @@ open_for_write(struct recovery_state *recover, struct log_io_class *class, i64 l { struct log_io *l = NULL; int fd; - char *error = "unknown error"; + char *dot; + bool exists; - l = malloc(sizeof(*l)); - if (l == NULL) + l = calloc(1, sizeof(*l)); + if (l == NULL) { + diag_set_error(errno, strerror(errno)); goto error; - memset(l, 0, sizeof(*l)); + } l->mode = LOG_WRITE; l->class = class; l->stat.data = recover; @@ -750,15 +818,34 @@ open_for_write(struct recovery_state *recover, struct log_io_class *class, i64 l format_filename(l->filename, class, lsn, suffix); say_debug("find_log for writing `%s'", l->filename); + if (suffix == -1) { + /* + * Check whether a file with this name already exists. + * We don't overwrite existing files. + */ + dot = strrchr(l->filename, '.'); + *dot = '\0'; + exists = access(l->filename, F_OK) == 0; + *dot = '.'; + if (exists) { + diag_set_error(EEXIST, "exists"); + goto error; + } + } + + /* + * Open the <lsn>.<suffix>.inprogress file. If it + * exists, open will fail. + */ fd = open(l->filename, O_WRONLY | O_CREAT | O_EXCL | O_APPEND, 0664); if (fd < 0) { - error = strerror(errno); + diag_set_error(errno, strerror(errno)); goto error; } l->f = fdopen(fd, "a"); if (l->f == NULL) { - error = strerror(errno); + diag_set_error(errno, strerror(errno)); goto error; } @@ -766,7 +853,8 @@ open_for_write(struct recovery_state *recover, struct log_io_class *class, i64 l write_header(l); return l; error: - say_error("find_log: failed to open `%s': %s", l->filename, error); + say_error("find_log: failed to open `%s': %s", l->filename, + diag_get_last_error()->msg); if (l != NULL) { if (l->f != NULL) fclose(l->f); @@ -938,8 +1026,6 @@ recover_remaining_wals(struct recovery_state *r) { int result = 0; struct log_io *next_wal; - char *name; - int suffix = 0; i64 current_lsn, wal_greatest_lsn; size_t rows_before; @@ -965,13 +1051,30 @@ recover_remaining_wals(struct recovery_state *r) } current_lsn = r->confirmed_lsn + 1; /* TODO: find better way looking for next xlog */ - next_wal = open_for_read(r, r->wal_class, current_lsn, suffix, NULL); + next_wal = open_for_read(r, r->wal_class, current_lsn, 0, NULL); + + /* + * When doing final recovery, and dealing with the + * last file, try opening .<suffix>.inprogress. + */ + if (next_wal == NULL && r->finalize && current_lsn == wal_greatest_lsn) { + next_wal = open_for_read(r, r->wal_class, current_lsn, -1, NULL); + if (next_wal == NULL) { + char *filename = + format_filename(NULL, *r->wal_class, current_lsn, -1); + + say_warn("unlink broken %s wal", filename); + if (inprogress_log_unlink(filename) != 0) + panic("can't unlink 'inprogres' wal"); + } + } + if (next_wal == NULL) { - if (suffix++ < 10) - continue; result = 0; break; } + + assert(r->current_wal == NULL); r->current_wal = next_wal; say_info("recover from `%s'", r->current_wal->filename); @@ -987,26 +1090,10 @@ recover_remaining_wals(struct recovery_state *r) if (r->current_wal->rows > 0 && r->current_wal->rows != rows_before) r->current_wal->retry = 0; - /* - * rows == 0 could possible indicate an filename confilct - * retry filename with same lsn but with bigger suffix - */ + /* rows == 0 could possible indicate to an empty WAL */ if (r->current_wal->rows == 0) { - say_error("read zero records from %s, RETRY", r->current_wal->filename); - if (suffix++ < 10) - continue; - - say_error("too many filename conflicters"); - result = -1; + say_error("read zero records from %s", r->current_wal->filename); break; - } else { - name = format_filename(NULL, r->wal_prefered_class, - current_lsn, suffix + 1); - if (access(name, F_OK) == 0) { - say_error("found conflicter `%s' after successful reading", name); - result = -1; - break; - } } if (result == LOG_EOF) { @@ -1014,15 +1101,14 @@ recover_remaining_wals(struct recovery_state *r) r->confirmed_lsn); close_log(&r->current_wal); } - suffix = 0; } /* - * it's not a fatal error then last wal is empty - * but if we lost some logs it is fatal error + * It's not a fatal error when last WAL is empty, but if + * we lost some logs it is a fatal error. */ if (wal_greatest_lsn > r->confirmed_lsn + 1) { - say_error("not all wals have been successfuly read"); + say_error("not all WALs have been successfully read"); result = -1; } @@ -1140,6 +1226,8 @@ recover_finalize(struct recovery_state *r) { int result; + r->finalize = true; + if (ev_is_active(&r->wal_timer)) ev_timer_stop(&r->wal_timer); @@ -1150,10 +1238,29 @@ recover_finalize(struct recovery_state *r) result = recover_remaining_wals(r); if (result < 0) - panic("unable to scucessfully finalize recovery"); + panic("unable to successfully finalize recovery"); if (r->current_wal != NULL && result != LOG_EOF) { say_warn("wal `%s' wasn't correctly closed", r->current_wal->filename); + + if (!r->current_wal->is_inprogress) { + if (r->current_wal->rows == 0) + /* Regular WAL (not inprogress) must contain at least one row */ + panic("zero rows was successfully read from last WAL `%s'", + r->current_wal->filename); + } else if (r->current_wal->rows == 0) { + /* Unlink empty inprogress WAL */ + say_warn("unlink broken %s wal", r->current_wal->filename); + if (inprogress_log_unlink(r->current_wal->filename) != 0) + panic("can't unlink 'inprogress' wal"); + } else if (r->current_wal->rows == 1) { + /* Rename inprogress wal with one row */ + say_warn("rename unfinished %s wal", r->current_wal->filename); + if (inprogress_log_rename(r->current_wal->filename) != 0) + panic("can't rename 'inprogress' wal"); + } else + panic("too many rows in inprogress WAL `%s'", r->current_wal->filename); + close_log(&r->current_wal); } } @@ -1169,11 +1276,9 @@ write_to_disk(void *_state, struct tbuf *t) { static struct log_io *wal = NULL, *wal_to_close = NULL; static ev_tstamp last_flush = 0; - static size_t rows = 0; struct tbuf *reply, *header; struct recovery_state *r = _state; u32 result = 0; - int suffix = 0; /* we're not running inside ev_loop, so update ev_now manually */ ev_now_update(); @@ -1187,11 +1292,17 @@ write_to_disk(void *_state, struct tbuf *t) reply = tbuf_alloc(t->pool); - /* if there is filename conflict, try filename with lager suffix */ - while (wal == NULL && suffix < 10) { - wal = open_for_write(r, r->wal_prefered_class, wal_write_request(t)->lsn, suffix); - suffix++; + if (wal == NULL) + /* Open WAL with '.inprogress' suffix. */ + wal = open_for_write(r, r->wal_prefered_class, wal_write_request(t)->lsn, -1); + else if (wal->rows == 1) { + /* rename wal after first successfull write to name without inprogress suffix*/ + if (inprogress_log_rename(wal->filename) != 0) { + say_error("can't rename inprogress wal"); + goto fail; + } } + if (wal_to_close != NULL) { if (close_log(&wal_to_close) != 0) goto fail; @@ -1242,13 +1353,11 @@ write_to_disk(void *_state, struct tbuf *t) last_flush = ev_now(); } - rows++; - if (wal->class->rows_per_file <= rows || + wal->rows++; + if (wal->class->rows_per_file <= wal->rows || (wal_write_request(t)->lsn + 1) % wal->class->rows_per_file == 0) { wal_to_close = wal; wal = NULL; - rows = 0; - suffix = 0; } tbuf_append(reply, &result, sizeof(result)); @@ -1296,6 +1405,9 @@ recover_init(const char *snap_dirname, const char *wal_dirname, { struct recovery_state *r = p0alloc(eter_pool, sizeof(*r)); + if (rows_per_file <= 1) + panic("inacceptable value of 'rows_per_file'"); + r->wal_timer.data = r; r->row_handler = row_handler; r->data = data; @@ -1416,13 +1528,19 @@ snapshot_save(struct recovery_state *r, void (*f) (struct log_io_iter *)) snap = open_for_write(r, r->snap_prefered_class, r->confirmed_lsn, -1); if (snap == NULL) - panic("can't open snap for writing"); + panic_status(diag_get_last_error()->code, + "can't open snap for writing"); iter_open(snap, &i, write_rows); if (r->snap_io_rate_limit > 0) i.io_rate_limit = r->snap_io_rate_limit; + /* + * While saving a snapshot, snapshot name is set to + * <lsn>.snap.inprogress. When done, the snapshot is + * renamed to <lsn>.snap. + */ strncpy(final_filename, snap->filename, PATH_MAX); dot = strrchr(final_filename, '.'); *dot = 0; @@ -1433,8 +1551,11 @@ snapshot_save(struct recovery_state *r, void (*f) (struct log_io_iter *)) if (fsync(fileno(snap->f)) < 0) panic("fsync"); - if (rename(snap->filename, final_filename) != 0) - panic("rename"); + if (link(snap->filename, final_filename) == -1) + panic_status(errno, "can't create hard link to snapshot"); + + if (unlink(snap->filename) == -1) + say_syserror("can't unlink 'inprogress' snapshot"); close_log(&snap); diff --git a/core/tarantool.c b/core/tarantool.c index da268a4aa6e5130adccab5e1580d3abd4929e8d3..6964498f288d883b521016ace37b686022deaf8d 100644 --- a/core/tarantool.c +++ b/core/tarantool.c @@ -112,8 +112,9 @@ reload_cfg(struct tbuf *out) return -1; } ret = load_cfg(&new_cfg1, 1); - tbuf_append(out, cfg_out->data, cfg_out->len); if (ret == -1) { + tbuf_append(out, cfg_out->data, cfg_out->len); + destroy_tarantool_cfg(&new_cfg1); return -1; @@ -125,8 +126,9 @@ reload_cfg(struct tbuf *out) return -1; } ret = load_cfg(&new_cfg2, 0); - tbuf_append(out, cfg_out->data, cfg_out->len); if (ret == -1) { + tbuf_append(out, cfg_out->data, cfg_out->len); + destroy_tarantool_cfg(&new_cfg1); return -1; @@ -138,6 +140,7 @@ reload_cfg(struct tbuf *out) destroy_tarantool_cfg(&new_cfg2); out_warning(0, "Could not accept read only '%s' option", diff); + tbuf_append(out, cfg_out->data, cfg_out->len); return -1; } @@ -167,16 +170,21 @@ tarantool_uptime(void) } #ifdef STORAGE -void +int snapshot(void *ev __unused__, int events __unused__) { pid_t p = fork(); if (p < 0) { say_syserror("fork"); - return; + return -1; + } + if (p > 0) { + wait_for_child(p); + + assert(p == fiber->cw.rpid); + + return WEXITSTATUS(fiber->cw.rstatus); } - if (p > 0) - return; fiber->name = "dumper"; set_proc_title("dumper (%" PRIu32 ")", getppid()); @@ -186,16 +194,10 @@ snapshot(void *ev __unused__, int events __unused__) __gcov_flush(); #endif _exit(EXIT_SUCCESS); -} -#endif -static void -sig_child(int signal __unused__) -{ - int child_status; - /* TODO: watch child status & destroy corresponding fibers */ - while (waitpid(-1, &child_status, WNOHANG) > 0) ; + return 0; } +#endif static void sig_int(int signal) @@ -231,12 +233,6 @@ signal_init(void) if (sigaction(SIGPIPE, &sa, 0) == -1) goto error; - sa.sa_handler = sig_child; - sa.sa_flags = SA_RESTART; - sigemptyset(&sa.sa_mask); - if (sigaction(SIGCHLD, &sa, NULL) == -1) - goto error; - sa.sa_handler = sig_int; sa.sa_flags = 0; sigemptyset(&sa.sa_mask); diff --git a/include/fiber.h b/include/fiber.h index 35601ba5faf5eb736cb0cc2bcf2c67469c723a4d..9113d06a83ee9b0b51d55e5c35019dbe3cf2bb09 100644 --- a/include/fiber.h +++ b/include/fiber.h @@ -65,6 +65,7 @@ struct fiber { int fd; ev_timer timer; + ev_child cw; struct tbuf *iov; size_t iov_cnt; @@ -83,6 +84,8 @@ struct fiber { void *f_data; void *data; + /** Information about the last error. */ + void *diagnostics; u64 cookie; bool has_peer; @@ -109,8 +112,8 @@ extern struct fiber *fiber; void fiber_init(void); struct fiber *fiber_create(const char *name, int fd, int inbox_size, void (*f) (void *), void *); -void fiber_zombificate(struct fiber *f); void wait_for(int events); +void wait_for_child(pid_t pid); void unwait(int events); void yield(void); void raise_(int); diff --git a/include/log_io.h b/include/log_io.h index 25e7246f16cc9ffa8f559e75806b10883623ae1b..64fe367f4230781f36a8d39d55c9192d07388a93 100644 --- a/include/log_io.h +++ b/include/log_io.h @@ -73,6 +73,8 @@ struct log_io { size_t rows; size_t retry; char filename[PATH_MAX + 1]; + + bool is_inprogress; }; struct recovery_state { @@ -92,6 +94,8 @@ struct recovery_state { int snap_io_rate_limit; u64 cookie; + bool finalize; + /* pointer to user supplied custom data */ void *data; }; diff --git a/include/say.h b/include/say.h index a0cce235304cf7003db36c8b24a26d5ac17773c6..b89183f2f88cc59205e356d655b11b75ed1e3546 100644 --- a/include/say.h +++ b/include/say.h @@ -54,13 +54,14 @@ void _say(int level, const char *filename, int line, const char *error, #define say(level, ...) ({ _say(level, __FILE__, __LINE__, __VA_ARGS__); }) -#define panic(...) ({ say(S_FATAL, NULL, __VA_ARGS__); exit(EXIT_FAILURE); }) -#define panic_syserror(...) ({ say(S_FATAL, strerror(errno), __VA_ARGS__); exit(EXIT_FAILURE); }) -#define say_syserror(...) say(S_ERROR, strerror(errno), __VA_ARGS__) -#define say_error(...) say(S_ERROR, NULL, __VA_ARGS__) -#define say_crit(...) say(S_CRIT, NULL, __VA_ARGS__) -#define say_warn(...) say(S_WARN, NULL, __VA_ARGS__) -#define say_info(...) say(S_INFO, NULL, __VA_ARGS__) -#define say_debug(...) say(S_DEBUG, NULL, __VA_ARGS__) +#define panic_status(status, ...) ({ say(S_FATAL, NULL, __VA_ARGS__); exit(status); }) +#define panic(...) panic_status(EXIT_FAILURE, __VA_ARGS__) +#define panic_syserror(...) ({ say(S_FATAL, strerror(errno), __VA_ARGS__); exit(EXIT_FAILURE); }) +#define say_syserror(...) say(S_ERROR, strerror(errno), __VA_ARGS__) +#define say_error(...) say(S_ERROR, NULL, __VA_ARGS__) +#define say_crit(...) say(S_CRIT, NULL, __VA_ARGS__) +#define say_warn(...) say(S_WARN, NULL, __VA_ARGS__) +#define say_info(...) say(S_INFO, NULL, __VA_ARGS__) +#define say_debug(...) say(S_DEBUG, NULL, __VA_ARGS__) #endif diff --git a/include/tarantool.h b/include/tarantool.h index 47f16641823ea968218ee10c2ffc4744ed55094f..78b80b8c1fab950537f200c9dadd41c7d1c56d11 100644 --- a/include/tarantool.h +++ b/include/tarantool.h @@ -48,7 +48,7 @@ extern const char *cfg_filename; extern bool init_storage, booting; extern char *binary_filename; i32 reload_cfg(struct tbuf *out); -void snapshot(void *ev __unused__, int events __unused__); +int snapshot(void *ev __unused__, int events __unused__); const char *tarantool_version(void); void tarantool_info(struct tbuf *out); double tarantool_uptime(void); diff --git a/test/box/bad_record.xlog b/test/box/bad_record.xlog new file mode 100644 index 0000000000000000000000000000000000000000..28e8cd35f26399e406ef600e314af78bdafa04d9 Binary files /dev/null and b/test/box/bad_record.xlog differ diff --git a/test/box/empty.xlog b/test/box/empty.xlog new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/test/box/just_header.xlog b/test/box/just_header.xlog new file mode 100644 index 0000000000000000000000000000000000000000..3472cc4eb0dbaff89e0ff37b3637d2ff6580af1a Binary files /dev/null and b/test/box/just_header.xlog differ diff --git a/test/box/reconfigure.result b/test/box/reconfigure.result index 7faaa71b78bc90273508a9160f58003eada29d80..3ddcf400198a196706a6e91654e5ad859f0e0979 100644 --- a/test/box/reconfigure.result +++ b/test/box/reconfigure.result @@ -3,6 +3,7 @@ reload configuration fail: - Could not accept read only 'slab_alloc_arena' option - Could not accept read only 'pid_file' option + - Could not accept read only 'logger' option - Could not accept read only 'primary_port' option - Could not accept read only 'secondary_port' option - Could not accept read only 'admin_port' option @@ -50,3 +51,49 @@ reload configuration fail: - can't open config `tarantool.cfg' ... +reload configuration +--- +ok +... +# +# A test case for http://bugs.launchpad.net/bugs/712447: +# Valgrind reports use of not initialized memory after 'reload +# configuration' +# +insert into t0 values (1, 'tuple') +Insert OK, 1 row affected +save snapshot +--- +ok +... +reload configuration +--- +fail: + - can't open config `tarantool.cfg' +... +insert into t0 values (2, 'tuple 2') +Insert OK, 1 row affected +save snapshot +--- +ok +... +reload configuration +--- +ok +... +insert into t0 values (3, 'tuple 3') +Insert OK, 1 row affected +save snapshot +--- +ok +... +reload configuration +--- +ok +... +delete from t0 where k0 = 1 +Delete OK, 1 row affected +delete from t0 where k0 = 2 +Delete OK, 1 row affected +delete from t0 where k0 = 3 +Delete OK, 1 row affected diff --git a/test/box/reconfigure.test b/test/box/reconfigure.test index 6a292b142802cf32f8222eae4d60292faf905540..afa62fec6fab7a29006fee71879166a8957ad160 100644 --- a/test/box/reconfigure.test +++ b/test/box/reconfigure.test @@ -44,6 +44,32 @@ exec admin "reload configuration" os.unlink(cfg) exec admin "reload configuration" +# cleanup # restore default os.rename(oldcfg, cfg) +exec admin "reload configuration" + +print """# +# A test case for http://bugs.launchpad.net/bugs/712447: +# Valgrind reports use of not initialized memory after 'reload +# configuration' +#""" +exec sql "insert into t0 values (1, 'tuple')" +exec admin "save snapshot" +os.rename(cfg, oldcfg) +exec admin "reload configuration" +exec sql "insert into t0 values (2, 'tuple 2')" +exec admin "save snapshot" +os.symlink(abspath("box/tarantool_good.cfg"), cfg) +exec admin "reload configuration" +exec sql "insert into t0 values (3, 'tuple 3')" +exec admin "save snapshot" +# Cleanup +os.unlink(cfg) +os.rename(oldcfg, cfg) +exec admin "reload configuration" +exec sql "delete from t0 where k0 = 1" +exec sql "delete from t0 where k0 = 2" +exec sql "delete from t0 where k0 = 3" + # vim: syntax=python diff --git a/test/box/show.result b/test/box/show.result index bc4af4173e15ff20d4b80c86607107ec1061dd9c..d00dfc6d0c4a21b8a4add4019b5427b1f2603cb8 100644 --- a/test/box/show.result +++ b/test/box/show.result @@ -35,7 +35,7 @@ configuration: slab_alloc_factor: "2" work_dir: (null) pid_file: "box.pid" - logger: (null) + logger: "tee --append tarantool.log" logger_nonblock: "1" io_collect_interval: "0" backlog: "1024" @@ -82,10 +82,14 @@ save coredump --- ok ... +insert into t0 values (1, 'tuple') +Insert OK, 1 row affected save snapshot --- ok ... +delete from t0 where k0 = 1 +Delete OK, 1 row affected exec module command --- unimplemented @@ -94,10 +98,10 @@ show info --- info: version: "1.3.minor-<rev>-<commit> - uptime: 0 + uptime: <uptime> pid: <pid> wal_writer_pid: <pid> - lsn: 1 + lsn: 3 recovery_lag: 0.000 recovery_last_update: 0.000 status: primary diff --git a/test/box/show.test b/test/box/show.test index 2c2dbd10e55c2de53b4dd6f716b52004de52a2e1..2e4de22121f80ca746dbddd0b9f4f93b57c1efd9 100644 --- a/test/box/show.test +++ b/test/box/show.test @@ -9,10 +9,13 @@ exec admin "help" exec admin "show configuration" exec admin "show stat" exec admin "save coredump" +exec sql "insert into t0 values (1, 'tuple')" exec admin "save snapshot" +exec sql "delete from t0 where k0 = 1" exec admin "exec module command" sys.stdout.push_filter("(\d\.\d)\.\d-\d+-\S+", "\\1.minor-<rev>-<commit>") sys.stdout.push_filter("pid: \d+", "pid: <pid>") +sys.stdout.push_filter("uptime: \d+", "uptime: <uptime>") exec admin "show info" sys.stdout.clear_all_filters() sys.stdout.push_filter(".*", "") diff --git a/test/box/snapshot.result b/test/box/snapshot.result index 2167ee50245161044e6a38a5de77a44a27340042..edb9e178186c970e3ea58d6d3574da8afeeafea0 100644 --- a/test/box/snapshot.result +++ b/test/box/snapshot.result @@ -1505,3 +1505,31 @@ Found 1 tuple: select * from t0 where k0=500 No match # Restore the default server... +# +# A test case for: http://bugs.launchpad.net/bugs/686411 +# Check that 'save snapshot' does not overwrite a snapshot +# file that already exists. Verify also that any other +# error that happens when saving snapshot is propagated +# to the caller. + +insert into t0 values (1, 'first tuple') +Insert OK, 1 row affected +save snapshot +--- +ok +... +save snapshot +--- +fail: can't save snapshot, errno 17 (File exists) +... +insert into t0 values (2, 'second tuple') +Insert OK, 1 row affected +# Make 'var' directory read-only. +save snapshot +--- +fail: can't save snapshot, errno 13 (Permission denied) +... +delete from t0 where k0 = 1 +Delete OK, 1 row affected +delete from t0 where k0 = 2 +Delete OK, 1 row affected diff --git a/test/box/snapshot.test b/test/box/snapshot.test index 3eef198ee2c7190c5325f1a33de089bb0ec35669..764699f2a3b2d3bd3e981359992a3359cbda05e0 100644 --- a/test/box/snapshot.test +++ b/test/box/snapshot.test @@ -1,4 +1,5 @@ # encoding: tarantool +# print """ # Verify that the server starts from a pre-recorded snapshot. # This way we check that the server can read old snapshots (v11) @@ -15,4 +16,33 @@ server.stop(True) os.unlink(snapshot) server.start(True) -# vim: syntax=python +print """# +# A test case for: http://bugs.launchpad.net/bugs/686411 +# Check that 'save snapshot' does not overwrite a snapshot +# file that already exists. Verify also that any other +# error that happens when saving snapshot is propagated +# to the caller. +""" +exec sql "insert into t0 values (1, 'first tuple')" +exec admin "save snapshot" + +# In absence of data modifications, two consecutive +# 'save snapshot' statements will try to write +# into the same file, since file name is based +# on LSN. +# Don't allow to overwrite snapshots. +exec admin "save snapshot" +# +# Increment LSN +exec sql "insert into t0 values (2, 'second tuple')" +# +# Check for other errors, e.g. "Permission denied". +print "# Make 'var' directory read-only." +os.chmod(vardir, 0555) +exec admin "save snapshot" + +# cleanup +os.chmod(vardir, 0755) +exec sql "delete from t0 where k0 = 1" +exec sql "delete from t0 where k0 = 2" +# vim: syntax=python spell diff --git a/test/box/tarantool.cfg b/test/box/tarantool.cfg index c09c23a187f1636abe74081ba9ac4968be65b27f..d7cd0fe80ccd4940e6b324918043eabbf43d45a9 100644 --- a/test/box/tarantool.cfg +++ b/test/box/tarantool.cfg @@ -2,6 +2,8 @@ slab_alloc_arena = 0.1 pid_file = "box.pid" +logger="tee --append tarantool.log" + primary_port = 33013 secondary_port = 33014 admin_port = 33015 diff --git a/test/box/tarantool_bad1.cfg b/test/box/tarantool_bad1.cfg index af4eb414a09982fe8dfc623ab80761ab4e08bfdd..fb0536b31cb15be0b40524c0d68caf2fab2ef626 100644 --- a/test/box/tarantool_bad1.cfg +++ b/test/box/tarantool_bad1.cfg @@ -2,6 +2,8 @@ slab_alloc_arena = 0.2 pid_file = "/var/run/box.pid" +logger="cat tarantool.log" + primary_port = 33023 secondary_port = 33024 admin_port = 33025 diff --git a/test/box/tarantool_bad2.cfg b/test/box/tarantool_bad2.cfg index fa5f170d894e61d35daa1bb1b43e8e27da22336e..0d03bfde3b38b7e1cce9c5d2b2a40811060bbddf 100644 --- a/test/box/tarantool_bad2.cfg +++ b/test/box/tarantool_bad2.cfg @@ -2,6 +2,8 @@ slab_alloc_arena = 0.1 pid_file = "box.pid" +logger="tee --append tarantool.log" + #primary_port = 33013 secondary_port = 33014 admin_port = 33015 diff --git a/test/box/tarantool_bad3.cfg b/test/box/tarantool_bad3.cfg index edac4b201101f6fe9731541d535f6edf39edd2b6..8c1d14c3c6e43ed63617f14700c5f8c2b2719ff7 100644 --- a/test/box/tarantool_bad3.cfg +++ b/test/box/tarantool_bad3.cfg @@ -2,6 +2,8 @@ slab_alloc_arena = 0.1 pid_file = "box.pid" +logger="tee --append tarantool.log" + primary_port = 33013 secondary_port = 33014 admin_port = 33015 diff --git a/test/box/tarantool_bad4.cfg b/test/box/tarantool_bad4.cfg index 99bdb38f15d974b550ca303c2af679bf76b70f35..67b9c893b0ae0e5f691d090a1bf3a6150648a876 100644 --- a/test/box/tarantool_bad4.cfg +++ b/test/box/tarantool_bad4.cfg @@ -2,6 +2,8 @@ slab_alloc_arena = 0.1 pid_file = "box.pid" +logger="tee --append tarantool.log" + primary_port = 33013 secondary_port = 33014 admin_port = 33015 diff --git a/test/box/tarantool_bad5.cfg b/test/box/tarantool_bad5.cfg index 1ba6dfa8bdcd4b65e4e882bf939a460a6981f673..1390839277b1fa49ed953c4aef3d73c269080a9b 100644 --- a/test/box/tarantool_bad5.cfg +++ b/test/box/tarantool_bad5.cfg @@ -2,6 +2,8 @@ slab_alloc_arena = 0.1 pid_file = "box.pid" +logger="tee --append tarantool.log" + primary_port = 33013 secondary_port = 33014 admin_port = 33015 diff --git a/test/box/tarantool_good.cfg b/test/box/tarantool_good.cfg index c09c23a187f1636abe74081ba9ac4968be65b27f..d7cd0fe80ccd4940e6b324918043eabbf43d45a9 100644 --- a/test/box/tarantool_good.cfg +++ b/test/box/tarantool_good.cfg @@ -2,6 +2,8 @@ slab_alloc_arena = 0.1 pid_file = "box.pid" +logger="tee --append tarantool.log" + primary_port = 33013 secondary_port = 33014 admin_port = 33015 diff --git a/test/box/unfinished.xlog b/test/box/unfinished.xlog new file mode 100644 index 0000000000000000000000000000000000000000..c905fec9229b37c384366bc267c471184e5d7238 Binary files /dev/null and b/test/box/unfinished.xlog differ diff --git a/test/box/xlog.result b/test/box/xlog.result new file mode 100644 index 0000000000000000000000000000000000000000..ba345f63be7ea6ca88bcef3857b726ffe294ab32 --- /dev/null +++ b/test/box/xlog.result @@ -0,0 +1,34 @@ + +# Inprogress xlog must be renamed before second insert. + +insert into t0 values (1, 'first tuple') +Insert OK, 1 row affected +00000000000000000002.xlog.inprogress exists +insert into t0 values (2, 'second tuple') +Insert OK, 1 row affected +00000000000000000002.xlog.inprogress has been successfully renamed + +# Inprogress xlog must be renamed during regular termination. + +insert into t0 values (3, 'third tuple') +Insert OK, 1 row affected +00000000000000000004.xlog.inprogress exists +Stopping the server... +00000000000000000004.xlog.inprogress has been successfully renamed + +# An inprogress xlog fle with one record must be renamed during recovery. + +00000000000000000005.xlog.inprogress hash been successfully renamed + +# Empty (zero size) inprogress xlog must be deleted during recovery. + +00000000000000000006.xlog.inprogress has been successfully deleted + +# Empty (header only, no records) inprogress xlog must be deleted +# during recovery. + +00000000000000000006.xlog.inprogress has been successfully deleted + +# Inprogress xlog with bad record must be deleted during recovery. + +00000000000000000006.xlog.inprogress has been successfully deleted diff --git a/test/box/xlog.test b/test/box/xlog.test new file mode 100644 index 0000000000000000000000000000000000000000..50540435889ac72aed03c6e45ce1a22ccaaa5e5e --- /dev/null +++ b/test/box/xlog.test @@ -0,0 +1,113 @@ +# encoding: tarantool +# +from os.path import abspath + +# cleanup vardir +server.stop(True) +server.install(True) + +print """ +# Inprogress xlog must be renamed before second insert. +""" +wal_inprogress = os.path.join(vardir, "00000000000000000002.xlog.inprogress") +wal = os.path.join(vardir, "00000000000000000002.xlog") + +server.start(True) +exec sql "insert into t0 values (1, 'first tuple')" +if os.access(wal_inprogress, os.F_OK): + print "00000000000000000002.xlog.inprogress exists" + +exec sql "insert into t0 values (2, 'second tuple')" + +if os.access(wal, os.F_OK) and not os.access(wal_inprogress, os.F_OK): + print "00000000000000000002.xlog.inprogress has been successfully renamed" +server.stop(True) + +print """ +# Inprogress xlog must be renamed during regular termination. +""" +server.start(True) + +wal_inprogress = os.path.join(vardir, "00000000000000000004.xlog.inprogress") +wal = os.path.join(vardir, "00000000000000000004.xlog") + +exec sql "insert into t0 values (3, 'third tuple')" + +if os.access(wal_inprogress, os.F_OK): + print "00000000000000000004.xlog.inprogress exists" + +server.stop(False) + +if os.access(wal, os.F_OK) and not os.access(wal_inprogress, os.F_OK): + print "00000000000000000004.xlog.inprogress has been successfully renamed" + +print """ +# An inprogress xlog fle with one record must be renamed during recovery. +""" + +wal_inprogress = os.path.join(vardir, "00000000000000000005.xlog.inprogress") +wal = os.path.join(vardir, "00000000000000000005.xlog") + +os.symlink(abspath("box/unfinished.xlog"), wal_inprogress) + +server.start(True) + +if os.access(wal, os.F_OK) and not os.access(wal_inprogress, os.F_OK): + print "00000000000000000005.xlog.inprogress hash been successfully renamed" +server.stop(True) + +print """ +# Empty (zero size) inprogress xlog must be deleted during recovery. +""" + +wal_inprogress = os.path.join(vardir, "00000000000000000006.xlog.inprogress") +wal = os.path.join(vardir, "00000000000000000006.xlog") + +os.symlink(abspath("box/empty.xlog"), wal_inprogress) +server.start(True) + +if not os.access(wal_inprogress, os.F_OK) and not os.access(wal, os.F_OK): + print "00000000000000000006.xlog.inprogress has been successfully deleted" +server.stop(True) + +print """ +# Empty (header only, no records) inprogress xlog must be deleted +# during recovery. +""" + +# If the previous test has failed, there is a dangling link +# and symlink fails. +try: + os.symlink(abspath("box/just_header.xlog"), wal_inprogress) +except OSError as e: + print e + +server.start(True) + +if not os.access(wal_inprogress, os.F_OK) and not os.access(wal, os.F_OK): + print "00000000000000000006.xlog.inprogress has been successfully deleted" +server.stop(True) + +print """ +# Inprogress xlog with bad record must be deleted during recovery. +""" + +# If the previous test has failed, there is a dangling link +# and symlink fails. +try: + os.symlink(abspath("box/bad_record.xlog"), wal_inprogress) +except OSError as e: + print e + +server.start(True) + +if not os.access(wal_inprogress, os.F_OK) and not os.access(wal, os.F_OK): + print "00000000000000000006.xlog.inprogress has been successfully deleted" +server.stop(True) + +# cleanup +server.stop(True) +server.install(True) +server.start(True) + +# vim: syntax=python diff --git a/test/lib/tarantool_silverbox_server.py b/test/lib/tarantool_silverbox_server.py index 9008c05accc75fb91e2963e156506dfb249594fa..996ced8f3fa66d15c5b4821d16c6d1ce1a4f58ce 100644 --- a/test/lib/tarantool_silverbox_server.py +++ b/test/lib/tarantool_silverbox_server.py @@ -116,9 +116,11 @@ class TarantoolSilverboxServer: stdout = subprocess.PIPE, stderr = subprocess.PIPE) - version = subprocess.Popen([self.abspath_to_exe, "--version"], - cwd = self.args.vardir, - stdout = subprocess.PIPE).stdout.read().rstrip() + p = subprocess.Popen([self.abspath_to_exe, "--version"], + cwd = self.args.vardir, + stdout = subprocess.PIPE) + version = p.stdout.read().rstrip() + p.wait() if not silent: print "Starting {0} {1}.".format(os.path.basename(self.abspath_to_exe),