From 59d0e0815cfdb02e6b443b117342a907559f3aef Mon Sep 17 00:00:00 2001 From: Konstantin Osipov <kostja@tarantool.org> Date: Fri, 30 Mar 2012 19:48:45 +0400 Subject: [PATCH] Thread-based WAL writer: a piecewise port. This patch introduces wal_mode configuration variable, which can take either "fsync" or "fsync_delay" values. In "fsync" mode, we open the write ahead log in O_SYNC mode, and thus sync every write to disk. Th fsync_delay mode is the backward-compatible mode: in this mode, we only do fsync once every fsync_delay seconds. If fsync_delay is 0, there is no fsync. Otherwise, this patch is only adding auxiliary headers, object files, etc. --- CMakeLists.txt | 5 + cfg/core_cfg.cfg_tmpl | 45 ++ cfg/prscfg.c | 4 +- cfg/tarantool_box_cfg.c | 799 ++++++++++++++++-------------- cfg/tarantool_box_cfg.h | 101 ++-- core/CMakeLists.txt | 4 +- core/crc32.c | 47 ++ core/fiber.m | 3 +- core/log_io.m | 255 +++++----- core/log_io_remote.m | 4 +- core/replication.m | 4 +- core/tarantool.m | 63 ++- include/config.h.cmake | 13 + include/crc32.h | 44 ++ include/log_io.h | 11 +- include/tarantool_pthread.h | 157 ++++++ mod/box/box.m | 11 +- mod/box/box_cfg.cfg_tmpl | 36 -- test/box/admin.result | 21 +- test/box/configuration.result | 42 +- test/box/lua.result | 82 +-- test/box_memcached/off.result | 21 +- third_party/compat/sys/bsd_time.h | 122 +++++ 23 files changed, 1205 insertions(+), 689 deletions(-) create mode 100644 core/crc32.c create mode 100644 include/crc32.h create mode 100644 include/tarantool_pthread.h create mode 100644 third_party/compat/sys/bsd_time.h diff --git a/CMakeLists.txt b/CMakeLists.txt index cb3341db9d..e9651a6ced 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -8,6 +8,7 @@ include(CheckCCompilerFlag) include(CheckSymbolExists) include(CheckCXXSourceRuns) include(TestBigEndian) +include(CheckFunctionExists) find_program(ECHO echo) find_program(XSLTPROC xsltproc) @@ -59,6 +60,10 @@ endif() check_symbol_exists(MAP_ANON sys/mman.h HAVE_MAP_ANON) check_symbol_exists(MAP_ANONYMOUS sys/mman.h HAVE_MAP_ANONYMOUS) +check_include_file(sys/time.h HAVE_SYS_TIME_H) + +check_symbol_exists(O_DSYNC fcntl.h HAVE_O_DSYNC) +check_function_exists(fdatasync HAVE_FDATASYNC) test_big_endian(HAVE_BYTE_ORDER_BIG_ENDIAN) diff --git a/cfg/core_cfg.cfg_tmpl b/cfg/core_cfg.cfg_tmpl index bb3580aa5c..3a019a52e2 100644 --- a/cfg/core_cfg.cfg_tmpl +++ b/cfg/core_cfg.cfg_tmpl @@ -1,6 +1,12 @@ # username to switch to username=NULL, ro +# Local hot standby (if enabled, the server will run in hot +# standby mode, continuously fetching WAL records from wal_dir, +# until it is able to bind to the primary port. +# In local hot standby mode the server only accepts reads. +local_hot_standby=false, ro + # tarantool bind ip address, applies to master # and replication ports. INADDR_ANY is the default value. bind_ipaddr="INADDR_ANY", ro @@ -29,6 +35,12 @@ slab_alloc_factor=2.0, ro # working directory (daemon will chdir(2) to it) work_dir=NULL, ro +# Snapshot directory (where snapshots get saved/read) +snap_dir=".", ro + +# WAL directory (where WALs get saved/read) +wal_dir=".", ro + # name of pid file pid_file="tarantool.pid", ro @@ -49,3 +61,36 @@ backlog=1024, ro # network io readahead readahead=16320 + +# Do not write into snapshot faster than snap_io_rate_limit MB/sec +snap_io_rate_limit=0.0, ro + +# Write no more rows in WAL +rows_per_wal=500000, ro + +# Size of the WAL writer request queue: how many outstanding +# requests for write to disk it can have. +# Rule of thumb is to set this to the average connection count. +wal_writer_inbox_size=16384, ro + +# Defines fiber/data synchronization fsync(2) policy: +# "write": fibers wait for their data to be written to the log. +# "fsync": fibers wait for their data, fsync(2) follows each write(2) +# "fsync_delay": fibers wait for their data, fsync every N=wal_fsync_delay seconds, +# N=0.0 means no fsync (equivalent to wal_mode = "write"); +wal_mode="fsync_delay" + +# Fsync WAL delay, only issue fsync if last fsync was wal_fsync_delay +# seconds ago. +# WARNING: actually, several last requests may stall fsync for much longer +wal_fsync_delay=0.0 + +# Delay, in seconds, between successive re-readings of wal_dir. +# The re-scan is necessary to discover new WAL files or snapshots. +wal_dir_rescan_delay=0.1, ro + +# Panic if there is an error reading a snapshot or WAL. +# By default, panic on any snapshot reading error and ignore errors +# when reading WALs. +panic_on_snap_error=true, ro +panic_on_wal_error=false, ro diff --git a/cfg/prscfg.c b/cfg/prscfg.c index 6f930b537c..c4f11a2bbc 100644 --- a/cfg/prscfg.c +++ b/cfg/prscfg.c @@ -189,7 +189,7 @@ typedef union YYSTYPE #include <limits.h> #include <sys/types.h> -static int prscfg_yyerror(prscfg_yyscan_t yyscanner, char *msg); +static int prscfg_yyerror(prscfg_yyscan_t yyscanner, const char *msg); extern int prscfg_yylex (YYSTYPE * yylval_param, prscfg_yyscan_t yyscanner); static NameAtom* prependName(NameAtom *prep, NameAtom *name); static void freeName(NameAtom *atom); @@ -2018,7 +2018,7 @@ YYSTYPE yylval; static int -prscfg_yyerror(prscfg_yyscan_t yyscanner, char *msg) { +prscfg_yyerror(prscfg_yyscan_t yyscanner, const char *msg) { out_warning(CNF_SYNTAXERROR, "gram_yyerror: %s at line %d", msg, prscfgGetLineNo(yyscanner)); return 0; } diff --git a/cfg/tarantool_box_cfg.c b/cfg/tarantool_box_cfg.c index 1044259140..713077d10a 100644 --- a/cfg/tarantool_box_cfg.c +++ b/cfg/tarantool_box_cfg.c @@ -29,6 +29,7 @@ init_tarantool_cfg(tarantool_cfg *c) { c->__confetti_flags = 0; c->username = NULL; + c->local_hot_standby = false; c->bind_ipaddr = NULL; c->coredump = false; c->admin_port = 0; @@ -38,14 +39,22 @@ init_tarantool_cfg(tarantool_cfg *c) { c->slab_alloc_minimal = 0; c->slab_alloc_factor = 0; c->work_dir = NULL; + c->snap_dir = NULL; + c->wal_dir = NULL; c->pid_file = NULL; c->logger = NULL; c->logger_nonblock = false; c->io_collect_interval = 0; c->backlog = 0; c->readahead = 0; - c->snap_dir = NULL; - c->wal_dir = NULL; + c->snap_io_rate_limit = 0; + c->rows_per_wal = 0; + c->wal_writer_inbox_size = 0; + c->wal_mode = NULL; + c->wal_fsync_delay = 0; + c->wal_dir_rescan_delay = 0; + c->panic_on_snap_error = false; + c->panic_on_wal_error = false; c->primary_port = 0; c->secondary_port = 0; c->too_long_threshold = 0; @@ -55,14 +64,6 @@ init_tarantool_cfg(tarantool_cfg *c) { c->memcached_expire = false; c->memcached_expire_per_loop = 0; c->memcached_expire_full_sweep = 0; - c->snap_io_rate_limit = 0; - c->rows_per_wal = 0; - c->wal_fsync_delay = 0; - c->wal_writer_inbox_size = 0; - c->local_hot_standby = false; - c->wal_dir_rescan_delay = 0; - c->panic_on_snap_error = false; - c->panic_on_wal_error = false; c->replication_source = NULL; c->space = NULL; } @@ -72,6 +73,7 @@ fill_default_tarantool_cfg(tarantool_cfg *c) { c->__confetti_flags = 0; c->username = NULL; + c->local_hot_standby = false; c->bind_ipaddr = strdup("INADDR_ANY"); if (c->bind_ipaddr == NULL) return CNF_NOMEMORY; c->coredump = false; @@ -82,6 +84,10 @@ fill_default_tarantool_cfg(tarantool_cfg *c) { c->slab_alloc_minimal = 64; c->slab_alloc_factor = 2; c->work_dir = NULL; + c->snap_dir = strdup("."); + if (c->snap_dir == NULL) return CNF_NOMEMORY; + c->wal_dir = strdup("."); + if (c->wal_dir == NULL) return CNF_NOMEMORY; c->pid_file = strdup("tarantool.pid"); if (c->pid_file == NULL) return CNF_NOMEMORY; c->logger = NULL; @@ -89,10 +95,15 @@ fill_default_tarantool_cfg(tarantool_cfg *c) { c->io_collect_interval = 0; c->backlog = 1024; c->readahead = 16320; - c->snap_dir = strdup("."); - if (c->snap_dir == NULL) return CNF_NOMEMORY; - c->wal_dir = strdup("."); - if (c->wal_dir == NULL) return CNF_NOMEMORY; + c->snap_io_rate_limit = 0; + c->rows_per_wal = 500000; + c->wal_writer_inbox_size = 16384; + c->wal_mode = strdup("fsync_delay"); + if (c->wal_mode == NULL) return CNF_NOMEMORY; + c->wal_fsync_delay = 0; + c->wal_dir_rescan_delay = 0.1; + c->panic_on_snap_error = true; + c->panic_on_wal_error = false; c->primary_port = 0; c->secondary_port = 0; c->too_long_threshold = 0.5; @@ -102,14 +113,6 @@ fill_default_tarantool_cfg(tarantool_cfg *c) { c->memcached_expire = false; c->memcached_expire_per_loop = 1024; c->memcached_expire_full_sweep = 3600; - c->snap_io_rate_limit = 0; - c->rows_per_wal = 500000; - c->wal_fsync_delay = 0; - c->wal_writer_inbox_size = 128; - c->local_hot_standby = false; - c->wal_dir_rescan_delay = 0.1; - c->panic_on_snap_error = true; - c->panic_on_wal_error = false; c->replication_source = NULL; c->space = NULL; return 0; @@ -151,6 +154,9 @@ acceptDefault_name__space__index__key_field(tarantool_cfg_space_index_key_field static NameAtom _name__username[] = { { "username", -1, NULL } }; +static NameAtom _name__local_hot_standby[] = { + { "local_hot_standby", -1, NULL } +}; static NameAtom _name__bind_ipaddr[] = { { "bind_ipaddr", -1, NULL } }; @@ -178,6 +184,12 @@ static NameAtom _name__slab_alloc_factor[] = { static NameAtom _name__work_dir[] = { { "work_dir", -1, NULL } }; +static NameAtom _name__snap_dir[] = { + { "snap_dir", -1, NULL } +}; +static NameAtom _name__wal_dir[] = { + { "wal_dir", -1, NULL } +}; static NameAtom _name__pid_file[] = { { "pid_file", -1, NULL } }; @@ -196,11 +208,29 @@ static NameAtom _name__backlog[] = { static NameAtom _name__readahead[] = { { "readahead", -1, NULL } }; -static NameAtom _name__snap_dir[] = { - { "snap_dir", -1, NULL } +static NameAtom _name__snap_io_rate_limit[] = { + { "snap_io_rate_limit", -1, NULL } }; -static NameAtom _name__wal_dir[] = { - { "wal_dir", -1, NULL } +static NameAtom _name__rows_per_wal[] = { + { "rows_per_wal", -1, NULL } +}; +static NameAtom _name__wal_writer_inbox_size[] = { + { "wal_writer_inbox_size", -1, NULL } +}; +static NameAtom _name__wal_mode[] = { + { "wal_mode", -1, NULL } +}; +static NameAtom _name__wal_fsync_delay[] = { + { "wal_fsync_delay", -1, NULL } +}; +static NameAtom _name__wal_dir_rescan_delay[] = { + { "wal_dir_rescan_delay", -1, NULL } +}; +static NameAtom _name__panic_on_snap_error[] = { + { "panic_on_snap_error", -1, NULL } +}; +static NameAtom _name__panic_on_wal_error[] = { + { "panic_on_wal_error", -1, NULL } }; static NameAtom _name__primary_port[] = { { "primary_port", -1, NULL } @@ -229,30 +259,6 @@ static NameAtom _name__memcached_expire_per_loop[] = { static NameAtom _name__memcached_expire_full_sweep[] = { { "memcached_expire_full_sweep", -1, NULL } }; -static NameAtom _name__snap_io_rate_limit[] = { - { "snap_io_rate_limit", -1, NULL } -}; -static NameAtom _name__rows_per_wal[] = { - { "rows_per_wal", -1, NULL } -}; -static NameAtom _name__wal_fsync_delay[] = { - { "wal_fsync_delay", -1, NULL } -}; -static NameAtom _name__wal_writer_inbox_size[] = { - { "wal_writer_inbox_size", -1, NULL } -}; -static NameAtom _name__local_hot_standby[] = { - { "local_hot_standby", -1, NULL } -}; -static NameAtom _name__wal_dir_rescan_delay[] = { - { "wal_dir_rescan_delay", -1, NULL } -}; -static NameAtom _name__panic_on_snap_error[] = { - { "panic_on_snap_error", -1, NULL } -}; -static NameAtom _name__panic_on_wal_error[] = { - { "panic_on_wal_error", -1, NULL } -}; static NameAtom _name__replication_source[] = { { "replication_source", -1, NULL } }; @@ -346,6 +352,37 @@ acceptValue(tarantool_cfg* c, OptDef* opt, int check_rdonly) { if (opt->paramValue.stringval && c->username == NULL) return CNF_NOMEMORY; } + else if ( cmpNameAtoms( opt->name, _name__local_hot_standby) ) { + if (opt->paramType != stringType && opt->paramType != numberType ) + return CNF_WRONGTYPE; + c->__confetti_flags &= ~CNF_FLAG_STRUCT_NOTSET; + errno = 0; + bool bln; + + if (opt->paramType == numberType) { + if (strcmp(opt->paramValue.numberval, "0") == 0 || strcmp(opt->paramValue.numberval, "1") == 0) + bln = opt->paramValue.numberval[0] - '0'; + else + return CNF_WRONGRANGE; + } + else if (strcasecmp(opt->paramValue.stringval, "true") == 0 || + strcasecmp(opt->paramValue.stringval, "yes") == 0 || + strcasecmp(opt->paramValue.stringval, "enable") == 0 || + strcasecmp(opt->paramValue.stringval, "on") == 0 || + strcasecmp(opt->paramValue.stringval, "1") == 0 ) + bln = true; + else if (strcasecmp(opt->paramValue.stringval, "false") == 0 || + strcasecmp(opt->paramValue.stringval, "no") == 0 || + strcasecmp(opt->paramValue.stringval, "disable") == 0 || + strcasecmp(opt->paramValue.stringval, "off") == 0 || + strcasecmp(opt->paramValue.stringval, "0") == 0 ) + bln = false; + else + return CNF_WRONGRANGE; + if (check_rdonly && c->local_hot_standby != bln) + return CNF_RDONLY; + c->local_hot_standby = bln; + } else if ( cmpNameAtoms( opt->name, _name__bind_ipaddr) ) { if (opt->paramType != stringType ) return CNF_WRONGTYPE; @@ -479,6 +516,30 @@ acceptValue(tarantool_cfg* c, OptDef* opt, int check_rdonly) { if (opt->paramValue.stringval && c->work_dir == NULL) return CNF_NOMEMORY; } + else if ( cmpNameAtoms( opt->name, _name__snap_dir) ) { + if (opt->paramType != stringType ) + return CNF_WRONGTYPE; + c->__confetti_flags &= ~CNF_FLAG_STRUCT_NOTSET; + errno = 0; + if (check_rdonly && ( (opt->paramValue.stringval == NULL && c->snap_dir == NULL) || strcmp(opt->paramValue.stringval, c->snap_dir) != 0)) + return CNF_RDONLY; + if (c->snap_dir) free(c->snap_dir); + c->snap_dir = (opt->paramValue.stringval) ? strdup(opt->paramValue.stringval) : NULL; + if (opt->paramValue.stringval && c->snap_dir == NULL) + return CNF_NOMEMORY; + } + else if ( cmpNameAtoms( opt->name, _name__wal_dir) ) { + if (opt->paramType != stringType ) + return CNF_WRONGTYPE; + c->__confetti_flags &= ~CNF_FLAG_STRUCT_NOTSET; + errno = 0; + if (check_rdonly && ( (opt->paramValue.stringval == NULL && c->wal_dir == NULL) || strcmp(opt->paramValue.stringval, c->wal_dir) != 0)) + return CNF_RDONLY; + if (c->wal_dir) free(c->wal_dir); + c->wal_dir = (opt->paramValue.stringval) ? strdup(opt->paramValue.stringval) : NULL; + if (opt->paramValue.stringval && c->wal_dir == NULL) + return CNF_NOMEMORY; + } else if ( cmpNameAtoms( opt->name, _name__pid_file) ) { if (opt->paramType != stringType ) return CNF_WRONGTYPE; @@ -572,31 +633,19 @@ acceptValue(tarantool_cfg* c, OptDef* opt, int check_rdonly) { return CNF_WRONGRANGE; c->readahead = i32; } - else if ( cmpNameAtoms( opt->name, _name__snap_dir) ) { - if (opt->paramType != stringType ) - return CNF_WRONGTYPE; - c->__confetti_flags &= ~CNF_FLAG_STRUCT_NOTSET; - errno = 0; - if (check_rdonly && ( (opt->paramValue.stringval == NULL && c->snap_dir == NULL) || strcmp(opt->paramValue.stringval, c->snap_dir) != 0)) - return CNF_RDONLY; - if (c->snap_dir) free(c->snap_dir); - c->snap_dir = (opt->paramValue.stringval) ? strdup(opt->paramValue.stringval) : NULL; - if (opt->paramValue.stringval && c->snap_dir == NULL) - return CNF_NOMEMORY; - } - else if ( cmpNameAtoms( opt->name, _name__wal_dir) ) { - if (opt->paramType != stringType ) + else if ( cmpNameAtoms( opt->name, _name__snap_io_rate_limit) ) { + if (opt->paramType != numberType ) return CNF_WRONGTYPE; c->__confetti_flags &= ~CNF_FLAG_STRUCT_NOTSET; errno = 0; - if (check_rdonly && ( (opt->paramValue.stringval == NULL && c->wal_dir == NULL) || strcmp(opt->paramValue.stringval, c->wal_dir) != 0)) + double dbl = strtod(opt->paramValue.numberval, NULL); + if ( (dbl == 0 || dbl == -HUGE_VAL || dbl == HUGE_VAL) && errno == ERANGE) + return CNF_WRONGRANGE; + if (check_rdonly && c->snap_io_rate_limit != dbl) return CNF_RDONLY; - if (c->wal_dir) free(c->wal_dir); - c->wal_dir = (opt->paramValue.stringval) ? strdup(opt->paramValue.stringval) : NULL; - if (opt->paramValue.stringval && c->wal_dir == NULL) - return CNF_NOMEMORY; + c->snap_io_rate_limit = dbl; } - else if ( cmpNameAtoms( opt->name, _name__primary_port) ) { + else if ( cmpNameAtoms( opt->name, _name__rows_per_wal) ) { if (opt->paramType != numberType ) return CNF_WRONGTYPE; c->__confetti_flags &= ~CNF_FLAG_STRUCT_NOTSET; @@ -606,11 +655,11 @@ acceptValue(tarantool_cfg* c, OptDef* opt, int check_rdonly) { return CNF_WRONGINT; if ( (i32 == LONG_MIN || i32 == LONG_MAX) && errno == ERANGE) return CNF_WRONGRANGE; - if (check_rdonly && c->primary_port != i32) + if (check_rdonly && c->rows_per_wal != i32) return CNF_RDONLY; - c->primary_port = i32; + c->rows_per_wal = i32; } - else if ( cmpNameAtoms( opt->name, _name__secondary_port) ) { + else if ( cmpNameAtoms( opt->name, _name__wal_writer_inbox_size) ) { if (opt->paramType != numberType ) return CNF_WRONGTYPE; c->__confetti_flags &= ~CNF_FLAG_STRUCT_NOTSET; @@ -620,61 +669,74 @@ acceptValue(tarantool_cfg* c, OptDef* opt, int check_rdonly) { return CNF_WRONGINT; if ( (i32 == LONG_MIN || i32 == LONG_MAX) && errno == ERANGE) return CNF_WRONGRANGE; - if (check_rdonly && c->secondary_port != i32) + if (check_rdonly && c->wal_writer_inbox_size != i32) return CNF_RDONLY; - c->secondary_port = i32; + c->wal_writer_inbox_size = i32; } - else if ( cmpNameAtoms( opt->name, _name__too_long_threshold) ) { - if (opt->paramType != numberType ) + else if ( cmpNameAtoms( opt->name, _name__wal_mode) ) { + if (opt->paramType != stringType ) return CNF_WRONGTYPE; c->__confetti_flags &= ~CNF_FLAG_STRUCT_NOTSET; errno = 0; - double dbl = strtod(opt->paramValue.numberval, NULL); - if ( (dbl == 0 || dbl == -HUGE_VAL || dbl == HUGE_VAL) && errno == ERANGE) - return CNF_WRONGRANGE; - c->too_long_threshold = dbl; + if (c->wal_mode) free(c->wal_mode); + c->wal_mode = (opt->paramValue.stringval) ? strdup(opt->paramValue.stringval) : NULL; + if (opt->paramValue.stringval && c->wal_mode == NULL) + return CNF_NOMEMORY; } - else if ( cmpNameAtoms( opt->name, _name__custom_proc_title) ) { - if (opt->paramType != stringType ) + else if ( cmpNameAtoms( opt->name, _name__wal_fsync_delay) ) { + if (opt->paramType != numberType ) return CNF_WRONGTYPE; c->__confetti_flags &= ~CNF_FLAG_STRUCT_NOTSET; errno = 0; - if (check_rdonly && ( (opt->paramValue.stringval == NULL && c->custom_proc_title == NULL) || strcmp(opt->paramValue.stringval, c->custom_proc_title) != 0)) - return CNF_RDONLY; - if (c->custom_proc_title) free(c->custom_proc_title); - c->custom_proc_title = (opt->paramValue.stringval) ? strdup(opt->paramValue.stringval) : NULL; - if (opt->paramValue.stringval && c->custom_proc_title == NULL) - return CNF_NOMEMORY; + double dbl = strtod(opt->paramValue.numberval, NULL); + if ( (dbl == 0 || dbl == -HUGE_VAL || dbl == HUGE_VAL) && errno == ERANGE) + return CNF_WRONGRANGE; + c->wal_fsync_delay = dbl; } - else if ( cmpNameAtoms( opt->name, _name__memcached_port) ) { + else if ( cmpNameAtoms( opt->name, _name__wal_dir_rescan_delay) ) { if (opt->paramType != numberType ) return CNF_WRONGTYPE; c->__confetti_flags &= ~CNF_FLAG_STRUCT_NOTSET; errno = 0; - long int i32 = strtol(opt->paramValue.numberval, NULL, 10); - if (i32 == 0 && errno == EINVAL) - return CNF_WRONGINT; - if ( (i32 == LONG_MIN || i32 == LONG_MAX) && errno == ERANGE) + double dbl = strtod(opt->paramValue.numberval, NULL); + if ( (dbl == 0 || dbl == -HUGE_VAL || dbl == HUGE_VAL) && errno == ERANGE) return CNF_WRONGRANGE; - if (check_rdonly && c->memcached_port != i32) + if (check_rdonly && c->wal_dir_rescan_delay != dbl) return CNF_RDONLY; - c->memcached_port = i32; + c->wal_dir_rescan_delay = dbl; } - else if ( cmpNameAtoms( opt->name, _name__memcached_space) ) { - if (opt->paramType != numberType ) + else if ( cmpNameAtoms( opt->name, _name__panic_on_snap_error) ) { + if (opt->paramType != stringType && opt->paramType != numberType ) return CNF_WRONGTYPE; c->__confetti_flags &= ~CNF_FLAG_STRUCT_NOTSET; errno = 0; - long int i32 = strtol(opt->paramValue.numberval, NULL, 10); - if (i32 == 0 && errno == EINVAL) - return CNF_WRONGINT; - if ( (i32 == LONG_MIN || i32 == LONG_MAX) && errno == ERANGE) + bool bln; + + if (opt->paramType == numberType) { + if (strcmp(opt->paramValue.numberval, "0") == 0 || strcmp(opt->paramValue.numberval, "1") == 0) + bln = opt->paramValue.numberval[0] - '0'; + else + return CNF_WRONGRANGE; + } + else if (strcasecmp(opt->paramValue.stringval, "true") == 0 || + strcasecmp(opt->paramValue.stringval, "yes") == 0 || + strcasecmp(opt->paramValue.stringval, "enable") == 0 || + strcasecmp(opt->paramValue.stringval, "on") == 0 || + strcasecmp(opt->paramValue.stringval, "1") == 0 ) + bln = true; + else if (strcasecmp(opt->paramValue.stringval, "false") == 0 || + strcasecmp(opt->paramValue.stringval, "no") == 0 || + strcasecmp(opt->paramValue.stringval, "disable") == 0 || + strcasecmp(opt->paramValue.stringval, "off") == 0 || + strcasecmp(opt->paramValue.stringval, "0") == 0 ) + bln = false; + else return CNF_WRONGRANGE; - if (check_rdonly && c->memcached_space != i32) + if (check_rdonly && c->panic_on_snap_error != bln) return CNF_RDONLY; - c->memcached_space = i32; + c->panic_on_snap_error = bln; } - else if ( cmpNameAtoms( opt->name, _name__memcached_expire) ) { + else if ( cmpNameAtoms( opt->name, _name__panic_on_wal_error) ) { if (opt->paramType != stringType && opt->paramType != numberType ) return CNF_WRONGTYPE; c->__confetti_flags &= ~CNF_FLAG_STRUCT_NOTSET; @@ -701,11 +763,11 @@ acceptValue(tarantool_cfg* c, OptDef* opt, int check_rdonly) { bln = false; else return CNF_WRONGRANGE; - if (check_rdonly && c->memcached_expire != bln) + if (check_rdonly && c->panic_on_wal_error != bln) return CNF_RDONLY; - c->memcached_expire = bln; + c->panic_on_wal_error = bln; } - else if ( cmpNameAtoms( opt->name, _name__memcached_expire_per_loop) ) { + else if ( cmpNameAtoms( opt->name, _name__primary_port) ) { if (opt->paramType != numberType ) return CNF_WRONGTYPE; c->__confetti_flags &= ~CNF_FLAG_STRUCT_NOTSET; @@ -715,19 +777,25 @@ acceptValue(tarantool_cfg* c, OptDef* opt, int check_rdonly) { return CNF_WRONGINT; if ( (i32 == LONG_MIN || i32 == LONG_MAX) && errno == ERANGE) return CNF_WRONGRANGE; - c->memcached_expire_per_loop = i32; + if (check_rdonly && c->primary_port != i32) + return CNF_RDONLY; + c->primary_port = i32; } - else if ( cmpNameAtoms( opt->name, _name__memcached_expire_full_sweep) ) { + else if ( cmpNameAtoms( opt->name, _name__secondary_port) ) { if (opt->paramType != numberType ) return CNF_WRONGTYPE; c->__confetti_flags &= ~CNF_FLAG_STRUCT_NOTSET; errno = 0; - double dbl = strtod(opt->paramValue.numberval, NULL); - if ( (dbl == 0 || dbl == -HUGE_VAL || dbl == HUGE_VAL) && errno == ERANGE) + long int i32 = strtol(opt->paramValue.numberval, NULL, 10); + if (i32 == 0 && errno == EINVAL) + return CNF_WRONGINT; + if ( (i32 == LONG_MIN || i32 == LONG_MAX) && errno == ERANGE) return CNF_WRONGRANGE; - c->memcached_expire_full_sweep = dbl; + if (check_rdonly && c->secondary_port != i32) + return CNF_RDONLY; + c->secondary_port = i32; } - else if ( cmpNameAtoms( opt->name, _name__snap_io_rate_limit) ) { + else if ( cmpNameAtoms( opt->name, _name__too_long_threshold) ) { if (opt->paramType != numberType ) return CNF_WRONGTYPE; c->__confetti_flags &= ~CNF_FLAG_STRUCT_NOTSET; @@ -735,37 +803,35 @@ acceptValue(tarantool_cfg* c, OptDef* opt, int check_rdonly) { double dbl = strtod(opt->paramValue.numberval, NULL); if ( (dbl == 0 || dbl == -HUGE_VAL || dbl == HUGE_VAL) && errno == ERANGE) return CNF_WRONGRANGE; - if (check_rdonly && c->snap_io_rate_limit != dbl) - return CNF_RDONLY; - c->snap_io_rate_limit = dbl; + c->too_long_threshold = dbl; } - else if ( cmpNameAtoms( opt->name, _name__rows_per_wal) ) { - if (opt->paramType != numberType ) + else if ( cmpNameAtoms( opt->name, _name__custom_proc_title) ) { + if (opt->paramType != stringType ) return CNF_WRONGTYPE; c->__confetti_flags &= ~CNF_FLAG_STRUCT_NOTSET; errno = 0; - long int i32 = strtol(opt->paramValue.numberval, NULL, 10); - if (i32 == 0 && errno == EINVAL) - return CNF_WRONGINT; - if ( (i32 == LONG_MIN || i32 == LONG_MAX) && errno == ERANGE) - return CNF_WRONGRANGE; - if (check_rdonly && c->rows_per_wal != i32) + if (check_rdonly && ( (opt->paramValue.stringval == NULL && c->custom_proc_title == NULL) || strcmp(opt->paramValue.stringval, c->custom_proc_title) != 0)) return CNF_RDONLY; - c->rows_per_wal = i32; + if (c->custom_proc_title) free(c->custom_proc_title); + c->custom_proc_title = (opt->paramValue.stringval) ? strdup(opt->paramValue.stringval) : NULL; + if (opt->paramValue.stringval && c->custom_proc_title == NULL) + return CNF_NOMEMORY; } - else if ( cmpNameAtoms( opt->name, _name__wal_fsync_delay) ) { + else if ( cmpNameAtoms( opt->name, _name__memcached_port) ) { if (opt->paramType != numberType ) return CNF_WRONGTYPE; c->__confetti_flags &= ~CNF_FLAG_STRUCT_NOTSET; errno = 0; - double dbl = strtod(opt->paramValue.numberval, NULL); - if ( (dbl == 0 || dbl == -HUGE_VAL || dbl == HUGE_VAL) && errno == ERANGE) + long int i32 = strtol(opt->paramValue.numberval, NULL, 10); + if (i32 == 0 && errno == EINVAL) + return CNF_WRONGINT; + if ( (i32 == LONG_MIN || i32 == LONG_MAX) && errno == ERANGE) return CNF_WRONGRANGE; - if (check_rdonly && c->wal_fsync_delay != dbl) + if (check_rdonly && c->memcached_port != i32) return CNF_RDONLY; - c->wal_fsync_delay = dbl; + c->memcached_port = i32; } - else if ( cmpNameAtoms( opt->name, _name__wal_writer_inbox_size) ) { + else if ( cmpNameAtoms( opt->name, _name__memcached_space) ) { if (opt->paramType != numberType ) return CNF_WRONGTYPE; c->__confetti_flags &= ~CNF_FLAG_STRUCT_NOTSET; @@ -775,11 +841,11 @@ acceptValue(tarantool_cfg* c, OptDef* opt, int check_rdonly) { return CNF_WRONGINT; if ( (i32 == LONG_MIN || i32 == LONG_MAX) && errno == ERANGE) return CNF_WRONGRANGE; - if (check_rdonly && c->wal_writer_inbox_size != i32) + if (check_rdonly && c->memcached_space != i32) return CNF_RDONLY; - c->wal_writer_inbox_size = i32; + c->memcached_space = i32; } - else if ( cmpNameAtoms( opt->name, _name__local_hot_standby) ) { + else if ( cmpNameAtoms( opt->name, _name__memcached_expire) ) { if (opt->paramType != stringType && opt->paramType != numberType ) return CNF_WRONGTYPE; c->__confetti_flags &= ~CNF_FLAG_STRUCT_NOTSET; @@ -806,83 +872,31 @@ acceptValue(tarantool_cfg* c, OptDef* opt, int check_rdonly) { bln = false; else return CNF_WRONGRANGE; - if (check_rdonly && c->local_hot_standby != bln) + if (check_rdonly && c->memcached_expire != bln) return CNF_RDONLY; - c->local_hot_standby = bln; + c->memcached_expire = bln; } - else if ( cmpNameAtoms( opt->name, _name__wal_dir_rescan_delay) ) { + else if ( cmpNameAtoms( opt->name, _name__memcached_expire_per_loop) ) { if (opt->paramType != numberType ) return CNF_WRONGTYPE; c->__confetti_flags &= ~CNF_FLAG_STRUCT_NOTSET; errno = 0; - double dbl = strtod(opt->paramValue.numberval, NULL); - if ( (dbl == 0 || dbl == -HUGE_VAL || dbl == HUGE_VAL) && errno == ERANGE) - return CNF_WRONGRANGE; - if (check_rdonly && c->wal_dir_rescan_delay != dbl) - return CNF_RDONLY; - c->wal_dir_rescan_delay = dbl; - } - else if ( cmpNameAtoms( opt->name, _name__panic_on_snap_error) ) { - if (opt->paramType != stringType && opt->paramType != numberType ) - return CNF_WRONGTYPE; - c->__confetti_flags &= ~CNF_FLAG_STRUCT_NOTSET; - errno = 0; - bool bln; - - if (opt->paramType == numberType) { - if (strcmp(opt->paramValue.numberval, "0") == 0 || strcmp(opt->paramValue.numberval, "1") == 0) - bln = opt->paramValue.numberval[0] - '0'; - else - return CNF_WRONGRANGE; - } - else if (strcasecmp(opt->paramValue.stringval, "true") == 0 || - strcasecmp(opt->paramValue.stringval, "yes") == 0 || - strcasecmp(opt->paramValue.stringval, "enable") == 0 || - strcasecmp(opt->paramValue.stringval, "on") == 0 || - strcasecmp(opt->paramValue.stringval, "1") == 0 ) - bln = true; - else if (strcasecmp(opt->paramValue.stringval, "false") == 0 || - strcasecmp(opt->paramValue.stringval, "no") == 0 || - strcasecmp(opt->paramValue.stringval, "disable") == 0 || - strcasecmp(opt->paramValue.stringval, "off") == 0 || - strcasecmp(opt->paramValue.stringval, "0") == 0 ) - bln = false; - else + long int i32 = strtol(opt->paramValue.numberval, NULL, 10); + if (i32 == 0 && errno == EINVAL) + return CNF_WRONGINT; + if ( (i32 == LONG_MIN || i32 == LONG_MAX) && errno == ERANGE) return CNF_WRONGRANGE; - if (check_rdonly && c->panic_on_snap_error != bln) - return CNF_RDONLY; - c->panic_on_snap_error = bln; + c->memcached_expire_per_loop = i32; } - else if ( cmpNameAtoms( opt->name, _name__panic_on_wal_error) ) { - if (opt->paramType != stringType && opt->paramType != numberType ) + else if ( cmpNameAtoms( opt->name, _name__memcached_expire_full_sweep) ) { + if (opt->paramType != numberType ) return CNF_WRONGTYPE; c->__confetti_flags &= ~CNF_FLAG_STRUCT_NOTSET; errno = 0; - bool bln; - - if (opt->paramType == numberType) { - if (strcmp(opt->paramValue.numberval, "0") == 0 || strcmp(opt->paramValue.numberval, "1") == 0) - bln = opt->paramValue.numberval[0] - '0'; - else - return CNF_WRONGRANGE; - } - else if (strcasecmp(opt->paramValue.stringval, "true") == 0 || - strcasecmp(opt->paramValue.stringval, "yes") == 0 || - strcasecmp(opt->paramValue.stringval, "enable") == 0 || - strcasecmp(opt->paramValue.stringval, "on") == 0 || - strcasecmp(opt->paramValue.stringval, "1") == 0 ) - bln = true; - else if (strcasecmp(opt->paramValue.stringval, "false") == 0 || - strcasecmp(opt->paramValue.stringval, "no") == 0 || - strcasecmp(opt->paramValue.stringval, "disable") == 0 || - strcasecmp(opt->paramValue.stringval, "off") == 0 || - strcasecmp(opt->paramValue.stringval, "0") == 0 ) - bln = false; - else + double dbl = strtod(opt->paramValue.numberval, NULL); + if ( (dbl == 0 || dbl == -HUGE_VAL || dbl == HUGE_VAL) && errno == ERANGE) return CNF_WRONGRANGE; - if (check_rdonly && c->panic_on_wal_error != bln) - return CNF_RDONLY; - c->panic_on_wal_error = bln; + c->memcached_expire_full_sweep = dbl; } else if ( cmpNameAtoms( opt->name, _name__replication_source) ) { if (opt->paramType != stringType ) @@ -1207,6 +1221,7 @@ parse_cfg_buffer_tarantool_cfg(tarantool_cfg *c, char *buffer, int check_rdonly, typedef enum IteratorState { _S_Initial = 0, S_name__username, + S_name__local_hot_standby, S_name__bind_ipaddr, S_name__coredump, S_name__admin_port, @@ -1216,14 +1231,22 @@ typedef enum IteratorState { S_name__slab_alloc_minimal, S_name__slab_alloc_factor, S_name__work_dir, + S_name__snap_dir, + S_name__wal_dir, S_name__pid_file, S_name__logger, S_name__logger_nonblock, S_name__io_collect_interval, S_name__backlog, S_name__readahead, - S_name__snap_dir, - S_name__wal_dir, + S_name__snap_io_rate_limit, + S_name__rows_per_wal, + S_name__wal_writer_inbox_size, + S_name__wal_mode, + S_name__wal_fsync_delay, + S_name__wal_dir_rescan_delay, + S_name__panic_on_snap_error, + S_name__panic_on_wal_error, S_name__primary_port, S_name__secondary_port, S_name__too_long_threshold, @@ -1233,14 +1256,6 @@ typedef enum IteratorState { S_name__memcached_expire, S_name__memcached_expire_per_loop, S_name__memcached_expire_full_sweep, - S_name__snap_io_rate_limit, - S_name__rows_per_wal, - S_name__wal_fsync_delay, - S_name__wal_writer_inbox_size, - S_name__local_hot_standby, - S_name__wal_dir_rescan_delay, - S_name__panic_on_snap_error, - S_name__panic_on_wal_error, S_name__replication_source, S_name__space, S_name__space__enabled, @@ -1287,6 +1302,17 @@ tarantool_cfg_iterator_next(tarantool_cfg_iterator_t* i, tarantool_cfg *c, char return NULL; } snprintf(buf, PRINTBUFLEN-1, "username"); + i->state = S_name__local_hot_standby; + return buf; + case S_name__local_hot_standby: + *v = malloc(8); + if (*v == NULL) { + free(i); + out_warning(CNF_NOMEMORY, "No memory to output value"); + return NULL; + } + sprintf(*v, "%s", c->local_hot_standby ? "true" : "false"); + snprintf(buf, PRINTBUFLEN-1, "local_hot_standby"); i->state = S_name__bind_ipaddr; return buf; case S_name__bind_ipaddr: @@ -1384,6 +1410,26 @@ tarantool_cfg_iterator_next(tarantool_cfg_iterator_t* i, tarantool_cfg *c, char return NULL; } snprintf(buf, PRINTBUFLEN-1, "work_dir"); + i->state = S_name__snap_dir; + return buf; + case S_name__snap_dir: + *v = (c->snap_dir) ? strdup(c->snap_dir) : NULL; + if (*v == NULL && c->snap_dir) { + free(i); + out_warning(CNF_NOMEMORY, "No memory to output value"); + return NULL; + } + snprintf(buf, PRINTBUFLEN-1, "snap_dir"); + i->state = S_name__wal_dir; + return buf; + case S_name__wal_dir: + *v = (c->wal_dir) ? strdup(c->wal_dir) : NULL; + if (*v == NULL && c->wal_dir) { + free(i); + out_warning(CNF_NOMEMORY, "No memory to output value"); + return NULL; + } + snprintf(buf, PRINTBUFLEN-1, "wal_dir"); i->state = S_name__pid_file; return buf; case S_name__pid_file: @@ -1424,50 +1470,117 @@ tarantool_cfg_iterator_next(tarantool_cfg_iterator_t* i, tarantool_cfg *c, char out_warning(CNF_NOMEMORY, "No memory to output value"); return NULL; } - sprintf(*v, "%g", c->io_collect_interval); - snprintf(buf, PRINTBUFLEN-1, "io_collect_interval"); - i->state = S_name__backlog; + sprintf(*v, "%g", c->io_collect_interval); + snprintf(buf, PRINTBUFLEN-1, "io_collect_interval"); + i->state = S_name__backlog; + return buf; + case S_name__backlog: + *v = malloc(32); + if (*v == NULL) { + free(i); + out_warning(CNF_NOMEMORY, "No memory to output value"); + return NULL; + } + sprintf(*v, "%"PRId32, c->backlog); + snprintf(buf, PRINTBUFLEN-1, "backlog"); + i->state = S_name__readahead; + return buf; + case S_name__readahead: + *v = malloc(32); + if (*v == NULL) { + free(i); + out_warning(CNF_NOMEMORY, "No memory to output value"); + return NULL; + } + sprintf(*v, "%"PRId32, c->readahead); + snprintf(buf, PRINTBUFLEN-1, "readahead"); + i->state = S_name__snap_io_rate_limit; + return buf; + case S_name__snap_io_rate_limit: + *v = malloc(32); + if (*v == NULL) { + free(i); + out_warning(CNF_NOMEMORY, "No memory to output value"); + return NULL; + } + sprintf(*v, "%g", c->snap_io_rate_limit); + snprintf(buf, PRINTBUFLEN-1, "snap_io_rate_limit"); + i->state = S_name__rows_per_wal; + return buf; + case S_name__rows_per_wal: + *v = malloc(32); + if (*v == NULL) { + free(i); + out_warning(CNF_NOMEMORY, "No memory to output value"); + return NULL; + } + sprintf(*v, "%"PRId32, c->rows_per_wal); + snprintf(buf, PRINTBUFLEN-1, "rows_per_wal"); + i->state = S_name__wal_writer_inbox_size; + return buf; + case S_name__wal_writer_inbox_size: + *v = malloc(32); + if (*v == NULL) { + free(i); + out_warning(CNF_NOMEMORY, "No memory to output value"); + return NULL; + } + sprintf(*v, "%"PRId32, c->wal_writer_inbox_size); + snprintf(buf, PRINTBUFLEN-1, "wal_writer_inbox_size"); + i->state = S_name__wal_mode; + return buf; + case S_name__wal_mode: + *v = (c->wal_mode) ? strdup(c->wal_mode) : NULL; + if (*v == NULL && c->wal_mode) { + free(i); + out_warning(CNF_NOMEMORY, "No memory to output value"); + return NULL; + } + snprintf(buf, PRINTBUFLEN-1, "wal_mode"); + i->state = S_name__wal_fsync_delay; return buf; - case S_name__backlog: + case S_name__wal_fsync_delay: *v = malloc(32); if (*v == NULL) { free(i); out_warning(CNF_NOMEMORY, "No memory to output value"); return NULL; } - sprintf(*v, "%"PRId32, c->backlog); - snprintf(buf, PRINTBUFLEN-1, "backlog"); - i->state = S_name__readahead; + sprintf(*v, "%g", c->wal_fsync_delay); + snprintf(buf, PRINTBUFLEN-1, "wal_fsync_delay"); + i->state = S_name__wal_dir_rescan_delay; return buf; - case S_name__readahead: + case S_name__wal_dir_rescan_delay: *v = malloc(32); if (*v == NULL) { free(i); out_warning(CNF_NOMEMORY, "No memory to output value"); return NULL; } - sprintf(*v, "%"PRId32, c->readahead); - snprintf(buf, PRINTBUFLEN-1, "readahead"); - i->state = S_name__snap_dir; + sprintf(*v, "%g", c->wal_dir_rescan_delay); + snprintf(buf, PRINTBUFLEN-1, "wal_dir_rescan_delay"); + i->state = S_name__panic_on_snap_error; return buf; - case S_name__snap_dir: - *v = (c->snap_dir) ? strdup(c->snap_dir) : NULL; - if (*v == NULL && c->snap_dir) { + case S_name__panic_on_snap_error: + *v = malloc(8); + if (*v == NULL) { free(i); out_warning(CNF_NOMEMORY, "No memory to output value"); return NULL; } - snprintf(buf, PRINTBUFLEN-1, "snap_dir"); - i->state = S_name__wal_dir; + sprintf(*v, "%s", c->panic_on_snap_error ? "true" : "false"); + snprintf(buf, PRINTBUFLEN-1, "panic_on_snap_error"); + i->state = S_name__panic_on_wal_error; return buf; - case S_name__wal_dir: - *v = (c->wal_dir) ? strdup(c->wal_dir) : NULL; - if (*v == NULL && c->wal_dir) { + case S_name__panic_on_wal_error: + *v = malloc(8); + if (*v == NULL) { free(i); out_warning(CNF_NOMEMORY, "No memory to output value"); return NULL; } - snprintf(buf, PRINTBUFLEN-1, "wal_dir"); + sprintf(*v, "%s", c->panic_on_wal_error ? "true" : "false"); + snprintf(buf, PRINTBUFLEN-1, "panic_on_wal_error"); i->state = S_name__primary_port; return buf; case S_name__primary_port: @@ -1566,94 +1679,6 @@ tarantool_cfg_iterator_next(tarantool_cfg_iterator_t* i, tarantool_cfg *c, char } sprintf(*v, "%g", c->memcached_expire_full_sweep); snprintf(buf, PRINTBUFLEN-1, "memcached_expire_full_sweep"); - i->state = S_name__snap_io_rate_limit; - return buf; - case S_name__snap_io_rate_limit: - *v = malloc(32); - if (*v == NULL) { - free(i); - out_warning(CNF_NOMEMORY, "No memory to output value"); - return NULL; - } - sprintf(*v, "%g", c->snap_io_rate_limit); - snprintf(buf, PRINTBUFLEN-1, "snap_io_rate_limit"); - i->state = S_name__rows_per_wal; - return buf; - case S_name__rows_per_wal: - *v = malloc(32); - if (*v == NULL) { - free(i); - out_warning(CNF_NOMEMORY, "No memory to output value"); - return NULL; - } - sprintf(*v, "%"PRId32, c->rows_per_wal); - snprintf(buf, PRINTBUFLEN-1, "rows_per_wal"); - i->state = S_name__wal_fsync_delay; - return buf; - case S_name__wal_fsync_delay: - *v = malloc(32); - if (*v == NULL) { - free(i); - out_warning(CNF_NOMEMORY, "No memory to output value"); - return NULL; - } - sprintf(*v, "%g", c->wal_fsync_delay); - snprintf(buf, PRINTBUFLEN-1, "wal_fsync_delay"); - i->state = S_name__wal_writer_inbox_size; - return buf; - case S_name__wal_writer_inbox_size: - *v = malloc(32); - if (*v == NULL) { - free(i); - out_warning(CNF_NOMEMORY, "No memory to output value"); - return NULL; - } - sprintf(*v, "%"PRId32, c->wal_writer_inbox_size); - snprintf(buf, PRINTBUFLEN-1, "wal_writer_inbox_size"); - i->state = S_name__local_hot_standby; - return buf; - case S_name__local_hot_standby: - *v = malloc(8); - if (*v == NULL) { - free(i); - out_warning(CNF_NOMEMORY, "No memory to output value"); - return NULL; - } - sprintf(*v, "%s", c->local_hot_standby ? "true" : "false"); - snprintf(buf, PRINTBUFLEN-1, "local_hot_standby"); - i->state = S_name__wal_dir_rescan_delay; - return buf; - case S_name__wal_dir_rescan_delay: - *v = malloc(32); - if (*v == NULL) { - free(i); - out_warning(CNF_NOMEMORY, "No memory to output value"); - return NULL; - } - sprintf(*v, "%g", c->wal_dir_rescan_delay); - snprintf(buf, PRINTBUFLEN-1, "wal_dir_rescan_delay"); - i->state = S_name__panic_on_snap_error; - return buf; - case S_name__panic_on_snap_error: - *v = malloc(8); - if (*v == NULL) { - free(i); - out_warning(CNF_NOMEMORY, "No memory to output value"); - return NULL; - } - sprintf(*v, "%s", c->panic_on_snap_error ? "true" : "false"); - snprintf(buf, PRINTBUFLEN-1, "panic_on_snap_error"); - i->state = S_name__panic_on_wal_error; - return buf; - case S_name__panic_on_wal_error: - *v = malloc(8); - if (*v == NULL) { - free(i); - out_warning(CNF_NOMEMORY, "No memory to output value"); - return NULL; - } - sprintf(*v, "%s", c->panic_on_wal_error ? "true" : "false"); - snprintf(buf, PRINTBUFLEN-1, "panic_on_wal_error"); i->state = S_name__replication_source; return buf; case S_name__replication_source: @@ -1945,6 +1970,7 @@ dup_tarantool_cfg(tarantool_cfg* dst, tarantool_cfg* src) { if (dst->username) free(dst->username);dst->username = src->username == NULL ? NULL : strdup(src->username); if (src->username != NULL && dst->username == NULL) return CNF_NOMEMORY; + dst->local_hot_standby = src->local_hot_standby; if (dst->bind_ipaddr) free(dst->bind_ipaddr);dst->bind_ipaddr = src->bind_ipaddr == NULL ? NULL : strdup(src->bind_ipaddr); if (src->bind_ipaddr != NULL && dst->bind_ipaddr == NULL) return CNF_NOMEMORY; @@ -1958,6 +1984,12 @@ dup_tarantool_cfg(tarantool_cfg* dst, tarantool_cfg* src) { if (dst->work_dir) free(dst->work_dir);dst->work_dir = src->work_dir == NULL ? NULL : strdup(src->work_dir); if (src->work_dir != NULL && dst->work_dir == NULL) return CNF_NOMEMORY; + if (dst->snap_dir) free(dst->snap_dir);dst->snap_dir = src->snap_dir == NULL ? NULL : strdup(src->snap_dir); + if (src->snap_dir != NULL && dst->snap_dir == NULL) + return CNF_NOMEMORY; + if (dst->wal_dir) free(dst->wal_dir);dst->wal_dir = src->wal_dir == NULL ? NULL : strdup(src->wal_dir); + if (src->wal_dir != NULL && dst->wal_dir == NULL) + return CNF_NOMEMORY; if (dst->pid_file) free(dst->pid_file);dst->pid_file = src->pid_file == NULL ? NULL : strdup(src->pid_file); if (src->pid_file != NULL && dst->pid_file == NULL) return CNF_NOMEMORY; @@ -1968,12 +2000,16 @@ dup_tarantool_cfg(tarantool_cfg* dst, tarantool_cfg* src) { dst->io_collect_interval = src->io_collect_interval; dst->backlog = src->backlog; dst->readahead = src->readahead; - if (dst->snap_dir) free(dst->snap_dir);dst->snap_dir = src->snap_dir == NULL ? NULL : strdup(src->snap_dir); - if (src->snap_dir != NULL && dst->snap_dir == NULL) - return CNF_NOMEMORY; - if (dst->wal_dir) free(dst->wal_dir);dst->wal_dir = src->wal_dir == NULL ? NULL : strdup(src->wal_dir); - if (src->wal_dir != NULL && dst->wal_dir == NULL) + dst->snap_io_rate_limit = src->snap_io_rate_limit; + dst->rows_per_wal = src->rows_per_wal; + dst->wal_writer_inbox_size = src->wal_writer_inbox_size; + if (dst->wal_mode) free(dst->wal_mode);dst->wal_mode = src->wal_mode == NULL ? NULL : strdup(src->wal_mode); + if (src->wal_mode != NULL && dst->wal_mode == NULL) return CNF_NOMEMORY; + dst->wal_fsync_delay = src->wal_fsync_delay; + dst->wal_dir_rescan_delay = src->wal_dir_rescan_delay; + dst->panic_on_snap_error = src->panic_on_snap_error; + dst->panic_on_wal_error = src->panic_on_wal_error; dst->primary_port = src->primary_port; dst->secondary_port = src->secondary_port; dst->too_long_threshold = src->too_long_threshold; @@ -1985,14 +2021,6 @@ dup_tarantool_cfg(tarantool_cfg* dst, tarantool_cfg* src) { dst->memcached_expire = src->memcached_expire; dst->memcached_expire_per_loop = src->memcached_expire_per_loop; dst->memcached_expire_full_sweep = src->memcached_expire_full_sweep; - dst->snap_io_rate_limit = src->snap_io_rate_limit; - dst->rows_per_wal = src->rows_per_wal; - dst->wal_fsync_delay = src->wal_fsync_delay; - dst->wal_writer_inbox_size = src->wal_writer_inbox_size; - dst->local_hot_standby = src->local_hot_standby; - dst->wal_dir_rescan_delay = src->wal_dir_rescan_delay; - dst->panic_on_snap_error = src->panic_on_snap_error; - dst->panic_on_wal_error = src->panic_on_wal_error; if (dst->replication_source) free(dst->replication_source);dst->replication_source = src->replication_source == NULL ? NULL : strdup(src->replication_source); if (src->replication_source != NULL && dst->replication_source == NULL) return CNF_NOMEMORY; @@ -2065,14 +2093,16 @@ destroy_tarantool_cfg(tarantool_cfg* c) { free(c->bind_ipaddr); if (c->work_dir != NULL) free(c->work_dir); - if (c->pid_file != NULL) - free(c->pid_file); - if (c->logger != NULL) - free(c->logger); if (c->snap_dir != NULL) free(c->snap_dir); if (c->wal_dir != NULL) free(c->wal_dir); + if (c->pid_file != NULL) + free(c->pid_file); + if (c->logger != NULL) + free(c->logger); + if (c->wal_mode != NULL) + free(c->wal_mode); if (c->custom_proc_title != NULL) free(c->custom_proc_title); if (c->replication_source != NULL) @@ -2143,6 +2173,11 @@ cmp_tarantool_cfg(tarantool_cfg* c1, tarantool_cfg* c2, int only_check_rdonly) { return diff; } + if (c1->local_hot_standby != c2->local_hot_standby) { + snprintf(diff, PRINTBUFLEN - 1, "%s", "c->local_hot_standby"); + + return diff; + } if (confetti_strcmp(c1->bind_ipaddr, c2->bind_ipaddr) != 0) { snprintf(diff, PRINTBUFLEN - 1, "%s", "c->bind_ipaddr"); @@ -2190,6 +2225,16 @@ cmp_tarantool_cfg(tarantool_cfg* c1, tarantool_cfg* c2, int only_check_rdonly) { return diff; } + if (confetti_strcmp(c1->snap_dir, c2->snap_dir) != 0) { + snprintf(diff, PRINTBUFLEN - 1, "%s", "c->snap_dir"); + + return diff; +} + if (confetti_strcmp(c1->wal_dir, c2->wal_dir) != 0) { + snprintf(diff, PRINTBUFLEN - 1, "%s", "c->wal_dir"); + + return diff; +} if (confetti_strcmp(c1->pid_file, c2->pid_file) != 0) { snprintf(diff, PRINTBUFLEN - 1, "%s", "c->pid_file"); @@ -2222,16 +2267,50 @@ cmp_tarantool_cfg(tarantool_cfg* c1, tarantool_cfg* c2, int only_check_rdonly) { return diff; } } - if (confetti_strcmp(c1->snap_dir, c2->snap_dir) != 0) { - snprintf(diff, PRINTBUFLEN - 1, "%s", "c->snap_dir"); + if (c1->snap_io_rate_limit != c2->snap_io_rate_limit) { + snprintf(diff, PRINTBUFLEN - 1, "%s", "c->snap_io_rate_limit"); return diff; -} - if (confetti_strcmp(c1->wal_dir, c2->wal_dir) != 0) { - snprintf(diff, PRINTBUFLEN - 1, "%s", "c->wal_dir"); + } + if (c1->rows_per_wal != c2->rows_per_wal) { + snprintf(diff, PRINTBUFLEN - 1, "%s", "c->rows_per_wal"); + + return diff; + } + if (c1->wal_writer_inbox_size != c2->wal_writer_inbox_size) { + snprintf(diff, PRINTBUFLEN - 1, "%s", "c->wal_writer_inbox_size"); return diff; + } + if (!only_check_rdonly) { + if (confetti_strcmp(c1->wal_mode, c2->wal_mode) != 0) { + snprintf(diff, PRINTBUFLEN - 1, "%s", "c->wal_mode"); + + return diff; } + } + if (!only_check_rdonly) { + if (c1->wal_fsync_delay != c2->wal_fsync_delay) { + snprintf(diff, PRINTBUFLEN - 1, "%s", "c->wal_fsync_delay"); + + return diff; + } + } + if (c1->wal_dir_rescan_delay != c2->wal_dir_rescan_delay) { + snprintf(diff, PRINTBUFLEN - 1, "%s", "c->wal_dir_rescan_delay"); + + return diff; + } + if (c1->panic_on_snap_error != c2->panic_on_snap_error) { + snprintf(diff, PRINTBUFLEN - 1, "%s", "c->panic_on_snap_error"); + + return diff; + } + if (c1->panic_on_wal_error != c2->panic_on_wal_error) { + snprintf(diff, PRINTBUFLEN - 1, "%s", "c->panic_on_wal_error"); + + return diff; + } if (c1->primary_port != c2->primary_port) { snprintf(diff, PRINTBUFLEN - 1, "%s", "c->primary_port"); @@ -2283,46 +2362,6 @@ cmp_tarantool_cfg(tarantool_cfg* c1, tarantool_cfg* c2, int only_check_rdonly) { return diff; } } - if (c1->snap_io_rate_limit != c2->snap_io_rate_limit) { - snprintf(diff, PRINTBUFLEN - 1, "%s", "c->snap_io_rate_limit"); - - return diff; - } - if (c1->rows_per_wal != c2->rows_per_wal) { - snprintf(diff, PRINTBUFLEN - 1, "%s", "c->rows_per_wal"); - - return diff; - } - if (c1->wal_fsync_delay != c2->wal_fsync_delay) { - snprintf(diff, PRINTBUFLEN - 1, "%s", "c->wal_fsync_delay"); - - return diff; - } - if (c1->wal_writer_inbox_size != c2->wal_writer_inbox_size) { - snprintf(diff, PRINTBUFLEN - 1, "%s", "c->wal_writer_inbox_size"); - - return diff; - } - if (c1->local_hot_standby != c2->local_hot_standby) { - snprintf(diff, PRINTBUFLEN - 1, "%s", "c->local_hot_standby"); - - return diff; - } - if (c1->wal_dir_rescan_delay != c2->wal_dir_rescan_delay) { - snprintf(diff, PRINTBUFLEN - 1, "%s", "c->wal_dir_rescan_delay"); - - return diff; - } - if (c1->panic_on_snap_error != c2->panic_on_snap_error) { - snprintf(diff, PRINTBUFLEN - 1, "%s", "c->panic_on_snap_error"); - - return diff; - } - if (c1->panic_on_wal_error != c2->panic_on_wal_error) { - snprintf(diff, PRINTBUFLEN - 1, "%s", "c->panic_on_wal_error"); - - return diff; - } if (!only_check_rdonly) { if (confetti_strcmp(c1->replication_source, c2->replication_source) != 0) { snprintf(diff, PRINTBUFLEN - 1, "%s", "c->replication_source"); diff --git a/cfg/tarantool_box_cfg.h b/cfg/tarantool_box_cfg.h index 5e9451bb98..617d989e05 100644 --- a/cfg/tarantool_box_cfg.h +++ b/cfg/tarantool_box_cfg.h @@ -44,6 +44,14 @@ typedef struct tarantool_cfg { /* username to switch to */ char* username; + /* + * Local hot standby (if enabled, the server will run in hot + * standby mode, continuously fetching WAL records from wal_dir, + * until it is able to bind to the primary port. + * In local hot standby mode the server only accepts reads. + */ + confetti_bool_t local_hot_standby; + /* * tarantool bind ip address, applies to master * and replication ports. INADDR_ANY is the default value. @@ -80,6 +88,12 @@ typedef struct tarantool_cfg { /* working directory (daemon will chdir(2) to it) */ char* work_dir; + /* Snapshot directory (where snapshots get saved/read) */ + char* snap_dir; + + /* WAL directory (where WALs get saved/read) */ + char* wal_dir; + /* name of pid file */ char* pid_file; @@ -103,16 +117,53 @@ typedef struct tarantool_cfg { /* network io readahead */ int32_t readahead; + /* Do not write into snapshot faster than snap_io_rate_limit MB/sec */ + double snap_io_rate_limit; + + /* Write no more rows in WAL */ + int32_t rows_per_wal; + /* - * # BOX - * Snapshot directory (where snapshots get saved/read) + * Size of the WAL writer request queue: how many outstanding + * requests for write to disk it can have. + * Rule of thumb is to set this to the average connection count. */ - char* snap_dir; + int32_t wal_writer_inbox_size; - /* WAL directory (where WALs get saved/read) */ - char* wal_dir; + /* + * Defines fiber/data synchronization fsync(2) policy: + * "write": fibers wait for their data to be written to the log. + * "fsync": fibers wait for their data, fsync(2) follows each write(2) + * "fsync_delay": fibers wait for their data, fsync every N=wal_fsync_delay seconds, + * N=0.0 means no fsync (equivalent to wal_mode = "write"); + */ + char* wal_mode; - /* Primary port (where updates are accepted) */ + /* + * Fsync WAL delay, only issue fsync if last fsync was wal_fsync_delay + * seconds ago. + * WARNING: actually, several last requests may stall fsync for much longer + */ + double wal_fsync_delay; + + /* + * Delay, in seconds, between successive re-readings of wal_dir. + * The re-scan is necessary to discover new WAL files or snapshots. + */ + double wal_dir_rescan_delay; + + /* + * Panic if there is an error reading a snapshot or WAL. + * By default, panic on any snapshot reading error and ignore errors + * when reading WALs. + */ + confetti_bool_t panic_on_snap_error; + confetti_bool_t panic_on_wal_error; + + /* + * # BOX + * Primary port (where updates are accepted) + */ int32_t primary_port; /* Secondary port (where only selects are accepted) */ @@ -142,44 +193,6 @@ typedef struct tarantool_cfg { /* tarantool will try to iterate over all rows within this time */ double memcached_expire_full_sweep; - /* Do not write into snapshot faster than snap_io_rate_limit MB/sec */ - double snap_io_rate_limit; - - /* Write no more rows in WAL */ - int32_t rows_per_wal; - - /* - * fsync WAL delay, only issue fsync if last fsync was wal_fsync_delay - * seconds ago. - * WARNING: actually, several last requests may stall fsync for much longer - */ - double wal_fsync_delay; - - /* size of WAL writer request buffer */ - int32_t wal_writer_inbox_size; - - /* - * Local hot standby (if enabled, the server will run in hot - * standby mode, continuously fetching WAL records from wal_dir, - * until it is able to bind to the primary port. - * In local hot standby mode the server only accepts reads. - */ - confetti_bool_t local_hot_standby; - - /* - * Delay, in seconds, between successive re-readings of wal_dir. - * The re-scan is necessary to discover new WAL files or snapshots. - */ - double wal_dir_rescan_delay; - - /* - * Panic if there is an error reading a snapshot or WAL. - * By default, panic on any snapshot reading error and ignore errors - * when reading WALs. - */ - confetti_bool_t panic_on_snap_error; - confetti_bool_t panic_on_wal_error; - /* * Replication mode (if enabled, the server, once * bound to the primary port, will connect to diff --git a/core/CMakeLists.txt b/core/CMakeLists.txt index 331d9e9f60..8072287ed7 100644 --- a/core/CMakeLists.txt +++ b/core/CMakeLists.txt @@ -61,7 +61,7 @@ set (recompiled_core_sources ${CMAKE_SOURCE_DIR}/core/fiber.m PARENT_SCOPE) set (common_sources tbuf.m palloc.m util.m - salloc.m pickle.m coro.m stat.m log_io.m cpu_feature.m + salloc.m pickle.m coro.m stat.m log_io.m cpu_feature.m crc32.c log_io_remote.m iproto.m exception.m errcode.c errinj.m latch.m) if (ENABLE_TRACE) @@ -72,7 +72,7 @@ add_library(core STATIC ${common_sources}) add_dependencies(core generate_headers luajit) set_target_properties(core PROPERTIES COMPILE_FLAGS "${core_cflags}") -set (common_libraries cfg core ev coro gopt misc objc) +set (common_libraries cfg core ev coro gopt misc objc pthread rt) set (common_libraries ${common_libraries} ${LUAJIT_LIB}) if (TARGET_OS_LINUX) diff --git a/core/crc32.c b/core/crc32.c new file mode 100644 index 0000000000..097f651166 --- /dev/null +++ b/core/crc32.c @@ -0,0 +1,47 @@ +/* + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the + * following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY <COPYRIGHT HOLDER> ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * <COPYRIGHT HOLDER> OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF + * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ +#include "crc32.h" +#include <third_party/crc32.h> +#include <cpu_feature.h> +/* + * Pointer to an architecture-specific implementation of + * CRC32 calculation method. + */ +crc32_func crc32_calc = NULL; + +void +crc32_init() +{ +#if defined (__i386__) || defined (__x86_64__) + crc32_calc = cpu_has(cpuf_sse4_2) ? &crc32c_hw : &crc32c; +#else + crc32_calc = &crc32c; +#endif +} + diff --git a/core/fiber.m b/core/fiber.m index 89258fa0a2..1abbb7063b 100644 --- a/core/fiber.m +++ b/core/fiber.m @@ -135,7 +135,6 @@ fiber_call(struct fiber *callee) void fiber_wakeup(struct fiber *f) { - ev_async_start(&f->async); ev_async_send(&f->async); } @@ -531,6 +530,7 @@ fiber_create(const char *name, int fd, int inbox_size, void (*f) (void *), void fiber_alloc(fiber); ev_init(&fiber->io, (void *)ev_schedule); ev_async_init(&fiber->async, (void *)ev_schedule); + ev_async_start(&fiber->async); ev_init(&fiber->timer, (void *)ev_schedule); ev_init(&fiber->cw, (void *)ev_schedule); fiber->io.data = fiber->async.data = fiber->timer.data = fiber->cw.data = fiber; @@ -565,6 +565,7 @@ fiber_destroy(struct fiber *f) if (strcmp(f->name, "sched") == 0) return; + ev_async_stop(&f->async); palloc_destroy_pool(f->gc_pool); tarantool_coro_destroy(&f->coro); } diff --git a/core/log_io.m b/core/log_io.m index 6d35768687..4339f41457 100644 --- a/core/log_io.m +++ b/core/log_io.m @@ -41,9 +41,8 @@ #include <fiber.h> #include <say.h> -#include <third_party/crc32.h> #include <pickle.h> -#include <cpu_feature.h> +#include <crc32.h> const u16 snap_tag = -1; const u16 wal_tag = -2; @@ -51,12 +50,13 @@ const u64 default_cookie = 0; const u32 default_version = 11; const u32 marker_v11 = 0xba0babed; const u32 eof_marker_v11 = 0x10adab1e; -const char *snap_suffix = ".snap"; -const char *xlog_suffix = ".xlog"; -const char *inprogress_suffix = ".inprogress"; -const char *v11 = "0.11\n"; -const char *snap_mark = "SNAP\n"; -const char *xlog_mark = "XLOG\n"; +const char snap_suffix[] = ".snap"; +const char xlog_suffix[] = ".xlog"; +const char inprogress_suffix[] = ".inprogress"; +const char v11[] = "0.11\n"; +const char snap_mark[] = "SNAP\n"; +const char xlog_mark[] = "XLOG\n"; +static const int HEADER_SIZE_MAX = sizeof(v11) + sizeof(snap_mark) + 2; #define ROW_EOF (void *)1 @@ -72,19 +72,6 @@ struct log_io_iter { int io_rate_limit; }; -static u_int32_t (*calc_crc32c)(u_int32_t crc, const unsigned char *buf, - unsigned int len) = NULL; - -void -mach_setup_crc32() -{ -#if defined (__i386__) || defined (__x86_64__) - calc_crc32c = cpu_has(cpuf_sse4_2) ? &crc32c_hw : &crc32c; -#else - calc_crc32c = &crc32c; -#endif -} - void wait_lsn_set(struct wait_lsn *wait_lsn, i64 lsn) @@ -472,8 +459,8 @@ row_reader_v11(FILE *f, struct palloc_pool *pool) m->size = offsetof(struct row_v11, data); /* header crc32c calculated on <lsn, tm, len, data_crc32c> */ - header_crc = calc_crc32c(0, m->data + offsetof(struct row_v11, lsn), - sizeof(struct row_v11) - offsetof(struct row_v11, lsn)); + header_crc = crc32_calc(0, m->data + offsetof(struct row_v11, lsn), + sizeof(struct row_v11) - offsetof(struct row_v11, lsn)); if (row_v11(m)->header_crc32c != header_crc) { say_error("header crc32c mismatch"); @@ -486,7 +473,7 @@ row_reader_v11(FILE *f, struct palloc_pool *pool) m->size += row_v11(m)->len; - data_crc = calc_crc32c(0, row_v11(m)->data, row_v11(m)->len); + data_crc = crc32_calc(0, row_v11(m)->data, row_v11(m)->len); if (row_v11(m)->data_crc32c != data_crc) { say_error("data crc32c mismatch"); return NULL; @@ -497,7 +484,7 @@ row_reader_v11(FILE *f, struct palloc_pool *pool) } static int -inprogress_log_rename(char *filename) +log_io_inprogress_rename(char *filename) { char *new_filename; char *suffix = strrchr(filename, '.'); @@ -541,14 +528,14 @@ inprogress_log_unlink(char *filename) } int -close_log(struct log_io **lptr) +log_io_close(struct log_io **lptr) { struct log_io *l = *lptr; int r; if (l->rows == 1 && l->mode == LOG_WRITE) { /* Rename WAL before finalize. */ - if (inprogress_log_rename(l->filename) != 0) + if (log_io_inprogress_rename(l->filename) != 0) panic("can't rename 'inprogress' WAL"); } @@ -568,38 +555,29 @@ close_log(struct log_io **lptr) } static int -flush_log(struct log_io *l) +log_io_flush(struct log_io *l) { if (fflush(l->f) < 0) return -1; -#ifdef TARGET_OS_LINUX - if (fdatasync(fileno(l->f)) < 0) { - say_syserror("fdatasync"); - return -1; - } -#else if (fsync(fileno(l->f)) < 0) { say_syserror("fsync"); return -1; } -#endif return 0; } static int write_header(struct log_io *l) { - if (fwrite(l->class->filetype, strlen(l->class->filetype), 1, l->f) != 1) - return -1; + char header[HEADER_SIZE_MAX]; - if (fwrite(l->class->version, strlen(l->class->version), 1, l->f) != 1) - return -1; + int n = snprintf(header, HEADER_SIZE_MAX, "%s%s\n", + l->class->filetype, l->class->version); - if (fwrite("\n", 1, 1, l->f) != 1) - return -1; + assert(n < HEADER_SIZE_MAX); - return 0; + return fwrite(header, n, 1, l->f); } static char * @@ -626,19 +604,62 @@ format_filename(char *filename, struct log_io_class *class, i64 lsn, int suffix) return filename; } -static struct log_io * -open_for_read(struct recovery_state *recover, struct log_io_class *class, i64 lsn, int suffix, - const char *filename) +/** + * Verify that file is of the given class (format). + * + * @param l log_io object, denoting the file to check. + * @param class class to check against. + * @param[out] errmsg set if error + * + * @return 0 if success, -1 on error. + */ +static int +log_io_verify_meta(struct log_io *l, struct log_io_class *class, + const char **errmsg) { char filetype[32], version[32], buf[256]; + + FILE *stream = l->f; + + if (fgets(filetype, sizeof(filetype), stream) == NULL || + fgets(version, sizeof(version), stream) == NULL) { + *errmsg = "failed to read log file header"; + goto error; + } + if (strcmp(class->filetype, filetype) != 0) { + *errmsg = "unknown filetype"; + goto error; + } + + if (strcmp(class->version, version) != 0) { + *errmsg = "unknown version"; + goto error; + } + for (;;) { + if (fgets(buf, sizeof(buf), stream) == NULL) { + *errmsg = "failed to read log file header"; + goto error; + } + if (strcmp(buf, "\n") == 0 || strcmp(buf, "\r\n") == 0) + break; + } + return 0; +error: + return -1; +} + + +static struct log_io * +log_io_open_for_read(struct recovery_state *recover, struct log_io_class *class, i64 lsn, int suffix, + const char *filename) +{ struct log_io *l = NULL; - char *r; const char *errmsg; l = calloc(1, sizeof(*l)); if (l == NULL) { - errmsg = strerror(errno); - goto error; + say_syserror("calloc"); + return NULL; } l->mode = LOG_READ; l->stat.data = recover; @@ -661,54 +682,23 @@ open_for_read(struct recovery_state *recover, struct log_io_class *class, i64 ls goto error; } - r = fgets(filetype, sizeof(filetype), l->f); - if (r == NULL) { - errmsg = "header reading failed"; + if (log_io_verify_meta(l, class, &errmsg) != 0) goto error; - } - - r = fgets(version, sizeof(version), l->f); - if (r == NULL) { - errmsg = "header reading failed"; - goto error; - } - - if (strcmp(class->filetype, filetype) != 0) { - errmsg = "unknown filetype"; - goto error; - } - - if (strcmp(class->version, version) != 0) { - errmsg = "unknown version"; - goto error; - } l->class = class; - for (;;) { - r = fgets(buf, sizeof(buf), l->f); - if (r == NULL) { - errmsg = "header reading failed"; - goto error; - } - if (strcmp(r, "\n") == 0 || strcmp(r, "\r\n") == 0) - break; - } - return l; - error: - say_error("open_for_read: failed to open `%s': %s", l->filename, +error: + say_error("log_io_open_for_read: failed to open `%s': %s", l->filename, errmsg); - if (l != NULL) { - if (l->f != NULL) - fclose(l->f); - free(l); - } + if (l->f != NULL) + fclose(l->f); + free(l); return NULL; } struct log_io * -open_for_write(struct recovery_state *recover, struct log_io_class *class, i64 lsn, - int suffix, int *save_errno) +log_io_open_for_write(struct recovery_state *recover, struct log_io_class *class, i64 lsn, + int suffix, int *save_errno) { struct log_io *l = NULL; int fd; @@ -729,7 +719,7 @@ open_for_write(struct recovery_state *recover, struct log_io_class *class, i64 l assert(lsn > 0); format_filename(l->filename, class, lsn, suffix); - say_debug("find_log for writing `%s'", l->filename); + say_debug("%s: opening %s'", __func__, l->filename); if (suffix == -1) { /* @@ -751,14 +741,15 @@ open_for_write(struct recovery_state *recover, struct log_io_class *class, i64 l * Open the <lsn>.<suffix>.inprogress file. If it * exists, open will fail. */ - fd = open(l->filename, O_WRONLY | O_CREAT | O_EXCL | O_APPEND, 0664); + fd = open(l->filename, + O_WRONLY | O_CREAT | O_EXCL | l->class->open_wflags, 0664); if (fd < 0) { *save_errno = errno; errmsg = strerror(errno); goto error; } - l->f = fdopen(fd, "a"); + l->f = fdopen(fd, "w"); if (l->f == NULL) { *save_errno = errno; errmsg = strerror(errno); @@ -801,7 +792,7 @@ read_log(const char *filename, return -1; } - l = open_for_read(NULL, c, 0, 0, filename); + l = log_io_open_for_read(NULL, c, 0, 0, filename); iter_open(l, &i, read_rows); while ((row = iter_inner(&i, (void *)1))) h(state, row); @@ -811,7 +802,7 @@ read_log(const char *filename, close_iter(&i); v11_class_free(c); - close_log(&l); + log_io_close(&l); return i.error; } @@ -832,7 +823,7 @@ recover_snap(struct recovery_state *r) return -1; } - snap = open_for_read(r, r->snap_class, lsn, 0, NULL); + snap = log_io_open_for_read(r, r->snap_class, lsn, 0, NULL); if (snap == NULL) { say_error("can't find/open snapshot"); return -1; @@ -866,7 +857,7 @@ recover_snap(struct recovery_state *r) close_iter(&i); if (snap != NULL) - close_log(&snap); + log_io_close(&snap); prelease(fiber->gc_pool); } @@ -967,19 +958,19 @@ recover_remaining_wals(struct recovery_state *r) } else { say_warn("wal `%s' wasn't correctly closed", r->current_wal->filename); - close_log(&r->current_wal); + log_io_close(&r->current_wal); } } current_lsn = r->confirmed_lsn + 1; /* TODO: find better way looking for next xlog */ - next_wal = open_for_read(r, r->wal_class, current_lsn, 0, NULL); + next_wal = log_io_open_for_read(r, r->wal_class, current_lsn, 0, NULL); /* * When doing final recovery, and dealing with the * last file, try opening .<suffix>.inprogress. */ if (next_wal == NULL && r->finalize && current_lsn == wal_greatest_lsn) { - next_wal = open_for_read(r, r->wal_class, current_lsn, -1, NULL); + next_wal = log_io_open_for_read(r, r->wal_class, current_lsn, -1, NULL); if (next_wal == NULL) { char *filename = format_filename(NULL, r->wal_class, current_lsn, -1); @@ -1020,7 +1011,7 @@ recover_remaining_wals(struct recovery_state *r) if (result == LOG_EOF) { say_info("done `%s' confirmed_lsn:%" PRIi64, r->current_wal->filename, r->confirmed_lsn); - close_log(&r->current_wal); + log_io_close(&r->current_wal); } } @@ -1077,7 +1068,7 @@ recover(struct recovery_state *r, i64 lsn) result = -1; goto out; } - r->current_wal = open_for_read(r, r->wal_class, lsn, 0, NULL); + r->current_wal = log_io_open_for_read(r, r->wal_class, lsn, 0, NULL); if (r->current_wal == NULL) { result = -1; goto out; @@ -1123,7 +1114,7 @@ recover_follow_file(ev_stat *w, int revents __attribute__((unused))) if (result == LOG_EOF) { say_info("done `%s' confirmed_lsn:%" PRIi64, r->current_wal->filename, r->confirmed_lsn); - close_log(&r->current_wal); + log_io_close(&r->current_wal); recover_follow_dir((ev_timer *)w, 0); } } @@ -1176,12 +1167,12 @@ recover_finalize(struct recovery_state *r) } else if (r->current_wal->rows == 1) { /* Rename inprogress wal with one row */ say_warn("rename unfinished %s wal", r->current_wal->filename); - if (inprogress_log_rename(r->current_wal->filename) != 0) + if (log_io_inprogress_rename(r->current_wal->filename) != 0) panic("can't rename 'inprogress' wal"); } else panic("too many rows in inprogress WAL `%s'", r->current_wal->filename); - close_log(&r->current_wal); + log_io_close(&r->current_wal); } } @@ -1206,7 +1197,7 @@ write_to_disk(void *_state, struct tbuf *t) /* caller requested termination */ if (t == NULL) { if (wal != NULL) - close_log(&wal); + log_io_close(&wal); recover_free((struct recovery_state*)_state); return NULL; } @@ -1216,20 +1207,20 @@ write_to_disk(void *_state, struct tbuf *t) if (wal == NULL) { int unused; /* Open WAL with '.inprogress' suffix. */ - wal = open_for_write(r, r->wal_class, wal_write_request(t)->lsn, -1, - &unused); + wal = log_io_open_for_write(r, r->wal_class, wal_write_request(t)->lsn, -1, + &unused); } else if (wal->rows == 1) { /* rename WAL after first successful write to name * without inprogress suffix*/ - if (inprogress_log_rename(wal->filename) != 0) { + if (log_io_inprogress_rename(wal->filename) != 0) { say_error("can't rename inprogress wal"); goto fail; } } if (wal_to_close != NULL) { - if (close_log(&wal_to_close) != 0) + if (log_io_close(&wal_to_close) != 0) goto fail; } if (wal == NULL) { @@ -1249,9 +1240,9 @@ write_to_disk(void *_state, struct tbuf *t) row_v11(header)->tm = ev_now(); row_v11(header)->len = wal_write_request(t)->len; row_v11(header)->data_crc32c = - calc_crc32c(0, wal_write_request(t)->data, wal_write_request(t)->len); + crc32_calc(0, wal_write_request(t)->data, wal_write_request(t)->len); row_v11(header)->header_crc32c = - calc_crc32c(0, header->data + field_sizeof(struct row_v11, header_crc32c), + crc32_calc(0, header->data + field_sizeof(struct row_v11, header_crc32c), sizeof(struct row_v11) - field_sizeof(struct row_v11, header_crc32c)); if (fwrite(header->data, header->size, 1, wal->f) != 1) { @@ -1271,7 +1262,7 @@ write_to_disk(void *_state, struct tbuf *t) } if (wal->class->fsync_delay > 0 && ev_now() - last_flush >= wal->class->fsync_delay) { - if (flush_log(wal) < 0) { + if (log_io_flush(wal) < 0) { say_syserror("can't flush wal"); goto fail; } @@ -1294,24 +1285,27 @@ write_to_disk(void *_state, struct tbuf *t) return reply; } -bool -wal_write(struct recovery_state *r, u16 tag, u64 cookie, i64 lsn, struct tbuf *row) +int +wal_write(struct recovery_state *r, u16 tag, u16 op, u64 cookie, + i64 lsn, struct tbuf *row) { - struct tbuf *m = tbuf_alloc(row->pool); + struct tbuf *m = tbuf_alloc(fiber->gc_pool); struct msg *a; say_debug("wal_write lsn=%" PRIi64, lsn); - tbuf_reserve(m, sizeof(struct wal_write_request) + sizeof(tag) + sizeof(cookie) + row->size); + tbuf_reserve(m, sizeof(struct wal_write_request) + + sizeof(tag) + sizeof(cookie) + sizeof(op) + row->size); m->size = sizeof(struct wal_write_request); wal_write_request(m)->lsn = lsn; - wal_write_request(m)->len = row->size + sizeof(tag) + sizeof(cookie); + wal_write_request(m)->len = sizeof(tag) + sizeof(cookie) + sizeof(op) + row->size; tbuf_append(m, &tag, sizeof(tag)); tbuf_append(m, &cookie, sizeof(cookie)); + tbuf_append(m, &op, sizeof(op)); tbuf_append(m, row->data, row->size); if (write_inbox(r->wal_writer->out, m) == false) { say_warn("wal writer inbox is full"); - return false; + return -1; } a = read_inbox(); @@ -1319,13 +1313,13 @@ wal_write(struct recovery_state *r, u16 tag, u64 cookie, i64 lsn, struct tbuf *r say_debug("wal_write reply=%" PRIu32, reply); if (reply != 0) say_warn("wal writer returned error status"); - return reply == 0; + return reply ? -1 : 0; } struct recovery_state * recover_init(const char *snap_dirname, const char *wal_dirname, - row_handler row_handler, - int rows_per_file, double fsync_delay, + row_handler row_handler, int rows_per_file, + const char *wal_mode, double fsync_delay, int inbox_size, int flags, void *data) { struct recovery_state *r = p0alloc(eter_pool, sizeof(*r)); @@ -1343,6 +1337,7 @@ recover_init(const char *snap_dirname, const char *wal_dirname, r->wal_class = xlog_class_create(wal_dirname); r->wal_class->rows_per_file = rows_per_file; r->wal_class->fsync_delay = fsync_delay; + r->wal_class->open_wflags = strcasecmp(wal_mode, "fsync") ? 0 : WAL_SYNC_FLAG; wait_lsn_clear(&r->wait_lsn); if ((flags & RECOVER_READONLY) == 0) @@ -1351,6 +1346,14 @@ recover_init(const char *snap_dirname, const char *wal_dirname, return r; } +void +recovery_update_mode(struct recovery_state *r, const char *mode, + double fsync_delay) +{ + (void) mode; + r->wal_class->fsync_delay = fsync_delay; +} + void recover_free(struct recovery_state *recovery) { @@ -1363,7 +1366,7 @@ recover_free(struct recovery_state *recovery) v11_class_free(recovery->snap_class); v11_class_free(recovery->wal_class); if (recovery->current_wal) - close_log(&recovery->current_wal); + log_io_close(&recovery->current_wal); } void @@ -1395,10 +1398,10 @@ write_rows(struct log_io_iter *i) row_v11(row)->lsn = 0; /* unused */ row_v11(row)->tm = ev_now(); row_v11(row)->len = data->size; - row_v11(row)->data_crc32c = calc_crc32c(0, data->data, data->size); + row_v11(row)->data_crc32c = crc32_calc(0, data->data, data->size); row_v11(row)->header_crc32c = - calc_crc32c(0, row->data + field_sizeof(struct row_v11, header_crc32c), - sizeof(struct row_v11) - field_sizeof(struct row_v11, + crc32_calc(0, row->data + field_sizeof(struct row_v11, header_crc32c), + sizeof(struct row_v11) - field_sizeof(struct row_v11, header_crc32c)); if (fwrite(row->data, row->size, 1, l->f) != 1) @@ -1434,7 +1437,7 @@ snapshot_write_row(struct log_io_iter *i, u16 tag, u64 cookie, struct tbuf *row) bytes += row->size + sizeof(struct row_v11); while (bytes >= i->io_rate_limit) { - flush_log(i->log); + log_io_flush(i->log); ev_now_update(); elapsed = ev_now() - last; @@ -1462,7 +1465,7 @@ snapshot_save(struct recovery_state *r, void (*f) (struct log_io_iter *)) memset(&i, 0, sizeof(i)); - snap = open_for_write(r, r->snap_class, r->confirmed_lsn, -1, &save_errno); + snap = log_io_open_for_write(r, r->snap_class, r->confirmed_lsn, -1, &save_errno); if (snap == NULL) panic_status(save_errno, "can't open snap for writing"); @@ -1492,7 +1495,7 @@ snapshot_save(struct recovery_state *r, void (*f) (struct log_io_iter *)) if (unlink(snap->filename) == -1) say_syserror("can't unlink 'inprogress' snapshot"); - close_log(&snap); + log_io_close(&snap); say_info("done"); } diff --git a/core/log_io_remote.m b/core/log_io_remote.m index 4b14427f11..eefa828637 100644 --- a/core/log_io_remote.m +++ b/core/log_io_remote.m @@ -149,6 +149,7 @@ default_remote_row_handler(struct recovery_state *r, struct tbuf *row) struct tbuf *data; i64 lsn = row_v11(row)->lsn; u16 tag; + u16 op; /* save row data since wal_row_handler may clobber it */ data = tbuf_alloc(row->pool); @@ -159,8 +160,9 @@ default_remote_row_handler(struct recovery_state *r, struct tbuf *row) tag = read_u16(data); (void)read_u64(data); /* drop the cookie */ + op = read_u16(data); - if (wal_write(r, tag, r->cookie, lsn, data) == false) + if (wal_write(r, tag, op, r->cookie, lsn, data)) panic("replication failure: can't write row to WAL"); next_lsn(r, lsn); diff --git a/core/replication.m b/core/replication.m index b7644fd19a..85298fde11 100644 --- a/core/replication.m +++ b/core/replication.m @@ -609,7 +609,9 @@ replication_relay_loop(int client_sock) /* init reovery porcess */ log_io = recover_init(NULL, cfg.wal_dir, - replication_relay_send_row, INT32_MAX, 0, 64, RECOVER_READONLY, false); + replication_relay_send_row, + INT32_MAX, "fsync_delay", 0, 64, + RECOVER_READONLY, false); recover(log_io, lsn); recover_follow(log_io, 0.1); diff --git a/core/tarantool.m b/core/tarantool.m index 055b0696f7..b778cc901d 100644 --- a/core/tarantool.m +++ b/core/tarantool.m @@ -48,6 +48,7 @@ #include <iproto.h> #include <latch.h> #include <log_io.h> +#include <crc32.h> #include <palloc.h> #include <salloc.h> #include <say.h> @@ -74,6 +75,18 @@ static ev_signal *sigs = NULL; bool init_storage, booting = true; +static int +core_check_config(struct tarantool_cfg *conf) +{ + /* Check that the mode is a supported one. */ + if (strcmp(conf->wal_mode, "fsync") != 0 && + strcmp(conf->wal_mode, "fsync_delay") != 0) { + out_warning(0, "wal_mode is not one of 'fsync', 'fsync_delay'"); + return -1; + } + return 0; +} + static i32 load_cfg(struct tarantool_cfg *conf, i32 check_rdonly) { @@ -102,12 +115,50 @@ load_cfg(struct tarantool_cfg *conf, i32 check_rdonly) if (n_accepted == 0 || n_skipped != 0) return -1; + if (core_check_config(conf) != 0) + return -1; + if (replication_check_config(conf) != 0) return -1; return mod_check_config(conf); } +static int +core_reload_config(const struct tarantool_cfg *old_conf, + const struct tarantool_cfg *new_conf) +{ + if (strcasecmp(old_conf->wal_mode, new_conf->wal_mode) == 0 && + old_conf->wal_fsync_delay == new_conf->wal_fsync_delay) + return 0; + + double new_delay = new_conf->wal_fsync_delay; + + /* Mode has changed: */ + if (strcasecmp(old_conf->wal_mode, new_conf->wal_mode)) { + if (strcasecmp(old_conf->wal_mode, "fsync") == 0 || + strcasecmp(new_conf->wal_mode, "fsync") == 0) { + out_warning(0, "wal_mode cannot switch to/from fsync"); + return -1; + } + say_debug("%s: wal_mode [%s] -> [%s]", + __func__, old_conf->wal_mode, new_conf->wal_mode); + } + + /* + * Unless wal_mode=fsync_delay, wal_fsync_delay is irrelevant and must be 0. + */ + if (strcasecmp(new_conf->wal_mode, "fsync_delay") != 0) + new_delay = 0.0; + + if (old_conf->wal_fsync_delay != new_delay) + say_debug("%s: wal_fsync_delay [%f] -> [%f]", + __func__, old_conf->wal_fsync_delay, new_delay); + + recovery_update_mode(recovery_state, new_conf->wal_mode, new_delay); + + return 0; +} i32 reload_cfg(struct tbuf *out) @@ -159,6 +210,10 @@ reload_cfg(struct tbuf *out) return -1; } + /* Process wal-writer-related changes. */ + if (core_reload_config(&cfg, &new_cfg) != 0) + return -1; + /* Now pass the config to the module, to take action. */ if (mod_reload_config(&cfg, &new_cfg) != 0) return -1; @@ -391,12 +446,6 @@ initialize_minimal() initialize(0.1, 4, 2); } -inline static void -mach_init() -{ - mach_setup_crc32(); -} - int main(int argc, char **argv) { @@ -415,9 +464,9 @@ main(int argc, char **argv) #endif master_pid = getpid(); + crc32_init(); stat_init(); palloc_init(); - mach_init(); #ifdef HAVE_BFD symbols_load(argv[0]); diff --git a/include/config.h.cmake b/include/config.h.cmake index b09c4a209a..961fdf11ce 100644 --- a/include/config.h.cmake +++ b/include/config.h.cmake @@ -42,6 +42,19 @@ #define MAP_ANONYMOUS MAP_ANON #endif +/* + * Defined if O_DSYNC mode exists for open(2). + */ +#cmakedefine HAVE_O_DSYNC 1 +#if defined(HAVE_O_DSYNC) + #define WAL_SYNC_FLAG O_DSYNC +#else + #define WAL_SYNC_FLAG O_SYNC +#endif +/* + * Defined if fdatasync(2) call is present. + */ +#cmakedefine HAVE_FDATASYNC 1 /* * Set if this is a GNU system and libc has __libc_stack_end. */ diff --git a/include/crc32.h b/include/crc32.h new file mode 100644 index 0000000000..f1dba110f2 --- /dev/null +++ b/include/crc32.h @@ -0,0 +1,44 @@ +#ifndef TARANTOOL_CRC32_H_INCLUDED +#define TARANTOOL_CRC32_H_INCLUDED +/* + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the + * following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY <COPYRIGHT HOLDER> ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * <COPYRIGHT HOLDER> OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF + * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ +#include <util.h> + +typedef u32 (*crc32_func)(u32 crc, const unsigned char *buf, + unsigned int len); + +/* + * Pointer to an architecture-specific implementation of + * CRC32 calculation method. + */ +extern crc32_func crc32_calc; + +void crc32_init(); + +#endif /* TARANTOOL_CRC32_H_INCLUDED */ diff --git a/include/log_io.h b/include/log_io.h index e157a2bb0e..9dba46403c 100644 --- a/include/log_io.h +++ b/include/log_io.h @@ -57,9 +57,12 @@ struct log_io_class { u64 marker, eof_marker; size_t marker_size, eof_marker_size; size_t rows_per_file; + /* wal_fsync_delay value for the log class. */ double fsync_delay; bool panic_if_error; + /* Additional flags to apply at open(2) to write. */ + int open_wflags; const char *filetype; const char *version; const char *suffix; @@ -152,13 +155,17 @@ struct tbuf *convert_to_v11(struct tbuf *orig, u16 tag, u64 cookie, i64 lsn); struct recovery_state *recover_init(const char *snap_dirname, const char *xlog_dirname, row_handler row_handler, - int rows_per_file, double fsync_delay, int inbox_size, + int rows_per_file, const char *wal_mode, + double fsync_delay, int inbox_size, int flags, void *data); +void recovery_update_mode(struct recovery_state *r, + const char *wal_mode, double fsync_delay); void recover_free(struct recovery_state *recovery); int recover(struct recovery_state *, i64 lsn); void recover_follow(struct recovery_state *r, ev_tstamp wal_dir_rescan_delay); void recover_finalize(struct recovery_state *r); -bool wal_write(struct recovery_state *r, u16 tag, u64 cookie, i64 lsn, struct tbuf *data); +int wal_write(struct recovery_state *r, u16 tag, u16 op, + u64 cookie, i64 lsn, struct tbuf *data); void recovery_setup_panic(struct recovery_state *r, bool on_snap_error, bool on_wal_error); diff --git a/include/tarantool_pthread.h b/include/tarantool_pthread.h new file mode 100644 index 0000000000..784350f918 --- /dev/null +++ b/include/tarantool_pthread.h @@ -0,0 +1,157 @@ +#ifndef TARANTOOL_PTHREAD_H_INCLUDED +#define TARANTOOL_PTHREAD_H_INCLUDED +/* + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the + * following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY <COPYRIGHT HOLDER> ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * <COPYRIGHT HOLDER> OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF + * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ +#include <pthread.h> + +#include <util.h> +#include <say.h> + +/** + * Assert on any pthread* error in debug mode. In release, + * write into the log file where and what has failed. + * + * Still give the user an opportunity to manually + * check for error, by returning the pthread_* + * function status up. + */ + +#define tt_pthread_error(e) \ + if (e != 0) \ + say_error("%s error %d", __func__, e);\ + assert(e == 0); \ + e + +/** + * Debug/logging friendly wrappers around pthread + * functions. + */ + +#define tt_pthread_mutex_init(mutex, attr) \ +({ int e = pthread_mutex_init(mutex, attr);\ + tt_pthread_error(e); \ +}) + +#define tt_pthread_mutex_destroy(mutex) \ +({ int e = pthread_mutex_destroy(mutex); \ + tt_pthread_error(e); \ +}) + +#define tt_pthread_mutex_lock(mutex) \ +({ int e = pthread_mutex_lock(mutex); \ + say_debug("%s: locking %s", __func__, #mutex);\ + tt_pthread_error(e);\ +}) + +#define tt_pthread_mutex_trylock(mutex) \ +({ int e = pthread_mutex_trylock(mutex); \ + if (e != 0 && e != EBUSY) \ + say_error("%s error %d at %s:%d", __func__, e, __FILE__, __LINE__);\ + assert(e == 0 || e == EBUSY); \ + e \ +}) + +#define tt_pthread_mutex_unlock(mutex) \ +({ int e = pthread_mutex_unlock(mutex); \ + say_debug("%s: unlocking %s", __func__, #mutex);\ + tt_pthread_error(e); \ +}) + +#define tt_pthread_mutex_destroy(mutex) \ +({ int e = pthread_mutex_destroy(mutex); \ + tt_pthread_error(e); \ +}) + +#define tt_pthread_mutexattr_init(attr) \ +({ int e = pthread_mutexattr_init(attr); \ + tt_pthread_error(e); \ +}) + +#define tt_pthread_mutexattr_destroy(attr) \ +({ int e = pthread_mutexattr_destroy(attr);\ + tt_pthread_error(e); \ +}) + +#define tt_pthread_mutexattr_gettype(attr, type)\ +({ int e = pthread_mutexattr_gettype(attr, type);\ + tt_pthread_error(e); \ +}) + +#define tt_pthread_mutexattr_settype(attr, type)\ +({ int e = pthread_mutexattr_settype(attr, type);\ + tt_pthread_error(e); \ +}) + +#define tt_pthread_condattr_init(attr) \ +({ int e = pthread_condattr_init(attr); \ + tt_pthread_error(e); \ +}) + +#define tt_pthread_condattr_destroy(attr) \ +({ int e = pthread_condattr_destroy(attr); \ + tt_pthread_error(e); \ +}) + +#define tt_pthread_condattr_getclock(attr, clock_id)\ +({ int e = pthread_condattr_getclock(attr, clock_id);\ + tt_pthread_error(e); \ +}) + +#define tt_pthread_condattr_setclock(attr, clock_id)\ +({ int e = pthread_condattr_setclock(attr, clock_id);\ + tt_pthread_error(e); \ +}) + +#define tt_pthread_cond_init(cond, attr) \ +({ int e = pthread_cond_init(cond, attr); \ + tt_pthread_error(e); \ +}) + +#define tt_pthread_cond_destroy(cond) \ +({ int e = pthread_cond_destroy(cond); \ + tt_pthread_error(e); \ +}) + +#define tt_pthread_cond_signal(cond) \ +({ int e = pthread_cond_signal(cond); \ + tt_pthread_error(e); \ +}) + +#define tt_pthread_cond_wait(cond, mutex) \ +({ int e = pthread_cond_wait(cond, mutex);\ + tt_pthread_error(e); \ +}) + +#define tt_pthread_cond_timedwait(cond, mutex, timeout) \ +({ int e = pthread_cond_timedwait(cond, mutex, timeout);\ + if (ETIMEDOUT != e) \ + tt_pthread_error(e); \ +}) + +#endif /* TARANTOOL_PTHREAD_H_INCLUDED */ diff --git a/mod/box/box.m b/mod/box/box.m index 0104696a7d..043984512e 100644 --- a/mod/box/box.m +++ b/mod/box/box.m @@ -1247,13 +1247,10 @@ txn_commit(struct box_txn *txn) ; else { fiber_peer_name(fiber); /* fill the cookie */ - struct tbuf *t = tbuf_alloc(fiber->gc_pool); - tbuf_append(t, &txn->op, sizeof(txn->op)); - tbuf_append(t, txn->req.data, txn->req.size); i64 lsn = next_lsn(recovery_state, 0); - bool res = !wal_write(recovery_state, wal_tag, - fiber->cookie, lsn, t); + int res = wal_write(recovery_state, wal_tag, txn->op, + fiber->cookie, lsn, &txn->req); confirm_lsn(recovery_state, lsn); if (res) tnt_raise(LoggedError, :ER_WAL_IO); @@ -2152,8 +2149,8 @@ mod_init(void) /* recovery initialization */ recovery_state = recover_init(cfg.snap_dir, cfg.wal_dir, - recover_row, cfg.rows_per_wal, cfg.wal_fsync_delay, - cfg.wal_writer_inbox_size, + recover_row, cfg.rows_per_wal, cfg.wal_mode, + cfg.wal_fsync_delay, cfg.wal_writer_inbox_size, init_storage ? RECOVER_READONLY : 0, NULL); recovery_state->snap_io_rate_limit = cfg.snap_io_rate_limit * 1024 * 1024; diff --git a/mod/box/box_cfg.cfg_tmpl b/mod/box/box_cfg.cfg_tmpl index d8b124e5cf..318b287932 100644 --- a/mod/box/box_cfg.cfg_tmpl +++ b/mod/box/box_cfg.cfg_tmpl @@ -1,12 +1,6 @@ ## BOX -# Snapshot directory (where snapshots get saved/read) -snap_dir=".", ro - -# WAL directory (where WALs get saved/read) -wal_dir=".", ro - # Primary port (where updates are accepted) primary_port=0, ro, required @@ -31,36 +25,6 @@ memcached_expire_per_loop=1024 # tarantool will try to iterate over all rows within this time memcached_expire_full_sweep=3600.0 -# Do not write into snapshot faster than snap_io_rate_limit MB/sec -snap_io_rate_limit=0.0, ro - -# Write no more rows in WAL -rows_per_wal=500000, ro - -# fsync WAL delay, only issue fsync if last fsync was wal_fsync_delay -# seconds ago. -# WARNING: actually, several last requests may stall fsync for much longer -wal_fsync_delay=0.0, ro - -# size of WAL writer request buffer -wal_writer_inbox_size=128, ro - -# Local hot standby (if enabled, the server will run in hot -# standby mode, continuously fetching WAL records from wal_dir, -# until it is able to bind to the primary port. -# In local hot standby mode the server only accepts reads. -local_hot_standby=false, ro -# Delay, in seconds, between successive re-readings of wal_dir. -# The re-scan is necessary to discover new WAL files or snapshots. -wal_dir_rescan_delay=0.1, ro - - -# Panic if there is an error reading a snapshot or WAL. -# By default, panic on any snapshot reading error and ignore errors -# when reading WALs. -panic_on_snap_error=true, ro -panic_on_wal_error=false, ro - # Replication mode (if enabled, the server, once # bound to the primary port, will connect to # replication_source (ipaddr:port) and run continously diff --git a/test/box/admin.result b/test/box/admin.result index 0bd780a395..2b376cb59e 100644 --- a/test/box/admin.result +++ b/test/box/admin.result @@ -30,6 +30,7 @@ show configuration --- configuration: username: (null) + local_hot_standby: "false" bind_ipaddr: "INADDR_ANY" coredump: "false" admin_port: "33015" @@ -39,14 +40,22 @@ configuration: slab_alloc_minimal: "64" slab_alloc_factor: "2" work_dir: (null) + snap_dir: "." + wal_dir: "." pid_file: "box.pid" logger: "cat - >> tarantool.log" logger_nonblock: "true" io_collect_interval: "0" backlog: "1024" readahead: "16320" - snap_dir: "." - wal_dir: "." + snap_io_rate_limit: "0" + rows_per_wal: "50" + wal_writer_inbox_size: "16384" + wal_mode: "fsync_delay" + wal_fsync_delay: "0" + wal_dir_rescan_delay: "0.1" + panic_on_snap_error: "true" + panic_on_wal_error: "false" primary_port: "33013" secondary_port: "33014" too_long_threshold: "0.5" @@ -56,14 +65,6 @@ configuration: memcached_expire: "false" memcached_expire_per_loop: "1024" memcached_expire_full_sweep: "3600" - snap_io_rate_limit: "0" - rows_per_wal: "50" - wal_fsync_delay: "0" - wal_writer_inbox_size: "128" - local_hot_standby: "false" - wal_dir_rescan_delay: "0.1" - panic_on_snap_error: "true" - panic_on_wal_error: "false" replication_source: (null) space[0].enabled: "true" space[0].cardinality: "-1" diff --git a/test/box/configuration.result b/test/box/configuration.result index e84f963d5b..02ce2663e0 100644 --- a/test/box/configuration.result +++ b/test/box/configuration.result @@ -8,6 +8,7 @@ show configuration --- configuration: username: (null) + local_hot_standby: "false" bind_ipaddr: "INADDR_ANY" coredump: "false" admin_port: "33015" @@ -17,14 +18,22 @@ configuration: slab_alloc_minimal: "64" slab_alloc_factor: "2" work_dir: (null) + snap_dir: "." + wal_dir: "." pid_file: "box.pid" logger: "cat - >> tarantool.log" logger_nonblock: "true" io_collect_interval: "0" backlog: "1024" readahead: "16320" - snap_dir: "." - wal_dir: "." + snap_io_rate_limit: "0" + rows_per_wal: "50" + wal_writer_inbox_size: "16384" + wal_mode: "fsync_delay" + wal_fsync_delay: "0" + wal_dir_rescan_delay: "0.1" + panic_on_snap_error: "true" + panic_on_wal_error: "false" primary_port: "33013" secondary_port: "33014" too_long_threshold: "0.5" @@ -34,14 +43,6 @@ configuration: memcached_expire: "false" memcached_expire_per_loop: "1024" memcached_expire_full_sweep: "3600" - snap_io_rate_limit: "0" - rows_per_wal: "50" - wal_fsync_delay: "0" - wal_writer_inbox_size: "128" - local_hot_standby: "false" - wal_dir_rescan_delay: "0.1" - panic_on_snap_error: "true" - panic_on_wal_error: "false" replication_source: (null) space[0].enabled: "true" space[0].cardinality: "-1" @@ -70,6 +71,7 @@ show configuration --- configuration: username: (null) + local_hot_standby: "false" bind_ipaddr: "INADDR_ANY" coredump: "false" admin_port: "33015" @@ -79,14 +81,22 @@ configuration: slab_alloc_minimal: "64" slab_alloc_factor: "2" work_dir: (null) + snap_dir: "." + wal_dir: "." pid_file: "box.pid" logger: "cat - >> tarantool.log" logger_nonblock: "true" io_collect_interval: "0" backlog: "1024" readahead: "16320" - snap_dir: "." - wal_dir: "." + snap_io_rate_limit: "0" + rows_per_wal: "50" + wal_writer_inbox_size: "16384" + wal_mode: "fsync_delay" + wal_fsync_delay: "0" + wal_dir_rescan_delay: "0.1" + panic_on_snap_error: "true" + panic_on_wal_error: "false" primary_port: "33013" secondary_port: "33014" too_long_threshold: "0.5" @@ -96,14 +106,6 @@ configuration: memcached_expire: "false" memcached_expire_per_loop: "1024" memcached_expire_full_sweep: "3600" - snap_io_rate_limit: "0" - rows_per_wal: "50" - wal_fsync_delay: "0" - wal_writer_inbox_size: "128" - local_hot_standby: "false" - wal_dir_rescan_delay: "0.1" - panic_on_snap_error: "true" - panic_on_wal_error: "false" replication_source: (null) space[0].enabled: "false" space[0].cardinality: "-1" diff --git a/test/box/lua.result b/test/box/lua.result index 0c696e2d7d..09b30eafb0 100644 --- a/test/box/lua.result +++ b/test/box/lua.result @@ -311,35 +311,36 @@ lua for k,v in pairs(box.cfg) do print(' - ', k, ': ', v) end --- - io_collect_interval: 0 - pid_file: box.pid - - panic_on_wal_error: false - - slab_alloc_factor: 2 - slab_alloc_minimal: 64 - - admin_port: 33015 - - logger: cat - >> tarantool.log - - too_long_threshold: 0.5 - - wal_dir_rescan_delay: 0.1 - - slab_alloc_arena: 0.1 - - wal_dir: . - - wal_writer_inbox_size: 128 - - secondary_port: 33014 - - backlog: 1024 - - rows_per_wal: 50 + - primary_port: 33013 + - log_level: 4 - logger_nonblock: true - memcached_expire_per_loop: 1024 - snap_dir: . - coredump: false - - snap_io_rate_limit: 0 - - primary_port: 33013 - - log_level: 4 - - memcached_space: 23 - - memcached_port: 0 - panic_on_snap_error: true - - local_hot_standby: false - memcached_expire_full_sweep: 3600 - replication_port: 0 - - bind_ipaddr: INADDR_ANY - wal_fsync_delay: 0 + - too_long_threshold: 0.5 + - slab_alloc_factor: 2 + - admin_port: 33015 + - logger: cat - >> tarantool.log + - snap_io_rate_limit: 0 + - wal_writer_inbox_size: 16384 + - backlog: 1024 + - wal_dir_rescan_delay: 0.1 + - wal_dir: . + - memcached_port: 0 + - wal_mode: fsync_delay + - local_hot_standby: false - readahead: 16320 + - panic_on_wal_error: false + - rows_per_wal: 50 + - secondary_port: 33014 + - bind_ipaddr: INADDR_ANY + - slab_alloc_arena: 0.1 + - memcached_space: 23 - memcached_expire: false ... lua for k,v in pairs(box.space[0]) do if type(v) ~= 'table' then print(' - ', k, ': ', v) end end @@ -357,35 +358,36 @@ lua for k,v in pairs(box.cfg) do print(' - ', k, ': ', v) end --- - io_collect_interval: 0 - pid_file: box.pid - - panic_on_wal_error: false - - slab_alloc_factor: 2 - slab_alloc_minimal: 64 - - admin_port: 33015 - - logger: cat - >> tarantool.log - - too_long_threshold: 0.5 - - wal_dir_rescan_delay: 0.1 - - slab_alloc_arena: 0.1 - - wal_dir: . - - wal_writer_inbox_size: 128 - - secondary_port: 33014 - - backlog: 1024 - - rows_per_wal: 50 + - primary_port: 33013 + - log_level: 4 - logger_nonblock: true - memcached_expire_per_loop: 1024 - snap_dir: . - coredump: false - - snap_io_rate_limit: 0 - - primary_port: 33013 - - log_level: 4 - - memcached_space: 23 - - memcached_port: 0 - panic_on_snap_error: true - - local_hot_standby: false - memcached_expire_full_sweep: 3600 - replication_port: 0 - - bind_ipaddr: INADDR_ANY - wal_fsync_delay: 0 + - too_long_threshold: 0.5 + - slab_alloc_factor: 2 + - admin_port: 33015 + - logger: cat - >> tarantool.log + - snap_io_rate_limit: 0 + - wal_writer_inbox_size: 16384 + - backlog: 1024 + - wal_dir_rescan_delay: 0.1 + - wal_dir: . + - memcached_port: 0 + - wal_mode: fsync_delay + - local_hot_standby: false - readahead: 16320 + - panic_on_wal_error: false + - rows_per_wal: 50 + - secondary_port: 33014 + - bind_ipaddr: INADDR_ANY + - slab_alloc_arena: 0.1 + - memcached_space: 23 - memcached_expire: false ... lua for k,v in pairs(box.space[0]) do if type(v) ~= 'table' then print(' - ', k, ': ', v) end end @@ -397,11 +399,11 @@ lua for k,v in pairs(box.space[0]) do if type(v) ~= 'table' then print(' - ', k, ... lua box.cfg.nosuchoption = 1 --- -error: 'Lua error: [string "box.cfg = {}..."]:50: Attempt to modify a read-only table' +error: 'Lua error: [string "box.cfg = {}..."]:51: Attempt to modify a read-only table' ... lua box.space[300] = 1 --- -error: 'Lua error: [string "box.cfg = {}..."]:50: Attempt to modify a read-only table' +error: 'Lua error: [string "box.cfg = {}..."]:51: Attempt to modify a read-only table' ... lua box.index.new('abc', 'cde') --- diff --git a/test/box_memcached/off.result b/test/box_memcached/off.result index 24d2b54646..09a66bf775 100644 --- a/test/box_memcached/off.result +++ b/test/box_memcached/off.result @@ -7,6 +7,7 @@ show configuration --- configuration: username: (null) + local_hot_standby: "false" bind_ipaddr: "INADDR_ANY" coredump: "false" admin_port: "33015" @@ -16,14 +17,22 @@ configuration: slab_alloc_minimal: "64" slab_alloc_factor: "2" work_dir: (null) + snap_dir: "." + wal_dir: "." pid_file: "box.pid" logger: "cat - >> tarantool.log" logger_nonblock: "true" io_collect_interval: "0" backlog: "1024" readahead: "16320" - snap_dir: "." - wal_dir: "." + snap_io_rate_limit: "0" + rows_per_wal: "50" + wal_writer_inbox_size: "16384" + wal_mode: "fsync_delay" + wal_fsync_delay: "0" + wal_dir_rescan_delay: "0.1" + panic_on_snap_error: "true" + panic_on_wal_error: "false" primary_port: "33013" secondary_port: "33014" too_long_threshold: "0.5" @@ -33,14 +42,6 @@ configuration: memcached_expire: "false" memcached_expire_per_loop: "1024" memcached_expire_full_sweep: "3600" - snap_io_rate_limit: "0" - rows_per_wal: "50" - wal_fsync_delay: "0" - wal_writer_inbox_size: "128" - local_hot_standby: "false" - wal_dir_rescan_delay: "0.1" - panic_on_snap_error: "true" - panic_on_wal_error: "false" replication_source: (null) space[0].enabled: "true" space[0].cardinality: "-1" diff --git a/third_party/compat/sys/bsd_time.h b/third_party/compat/sys/bsd_time.h new file mode 100644 index 0000000000..a2cfe663ef --- /dev/null +++ b/third_party/compat/sys/bsd_time.h @@ -0,0 +1,122 @@ +/* $OpenBSD: time.h,v 1.11 2000/10/10 13:36:48 itojun Exp $ */ +/* $NetBSD: time.h,v 1.18 1996/04/23 10:29:33 mycroft Exp $ */ + +/* + * Copyright (c) 1982, 1986, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)time.h 8.2 (Berkeley) 7/10/94 + */ + +#ifndef _SYS_TTL_COMPAT_TIME_H_ +#define _SYS_TTL_COMPAT_TIME_H_ + +#include <sys/types.h> + +#ifndef TIMEVAL_TO_TIMESPEC +#define TIMEVAL_TO_TIMESPEC(tv, ts) { \ + (ts)->tv_sec = (tv)->tv_sec; \ + (ts)->tv_nsec = (tv)->tv_usec * 1000; \ +} +#endif + +#ifndef TIMESPEC_TO_TIMEVAL +#define TIMESPEC_TO_TIMEVAL(tv, ts) { \ + (tv)->tv_sec = (ts)->tv_sec; \ + (tv)->tv_usec = (ts)->tv_nsec / 1000; \ +} +#endif + +/** + * Enable BSD timer macros for non-BSD code. + */ +#if !defined(__BSD) && !defined(__USE_BSD) + +/* Operations on timevals. */ +#define timerclear(tvp) (tvp)->tv_sec = (tvp)->tv_usec = 0 +#define timerisset(tvp) ((tvp)->tv_sec || (tvp)->tv_usec) +#define timercmp(tvp, uvp, cmp) \ + (((tvp)->tv_sec == (uvp)->tv_sec) ? \ + ((tvp)->tv_usec cmp (uvp)->tv_usec) : \ + ((tvp)->tv_sec cmp (uvp)->tv_sec)) +#define timeradd(tvp, uvp, vvp) \ + do { \ + (vvp)->tv_sec = (tvp)->tv_sec + (uvp)->tv_sec; \ + (vvp)->tv_usec = (tvp)->tv_usec + (uvp)->tv_usec; \ + if ((vvp)->tv_usec >= 1000000) { \ + (vvp)->tv_sec++; \ + (vvp)->tv_usec -= 1000000; \ + } \ + } while (0) +#define timersub(tvp, uvp, vvp) \ + do { \ + (vvp)->tv_sec = (tvp)->tv_sec - (uvp)->tv_sec; \ + (vvp)->tv_usec = (tvp)->tv_usec - (uvp)->tv_usec; \ + if ((vvp)->tv_usec < 0) { \ + (vvp)->tv_sec--; \ + (vvp)->tv_usec += 1000000; \ + } \ + } while (0) + +#endif /* !defined(__BSD) && !defined(__USE_BSD) */ + + +/* Operations on timespecs. + Include if missing (one API-call check should suffice). */ +#if !defined(timespecclear) + +#define timespecclear(tsp) (tsp)->tv_sec = (tsp)->tv_nsec = 0 +#define timespecisset(tsp) ((tsp)->tv_sec || (tsp)->tv_nsec) +#define timespeccmp(tsp, usp, cmp) \ + (((tsp)->tv_sec == (usp)->tv_sec) ? \ + ((tsp)->tv_nsec cmp (usp)->tv_nsec) : \ + ((tsp)->tv_sec cmp (usp)->tv_sec)) +#define timespecadd(tsp, usp, vsp) \ + do { \ + (vsp)->tv_sec = (tsp)->tv_sec + (usp)->tv_sec; \ + (vsp)->tv_nsec = (tsp)->tv_nsec + (usp)->tv_nsec; \ + if ((vsp)->tv_nsec >= 1000000000L) { \ + (vsp)->tv_sec++; \ + (vsp)->tv_nsec -= 1000000000L; \ + } \ + } while (0) +#define timespecsub(tsp, usp, vsp) \ + do { \ + (vsp)->tv_sec = (tsp)->tv_sec - (usp)->tv_sec; \ + (vsp)->tv_nsec = (tsp)->tv_nsec - (usp)->tv_nsec; \ + if ((vsp)->tv_nsec < 0) { \ + (vsp)->tv_sec--; \ + (vsp)->tv_nsec += 1000000000L; \ + } \ + } while (0) + +#endif /* !defined(timespecclear) */ + + +/* --- stuff got cut here - kostja, niels --- */ + +#endif /* !_SYS_TTL_COMPAT_TIME_H_ */ -- GitLab