diff --git a/cfg/tarantool_silverbox_cfg.c b/cfg/tarantool_silverbox_cfg.c index 361478850f5fde102e49a027ed5fd257098ed4bc..10c9d296c1856accf346a4787e31a655a31e7dee 100644 --- a/cfg/tarantool_silverbox_cfg.c +++ b/cfg/tarantool_silverbox_cfg.c @@ -60,6 +60,8 @@ fill_default_tarantool_cfg(tarantool_cfg *c) { c->wal_writer_inbox_size = 128; c->local_hot_standby = 0; c->wal_dir_rescan_delay = 0.1; + c->panic_on_snap_error = 1; + c->panic_on_wal_error = 0; c->remote_hot_standby = 0; c->wal_feeder_ipaddr = NULL; c->wal_feeder_port = 0; @@ -183,6 +185,12 @@ static NameAtom _name__local_hot_standby[] = { static NameAtom _name__wal_dir_rescan_delay[] = { { "wal_dir_rescan_delay", -1, NULL } }; +static NameAtom _name__panic_on_snap_error[] = { + { "panic_on_snap_error", -1, NULL } +}; +static NameAtom _name__panic_on_wal_error[] = { + { "panic_on_wal_error", -1, NULL } +}; static NameAtom _name__remote_hot_standby[] = { { "remote_hot_standby", -1, NULL } }; @@ -547,6 +555,28 @@ acceptValue(tarantool_cfg* c, OptDef* opt, int check_rdonly) { if ( (c->wal_dir_rescan_delay == 0 || c->wal_dir_rescan_delay == -HUGE_VAL || c->wal_dir_rescan_delay == HUGE_VAL) && errno == ERANGE) return CNF_WRONGRANGE; } + else if ( cmpNameAtoms( opt->name, _name__panic_on_snap_error) ) { + if (opt->paramType != numberType ) + return CNF_WRONGTYPE; + errno = 0; + long int i32 = strtol(opt->paramValue.numberval, NULL, 10); + if (i32 == 0 && errno == EINVAL) + return CNF_WRONGINT; + if ( (i32 == LONG_MIN || i32 == LONG_MAX) && errno == ERANGE) + return CNF_WRONGRANGE; + c->panic_on_snap_error = i32; + } + else if ( cmpNameAtoms( opt->name, _name__panic_on_wal_error) ) { + if (opt->paramType != numberType ) + return CNF_WRONGTYPE; + errno = 0; + long int i32 = strtol(opt->paramValue.numberval, NULL, 10); + if (i32 == 0 && errno == EINVAL) + return CNF_WRONGINT; + if ( (i32 == LONG_MIN || i32 == LONG_MAX) && errno == ERANGE) + return CNF_WRONGRANGE; + c->panic_on_wal_error = i32; + } else if ( cmpNameAtoms( opt->name, _name__remote_hot_standby) ) { if (opt->paramType != numberType ) return CNF_WRONGTYPE; @@ -787,6 +817,8 @@ typedef enum IteratorState { S_name__wal_writer_inbox_size, S_name__local_hot_standby, S_name__wal_dir_rescan_delay, + S_name__panic_on_snap_error, + S_name__panic_on_wal_error, S_name__remote_hot_standby, S_name__wal_feeder_ipaddr, S_name__wal_feeder_port, @@ -1148,6 +1180,28 @@ tarantool_cfg_iterator_next(tarantool_cfg_iterator_t* i, tarantool_cfg *c, char } sprintf(*v, "%g", c->wal_dir_rescan_delay); snprintf(buf, PRINTBUFLEN-1, "wal_dir_rescan_delay"); + i->state = S_name__panic_on_snap_error; + return buf; + case S_name__panic_on_snap_error: + *v = malloc(32); + if (*v == NULL) { + free(i); + out_warning(CNF_NOMEMORY, "No memory to output value"); + return NULL; + } + sprintf(*v, "%"PRId32, c->panic_on_snap_error); + snprintf(buf, PRINTBUFLEN-1, "panic_on_snap_error"); + i->state = S_name__panic_on_wal_error; + return buf; + case S_name__panic_on_wal_error: + *v = malloc(32); + if (*v == NULL) { + free(i); + out_warning(CNF_NOMEMORY, "No memory to output value"); + return NULL; + } + sprintf(*v, "%"PRId32, c->panic_on_wal_error); + snprintf(buf, PRINTBUFLEN-1, "panic_on_wal_error"); i->state = S_name__remote_hot_standby; return buf; case S_name__remote_hot_standby: diff --git a/cfg/tarantool_silverbox_cfg.cfg b/cfg/tarantool_silverbox_cfg.cfg index 162e07540db3c17cfb22331f06a49e484d0008cb..de8d1edb3e11d37d50ef0f03a2871435303cb78c 100644 --- a/cfg/tarantool_silverbox_cfg.cfg +++ b/cfg/tarantool_silverbox_cfg.cfg @@ -96,6 +96,11 @@ local_hot_standby = 0 # delay in fractional seconds between successive re-readings of wal_dir wal_dir_rescan_delay = 0.1 +# panic if where is error reading snap or wal +# be default panic any snapshot reading error and ignore errors then reading wals +panic_on_snap_error = 1 +panic_on_wal_error = 0 + # Remote hot standby (if enabled server will run in hot standby mode # continuously fetching WAL records from wal_feeder_ipaddr:wal_feeder_port remote_hot_standby = 0 diff --git a/cfg/tarantool_silverbox_cfg.cfg_tmpl b/cfg/tarantool_silverbox_cfg.cfg_tmpl index b59c1a6e558ba8c2c01707059ca7773ada53ec55..444d681863f65c09089cae2923c0d7424b930969 100644 --- a/cfg/tarantool_silverbox_cfg.cfg_tmpl +++ b/cfg/tarantool_silverbox_cfg.cfg_tmpl @@ -80,7 +80,6 @@ memcached_expire_full_sweep=3600 # do not write snapshot faster then snap_io_rate_limit MBytes/sec snap_io_rate_limit=0.0 - # Write no more rows in WAL rows_per_wal=500000 @@ -97,6 +96,12 @@ local_hot_standby=0 # delay in fractional seconds between successive re-readings of wal_dir wal_dir_rescan_delay=0.1 + +# panic if where is error reading snap or wal +# be default panic any snapshot reading error and ignore errors then reading wals +panic_on_snap_error=1 +panic_on_wal_error=0 + # Remote hot standby (if enabled server will run in hot standby mode # continuously fetching WAL records from wal_feeder_ipaddr:wal_feeder_port remote_hot_standby=0 diff --git a/cfg/tarantool_silverbox_cfg.h b/cfg/tarantool_silverbox_cfg.h index 5178a74ce4badf33ffb9cc24d15ae31687ae017d..02d6b6559ffb0ad9b22791cd0c42ce369220a10e 100644 --- a/cfg/tarantool_silverbox_cfg.h +++ b/cfg/tarantool_silverbox_cfg.h @@ -135,6 +135,13 @@ typedef struct tarantool_cfg { /* delay in fractional seconds between successive re-readings of wal_dir */ double wal_dir_rescan_delay; + /* + * panic if where is error reading snap or wal + * be default panic any snapshot reading error and ignore errors then reading wals + */ + int32_t panic_on_snap_error; + int32_t panic_on_wal_error; + /* * Remote hot standby (if enabled server will run in hot standby mode * continuously fetching WAL records from wal_feeder_ipaddr:wal_feeder_port diff --git a/core/log_io.c b/core/log_io.c index 8061104cc30e3b52a9586fbb711086459e02a0b1..7dfd45ca2da3c9c43d51eb8d4582869b485e64fb 100644 --- a/core/log_io.c +++ b/core/log_io.c @@ -266,6 +266,8 @@ read_rows(struct log_io_iter *i) goto eof; if (row == NULL) { + if (l->class->panic_if_error) + panic("failed to read row"); say_warn("failed to read row"); goto restart; } @@ -1305,6 +1307,18 @@ recover_init(const char *snap_dirname, const char *wal_dirname, return r; } +void +recovery_setup_panic(struct recovery_state *r, bool on_snap_error, bool on_wal_error) +{ + struct log_io_class **class; + + for (class = r->wal_class; *class; class++) + (*class)->panic_if_error = on_wal_error; + + for (class = r->snap_class; *class; class++) + (*class)->panic_if_error = on_snap_error; +} + static void write_rows(struct log_io_iter *i) { diff --git a/include/log_io.h b/include/log_io.h index 02f75308a6f8d4d341580a443083606d6ce0f4d8..52398070d16cdbc9662eab180cd54ba18de60143 100644 --- a/include/log_io.h +++ b/include/log_io.h @@ -55,6 +55,7 @@ struct log_io_class { size_t marker_size, eof_marker_size; size_t rows_per_file; double fsync_delay; + bool panic_if_error; const char *filetype; const char *version; @@ -124,6 +125,8 @@ void recover_follow(struct recovery_state *r, ev_tstamp wal_dir_rescan_delay); void recover_finalize(struct recovery_state *r); bool wal_write(struct recovery_state *r, i64 lsn, struct tbuf *data); +void recovery_setup_panic(struct recovery_state *r, bool on_snap_error, bool on_wal_error); + /* recovery accessors */ struct palloc_pool *recovery_pool(struct recovery_state *r); int confirm_lsn(struct recovery_state *r, i64 lsn); diff --git a/mod/feeder/feeder.c b/mod/feeder/feeder.c index 04ff9f265a87c5f78ba71d3727eba01ebac7dda9..05c2a5dd32def3ff7da4c2d1abfaaa2808e5b5ca 100644 --- a/mod/feeder/feeder.c +++ b/mod/feeder/feeder.c @@ -80,7 +80,7 @@ recover_feed_slave(int sock) send_row(NULL, ver); log_io = recover_init(NULL, cfg.wal_feeder_dir, - NULL, NULL, send_row, 0, 0, 0, 64, RECOVER_READONLY, false); + NULL, NULL, send_row, 0, 0, 64, RECOVER_READONLY, false); recover(log_io, lsn); recover_follow(log_io, 0.1); diff --git a/mod/silverbox/box.c b/mod/silverbox/box.c index c3e881c0965d0718024328cb350b6a9bf0878059..802f4f5b46e5d93c985246bec042e0b5dc90b386 100644 --- a/mod/silverbox/box.c +++ b/mod/silverbox/box.c @@ -1775,6 +1775,7 @@ mod_init(void) init_storage ? RECOVER_READONLY : 0, NULL); recovery_state->snap_io_rate_limit = cfg.snap_io_rate_limit * 1024 * 1024; + recovery_setup_panic(recovery_state, cfg.panic_on_snap_error, cfg.panic_on_wal_error); /* initialize hashes _after_ starting wal writer */ if (cfg.memcached != 0) { diff --git a/mod/silverbox/box_cfg.cfg_tmpl b/mod/silverbox/box_cfg.cfg_tmpl index c8d8d2544d0dfc60c51507e60fcfed8caa0b7ba5..1bc8a9bf9d4a9b1a7d8b035dd898384057fa5551 100644 --- a/mod/silverbox/box_cfg.cfg_tmpl +++ b/mod/silverbox/box_cfg.cfg_tmpl @@ -32,7 +32,6 @@ memcached_expire_full_sweep=3600 # do not write snapshot faster then snap_io_rate_limit MBytes/sec snap_io_rate_limit=0.0 - # Write no more rows in WAL rows_per_wal=500000 @@ -49,6 +48,12 @@ local_hot_standby=0 # delay in fractional seconds between successive re-readings of wal_dir wal_dir_rescan_delay=0.1 + +# panic if where is error reading snap or wal +# be default panic any snapshot reading error and ignore errors then reading wals +panic_on_snap_error=1 +panic_on_wal_error=0 + # Remote hot standby (if enabled server will run in hot standby mode # continuously fetching WAL records from wal_feeder_ipaddr:wal_feeder_port remote_hot_standby=0