diff --git a/changelogs/unreleased/gh-7204-replication-greeting-timeout.md b/changelogs/unreleased/gh-7204-replication-greeting-timeout.md new file mode 100644 index 0000000000000000000000000000000000000000..4b0ab6b18a36ac4fca099339ee54bb48ab2e11b4 --- /dev/null +++ b/changelogs/unreleased/gh-7204-replication-greeting-timeout.md @@ -0,0 +1,4 @@ +## bugfix/replication + +* Fixed the bug because of which the error reason was not logged on a replica + in case when the master didn't send a greeting message (gh-7204). diff --git a/src/box/applier.cc b/src/box/applier.cc index f30d9f2f77158a7ac30dad7058850d4727b7f7ab..1cd5b92fb3d99243e470aa2624c9a598bd50bf0f 100644 --- a/src/box/applier.cc +++ b/src/box/applier.cc @@ -357,7 +357,15 @@ applier_connect(struct applier *applier) close(fd); diag_raise(); } - if (coio_readn(io, greetingbuf, IPROTO_GREETING_SIZE) < 0) + /* + * Abort if the master doesn't send a greeting within the configured + * timeout so as not to block forever if we connect to a wrong + * instance, which doesn't send anything to accepted clients. + * No timeouts after this point, because if we receive a proper + * greeting, the server is likely to be fine. + */ + if (coio_readn_timeout(io, greetingbuf, IPROTO_GREETING_SIZE, + replication_disconnect_timeout()) < 0) diag_raise(); applier->last_row_time = ev_monotonic_now(loop()); diff --git a/test/replication-luatest/gh_7204_greeting_timeout_test.lua b/test/replication-luatest/gh_7204_greeting_timeout_test.lua new file mode 100644 index 0000000000000000000000000000000000000000..2caca5dcd8ddde821f025c2da5c48e71be369a51 --- /dev/null +++ b/test/replication-luatest/gh_7204_greeting_timeout_test.lua @@ -0,0 +1,37 @@ +local fiber = require('fiber') +local server = require('test.luatest_helpers.server') +local socket = require('socket') +local t = require('luatest') + +local g = t.group() + +g.before_all(function(g) + g.server = server:new({ + alias = 'master', + box_cfg = { + replication_timeout = 0.1, + replication_connect_timeout = 0.5, + }, + }) + g.server:start() +end) + +g.after_all(function(g) + g.server:drop() +end) + +g.test_greeting_timeout = function(g) + local uri = server.build_instance_uri('server') + local s = socket.tcp_server('unix/', uri, { + handler = function() fiber.sleep(9000) end + }) + t.assert(s) + g.server:exec(function(uri) + box.cfg{replication = uri} + end, {uri}) + t.helpers.retrying({}, function() + t.assert(g.server:grep_log('timed out')) + t.assert(g.server:grep_log('will retry')) + end) + s:close() +end