From 6754ddcb104f5186153563d081e485cad2bf1502 Mon Sep 17 00:00:00 2001 From: Denis Smirnov <sd@picodata.io> Date: Wed, 22 May 2024 13:14:54 +0700 Subject: [PATCH] fix: refactor vshard timeouts on dql ref-map-unref --- sbroad-core/src/core-router.lua | 25 ++++++++++--------------- 1 file changed, 10 insertions(+), 15 deletions(-) diff --git a/sbroad-core/src/core-router.lua b/sbroad-core/src/core-router.lua index 439f2d651..889afcb2f 100644 --- a/sbroad-core/src/core-router.lua +++ b/sbroad-core/src/core-router.lua @@ -7,7 +7,6 @@ local fiber = require('fiber') local table = require('table') local ref_id = 0 local DQL_MIN_TIMEOUT = 10 -local REF_MIN_TIMEOUT = 5 local function future_wait(cond, timeout) local f = function(cond, timeout) @@ -49,10 +48,8 @@ end -- @param vtable_max_rows Maximum number of rows to receive from storages. -- If the number is exceeded, the results are discarded and error is returned. -- @param opts Table which may have the following options: --- 1. ref_timeout - number of seconds to keep the reference alive. Shouldn't --- be too big, because alive reference blocks vshard's rebalancing. --- 2. timeout - timeout for the whole function execution. --- 3. check_bucket_count - whether or not to check that ref stage has covered +-- 1. timeout - timeout for the whole function execution. +-- 2. check_bucket_count - whether or not to check that ref stage has covered -- all buckets. If this is true, the error will be returned if router's bucket -- count != bucket count covered by ref stage. It may mean that router has -- outdated configuration (some buckets were added/deleted on storages that router @@ -61,14 +58,12 @@ end -- local function multi_storage_dql(uuid_to_args, func, vtable_max_rows, opts) local replicasets = vshard.router.routeall() - local timeout, ref_timeout, check_bucket_count + local timeout, check_bucket_count if opts then - timeout = opts.map_timeout or DQL_MIN_TIMEOUT - ref_timeout = opts.ref_timeout or REF_MIN_TIMEOUT + timeout = opts.timeout or DQL_MIN_TIMEOUT check_bucket_count = opts.check_bucket_count or false else timeout = DQL_MIN_TIMEOUT - ref_timeout = REF_MIN_TIMEOUT check_bucket_count = false end @@ -91,7 +86,7 @@ local function multi_storage_dql(uuid_to_args, func, vtable_max_rows, opts) for uuid, _ in pairs(uuid_to_args) do local rs = replicasets[uuid] res, err = rs:callrw('vshard.storage._call', - {'storage_ref', rid, ref_timeout}, opts_ref) + {'storage_ref', rid, timeout}, opts_ref) if res == nil then err_uuid = uuid goto fail @@ -126,8 +121,9 @@ local function multi_storage_dql(uuid_to_args, func, vtable_max_rows, opts) -- if the user does a maintenance of some kind by creating/deleting buckets. -- In both cases can't guarantee all the data would be covered by Map calls. if check_bucket_count and bucket_count ~= vshard.router.bucket_count() then - err = "All refs are done but not all buckets are covered" - goto fail + err = string.format("Expected %d buckets in the cluster, got %d", + vshard.router.bucket_count(), bucket_count) + goto fail end -- Map stage: send. @@ -251,7 +247,7 @@ _G.dql_on_some = function(uuid_to_args, is_readonly, waiting_timeout, vtable_max result = helper.unwrap_execute_result(res[1][1]) else local err, err_uuid - local opts = { map_timeout = waiting_timeout, ref_timeout = waiting_timeout } + local opts = { timeout = waiting_timeout } result, err, err_uuid = multi_storage_dql(uuid_to_args, exec_fn, vtable_max_rows, opts) if err ~= nil then helper.dql_error(err, err_uuid) @@ -269,8 +265,7 @@ _G.dql_on_all = function(required, optional, waiting_timeout, vtable_max_rows) uuid_to_args[uuid] = { required = required, optional = optional } end local opts = { - map_timeout = waiting_timeout, - ref_timeout = waiting_timeout, + timeout = waiting_timeout, check_bucket_count = true } local result, err, err_uuid = multi_storage_dql(uuid_to_args, exec_fn, vtable_max_rows, opts) -- GitLab