Skip to content
Snippets Groups Projects
Commit ccb7a649 authored by Sergey Kaplun's avatar Sergey Kaplun Committed by Sergey Ostanevich
Browse files

perf: standardize gh-7089-vclock-copy benchmark

The output now contains items per second without the mean time in
seconds. The number of iterations is reduced to 40 to avoid running the
test too long. The `wal_mode` option (default is "none") is set via
command line flags, as far as the number of nodes (default is 10). Also,
the master nodes are set up via the `popen()` command without using any
Makefile.

Also, two new options are introduced:
* The `--output` option allows you to specify the output file.
* The `--output_format` option means the format for the printed output.
  The default is "console". It prints items proceeded per second to the
  stdout. The "json" format contains all the information about the
  benchmark in a format similar to Google Benchmark's.

Usually, these options should be used together to dump machine-readable
results for the benchmarks.

NO_DOC=perf test
NO_CHANGELOG=perf test
NO_TEST=perf test
parent 72dd9324
No related branches found
No related tags found
No related merge requests found
SHELL := /bin/bash
test: test_none stop
test_wal: test_write stop
dirs:
for i in {1..11}; do mkdir $${i} || true; done
start_swarm:
for i in {2..11}; do ./swarm.lua $${i}; done
test_none: clean dirs start_swarm
./speedtest.lua none 1
test_write: clean dirs start_swarm
./speedtest.lua write 1
stop stop1:
for i in {2..11}; do pkill -F $${i}/$${i}.pid || true; done
clean: stop1
for i in {1..11}; do rm -rf $${i}; done
gh-7089 extra vclock copy for each ack test.
Issue `make test` to run.
#!/usr/bin/env tarantool
-- An instance file for the node which tests applier thread ack speed.
-- There are 10 threads, one per each replication source, so each WAL write
-- results in an ack message for each thread. This magnifies the possible
-- performance drawbacks of copying vclocks for each thread.
local mode = arg[1] or 'none'
assert(mode == 'write' or mode == 'none',
"mode should be either 'write' or 'none'")
local id = tonumber(arg[2]) or 1
assert(id < 2 or id > 11,
'The id should be outside of the occupied range [2, 11]')
local fiber = require('fiber')
box.cfg{
listen = 3300 + id,
replication = {
3302,
3303,
3304,
3305,
3306,
3307,
3308,
3309,
3310,
3311,
},
replication_threads = 10,
-- Disable WAL on a node to notice slightest differences in TX thread
-- performance. It's okay to replicate TO a node with disabled WAL. You only
-- can't replicate FROM it.
wal_mode = mode,
work_dir = tostring(id),
log = id..'.log',
}
box.schema.space.create('test', {if_not_exists = true})
box.space.test:create_index('pk', {if_not_exists = true})
box.snapshot()
local function replace_func(num_iters)
for i = 1, num_iters do
box.space.test:replace{i, i}
end
end
local function test(num_fibers)
local fibers = {}
local num_replaces = 1e6
local num_iters = num_replaces / num_fibers
local start = fiber.time()
for _ = 1, num_fibers do
local fib = fiber.new(replace_func, num_iters)
fib:set_joinable(true)
table.insert(fibers, fib)
end
assert(#fibers == num_fibers, "Fibers created successfully")
for _, fib in pairs(fibers) do
fib:join()
end
-- Update fiber.time() if there were no yields.
fiber.yield()
local dt = fiber.time() - start
return dt, num_replaces / dt
end
local mean_time = 0
local mean_rps = 0
local num_iters = 100
-- Fiber count > 1 makes no sense for wal_mode = 'none'. There are no yields
-- on replace when there are no wal writes.
local num_fibers = mode == 'none' and 1 or 100
for test_iter = 1,num_iters do
local time, rps = test(num_fibers)
print(('Iteration #%d finished in %f seconds. RPS: %f'):format(test_iter,
time, rps))
mean_time = mean_time + time / num_iters
mean_rps = mean_rps + rps / num_iters
end
print(('Mean iteraion time: %f, mean RPS: %f'):format(mean_time, mean_rps))
os.exit()
#!/usr/bin/env tarantool
-- Instance file for one of the 10 swarm nodes. They bootstrap the cluster, each
-- bump their vclock component and then do nothing and serve as replication
-- masters for the eleventh node.
local id = tonumber(arg[1])
assert(id ~= nil, 'Please pass a numeric instance id')
assert(id >= 2 and id <= 11, 'The id should be in ramge [2, 11]')
box.cfg{
listen = 3300 + id,
replication = {
3302,
3303,
3304,
3305,
3306,
3307,
3308,
3309,
3310,
3311,
},
background = true,
work_dir = tostring(id),
pid_file = id..'.pid',
log = id..'.log',
}
box.once('bootstrap', function()
box.schema.user.grant('guest', 'replication')
end)
-- This is executed on every instance so that vclock is non-empty in each
-- component. This will make the testing instance copy a larger portion of data
-- on each write and make the performance degradation, if any, more obvious.
box.space._schema:replace{'Something to bump vclock '..id}
......@@ -33,6 +33,7 @@ endfunction()
create_perf_lua_test(NAME 1mops_write)
create_perf_lua_test(NAME box_select)
create_perf_lua_test(NAME column_scan)
create_perf_lua_test(NAME gh-7089-vclock-copy)
create_perf_lua_test(NAME uri_escape_unescape)
include_directories(${MSGPUCK_INCLUDE_DIRS})
......
local fiber = require('fiber')
local fio = require('fio')
local popen = require('popen')
local clock = require('clock')
local benchmark = require('benchmark')
local USAGE = [[
nodes <number, 10> - number of nodes as replication sources
wal_mode <string, 'none'> - WAL mode for tested replica ('none', 'write')
Being run without options, this benchmark tests applier thread ACK speed.
There are 10 threads, one per replication source, so each WAL write results in
an ACK message for each thread. This magnifies the possible performance
drawbacks of copying vclocks for each thread. The test performs 1000000
replaces, which are repeated 40 times, and measures the average RPS.
]]
local params = benchmark.argparse(arg, {
{'nodes', 'number'},
{'wal_mode', 'string'},
}, USAGE)
local bench = benchmark.new(params)
local wal_mode = params.wal_mode or 'none'
assert(wal_mode == 'write' or wal_mode == 'none',
"mode should be either 'write' or 'none'")
-- Number of nodes.
local nodes = params.nodes or 10
assert(nodes > 0 and nodes < 32, 'incorrect nodes number')
local test_dir = fio.tempdir()
local function rmtree(s)
if (fio.path.is_file(s) or fio.path.is_link(s)) then
fio.unlink(s)
return
end
if fio.path.is_dir(s) then
for _, file in pairs(fio.listdir(s)) do
rmtree(s .. '/' .. file)
end
fio.rmdir(s)
end
end
-- Number of nodes, storage for popen handles.
local nodes_ph = {}
local function exit(res, details)
for listen, master in pairs(nodes_ph) do
print(('# killing node on %d'):format(listen))
master:kill()
master:wait()
end
if (details ~= nil) then
print(details)
end
if test_dir ~= nil then
rmtree(test_dir)
test_dir = nil
end
os.exit(res)
end
-- The port for replica.
local LISTEN_PORT = 3301
local master_nodes = {}
for i = 3302, 3301 + nodes do
table.insert(master_nodes, ('%d'):format(i))
end
local function bootstrap_node(listen)
local work_dir = ('%s/%d'):format(test_dir, listen)
-- Subdirectory for node's data.
os.execute('mkdir ' .. work_dir)
local cmd = {arg[-1], '-e', string.format([[
local fiber = require('fiber')
box.cfg {
listen = %d,
work_dir = '%s',
read_only = false,
replication = {%s},
log = 'log.log',
}
box.once('bootstrap', function()
box.schema.user.grant('guest', 'replication')
end)
repeat
fiber.sleep(0.1)
until not (#box.info.replication < %d or box.info().status ~= 'running')
-- This is executed on every instance so that vclock is
-- non-empty in each component. This will make the testing
-- instance copy a larger portion of data on each write
-- and make the performance degradation, if any.
box.space._schema:replace({'Something to bump vclock ' .. %d})
]], listen, work_dir, table.concat(master_nodes, ','), nodes, listen)}
local res, err = popen.new(cmd)
if not res then
exit(1, 'error running replica: ' .. err)
end
nodes_ph[listen] = res
end
if (nodes ~= nil and nodes < 32 and nodes > 0) then
print('# starting ' .. nodes .. ' masters')
for listen = 3302, 3301 + nodes do
bootstrap_node(listen)
end
else
exit(1, 'Incorrect number of nodes: "' .. arg[1] .. '" must be 1..31')
end
box.cfg{
listen = LISTEN_PORT,
replication_threads = nodes,
-- Disable WAL on a node to notice slightest differences in TX
-- thread performance. It's okay to replicate _to_ a node with
-- disabled WAL. You only can't replicate _from_ it.
wal_mode = wal_mode,
replication = master_nodes,
read_only = false,
log = 'test.log',
work_dir = test_dir,
}
-- Wait for all nodes to connect.
repeat
fiber.sleep(0.1)
print('# replication', #box.info.replication,
'status ', box.info().status)
until not (#box.info.replication < nodes or box.info().status ~= 'running')
box.schema.space.create('test', {if_not_exists = true})
box.space.test:create_index('pk', {if_not_exists = true})
box.snapshot()
local function replace_func(num_iters)
for i = 1, num_iters do
box.space.test:replace({i, i})
end
end
local num_replaces = 1e6
local function test(num_fibers)
local fibers = {}
local num_iters = num_replaces / num_fibers
local start_realtime = clock.time()
local start_cputime = clock.proc()
for _ = 1, num_fibers do
local fib = fiber.new(replace_func, num_iters)
fib:set_joinable(true)
table.insert(fibers, fib)
end
assert(#fibers == num_fibers, 'Fibers created successfully')
for _, fib in pairs(fibers) do
fib:join()
end
local dt_realtime = clock.time() - start_realtime
local dt_cputime = clock.proc() - start_cputime
return dt_realtime, dt_cputime, num_replaces / dt_realtime
end
local num_iters = 40
-- Fiber count > 1 makes no sense for `wal_mode = 'none'`. There
-- are no yields on replace when there are no WAL writes.
local num_fibers = wal_mode == 'none' and 1 or 100
local total_realtime = 0
local total_cputime = 0
for test_iter = 1, num_iters do
local realtime, cputime, rps = test(num_fibers)
print(('# Iteration #%d finished in %f seconds. RPS: %f'):format(
test_iter, realtime, rps
))
total_realtime = total_realtime + realtime
total_cputime = total_cputime + cputime
end
bench:add_result('walmode_' .. wal_mode, {
real_time = total_realtime,
cpu_time = total_cputime,
items = num_iters * num_replaces,
})
bench:dump_results()
exit(0)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment