From 95257919a002303e72febf0e14686e8524793786 Mon Sep 17 00:00:00 2001 From: Sergey Kaplun <skaplun@tarantool.org> Date: Thu, 23 May 2024 13:26:29 +0300 Subject: [PATCH] perf: add aggregator helper for bench statistics This patch adds a helper script to aggregate the benchmark results from JSON files to the format parsable by the InfluxDB line protocol [1]. All JSON files from the <perf/output> directory are benchmark results and aggregated into the <perf/output/summary.txt> file that can be posted to the InfluxDB. The results are aggregated via the new target test-perf-aggregate, which is run only if some JSON files with results are missed. [1]: https://docs.influxdata.com/influxdb/v2/reference/syntax/line-protocol/ NO_DOC=perf test NO_CHANGELOG=perf test NO_TEST=perf test --- perf/CMakeLists.txt | 46 ++++++++++++++--- perf/lua/CMakeLists.txt | 46 ++++++++++++----- perf/tools/aggregate.lua | 104 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 176 insertions(+), 20 deletions(-) create mode 100755 perf/tools/aggregate.lua diff --git a/perf/CMakeLists.txt b/perf/CMakeLists.txt index 1a64760e6c..d80c03f018 100644 --- a/perf/CMakeLists.txt +++ b/perf/CMakeLists.txt @@ -3,8 +3,12 @@ set(CMAKE_CXX_STANDARD 14) set(PERF_OUTPUT_DIR ${PROJECT_BINARY_DIR}/perf/output) file(MAKE_DIRECTORY ${PERF_OUTPUT_DIR}) +set(BENCH_RESULTS "") + add_subdirectory(lua) +set(TARANTOOL_BIN $<TARGET_FILE:tarantool>) + find_package(benchmark QUIET) if (NOT ${benchmark_FOUND}) message(AUTHOR_WARNING "Google Benchmark library was not found") @@ -41,14 +45,30 @@ function(create_perf_test_target) "${multiValues}" ${ARGN}) message(STATUS "Creating C performance test ${PERF_TARGET}_perftest") - add_custom_target(${PERF_TARGET}_perftest - COMMAND "$<TARGET_FILE:${PERF_TARGET}.perftest>" - "--benchmark_out_format=json" - "--benchmark_out=${PERF_OUTPUT_DIR}/${PERF_TARGET}.json" - DEPENDS ${PERF_TARGET}.perftest - COMMENT Running ${PERF_TARGET}_perftest + + set(BENCH_RESULT ${PERF_OUTPUT_DIR}/${PERF_TARGET}.json) + set(BENCH_TARGET ${PERF_TARGET}_perftest) + set(BENCH_RESULT_TARGET ${BENCH_TARGET}_result) + + # XXX: We need to provide two different targets with the same + # command: the first (BENCH_TARGET) is run unconditionally + # regardless of whether there are files with benchmark results + # or not, and the second target (BENCH_RESULT_TARGET) is run + # only if the corresponding file is omitted. The COMMAND_LIST + # variable contains the same command for these targets. + set(COMMAND_LIST + COMMAND "$<TARGET_FILE:${PERF_TARGET}.perftest>" + "--benchmark_out_format=json" + "--benchmark_out=${BENCH_RESULT}" + DEPENDS ${PERF_TARGET}.perftest + COMMENT Running ${BENCH_TARGET} ) - set(RUN_PERF_C_TESTS_LIST ${RUN_PERF_C_TESTS_LIST} ${PERF_TARGET}_perftest PARENT_SCOPE) + add_custom_command(OUTPUT ${BENCH_RESULT} ${COMMAND_LIST}) + add_custom_target(${BENCH_RESULT_TARGET} DEPENDS ${BENCH_RESULT}) + add_custom_target(${BENCH_TARGET} ${COMMAND_LIST}) + + set(RUN_PERF_C_TESTS_LIST ${RUN_PERF_C_TESTS_LIST} ${BENCH_TARGET} PARENT_SCOPE) + set(BENCH_RESULTS ${BENCH_RESULT_TARGET} ${BENCH_RESULTS} PARENT_SCOPE) endfunction() function(create_perf_test) @@ -103,3 +123,15 @@ add_custom_target(test-perf DEPENDS test-c-perf test-lua-perf COMMENT "Running performance tests" ) + + +set(PERF_SUMMARY ${PERF_OUTPUT_DIR}/summary.txt) +add_custom_target(test-perf-aggregate + DEPENDS ${BENCH_RESULTS} + BYPRODUCTS ${PERF_SUMMARY} + COMMENT "Aggregate performance test results into ${PERF_SUMMARY}" + COMMAND ${TARANTOOL_BIN} ${CMAKE_CURRENT_SOURCE_DIR}/tools/aggregate.lua + --output=${PERF_SUMMARY} + --input_dir=${PERF_OUTPUT_DIR} + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} +) diff --git a/perf/lua/CMakeLists.txt b/perf/lua/CMakeLists.txt index 55d523f92e..1419b96e1a 100644 --- a/perf/lua/CMakeLists.txt +++ b/perf/lua/CMakeLists.txt @@ -7,7 +7,7 @@ function(create_perf_lua_test) set(prefix PERF) set(noValues) set(singleValues NAME) - set(multiValues) + set(multiValues DEPENDS) # FIXME: if we update to CMake >= 3.5, can remove this line. include(CMakeParseArguments) @@ -18,23 +18,38 @@ function(create_perf_lua_test) ${ARGN}) message(STATUS "Creating Lua performance test ${PERF_NAME}_perftest") + set(TEST_PATH ${CMAKE_CURRENT_SOURCE_DIR}/${PERF_NAME}.lua) - add_custom_target(${PERF_NAME}_perftest - COMMAND ${CMAKE_COMMAND} -E env - LUA_PATH="${LUA_PATH}" - ${TARANTOOL_BIN} ${TEST_PATH} - --output="${PERF_OUTPUT_DIR}/${PERF_NAME}.json" - --output_format=json - COMMENT Running ${PERF_NAME}_perftest - DEPENDS tarantool ${TEST_PATH} - WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} + set(BENCH_RESULT ${PERF_OUTPUT_DIR}/${PERF_NAME}.json) + set(BENCH_TARGET ${PERF_NAME}_perftest) + set(BENCH_RESULT_TARGET ${BENCH_TARGET}_result) + + # XXX: We need to provide two different targets with the same + # command: the first (BENCH_TARGET) is run unconditionally + # regardless of whether there are files with benchmark results + # or not, and the second target (BENCH_RESULT_TARGET) is run + # only if the corresponding file is omitted. The COMMAND_LIST + # variable contains the same command for these targets. + set(COMMAND_LIST + COMMENT Running ${BENCH_TARGET} + COMMAND ${CMAKE_COMMAND} -E env + LUA_PATH="${LUA_PATH}" + ${TARANTOOL_BIN} ${TEST_PATH} + --output="${BENCH_RESULT}" + --output_format=json + DEPENDS tarantool ${PERF_DEPENDS} ${TEST_PATH} + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} ) - set(RUN_PERF_LUA_TESTS_LIST ${RUN_PERF_LUA_TESTS_LIST} ${PERF_NAME}_perftest PARENT_SCOPE) + add_custom_command(OUTPUT ${BENCH_RESULT} ${COMMAND_LIST}) + add_custom_target(${BENCH_RESULT_TARGET} DEPENDS ${BENCH_RESULT}) + add_custom_target(${BENCH_TARGET} ${COMMAND_LIST}) + + set(RUN_PERF_LUA_TESTS_LIST ${RUN_PERF_LUA_TESTS_LIST} ${BENCH_TARGET} PARENT_SCOPE) + set(BENCH_RESULTS ${BENCH_RESULT_TARGET} ${BENCH_RESULTS} PARENT_SCOPE) endfunction() create_perf_lua_test(NAME 1mops_write) create_perf_lua_test(NAME box_select) -create_perf_lua_test(NAME column_scan) create_perf_lua_test(NAME gh-7089-vclock-copy) create_perf_lua_test(NAME uri_escape_unescape) @@ -42,9 +57,14 @@ include_directories(${MSGPUCK_INCLUDE_DIRS}) build_module(column_scan_module column_scan_module.c) target_link_libraries(column_scan_module msgpuck) -add_dependencies(column_scan_perftest column_scan_module) +create_perf_lua_test(NAME column_scan + DEPENDS column_scan_module +) add_custom_target(test-lua-perf DEPENDS "${RUN_PERF_LUA_TESTS_LIST}" COMMENT "Running Lua performance tests" ) + +# Propagate the list to the parent scope. +set(BENCH_RESULTS "${BENCH_RESULTS}" PARENT_SCOPE) diff --git a/perf/tools/aggregate.lua b/perf/tools/aggregate.lua new file mode 100755 index 0000000000..525cc9acd8 --- /dev/null +++ b/perf/tools/aggregate.lua @@ -0,0 +1,104 @@ +#!/usr/bin/env tarantool + +-- File to aggregate the benchmark results from JSON files to the +-- format parsable by the InfluxDB line protocol [1]: +-- <measurement>,<tag_set> <field_set> <timestamp> +-- +-- <tag_set> and <field_set> have the following format: +-- <key1>=<value1>,<key2>=<value2> +-- +-- The reported tag set is a set of values that can be used for +-- filtering data (i.e., branch or benchmark name). +-- +-- The script accepts the following parameters: +-- +-- <input_dir> -- the directory from which the .json files are +-- taken. +-- <output> -- the filename where the results are saved. +-- +-- [1]: https://docs.influxdata.com/influxdb/v2/reference/syntax/line-protocol/ + +local json = require('json') +local fio = require('fio') + +local params = require('internal.argparse').parse(arg, { + {'input_dir', 'string'}, + {'output', 'string'}, +}) + +local input_dir = params.input_dir +assert(input_dir and fio.path.is_dir(input_dir), + 'given input_dir is not a directory') + +local output = params.output +local out_fh = assert(fio.open(output, {'O_WRONLY', 'O_CREAT', 'O_TRUNC'})) + +local function exec(cmd) + return io.popen(cmd):read('*all'):strip() +end + +local commit = os.getenv('PERF_COMMIT') or exec('git rev-parse --short HEAD') +assert(commit, 'can not determine the commit') + +local branch = os.getenv('PERF_BRANCH') or + exec('git rev-parse --abbrev-ref HEAD') +assert(branch, 'can not determine the branch') + +local tag_set = {branch = branch} + +local function read_all(file) + local fh = assert(io.open(file, 'rb')) + local content = fh:read('*all') + fh:close() + return content +end + +local REPORTED_FIELDS = { + 'cpu_time', + 'items_per_second', + 'iterations', + 'real_time', +} + +local time = os.time() + +local function influx_kv(tab) + local kv_string = {} + for k, v in pairs(tab) do + table.insert(kv_string, ('%s=%s'):format(k, v)) + end + return table.concat(kv_string, ',') +end + +local function influx_line(measurement, tags, fields) + return ('%s,%s %s %d\n'):format(measurement, influx_kv(tags), + influx_kv(fields), time) +end + +for _, file in pairs(fio.listdir(input_dir)) do + -- Skip files in which we are not interested. + if not file:match('%.json$') then goto continue end + + local data = read_all(('%s/%s'):format(input_dir, file)) + local bench_name = fio.basename(file, '.json') + local benchmarks = json.decode(data).benchmarks + + for _, bench in ipairs(benchmarks) do + local full_tag_set = table.deepcopy(tag_set) + full_tag_set.name = bench.name + + -- Save commit as a field, since we don't want to filter + -- benchmarks by the commit (one point of data). + local field_set = {commit = ('"%s"'):format(commit)} + + for _, field in ipairs(REPORTED_FIELDS) do + field_set[field] = bench[field] + end + + local line = influx_line(bench_name, full_tag_set, field_set) + out_fh:write(line) + end + ::continue:: +end + +out_fh:close() -- GitLab