From 95257919a002303e72febf0e14686e8524793786 Mon Sep 17 00:00:00 2001
From: Sergey Kaplun <skaplun@tarantool.org>
Date: Thu, 23 May 2024 13:26:29 +0300
Subject: [PATCH] perf: add aggregator helper for bench statistics

This patch adds a helper script to aggregate the benchmark results from
JSON files to the format parsable by the InfluxDB line protocol [1].

All JSON files from the <perf/output> directory are benchmark results
and aggregated into the <perf/output/summary.txt> file that can be
posted to the InfluxDB. The results are aggregated via the new target
test-perf-aggregate, which is run only if some JSON files with results
are missed.

[1]: https://docs.influxdata.com/influxdb/v2/reference/syntax/line-protocol/

NO_DOC=perf test
NO_CHANGELOG=perf test
NO_TEST=perf test
---
 perf/CMakeLists.txt      |  46 ++++++++++++++---
 perf/lua/CMakeLists.txt  |  46 ++++++++++++-----
 perf/tools/aggregate.lua | 104 +++++++++++++++++++++++++++++++++++++++
 3 files changed, 176 insertions(+), 20 deletions(-)
 create mode 100755 perf/tools/aggregate.lua

diff --git a/perf/CMakeLists.txt b/perf/CMakeLists.txt
index 1a64760e6c..d80c03f018 100644
--- a/perf/CMakeLists.txt
+++ b/perf/CMakeLists.txt
@@ -3,8 +3,12 @@ set(CMAKE_CXX_STANDARD 14)
 set(PERF_OUTPUT_DIR ${PROJECT_BINARY_DIR}/perf/output)
 file(MAKE_DIRECTORY ${PERF_OUTPUT_DIR})
 
+set(BENCH_RESULTS "")
+
 add_subdirectory(lua)
 
+set(TARANTOOL_BIN $<TARGET_FILE:tarantool>)
+
 find_package(benchmark QUIET)
 if (NOT ${benchmark_FOUND})
     message(AUTHOR_WARNING "Google Benchmark library was not found")
@@ -41,14 +45,30 @@ function(create_perf_test_target)
                         "${multiValues}"
                         ${ARGN})
   message(STATUS "Creating C performance test ${PERF_TARGET}_perftest")
-  add_custom_target(${PERF_TARGET}_perftest
-                    COMMAND "$<TARGET_FILE:${PERF_TARGET}.perftest>"
-                            "--benchmark_out_format=json"
-                            "--benchmark_out=${PERF_OUTPUT_DIR}/${PERF_TARGET}.json"
-                    DEPENDS ${PERF_TARGET}.perftest
-                    COMMENT Running ${PERF_TARGET}_perftest
+
+  set(BENCH_RESULT ${PERF_OUTPUT_DIR}/${PERF_TARGET}.json)
+  set(BENCH_TARGET ${PERF_TARGET}_perftest)
+  set(BENCH_RESULT_TARGET ${BENCH_TARGET}_result)
+
+  # XXX: We need to provide two different targets with the same
+  # command: the first (BENCH_TARGET) is run unconditionally
+  # regardless of whether there are files with benchmark results
+  # or not, and the second target (BENCH_RESULT_TARGET) is run
+  # only if the corresponding file is omitted. The COMMAND_LIST
+  # variable contains the same command for these targets.
+  set(COMMAND_LIST
+        COMMAND "$<TARGET_FILE:${PERF_TARGET}.perftest>"
+                "--benchmark_out_format=json"
+                "--benchmark_out=${BENCH_RESULT}"
+        DEPENDS ${PERF_TARGET}.perftest
+        COMMENT Running ${BENCH_TARGET}
   )
-  set(RUN_PERF_C_TESTS_LIST ${RUN_PERF_C_TESTS_LIST} ${PERF_TARGET}_perftest PARENT_SCOPE)
+  add_custom_command(OUTPUT ${BENCH_RESULT} ${COMMAND_LIST})
+  add_custom_target(${BENCH_RESULT_TARGET} DEPENDS ${BENCH_RESULT})
+  add_custom_target(${BENCH_TARGET} ${COMMAND_LIST})
+
+  set(RUN_PERF_C_TESTS_LIST ${RUN_PERF_C_TESTS_LIST} ${BENCH_TARGET} PARENT_SCOPE)
+  set(BENCH_RESULTS ${BENCH_RESULT_TARGET} ${BENCH_RESULTS} PARENT_SCOPE)
 endfunction()
 
 function(create_perf_test)
@@ -103,3 +123,15 @@ add_custom_target(test-perf
                   DEPENDS test-c-perf test-lua-perf
                   COMMENT "Running performance tests"
 )
+
+
+set(PERF_SUMMARY ${PERF_OUTPUT_DIR}/summary.txt)
+add_custom_target(test-perf-aggregate
+                  DEPENDS ${BENCH_RESULTS}
+                  BYPRODUCTS ${PERF_SUMMARY}
+                  COMMENT "Aggregate performance test results into ${PERF_SUMMARY}"
+                  COMMAND ${TARANTOOL_BIN} ${CMAKE_CURRENT_SOURCE_DIR}/tools/aggregate.lua
+                    --output=${PERF_SUMMARY}
+                    --input_dir=${PERF_OUTPUT_DIR}
+                  WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
+)
diff --git a/perf/lua/CMakeLists.txt b/perf/lua/CMakeLists.txt
index 55d523f92e..1419b96e1a 100644
--- a/perf/lua/CMakeLists.txt
+++ b/perf/lua/CMakeLists.txt
@@ -7,7 +7,7 @@ function(create_perf_lua_test)
   set(prefix PERF)
   set(noValues)
   set(singleValues NAME)
-  set(multiValues)
+  set(multiValues DEPENDS)
 
   # FIXME: if we update to CMake >= 3.5, can remove this line.
   include(CMakeParseArguments)
@@ -18,23 +18,38 @@ function(create_perf_lua_test)
                         ${ARGN})
 
   message(STATUS "Creating Lua performance test ${PERF_NAME}_perftest")
+
   set(TEST_PATH ${CMAKE_CURRENT_SOURCE_DIR}/${PERF_NAME}.lua)
-  add_custom_target(${PERF_NAME}_perftest
-                    COMMAND ${CMAKE_COMMAND} -E env
-                      LUA_PATH="${LUA_PATH}"
-                      ${TARANTOOL_BIN} ${TEST_PATH}
-                        --output="${PERF_OUTPUT_DIR}/${PERF_NAME}.json"
-                        --output_format=json
-                    COMMENT Running ${PERF_NAME}_perftest
-                    DEPENDS tarantool ${TEST_PATH}
-                    WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
+  set(BENCH_RESULT ${PERF_OUTPUT_DIR}/${PERF_NAME}.json)
+  set(BENCH_TARGET ${PERF_NAME}_perftest)
+  set(BENCH_RESULT_TARGET ${BENCH_TARGET}_result)
+
+  # XXX: We need to provide two different targets with the same
+  # command: the first (BENCH_TARGET) is run unconditionally
+  # regardless of whether there are files with benchmark results
+  # or not, and the second target (BENCH_RESULT_TARGET) is run
+  # only if the corresponding file is omitted. The COMMAND_LIST
+  # variable contains the same command for these targets.
+  set(COMMAND_LIST
+        COMMENT Running ${BENCH_TARGET}
+        COMMAND ${CMAKE_COMMAND} -E env
+          LUA_PATH="${LUA_PATH}"
+          ${TARANTOOL_BIN} ${TEST_PATH}
+            --output="${BENCH_RESULT}"
+            --output_format=json
+        DEPENDS tarantool ${PERF_DEPENDS} ${TEST_PATH}
+        WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
   )
-  set(RUN_PERF_LUA_TESTS_LIST ${RUN_PERF_LUA_TESTS_LIST} ${PERF_NAME}_perftest PARENT_SCOPE)
+  add_custom_command(OUTPUT ${BENCH_RESULT} ${COMMAND_LIST})
+  add_custom_target(${BENCH_RESULT_TARGET} DEPENDS ${BENCH_RESULT})
+  add_custom_target(${BENCH_TARGET} ${COMMAND_LIST})
+
+  set(RUN_PERF_LUA_TESTS_LIST ${RUN_PERF_LUA_TESTS_LIST} ${BENCH_TARGET} PARENT_SCOPE)
+  set(BENCH_RESULTS ${BENCH_RESULT_TARGET} ${BENCH_RESULTS}  PARENT_SCOPE)
 endfunction()
 
 create_perf_lua_test(NAME 1mops_write)
 create_perf_lua_test(NAME box_select)
-create_perf_lua_test(NAME column_scan)
 create_perf_lua_test(NAME gh-7089-vclock-copy)
 create_perf_lua_test(NAME uri_escape_unescape)
 
@@ -42,9 +57,14 @@ include_directories(${MSGPUCK_INCLUDE_DIRS})
 
 build_module(column_scan_module column_scan_module.c)
 target_link_libraries(column_scan_module msgpuck)
-add_dependencies(column_scan_perftest column_scan_module)
+create_perf_lua_test(NAME column_scan
+                     DEPENDS column_scan_module
+)
 
 add_custom_target(test-lua-perf
                   DEPENDS "${RUN_PERF_LUA_TESTS_LIST}"
                   COMMENT "Running Lua performance tests"
 )
+
+# Propagate the list to the parent scope.
+set(BENCH_RESULTS "${BENCH_RESULTS}" PARENT_SCOPE)
diff --git a/perf/tools/aggregate.lua b/perf/tools/aggregate.lua
new file mode 100755
index 0000000000..525cc9acd8
--- /dev/null
+++ b/perf/tools/aggregate.lua
@@ -0,0 +1,104 @@
+#!/usr/bin/env tarantool
+
+-- File to aggregate the benchmark results from JSON files to the
+-- format parsable by the InfluxDB line protocol [1]:
+-- <measurement>,<tag_set> <field_set> <timestamp>
+--
+-- <tag_set> and <field_set> have the following format:
+-- <key1>=<value1>,<key2>=<value2>
+--
+-- The reported tag set is a set of values that can be used for
+-- filtering data (i.e., branch or benchmark name).
+--
+-- The script accepts the following parameters:
+--
+-- <input_dir> -- the directory from which the .json files are
+--                taken.
+-- <output>    -- the filename where the results are saved.
+--
+-- [1]: https://docs.influxdata.com/influxdb/v2/reference/syntax/line-protocol/
+
+local json = require('json')
+local fio = require('fio')
+
+local params = require('internal.argparse').parse(arg, {
+    {'input_dir', 'string'},
+    {'output', 'string'},
+})
+
+local input_dir = params.input_dir
+assert(input_dir and fio.path.is_dir(input_dir),
+       'given input_dir is not a directory')
+
+local output = params.output
+local out_fh = assert(fio.open(output, {'O_WRONLY', 'O_CREAT', 'O_TRUNC'}))
+
+local function exec(cmd)
+    return io.popen(cmd):read('*all'):strip()
+end
+
+local commit = os.getenv('PERF_COMMIT') or exec('git rev-parse --short HEAD')
+assert(commit, 'can not determine the commit')
+
+local branch = os.getenv('PERF_BRANCH') or
+    exec('git rev-parse --abbrev-ref HEAD')
+assert(branch, 'can not determine the branch')
+
+local tag_set = {branch = branch}
+
+local function read_all(file)
+    local fh = assert(io.open(file, 'rb'))
+    local content = fh:read('*all')
+    fh:close()
+    return content
+end
+
+local REPORTED_FIELDS = {
+    'cpu_time',
+    'items_per_second',
+    'iterations',
+    'real_time',
+}
+
+local time = os.time()
+
+local function influx_kv(tab)
+    local kv_string = {}
+    for k, v in pairs(tab) do
+        table.insert(kv_string, ('%s=%s'):format(k, v))
+    end
+    return table.concat(kv_string, ',')
+end
+
+local function influx_line(measurement, tags, fields)
+    return ('%s,%s %s %d\n'):format(measurement, influx_kv(tags),
+            influx_kv(fields), time)
+end
+
+for _, file in pairs(fio.listdir(input_dir)) do
+    -- Skip files in which we are not interested.
+    if not file:match('%.json$') then goto continue end
+
+    local data = read_all(('%s/%s'):format(input_dir, file))
+    local bench_name = fio.basename(file, '.json')
+    local benchmarks = json.decode(data).benchmarks
+
+    for _, bench in ipairs(benchmarks) do
+        local full_tag_set = table.deepcopy(tag_set)
+        full_tag_set.name = bench.name
+
+        -- Save commit as a field, since we don't want to filter
+        -- benchmarks by the commit (one point of data).
+        local field_set = {commit = ('"%s"'):format(commit)}
+
+        for _, field in ipairs(REPORTED_FIELDS) do
+            field_set[field] = bench[field]
+        end
+
+        local line = influx_line(bench_name, full_tag_set, field_set)
+        out_fh:write(line)
+    end
+    ::continue::
+end
+
+out_fh:close()
-- 
GitLab