From 6c100924afe06bda1bab2c6e29f6103312864006 Mon Sep 17 00:00:00 2001 From: Nikolay Shirokovskiy <nshirokovskiy@tarantool.org> Date: Tue, 11 Apr 2023 17:37:14 +0300 Subject: [PATCH] box: introduce memtx_sort_threads config parameter Closes #3389 Closes #7689 Closes #4646 @TarantoolBot document Title: new box.cfg parameter memtx_sort_threads The parameter sets the number of threads used to sort keys of secondary indexes on loading memtx database. The parameter cannot be changed dynamically (as it does not make sense). Maximum value is 256, minimum is 1. Default is to use all available cores. Usage example: ``` box.cfg{memtx_sort_threads=4} ``` --- .../gh-3389-cfg-option-memtx-sort-threads.md | 5 + cmake/BuildMisc.cmake | 5 - cmake/compiler.cmake | 16 -- debian/copyright | 1 - src/CMakeLists.txt | 7 - src/box/box.cc | 21 ++ src/box/lua/load_cfg.lua | 2 + src/box/memtx_engine.cc | 27 +- src/box/memtx_engine.h | 10 +- src/box/memtx_tree.cc | 9 +- src/lib/core/tt_sort.c | 18 +- src/trivia/config.h.cmake | 4 - ...389_cfg_option_memtx_sort_threads_test.lua | 58 ++++ test/box-tap/cfg.test.lua | 6 +- third_party/qsort_arg.c | 43 +-- third_party/qsort_arg.h | 11 +- third_party/qsort_arg_mt.c | 261 ------------------ 17 files changed, 144 insertions(+), 360 deletions(-) create mode 100644 changelogs/unreleased/gh-3389-cfg-option-memtx-sort-threads.md create mode 100644 test/box-luatest/gh_3389_cfg_option_memtx_sort_threads_test.lua delete mode 100644 third_party/qsort_arg_mt.c diff --git a/changelogs/unreleased/gh-3389-cfg-option-memtx-sort-threads.md b/changelogs/unreleased/gh-3389-cfg-option-memtx-sort-threads.md new file mode 100644 index 0000000000..9174ec4905 --- /dev/null +++ b/changelogs/unreleased/gh-3389-cfg-option-memtx-sort-threads.md @@ -0,0 +1,5 @@ +## feature/box + +* Added the `box.cfg.memtx_sort_threads` parameter that specifies the number of + threads used to sort indexes keys on loading a memtx database. OpenMP is + not used to sort keys anymore (gh-3389). diff --git a/cmake/BuildMisc.cmake b/cmake/BuildMisc.cmake index 1a99f946bf..e6905204de 100644 --- a/cmake/BuildMisc.cmake +++ b/cmake/BuildMisc.cmake @@ -25,11 +25,6 @@ macro(libmisc_build) ) endif() - if (HAVE_OPENMP) - list(APPEND misc_src - ${PROJECT_SOURCE_DIR}/third_party/qsort_arg_mt.c) - endif() - add_library(misc STATIC ${misc_src}) set_target_properties(misc PROPERTIES COMPILE_FLAGS "${DEPENDENCY_CFLAGS}") diff --git a/cmake/compiler.cmake b/cmake/compiler.cmake index c43ca41d25..14652a95a4 100644 --- a/cmake/compiler.cmake +++ b/cmake/compiler.cmake @@ -67,18 +67,6 @@ if((NOT HAVE_STD_C11 AND NOT HAVE_STD_GNU99) OR "Please consider upgrade to gcc 4.5+ or clang 3.2+.") endif() -# -# Check for an omp support -# -set(CMAKE_REQUIRED_FLAGS "-fopenmp -Werror") -check_cxx_source_compiles("int main(void) { -#pragma omp parallel - { - } - return 0; -}" HAVE_OPENMP) -set(CMAKE_REQUIRED_FLAGS "") - # # GCC started to warn for unused result starting from 4.2, and # this is when it introduced -Wno-unused-result @@ -327,10 +315,6 @@ macro(enable_tnt_compile_flags) endif() endmacro(enable_tnt_compile_flags) -if (HAVE_OPENMP) - add_compile_flags("C;CXX" "-fopenmp") -endif() - if (CMAKE_COMPILER_IS_CLANG OR CMAKE_COMPILER_IS_GNUCC) set(HAVE_BUILTIN_CTZ 1) set(HAVE_BUILTIN_CTZLL 1) diff --git a/debian/copyright b/debian/copyright index 93892bae56..646042b13e 100644 --- a/debian/copyright +++ b/debian/copyright @@ -181,7 +181,6 @@ Copyright: 2013, Red Hat, Inc License: GPL Files: third_party/qsort_arg.c - third_party/qsort_arg_mt.c third_party/compat/sys/bsd_time.h third_party/queue.h third_party/rb.h diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 84f1bf1980..77f89121e7 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -349,13 +349,6 @@ include_directories(${EXTRA_BOX_INCLUDE_DIRS}) set(TARANTOOL_C_FLAGS ${CMAKE_C_FLAGS} PARENT_SCOPE) set(TARANTOOL_CXX_FLAGS ${CMAKE_CXX_FLAGS} PARENT_SCOPE) -if(BUILD_STATIC AND HAVE_OPENMP) - # Link libgomp explicitly to make it static. Avoid linking - # against DSO version of libgomp, which implied by -fopenmp - set (common_libraries ${common_libraries} "libgomp.a") - set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fno-openmp") -endif() - set(exports_file_sources ${PROJECT_SOURCE_DIR}/extra/exports ${EXTRA_EXPORTS_FILE_SOURCES}) diff --git a/src/box/box.cc b/src/box/box.cc index 84f1ba1e05..b3c71ac1fd 100644 --- a/src/box/box.cc +++ b/src/box/box.cc @@ -97,6 +97,7 @@ #include "small/static.h" #include "memory.h" #include "sqlLimit.h" +#include "tt_sort.h" static char status[64] = "unconfigured"; @@ -1779,6 +1780,24 @@ box_init_say() return 0; } +/** + * Checks whether memtx_sort_threads configuration parameter is correct. + */ +static void +box_check_memtx_sort_threads(void) +{ + int num = cfg_geti("memtx_sort_threads"); + /* + * After high level checks this parameter is either nil or has + * type 'number'. + */ + if (cfg_isnumber("memtx_sort_threads") && + (num <= 0 || num > TT_SORT_THREADS_MAX)) + tnt_raise(ClientError, ER_CFG, "memtx_sort_threads", + tt_sprintf("must be greater than 0 and less than or" + " equal to %d", TT_SORT_THREADS_MAX)); +} + void box_check_config(void) { @@ -1841,6 +1860,7 @@ box_check_config(void) diag_raise(); if (box_check_txn_isolation() == txn_isolation_level_MAX) diag_raise(); + box_check_memtx_sort_threads(); } int @@ -4558,6 +4578,7 @@ engine_init() cfg_geti("slab_alloc_granularity"), cfg_gets("memtx_allocator"), cfg_getd("slab_alloc_factor"), + cfg_geti("memtx_sort_threads"), box_on_indexes_built); engine_register((struct engine *)memtx); box_set_memtx_max_tuple_size(); diff --git a/src/box/lua/load_cfg.lua b/src/box/lua/load_cfg.lua index 672031cf89..ca5aa9eae7 100644 --- a/src/box/lua/load_cfg.lua +++ b/src/box/lua/load_cfg.lua @@ -201,6 +201,7 @@ local default_cfg = { sql_vdbe_max_steps = 45000, txn_timeout = 365 * 100 * 86400, txn_isolation = "best-effort", + memtx_sort_threads = nil, metrics = { include = 'all', @@ -393,6 +394,7 @@ local template_cfg = { sql_cache_size = 'number', sql_vdbe_max_steps = 'number', txn_timeout = 'number', + memtx_sort_threads = 'number', metrics = 'table', } diff --git a/src/box/memtx_engine.cc b/src/box/memtx_engine.cc index 8d9ded8789..24926f5ca7 100644 --- a/src/box/memtx_engine.cc +++ b/src/box/memtx_engine.cc @@ -59,6 +59,7 @@ #include "memtx_space.h" #include "memtx_space_upgrade.h" #include "wal.h" +#include "tt_sort.h" #include <type_traits> @@ -1632,7 +1633,7 @@ struct memtx_engine * memtx_engine_new(const char *snap_dirname, bool force_recovery, uint64_t tuple_arena_max_size, uint32_t objsize_min, bool dontdump, unsigned granularity, - const char *allocator, float alloc_factor, + const char *allocator, float alloc_factor, int sort_threads, memtx_on_indexes_built_cb on_indexes_built) { int64_t snap_signature; @@ -1751,6 +1752,30 @@ memtx_engine_new(const char *snap_dirname, bool force_recovery, memtx->state = MEMTX_INITIALIZED; memtx->max_tuple_size = MAX_TUPLE_SIZE; memtx->force_recovery = force_recovery; + if (sort_threads == 0) { + char *ompnum_str = getenv_safe("OMP_NUM_THREADS", NULL, 0); + if (ompnum_str != NULL) { + long ompnum = strtol(ompnum_str, NULL, 10); + if (ompnum > 0 && ompnum <= TT_SORT_THREADS_MAX) { + say_warn("OMP_NUM_THREADS is used to set number" + " of sorting threads. Use cfg option" + " 'memtx_sort_threads' instead."); + sort_threads = ompnum; + } + free(ompnum_str); + } + if (sort_threads == 0) { + sort_threads = sysconf(_SC_NPROCESSORS_ONLN); + if (sort_threads < 1) { + say_warn("Cannot get number of processors. " + "Fallback to single processor."); + sort_threads = 1; + } else if (sort_threads > TT_SORT_THREADS_MAX) { + sort_threads = TT_SORT_THREADS_MAX; + } + } + } + memtx->sort_threads = sort_threads; memtx->replica_join_cord = NULL; diff --git a/src/box/memtx_engine.h b/src/box/memtx_engine.h index 25ad39f533..00b633c4f8 100644 --- a/src/box/memtx_engine.h +++ b/src/box/memtx_engine.h @@ -188,6 +188,11 @@ struct memtx_engine { struct tuple_format *func_key_format; /** Set of extents allocated using malloc. */ struct mh_ptr_t *malloc_extents; + /** + * Number of threads used to sort keys of secondary indexes on engine + * start. + */ + int sort_threads; }; struct memtx_gc_task; @@ -223,7 +228,7 @@ struct memtx_engine * memtx_engine_new(const char *snap_dirname, bool force_recovery, uint64_t tuple_arena_max_size, uint32_t objsize_min, bool dontdump, unsigned granularity, - const char *allocator, float alloc_factor, + const char *allocator, float alloc_factor, int threads_num, memtx_on_indexes_built_cb on_indexes_built); /** @@ -364,13 +369,14 @@ memtx_engine_new_xc(const char *snap_dirname, bool force_recovery, uint64_t tuple_arena_max_size, uint32_t objsize_min, bool dontdump, unsigned granularity, const char *allocator, float alloc_factor, + int sort_threads, memtx_on_indexes_built_cb on_indexes_built) { struct memtx_engine *memtx; memtx = memtx_engine_new(snap_dirname, force_recovery, tuple_arena_max_size, objsize_min, dontdump, granularity, allocator, alloc_factor, - on_indexes_built); + sort_threads, on_indexes_built); if (memtx == NULL) diag_raise(); return memtx; diff --git a/src/box/memtx_tree.cc b/src/box/memtx_tree.cc index dd15275a36..a77c642f67 100644 --- a/src/box/memtx_tree.cc +++ b/src/box/memtx_tree.cc @@ -42,7 +42,7 @@ #include "memtx_tx.h" #include "trivia/config.h" #include "trivia/util.h" -#include <qsort_arg.h> +#include "tt_sort.h" #include <small/mempool.h> /** @@ -1850,9 +1850,10 @@ memtx_tree_index_end_build(struct index *base) struct memtx_tree_index<USE_HINT> *index = (struct memtx_tree_index<USE_HINT> *)base; struct key_def *cmp_def = memtx_tree_cmp_def(&index->tree); - qsort_arg(index->build_array, index->build_array_size, - sizeof(index->build_array[0]), - memtx_tree_qcompare<USE_HINT>, cmp_def); + struct memtx_engine *memtx = (struct memtx_engine *)base->engine; + tt_sort(index->build_array, index->build_array_size, + sizeof(index->build_array[0]), memtx_tree_qcompare<USE_HINT>, + cmp_def, memtx->sort_threads); if (cmp_def->is_multikey || cmp_def->for_func_index) { /* * Multikey index may have equal(in terms of diff --git a/src/lib/core/tt_sort.c b/src/lib/core/tt_sort.c index 177daa0bfa..474dc94423 100644 --- a/src/lib/core/tt_sort.c +++ b/src/lib/core/tt_sort.c @@ -99,7 +99,7 @@ static int find_bucket(struct sort_data *sort, void *elem) { /* - * Bucket count is `thread_count`, thus bucket boundraries (splitters) + * Bucket count is `thread_count`, thus bucket boundaries (splitters) * count is `thread_count - 1` omitting most left and most right * boundaries. Let's place most left and most right boundaries at * imaginary indexes `-1` and `size of splitters` respectively. @@ -175,9 +175,9 @@ sort_bucket(va_list ap) struct sort_data *sort = worker->sort; /* Sort this worker bucket. */ - qsort_arg_st(sort->buffer + worker->bucket_begin * sort->elem_size, - worker->bucket_size, sort->elem_size, - sort->cmp, sort->cmp_arg); + qsort_arg(sort->buffer + worker->bucket_begin * sort->elem_size, + worker->bucket_size, sort->elem_size, + sort->cmp, sort->cmp_arg); /* Move sorted data back from temporary space. */ memcpy(sort->data + worker->bucket_begin * sort->elem_size, @@ -254,8 +254,8 @@ find_splitters(struct sort_data *sort) sort->data + i * sample_step * sort->elem_size, sort->elem_size); - qsort_arg_st(samples, samples_num, sort->elem_size, sort->cmp, - sort->cmp_arg); + qsort_arg(samples, samples_num, sort->elem_size, sort->cmp, + sort->cmp_arg); /* Take splitters from samples. */ for (int i = 0; i < sort->thread_count - 1; i++) { @@ -293,8 +293,8 @@ sort_all(va_list ap) { struct sort_data *sort = va_arg(ap, typeof(sort)); - qsort_arg_st(sort->data, sort->elem_count, sort->elem_size, sort->cmp, - sort->cmp_arg); + qsort_arg(sort->data, sort->elem_count, sort->elem_size, sort->cmp, + sort->cmp_arg); return 0; } @@ -353,7 +353,7 @@ tt_sort(void *data, size_t elem_count, size_t elem_size, if (elem_count < NOSPAWN_SIZE_THESHOLD) { say_verbose("data size is less than threshold %d," " sort in caller thread", NOSPAWN_SIZE_THESHOLD); - qsort_arg_st(data, elem_count, elem_size, cmp, cmp_arg); + qsort_arg(data, elem_count, elem_size, cmp, cmp_arg); return; } diff --git a/src/trivia/config.h.cmake b/src/trivia/config.h.cmake index 0c7c285749..9dabd29b9b 100644 --- a/src/trivia/config.h.cmake +++ b/src/trivia/config.h.cmake @@ -146,10 +146,6 @@ * Defined if this is a big-endian system. */ #cmakedefine HAVE_BYTE_ORDER_BIG_ENDIAN 1 -/* - * Defined if this platform supports openmp and it is enabled - */ -#cmakedefine HAVE_OPENMP 1 /* * Defined if compatible with GNU readline installed. */ diff --git a/test/box-luatest/gh_3389_cfg_option_memtx_sort_threads_test.lua b/test/box-luatest/gh_3389_cfg_option_memtx_sort_threads_test.lua new file mode 100644 index 0000000000..4b9bb8c082 --- /dev/null +++ b/test/box-luatest/gh_3389_cfg_option_memtx_sort_threads_test.lua @@ -0,0 +1,58 @@ +local server = require('luatest.server') +local t = require('luatest') + +local g = t.group() + +g.before_all(function(cg) + -- test setting memtx_sort_threads to non default value + cg.server = server:new{} + cg.server:start() + cg.server:exec(function() + -- this will trigger calling tt_sort on box.cfg{} + local s = box.schema.space.create('test') + s:create_index('pri') + s:replace{2} + s:replace{1} + box.snapshot() + end) +end) + +g.after_all(function(cg) + if cg.server ~= nil then + cg.server:drop() + end +end) + +g.after_each(function(cg) + if cg.server ~= nil then + cg.server:stop() + end +end) + +g.test_setting_cfg_option = function(cg) + cg.server = server:new{box_cfg = {memtx_sort_threads = 3}} + cg.server:start() + cg.server:exec(function() + t.assert_error_msg_equals( + "Can't set option 'memtx_sort_threads' dynamically", + box.cfg, {memtx_sort_threads = 5}) + end) +end + +g.test_setting_openmp_env_var = function(cg) + cg.server = server:new{box_cfg = {log_level = 'warn'}, + env = {OMP_NUM_THREADS = ' 3'}} + cg.server:start() + t.helpers.retrying({}, function() + local p = "OMP_NUM_THREADS is used to set number" .. + " of sorting threads. Use cfg option" .. + " 'memtx_sort_threads' instead." + t.assert_not_equals(cg.server:grep_log(p), nil) + end) + cg.server:stop() +end + +g.test_setting_openmp_env_var_bad = function(cg) + cg.server = server:new{env= {OMP_NUM_THREADS = '300'}} + cg.server:start() +end diff --git a/test/box-tap/cfg.test.lua b/test/box-tap/cfg.test.lua index 2d3e3a5222..ff42fa747f 100755 --- a/test/box-tap/cfg.test.lua +++ b/test/box-tap/cfg.test.lua @@ -6,7 +6,7 @@ local socket = require('socket') local fio = require('fio') local uuid = require('uuid') local msgpack = require('msgpack') -test:plan(108) +test:plan(112) -------------------------------------------------------------------------------- -- Invalid values @@ -50,6 +50,10 @@ invalid('vinyl_bloom_fpr', 0) invalid('vinyl_bloom_fpr', 1.1) invalid('wal_queue_max_size', -1) invalid('sql_vdbe_max_steps', -1) +invalid('memtx_sort_threads', 'all') +invalid('memtx_sort_threads', -1) +invalid('memtx_sort_threads', 0) +invalid('memtx_sort_threads', 257) local function invalid_combinations(name, val) local status, result = pcall(box.cfg, val) diff --git a/third_party/qsort_arg.c b/third_party/qsort_arg.c index 921e8c31e3..d27f7e51d3 100644 --- a/third_party/qsort_arg.c +++ b/third_party/qsort_arg.c @@ -50,16 +50,6 @@ #include <qsort_arg.h> #include <stdint.h> -enum { - /* - * The size of an array from which it is reasonable to start - * multithread sort. - * If the array size is less than the size below, a single-threaded - * version of qsort will be started.S - */ - MULTITHREAD_SIZE_THRESHOLD = 128 * 1024, -}; - #define min(a, b) (a) < (b) ? a : b static char *med3(char *a, char *b, char *c, @@ -119,7 +109,7 @@ med3(char *a, char *b, char *c, int (*cmp)(const void *a, const void *b, void *a * Single-thread version of qsort. */ void -qsort_arg_st(void *a, size_t n, size_t es, int (*cmp)(const void *a, const void *b, void *arg), void *arg) +qsort_arg(void *a, size_t n, size_t es, int (*cmp)(const void *a, const void *b, void *arg), void *arg) { char *pa, *pb, @@ -208,7 +198,7 @@ loop:SWAPINIT(a, es); { /* Recurse on left partition, then iterate on right partition */ if (d1 > es) - qsort_arg_st(a, d1 / es, es, cmp, arg); + qsort_arg(a, d1 / es, es, cmp, arg); if (d2 > es) { /* Iterate rather than recurse to save stack space */ @@ -222,7 +212,7 @@ loop:SWAPINIT(a, es); { /* Recurse on right partition, then iterate on left partition */ if (d2 > es) - qsort_arg_st(pn - d2, d2 / es, es, cmp, arg); + qsort_arg(pn - d2, d2 / es, es, cmp, arg); if (d1 > es) { /* Iterate rather than recurse to save stack space */ @@ -232,30 +222,3 @@ loop:SWAPINIT(a, es); } } } - -#ifdef HAVE_OPENMP -/** - * Multi-thread version of qsort. Only present when target machine supports - * open MP. - */ -void qsort_arg_mt(void *a, size_t n, size_t es, - int (*cmp)(const void *, const void *, void *), void *arg); -#endif - -/** - * General version of qsort that calls single-threaded of multi-threaded - * qsort depending on open MP availability and given array size. - */ -void -qsort_arg(void *a, size_t n, size_t es, - int (*cmp)(const void *a, const void *b, void *arg), void *arg) -{ -#ifdef HAVE_OPENMP - if (n >= MULTITHREAD_SIZE_THRESHOLD) - qsort_arg_mt(a, n, es, cmp, arg); - else - qsort_arg_st(a, n, es, cmp, arg); -#else - qsort_arg_st(a, n, es, cmp, arg); -#endif -} diff --git a/third_party/qsort_arg.h b/third_party/qsort_arg.h index 918d90ef13..875a21029d 100644 --- a/third_party/qsort_arg.h +++ b/third_party/qsort_arg.h @@ -12,15 +12,8 @@ extern "C" { * Single-thread version of qsort. */ void -qsort_arg_st(void *a, size_t n, size_t es, - int (*cmp)(const void *a, const void *b, void *arg), void *arg); - -/** - * General version of qsort that calls single-threaded of multi-threaded - * qsort depending on open MP availability and given array size. - */ -void qsort_arg(void *a, size_t n, size_t es, - int (*cmp)(const void *a, const void *b, void *arg), void *arg); +qsort_arg(void *a, size_t n, size_t es, + int (*cmp)(const void *a, const void *b, void *arg), void *arg); #if defined(__cplusplus) } diff --git a/third_party/qsort_arg_mt.c b/third_party/qsort_arg_mt.c deleted file mode 100644 index afb261cf06..0000000000 --- a/third_party/qsort_arg_mt.c +++ /dev/null @@ -1,261 +0,0 @@ -/* - * Imported from PostgreSQL sources by Teodor Sigaev <teodor@sigaev.ru>, <sigaev@corp.mail.ru> - */ - -/* - * qsort_arg.c: qsort with a passthrough "void *" argument - * - * Modifications from vanilla NetBSD source: - * Add do ... while() macro fix - * Remove __inline, _DIAGASSERTs, __P - * Remove ill-considered "swap_cnt" switch to insertion sort, - * in favor of a simple check for presorted input. - * - * CAUTION: if you change this file, see also qsort.c - * - * $PostgreSQL: pgsql/src/port/qsort_arg.c,v 1.4 2007/03/18 05:36:50 neilc Exp $ - */ - -/* $NetBSD: qsort.c,v 1.13 2003/08/07 16:43:42 agc Exp $ */ - -/*- - * Copyright (c) 1992, 1993 - * The Regents of the University of California. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include <qsort_arg.h> -#include <stdint.h> - -#if defined(__cplusplus) -extern "C" { -#endif /* defined(__cplusplus) */ - -#ifndef HAVE_OPENMP -#error "HAVE_OPENMP macro is not defined" -#endif - -#define min(a, b) (a) < (b) ? a : b - -static char *med3(char *a, char *b, char *c, - int (*cmp)(const void *a, const void *b, void *arg), void *arg); -static void swapfunc(char *, char *, size_t, int); - -/** - * @brief Reduce the current number of threads in the thread pool to the - * bare minimum. Doesn't prevent the pool from spawning new threads later - * if demand mounts. - */ -static void -thread_pool_trim() -{ - /* - * Trim OpenMP thread pool. - * Though we lack the direct control the workaround below works for - * GNU OpenMP library. The library stops surplus threads on entering - * a parallel region. Can't go below 2 threads due to the - * implementation quirk. - */ -#pragma omp parallel num_threads(2) - ; -} - - -/* - * Qsort routine based on J. L. Bentley and M. D. McIlroy, - * "Engineering a sort function", - * Software--Practice and Experience 23 (1993) 1249-1265. - * We have modified their original by adding a check for already-sorted input, - * which seems to be a win per discussions on pgsql-hackers around 2006-03-21. - */ -#define swapcode(TYPE, parmi, parmj, n) \ -do { \ - size_t i = (n) / sizeof (TYPE); \ - TYPE *pi = (TYPE *)(void *)(parmi); \ - TYPE *pj = (TYPE *)(void *)(parmj); \ - do { \ - TYPE t = *pi; \ - *pi++ = *pj; \ - *pj++ = t; \ - } while (--i > 0); \ -} while (0) - -#define SWAPINIT(a, es) swaptype = ((char *)(a) - (char *)0) % sizeof(long) || \ - (es) % sizeof(long) ? 2 : (es) == sizeof(long)? 0 : 1; - -static void -swapfunc(char *a, char *b, size_t n, int swaptype) -{ - if (swaptype <= 1) - swapcode(long, a, b, n); - else - swapcode(char, a, b, n); -} - -#define swap(a, b) \ - if (swaptype == 0) { \ - long t = *(long *)(void *)(a); \ - *(long *)(void *)(a) = *(long *)(void *)(b); \ - *(long *)(void *)(b) = t; \ - } else \ - swapfunc(a, b, es, swaptype) - -#define vecswap(a, b, n) if ((n) > 0) swapfunc((a), (b), (size_t)(n), swaptype) - -static char * -med3(char *a, char *b, char *c, int (*cmp)(const void *a, const void *b, void *arg), void *arg) -{ - return cmp(a, b, arg) < 0 ? - (cmp(b, c, arg) < 0 ? b : (cmp(a, c, arg) < 0 ? c : a)) - : (cmp(b, c, arg) > 0 ? b : (cmp(a, c, arg) < 0 ? a : c)); -} - -static void -qsort_arg_mt_internal(void *a, size_t n, intptr_t es, - int (*cmp)(const void *a, const void *b, void *arg), void *arg) -{ - char *pa, *pb, *pc, *pd, *pl, *pm, *pn; - size_t d1, d2; - intptr_t r, swaptype, presorted; - - loop:SWAPINIT(a, es); - if (n < 7) - { - for (pm = (char *) a + es; pm < (char *) a + n * es; pm += es) - for (pl = pm; pl > (char *) a && cmp(pl - es, pl, arg) > 0; - pl -= es) - swap(pl, pl - es); - return; - } - presorted = 1; - for (pm = (char *) a + es; pm < (char *) a + n * es; pm += es) - { - if (cmp(pm - es, pm, arg) > 0) - { - presorted = 0; - break; - } - } - if (presorted) - return; - pm = (char *) a + (n / 2) * es; - if (n > 7) - { - pl = (char *) a; - pn = (char *) a + (n - 1) * es; - if (n > 40) - { - size_t d = (n / 8) * es; - pl = med3(pl, pl + d, pl + 2 * d, cmp, arg); - pm = med3(pm - d, pm, pm + d, cmp, arg); - pn = med3(pn - 2 * d, pn - d, pn, cmp, arg); - } - pm = med3(pl, pm, pn, cmp, arg); - } - swap((char*)a, pm); - pa = pb = (char *) a + es; - pc = pd = (char *) a + (n - 1) * es; - for (;;) - { - while (pb <= pc && (r = cmp(pb, a, arg)) <= 0) - { - if (r == 0) - { - swap(pa, pb); - pa += es; - } - pb += es; - } - while (pb <= pc && (r = cmp(pc, a, arg)) >= 0) - { - if (r == 0) - { - swap(pc, pd); - pd -= es; - } - pc -= es; - } - if (pb > pc) - break; - swap(pb, pc); - pb += es; - pc -= es; - } - pn = (char *) a + n * es; - d1 = min(pa - (char *) a, pb - pa); - vecswap(a, pb - d1, d1); - d1 = min(pd - pc, pn - pd - es); - vecswap(pb, pn - d1, d1); - d1 = pb - pa; - d2 = pd - pc; - if (d1 <= d2) - { - /* Recurse on left partition, then iterate on right partition */ - if (d1 > es) { -#pragma omp task - qsort_arg_mt_internal(a, d1 / es, es, cmp, arg); - } - if (d2 > es) - { - /* Iterate rather than recurse to save stack space */ - /* qsort(pn - d2, d2 / es, es, cmp); */ - a = pn - d2; - n = d2 / es; - goto loop; - } - } - else - { - /* Recurse on right partition, then iterate on left partition */ - if (d2 > es) { -#pragma omp task - qsort_arg_mt_internal(pn - d2, d2 / es, es, cmp, arg); - } - if (d1 > es) - { - /* Iterate rather than recurse to save stack space */ - /* qsort(a, d1 / es, es, cmp); */ - n = d1 / es; - goto loop; - } - } -} - -void -qsort_arg_mt(void *a, size_t n, size_t es, - int (*cmp)(const void *a, const void *b, void *arg), void *arg) -{ -#pragma omp parallel - { -#pragma omp single - qsort_arg_mt_internal(a, n, es, cmp, arg); - } - thread_pool_trim(); -} - -#if defined(__cplusplus) -} -#endif /* defined(__cplusplus) */ -- GitLab