diff --git a/changelogs/unreleased/gh-3389-cfg-option-memtx-sort-threads.md b/changelogs/unreleased/gh-3389-cfg-option-memtx-sort-threads.md new file mode 100644 index 0000000000000000000000000000000000000000..9174ec4905238ba3bd37c3d7843a2c6089418dd3 --- /dev/null +++ b/changelogs/unreleased/gh-3389-cfg-option-memtx-sort-threads.md @@ -0,0 +1,5 @@ +## feature/box + +* Added the `box.cfg.memtx_sort_threads` parameter that specifies the number of + threads used to sort indexes keys on loading a memtx database. OpenMP is + not used to sort keys anymore (gh-3389). diff --git a/cmake/BuildMisc.cmake b/cmake/BuildMisc.cmake index 1a99f946bf876659c2de37b45566c9b8976ddeee..e6905204dec2099035703252ebf39cbdf3f97512 100644 --- a/cmake/BuildMisc.cmake +++ b/cmake/BuildMisc.cmake @@ -25,11 +25,6 @@ macro(libmisc_build) ) endif() - if (HAVE_OPENMP) - list(APPEND misc_src - ${PROJECT_SOURCE_DIR}/third_party/qsort_arg_mt.c) - endif() - add_library(misc STATIC ${misc_src}) set_target_properties(misc PROPERTIES COMPILE_FLAGS "${DEPENDENCY_CFLAGS}") diff --git a/cmake/compiler.cmake b/cmake/compiler.cmake index c43ca41d25b469caf0a31cd17bb329f0bd1c29b7..14652a95a42027a2901237ddc7a8894de91f119e 100644 --- a/cmake/compiler.cmake +++ b/cmake/compiler.cmake @@ -67,18 +67,6 @@ if((NOT HAVE_STD_C11 AND NOT HAVE_STD_GNU99) OR "Please consider upgrade to gcc 4.5+ or clang 3.2+.") endif() -# -# Check for an omp support -# -set(CMAKE_REQUIRED_FLAGS "-fopenmp -Werror") -check_cxx_source_compiles("int main(void) { -#pragma omp parallel - { - } - return 0; -}" HAVE_OPENMP) -set(CMAKE_REQUIRED_FLAGS "") - # # GCC started to warn for unused result starting from 4.2, and # this is when it introduced -Wno-unused-result @@ -327,10 +315,6 @@ macro(enable_tnt_compile_flags) endif() endmacro(enable_tnt_compile_flags) -if (HAVE_OPENMP) - add_compile_flags("C;CXX" "-fopenmp") -endif() - if (CMAKE_COMPILER_IS_CLANG OR CMAKE_COMPILER_IS_GNUCC) set(HAVE_BUILTIN_CTZ 1) set(HAVE_BUILTIN_CTZLL 1) diff --git a/debian/copyright b/debian/copyright index 93892bae56794721d05f5d900b219fc0c5e64a7a..646042b13e725127528228db59ebd27eb9427de2 100644 --- a/debian/copyright +++ b/debian/copyright @@ -181,7 +181,6 @@ Copyright: 2013, Red Hat, Inc License: GPL Files: third_party/qsort_arg.c - third_party/qsort_arg_mt.c third_party/compat/sys/bsd_time.h third_party/queue.h third_party/rb.h diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 84f1bf19808ceb5c70d9fe0d0be05c1cf4d7333e..77f89121e72622f5c3d9480aaab55aa86b64d748 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -349,13 +349,6 @@ include_directories(${EXTRA_BOX_INCLUDE_DIRS}) set(TARANTOOL_C_FLAGS ${CMAKE_C_FLAGS} PARENT_SCOPE) set(TARANTOOL_CXX_FLAGS ${CMAKE_CXX_FLAGS} PARENT_SCOPE) -if(BUILD_STATIC AND HAVE_OPENMP) - # Link libgomp explicitly to make it static. Avoid linking - # against DSO version of libgomp, which implied by -fopenmp - set (common_libraries ${common_libraries} "libgomp.a") - set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fno-openmp") -endif() - set(exports_file_sources ${PROJECT_SOURCE_DIR}/extra/exports ${EXTRA_EXPORTS_FILE_SOURCES}) diff --git a/src/box/box.cc b/src/box/box.cc index 84f1ba1e054ad5f9f57f0c3194d51cd026963776..b3c71ac1fdc887f3b0cf0f9c0658eac6e125ee22 100644 --- a/src/box/box.cc +++ b/src/box/box.cc @@ -97,6 +97,7 @@ #include "small/static.h" #include "memory.h" #include "sqlLimit.h" +#include "tt_sort.h" static char status[64] = "unconfigured"; @@ -1779,6 +1780,24 @@ box_init_say() return 0; } +/** + * Checks whether memtx_sort_threads configuration parameter is correct. + */ +static void +box_check_memtx_sort_threads(void) +{ + int num = cfg_geti("memtx_sort_threads"); + /* + * After high level checks this parameter is either nil or has + * type 'number'. + */ + if (cfg_isnumber("memtx_sort_threads") && + (num <= 0 || num > TT_SORT_THREADS_MAX)) + tnt_raise(ClientError, ER_CFG, "memtx_sort_threads", + tt_sprintf("must be greater than 0 and less than or" + " equal to %d", TT_SORT_THREADS_MAX)); +} + void box_check_config(void) { @@ -1841,6 +1860,7 @@ box_check_config(void) diag_raise(); if (box_check_txn_isolation() == txn_isolation_level_MAX) diag_raise(); + box_check_memtx_sort_threads(); } int @@ -4558,6 +4578,7 @@ engine_init() cfg_geti("slab_alloc_granularity"), cfg_gets("memtx_allocator"), cfg_getd("slab_alloc_factor"), + cfg_geti("memtx_sort_threads"), box_on_indexes_built); engine_register((struct engine *)memtx); box_set_memtx_max_tuple_size(); diff --git a/src/box/lua/load_cfg.lua b/src/box/lua/load_cfg.lua index 672031cf894ae88767f88a72e7c5ca062aca3a84..ca5aa9eae760b0dead9a58c384dd05a23235a862 100644 --- a/src/box/lua/load_cfg.lua +++ b/src/box/lua/load_cfg.lua @@ -201,6 +201,7 @@ local default_cfg = { sql_vdbe_max_steps = 45000, txn_timeout = 365 * 100 * 86400, txn_isolation = "best-effort", + memtx_sort_threads = nil, metrics = { include = 'all', @@ -393,6 +394,7 @@ local template_cfg = { sql_cache_size = 'number', sql_vdbe_max_steps = 'number', txn_timeout = 'number', + memtx_sort_threads = 'number', metrics = 'table', } diff --git a/src/box/memtx_engine.cc b/src/box/memtx_engine.cc index 8d9ded8789243d8a8b77761f1e7b94680df92a38..24926f5ca7e94504e91ecea4104c7cbf6a17cb85 100644 --- a/src/box/memtx_engine.cc +++ b/src/box/memtx_engine.cc @@ -59,6 +59,7 @@ #include "memtx_space.h" #include "memtx_space_upgrade.h" #include "wal.h" +#include "tt_sort.h" #include <type_traits> @@ -1632,7 +1633,7 @@ struct memtx_engine * memtx_engine_new(const char *snap_dirname, bool force_recovery, uint64_t tuple_arena_max_size, uint32_t objsize_min, bool dontdump, unsigned granularity, - const char *allocator, float alloc_factor, + const char *allocator, float alloc_factor, int sort_threads, memtx_on_indexes_built_cb on_indexes_built) { int64_t snap_signature; @@ -1751,6 +1752,30 @@ memtx_engine_new(const char *snap_dirname, bool force_recovery, memtx->state = MEMTX_INITIALIZED; memtx->max_tuple_size = MAX_TUPLE_SIZE; memtx->force_recovery = force_recovery; + if (sort_threads == 0) { + char *ompnum_str = getenv_safe("OMP_NUM_THREADS", NULL, 0); + if (ompnum_str != NULL) { + long ompnum = strtol(ompnum_str, NULL, 10); + if (ompnum > 0 && ompnum <= TT_SORT_THREADS_MAX) { + say_warn("OMP_NUM_THREADS is used to set number" + " of sorting threads. Use cfg option" + " 'memtx_sort_threads' instead."); + sort_threads = ompnum; + } + free(ompnum_str); + } + if (sort_threads == 0) { + sort_threads = sysconf(_SC_NPROCESSORS_ONLN); + if (sort_threads < 1) { + say_warn("Cannot get number of processors. " + "Fallback to single processor."); + sort_threads = 1; + } else if (sort_threads > TT_SORT_THREADS_MAX) { + sort_threads = TT_SORT_THREADS_MAX; + } + } + } + memtx->sort_threads = sort_threads; memtx->replica_join_cord = NULL; diff --git a/src/box/memtx_engine.h b/src/box/memtx_engine.h index 25ad39f533d25369161a04ba78a75b3080f9033e..00b633c4f88817dbc85f4ce62482ae0bf7e1ddc8 100644 --- a/src/box/memtx_engine.h +++ b/src/box/memtx_engine.h @@ -188,6 +188,11 @@ struct memtx_engine { struct tuple_format *func_key_format; /** Set of extents allocated using malloc. */ struct mh_ptr_t *malloc_extents; + /** + * Number of threads used to sort keys of secondary indexes on engine + * start. + */ + int sort_threads; }; struct memtx_gc_task; @@ -223,7 +228,7 @@ struct memtx_engine * memtx_engine_new(const char *snap_dirname, bool force_recovery, uint64_t tuple_arena_max_size, uint32_t objsize_min, bool dontdump, unsigned granularity, - const char *allocator, float alloc_factor, + const char *allocator, float alloc_factor, int threads_num, memtx_on_indexes_built_cb on_indexes_built); /** @@ -364,13 +369,14 @@ memtx_engine_new_xc(const char *snap_dirname, bool force_recovery, uint64_t tuple_arena_max_size, uint32_t objsize_min, bool dontdump, unsigned granularity, const char *allocator, float alloc_factor, + int sort_threads, memtx_on_indexes_built_cb on_indexes_built) { struct memtx_engine *memtx; memtx = memtx_engine_new(snap_dirname, force_recovery, tuple_arena_max_size, objsize_min, dontdump, granularity, allocator, alloc_factor, - on_indexes_built); + sort_threads, on_indexes_built); if (memtx == NULL) diag_raise(); return memtx; diff --git a/src/box/memtx_tree.cc b/src/box/memtx_tree.cc index dd15275a36e3df08f2468c624669be4f5505bbb1..a77c642f67b794911cae352e125e3da8a808bb4f 100644 --- a/src/box/memtx_tree.cc +++ b/src/box/memtx_tree.cc @@ -42,7 +42,7 @@ #include "memtx_tx.h" #include "trivia/config.h" #include "trivia/util.h" -#include <qsort_arg.h> +#include "tt_sort.h" #include <small/mempool.h> /** @@ -1850,9 +1850,10 @@ memtx_tree_index_end_build(struct index *base) struct memtx_tree_index<USE_HINT> *index = (struct memtx_tree_index<USE_HINT> *)base; struct key_def *cmp_def = memtx_tree_cmp_def(&index->tree); - qsort_arg(index->build_array, index->build_array_size, - sizeof(index->build_array[0]), - memtx_tree_qcompare<USE_HINT>, cmp_def); + struct memtx_engine *memtx = (struct memtx_engine *)base->engine; + tt_sort(index->build_array, index->build_array_size, + sizeof(index->build_array[0]), memtx_tree_qcompare<USE_HINT>, + cmp_def, memtx->sort_threads); if (cmp_def->is_multikey || cmp_def->for_func_index) { /* * Multikey index may have equal(in terms of diff --git a/src/lib/core/tt_sort.c b/src/lib/core/tt_sort.c index 177daa0bfa84d6cac936eadb4643a2ab983fba25..474dc9442371e7a3c34dde16e45adb11e13c8574 100644 --- a/src/lib/core/tt_sort.c +++ b/src/lib/core/tt_sort.c @@ -99,7 +99,7 @@ static int find_bucket(struct sort_data *sort, void *elem) { /* - * Bucket count is `thread_count`, thus bucket boundraries (splitters) + * Bucket count is `thread_count`, thus bucket boundaries (splitters) * count is `thread_count - 1` omitting most left and most right * boundaries. Let's place most left and most right boundaries at * imaginary indexes `-1` and `size of splitters` respectively. @@ -175,9 +175,9 @@ sort_bucket(va_list ap) struct sort_data *sort = worker->sort; /* Sort this worker bucket. */ - qsort_arg_st(sort->buffer + worker->bucket_begin * sort->elem_size, - worker->bucket_size, sort->elem_size, - sort->cmp, sort->cmp_arg); + qsort_arg(sort->buffer + worker->bucket_begin * sort->elem_size, + worker->bucket_size, sort->elem_size, + sort->cmp, sort->cmp_arg); /* Move sorted data back from temporary space. */ memcpy(sort->data + worker->bucket_begin * sort->elem_size, @@ -254,8 +254,8 @@ find_splitters(struct sort_data *sort) sort->data + i * sample_step * sort->elem_size, sort->elem_size); - qsort_arg_st(samples, samples_num, sort->elem_size, sort->cmp, - sort->cmp_arg); + qsort_arg(samples, samples_num, sort->elem_size, sort->cmp, + sort->cmp_arg); /* Take splitters from samples. */ for (int i = 0; i < sort->thread_count - 1; i++) { @@ -293,8 +293,8 @@ sort_all(va_list ap) { struct sort_data *sort = va_arg(ap, typeof(sort)); - qsort_arg_st(sort->data, sort->elem_count, sort->elem_size, sort->cmp, - sort->cmp_arg); + qsort_arg(sort->data, sort->elem_count, sort->elem_size, sort->cmp, + sort->cmp_arg); return 0; } @@ -353,7 +353,7 @@ tt_sort(void *data, size_t elem_count, size_t elem_size, if (elem_count < NOSPAWN_SIZE_THESHOLD) { say_verbose("data size is less than threshold %d," " sort in caller thread", NOSPAWN_SIZE_THESHOLD); - qsort_arg_st(data, elem_count, elem_size, cmp, cmp_arg); + qsort_arg(data, elem_count, elem_size, cmp, cmp_arg); return; } diff --git a/src/trivia/config.h.cmake b/src/trivia/config.h.cmake index 0c7c285749bfc8085dfeecec6c187922d3f9b089..9dabd29b9bb1668218281c93f3ce85d3af11e87e 100644 --- a/src/trivia/config.h.cmake +++ b/src/trivia/config.h.cmake @@ -146,10 +146,6 @@ * Defined if this is a big-endian system. */ #cmakedefine HAVE_BYTE_ORDER_BIG_ENDIAN 1 -/* - * Defined if this platform supports openmp and it is enabled - */ -#cmakedefine HAVE_OPENMP 1 /* * Defined if compatible with GNU readline installed. */ diff --git a/test/box-luatest/gh_3389_cfg_option_memtx_sort_threads_test.lua b/test/box-luatest/gh_3389_cfg_option_memtx_sort_threads_test.lua new file mode 100644 index 0000000000000000000000000000000000000000..4b9bb8c0826b35e72a52731606f3aef5857edc3f --- /dev/null +++ b/test/box-luatest/gh_3389_cfg_option_memtx_sort_threads_test.lua @@ -0,0 +1,58 @@ +local server = require('luatest.server') +local t = require('luatest') + +local g = t.group() + +g.before_all(function(cg) + -- test setting memtx_sort_threads to non default value + cg.server = server:new{} + cg.server:start() + cg.server:exec(function() + -- this will trigger calling tt_sort on box.cfg{} + local s = box.schema.space.create('test') + s:create_index('pri') + s:replace{2} + s:replace{1} + box.snapshot() + end) +end) + +g.after_all(function(cg) + if cg.server ~= nil then + cg.server:drop() + end +end) + +g.after_each(function(cg) + if cg.server ~= nil then + cg.server:stop() + end +end) + +g.test_setting_cfg_option = function(cg) + cg.server = server:new{box_cfg = {memtx_sort_threads = 3}} + cg.server:start() + cg.server:exec(function() + t.assert_error_msg_equals( + "Can't set option 'memtx_sort_threads' dynamically", + box.cfg, {memtx_sort_threads = 5}) + end) +end + +g.test_setting_openmp_env_var = function(cg) + cg.server = server:new{box_cfg = {log_level = 'warn'}, + env = {OMP_NUM_THREADS = ' 3'}} + cg.server:start() + t.helpers.retrying({}, function() + local p = "OMP_NUM_THREADS is used to set number" .. + " of sorting threads. Use cfg option" .. + " 'memtx_sort_threads' instead." + t.assert_not_equals(cg.server:grep_log(p), nil) + end) + cg.server:stop() +end + +g.test_setting_openmp_env_var_bad = function(cg) + cg.server = server:new{env= {OMP_NUM_THREADS = '300'}} + cg.server:start() +end diff --git a/test/box-tap/cfg.test.lua b/test/box-tap/cfg.test.lua index 2d3e3a5222460dbd23f72c6e6284784da202b53e..ff42fa747ff8a079a8d8f744c3273c3c91b25fc7 100755 --- a/test/box-tap/cfg.test.lua +++ b/test/box-tap/cfg.test.lua @@ -6,7 +6,7 @@ local socket = require('socket') local fio = require('fio') local uuid = require('uuid') local msgpack = require('msgpack') -test:plan(108) +test:plan(112) -------------------------------------------------------------------------------- -- Invalid values @@ -50,6 +50,10 @@ invalid('vinyl_bloom_fpr', 0) invalid('vinyl_bloom_fpr', 1.1) invalid('wal_queue_max_size', -1) invalid('sql_vdbe_max_steps', -1) +invalid('memtx_sort_threads', 'all') +invalid('memtx_sort_threads', -1) +invalid('memtx_sort_threads', 0) +invalid('memtx_sort_threads', 257) local function invalid_combinations(name, val) local status, result = pcall(box.cfg, val) diff --git a/third_party/qsort_arg.c b/third_party/qsort_arg.c index 921e8c31e338f396bdae838bc2733c6669cd6c91..d27f7e51d34158cc7bdaa564f4f7c241af60662e 100644 --- a/third_party/qsort_arg.c +++ b/third_party/qsort_arg.c @@ -50,16 +50,6 @@ #include <qsort_arg.h> #include <stdint.h> -enum { - /* - * The size of an array from which it is reasonable to start - * multithread sort. - * If the array size is less than the size below, a single-threaded - * version of qsort will be started.S - */ - MULTITHREAD_SIZE_THRESHOLD = 128 * 1024, -}; - #define min(a, b) (a) < (b) ? a : b static char *med3(char *a, char *b, char *c, @@ -119,7 +109,7 @@ med3(char *a, char *b, char *c, int (*cmp)(const void *a, const void *b, void *a * Single-thread version of qsort. */ void -qsort_arg_st(void *a, size_t n, size_t es, int (*cmp)(const void *a, const void *b, void *arg), void *arg) +qsort_arg(void *a, size_t n, size_t es, int (*cmp)(const void *a, const void *b, void *arg), void *arg) { char *pa, *pb, @@ -208,7 +198,7 @@ loop:SWAPINIT(a, es); { /* Recurse on left partition, then iterate on right partition */ if (d1 > es) - qsort_arg_st(a, d1 / es, es, cmp, arg); + qsort_arg(a, d1 / es, es, cmp, arg); if (d2 > es) { /* Iterate rather than recurse to save stack space */ @@ -222,7 +212,7 @@ loop:SWAPINIT(a, es); { /* Recurse on right partition, then iterate on left partition */ if (d2 > es) - qsort_arg_st(pn - d2, d2 / es, es, cmp, arg); + qsort_arg(pn - d2, d2 / es, es, cmp, arg); if (d1 > es) { /* Iterate rather than recurse to save stack space */ @@ -232,30 +222,3 @@ loop:SWAPINIT(a, es); } } } - -#ifdef HAVE_OPENMP -/** - * Multi-thread version of qsort. Only present when target machine supports - * open MP. - */ -void qsort_arg_mt(void *a, size_t n, size_t es, - int (*cmp)(const void *, const void *, void *), void *arg); -#endif - -/** - * General version of qsort that calls single-threaded of multi-threaded - * qsort depending on open MP availability and given array size. - */ -void -qsort_arg(void *a, size_t n, size_t es, - int (*cmp)(const void *a, const void *b, void *arg), void *arg) -{ -#ifdef HAVE_OPENMP - if (n >= MULTITHREAD_SIZE_THRESHOLD) - qsort_arg_mt(a, n, es, cmp, arg); - else - qsort_arg_st(a, n, es, cmp, arg); -#else - qsort_arg_st(a, n, es, cmp, arg); -#endif -} diff --git a/third_party/qsort_arg.h b/third_party/qsort_arg.h index 918d90ef1392b0298bed2a255484e1d74d0d5d0b..875a21029d8d3e67b8a3693cc20d6622abb2dc73 100644 --- a/third_party/qsort_arg.h +++ b/third_party/qsort_arg.h @@ -12,15 +12,8 @@ extern "C" { * Single-thread version of qsort. */ void -qsort_arg_st(void *a, size_t n, size_t es, - int (*cmp)(const void *a, const void *b, void *arg), void *arg); - -/** - * General version of qsort that calls single-threaded of multi-threaded - * qsort depending on open MP availability and given array size. - */ -void qsort_arg(void *a, size_t n, size_t es, - int (*cmp)(const void *a, const void *b, void *arg), void *arg); +qsort_arg(void *a, size_t n, size_t es, + int (*cmp)(const void *a, const void *b, void *arg), void *arg); #if defined(__cplusplus) } diff --git a/third_party/qsort_arg_mt.c b/third_party/qsort_arg_mt.c deleted file mode 100644 index afb261cf06f429cd175ea8721b06c51edb5cea8d..0000000000000000000000000000000000000000 --- a/third_party/qsort_arg_mt.c +++ /dev/null @@ -1,261 +0,0 @@ -/* - * Imported from PostgreSQL sources by Teodor Sigaev <teodor@sigaev.ru>, <sigaev@corp.mail.ru> - */ - -/* - * qsort_arg.c: qsort with a passthrough "void *" argument - * - * Modifications from vanilla NetBSD source: - * Add do ... while() macro fix - * Remove __inline, _DIAGASSERTs, __P - * Remove ill-considered "swap_cnt" switch to insertion sort, - * in favor of a simple check for presorted input. - * - * CAUTION: if you change this file, see also qsort.c - * - * $PostgreSQL: pgsql/src/port/qsort_arg.c,v 1.4 2007/03/18 05:36:50 neilc Exp $ - */ - -/* $NetBSD: qsort.c,v 1.13 2003/08/07 16:43:42 agc Exp $ */ - -/*- - * Copyright (c) 1992, 1993 - * The Regents of the University of California. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include <qsort_arg.h> -#include <stdint.h> - -#if defined(__cplusplus) -extern "C" { -#endif /* defined(__cplusplus) */ - -#ifndef HAVE_OPENMP -#error "HAVE_OPENMP macro is not defined" -#endif - -#define min(a, b) (a) < (b) ? a : b - -static char *med3(char *a, char *b, char *c, - int (*cmp)(const void *a, const void *b, void *arg), void *arg); -static void swapfunc(char *, char *, size_t, int); - -/** - * @brief Reduce the current number of threads in the thread pool to the - * bare minimum. Doesn't prevent the pool from spawning new threads later - * if demand mounts. - */ -static void -thread_pool_trim() -{ - /* - * Trim OpenMP thread pool. - * Though we lack the direct control the workaround below works for - * GNU OpenMP library. The library stops surplus threads on entering - * a parallel region. Can't go below 2 threads due to the - * implementation quirk. - */ -#pragma omp parallel num_threads(2) - ; -} - - -/* - * Qsort routine based on J. L. Bentley and M. D. McIlroy, - * "Engineering a sort function", - * Software--Practice and Experience 23 (1993) 1249-1265. - * We have modified their original by adding a check for already-sorted input, - * which seems to be a win per discussions on pgsql-hackers around 2006-03-21. - */ -#define swapcode(TYPE, parmi, parmj, n) \ -do { \ - size_t i = (n) / sizeof (TYPE); \ - TYPE *pi = (TYPE *)(void *)(parmi); \ - TYPE *pj = (TYPE *)(void *)(parmj); \ - do { \ - TYPE t = *pi; \ - *pi++ = *pj; \ - *pj++ = t; \ - } while (--i > 0); \ -} while (0) - -#define SWAPINIT(a, es) swaptype = ((char *)(a) - (char *)0) % sizeof(long) || \ - (es) % sizeof(long) ? 2 : (es) == sizeof(long)? 0 : 1; - -static void -swapfunc(char *a, char *b, size_t n, int swaptype) -{ - if (swaptype <= 1) - swapcode(long, a, b, n); - else - swapcode(char, a, b, n); -} - -#define swap(a, b) \ - if (swaptype == 0) { \ - long t = *(long *)(void *)(a); \ - *(long *)(void *)(a) = *(long *)(void *)(b); \ - *(long *)(void *)(b) = t; \ - } else \ - swapfunc(a, b, es, swaptype) - -#define vecswap(a, b, n) if ((n) > 0) swapfunc((a), (b), (size_t)(n), swaptype) - -static char * -med3(char *a, char *b, char *c, int (*cmp)(const void *a, const void *b, void *arg), void *arg) -{ - return cmp(a, b, arg) < 0 ? - (cmp(b, c, arg) < 0 ? b : (cmp(a, c, arg) < 0 ? c : a)) - : (cmp(b, c, arg) > 0 ? b : (cmp(a, c, arg) < 0 ? a : c)); -} - -static void -qsort_arg_mt_internal(void *a, size_t n, intptr_t es, - int (*cmp)(const void *a, const void *b, void *arg), void *arg) -{ - char *pa, *pb, *pc, *pd, *pl, *pm, *pn; - size_t d1, d2; - intptr_t r, swaptype, presorted; - - loop:SWAPINIT(a, es); - if (n < 7) - { - for (pm = (char *) a + es; pm < (char *) a + n * es; pm += es) - for (pl = pm; pl > (char *) a && cmp(pl - es, pl, arg) > 0; - pl -= es) - swap(pl, pl - es); - return; - } - presorted = 1; - for (pm = (char *) a + es; pm < (char *) a + n * es; pm += es) - { - if (cmp(pm - es, pm, arg) > 0) - { - presorted = 0; - break; - } - } - if (presorted) - return; - pm = (char *) a + (n / 2) * es; - if (n > 7) - { - pl = (char *) a; - pn = (char *) a + (n - 1) * es; - if (n > 40) - { - size_t d = (n / 8) * es; - pl = med3(pl, pl + d, pl + 2 * d, cmp, arg); - pm = med3(pm - d, pm, pm + d, cmp, arg); - pn = med3(pn - 2 * d, pn - d, pn, cmp, arg); - } - pm = med3(pl, pm, pn, cmp, arg); - } - swap((char*)a, pm); - pa = pb = (char *) a + es; - pc = pd = (char *) a + (n - 1) * es; - for (;;) - { - while (pb <= pc && (r = cmp(pb, a, arg)) <= 0) - { - if (r == 0) - { - swap(pa, pb); - pa += es; - } - pb += es; - } - while (pb <= pc && (r = cmp(pc, a, arg)) >= 0) - { - if (r == 0) - { - swap(pc, pd); - pd -= es; - } - pc -= es; - } - if (pb > pc) - break; - swap(pb, pc); - pb += es; - pc -= es; - } - pn = (char *) a + n * es; - d1 = min(pa - (char *) a, pb - pa); - vecswap(a, pb - d1, d1); - d1 = min(pd - pc, pn - pd - es); - vecswap(pb, pn - d1, d1); - d1 = pb - pa; - d2 = pd - pc; - if (d1 <= d2) - { - /* Recurse on left partition, then iterate on right partition */ - if (d1 > es) { -#pragma omp task - qsort_arg_mt_internal(a, d1 / es, es, cmp, arg); - } - if (d2 > es) - { - /* Iterate rather than recurse to save stack space */ - /* qsort(pn - d2, d2 / es, es, cmp); */ - a = pn - d2; - n = d2 / es; - goto loop; - } - } - else - { - /* Recurse on right partition, then iterate on left partition */ - if (d2 > es) { -#pragma omp task - qsort_arg_mt_internal(pn - d2, d2 / es, es, cmp, arg); - } - if (d1 > es) - { - /* Iterate rather than recurse to save stack space */ - /* qsort(a, d1 / es, es, cmp); */ - n = d1 / es; - goto loop; - } - } -} - -void -qsort_arg_mt(void *a, size_t n, size_t es, - int (*cmp)(const void *a, const void *b, void *arg), void *arg) -{ -#pragma omp parallel - { -#pragma omp single - qsort_arg_mt_internal(a, n, es, cmp, arg); - } - thread_pool_trim(); -} - -#if defined(__cplusplus) -} -#endif /* defined(__cplusplus) */