diff --git a/src/box/CMakeLists.txt b/src/box/CMakeLists.txt index 8cdaedb7757f0452f83232b206ed33c58bdccb55..6c7ac2b2f11a7f2803d35e8e4289f46ef26f3a9c 100644 --- a/src/box/CMakeLists.txt +++ b/src/box/CMakeLists.txt @@ -86,6 +86,8 @@ add_library(box STATIC sequence.c func.c func_def.c + coll_def.c + coll.c alter.cc schema.cc schema_def.c diff --git a/src/box/coll.c b/src/box/coll.c new file mode 100644 index 0000000000000000000000000000000000000000..f366c64073a5622d4f9302a7d74e193deb86523b --- /dev/null +++ b/src/box/coll.c @@ -0,0 +1,267 @@ +/* + * Copyright 2010-2017, Tarantool AUTHORS, please see AUTHORS file. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the + * following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * AUTHORS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF + * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "coll.h" +#include "third_party/PMurHash.h" +#include "error.h" +#include "diag.h" +#include <unicode/ucol.h> + +enum { + MAX_HASH_BUFFER = 1024, + MAX_LOCALE = 1024, +}; + +/** + * Compare two string using ICU collation. + */ +static int +coll_icu_cmp(const char *s, size_t slen, const char *t, size_t tlen, + struct coll *coll) +{ + assert(coll->icu.collator != NULL); + + UErrorCode status = U_ZERO_ERROR; + UCollationResult result = ucol_strcollUTF8(coll->icu.collator, + s, slen, t, tlen, &status); + assert(!U_FAILURE(status)); + return (int)result; +} + +/** + * Get a hash of a string using ICU collation. + */ +static uint32_t +coll_icu_hash(const char *s, size_t s_len, uint32_t *ph, uint32_t *pcarry, + struct coll *coll) +{ + uint32_t total_size = 0; + UCharIterator itr; + uiter_setUTF8(&itr, s, s_len); + uint8_t buf[MAX_HASH_BUFFER]; + uint32_t state[2] = {0, 0}; + UErrorCode status = U_ZERO_ERROR; + while (true) { + int32_t got = ucol_nextSortKeyPart(coll->icu.collator, + &itr, state, buf, + MAX_HASH_BUFFER, &status); + PMurHash32_Process(ph, pcarry, buf, got); + total_size += got; + if (got < MAX_HASH_BUFFER) + break; + } + return total_size; +} + +/** + * Set up ICU collator and init cmp and hash members of collation. + * @param coll - collation to set up. + * @param def - collation definition. + * @return 0 on success, -1 on error. + */ +static int +coll_icu_init_cmp(struct coll *coll, const struct coll_def *def) +{ + if (coll->icu.collator != NULL) { + ucol_close(coll->icu.collator); + coll->icu.collator = NULL; + } + + if (def->locale_len >= MAX_LOCALE) { + diag_set(ClientError, ER_CANT_CREATE_COLLATION, + "too long locale"); + return -1; + } + char locale[MAX_LOCALE]; + memcpy(locale, def->locale, def->locale_len); + UErrorCode status = U_ZERO_ERROR; + struct UCollator *collator = ucol_open(locale, &status); + if (U_FAILURE(status)) { + diag_set(ClientError, ER_CANT_CREATE_COLLATION, + u_errorName(status)); + return -1; + } + coll->icu.collator = collator; + + if (def->icu.french_collation != COLL_ICU_DEFAULT) { + enum coll_icu_on_off w = def->icu.french_collation; + UColAttributeValue v = + w == COLL_ICU_ON ? UCOL_ON : + w == COLL_ICU_OFF ? UCOL_OFF : + UCOL_DEFAULT; + ucol_setAttribute(collator, UCOL_FRENCH_COLLATION, v, &status); + if (U_FAILURE(status)) { + diag_set(ClientError, ER_CANT_CREATE_COLLATION, + "failed to set french_collation"); + return -1; + } + } + if (def->icu.alternate_handling != COLL_ICU_AH_DEFAULT) { + enum coll_icu_alternate_handling w = def->icu.alternate_handling; + UColAttributeValue v = + w == COLL_ICU_AH_NON_IGNORABLE ? UCOL_NON_IGNORABLE : + w == COLL_ICU_AH_SHIFTED ? UCOL_SHIFTED : + UCOL_DEFAULT; + ucol_setAttribute(collator, UCOL_ALTERNATE_HANDLING, v, &status); + if (U_FAILURE(status)) { + diag_set(ClientError, ER_CANT_CREATE_COLLATION, + "failed to set alternate_handling"); + return -1; + } + } + if (def->icu.case_first != COLL_ICU_CF_DEFAULT) { + enum coll_icu_case_first w = def->icu.case_first; + UColAttributeValue v = + w == COLL_ICU_CF_OFF ? UCOL_OFF : + w == COLL_ICU_CF_UPPER_FIRST ? UCOL_UPPER_FIRST : + w == COLL_ICU_CF_LOWER_FIRST ? UCOL_LOWER_FIRST : + UCOL_DEFAULT; + ucol_setAttribute(collator, UCOL_CASE_FIRST, v, &status); + if (U_FAILURE(status)) { + diag_set(ClientError, ER_CANT_CREATE_COLLATION, + "failed to set case_first"); + return -1; + } + } + if (def->icu.case_level != COLL_ICU_DEFAULT) { + enum coll_icu_on_off w = def->icu.case_level; + UColAttributeValue v = + w == COLL_ICU_ON ? UCOL_ON : + w == COLL_ICU_OFF ? UCOL_OFF : + UCOL_DEFAULT; + ucol_setAttribute(collator, UCOL_CASE_LEVEL , v, &status); + if (U_FAILURE(status)) { + diag_set(ClientError, ER_CANT_CREATE_COLLATION, + "failed to set case_level"); + return -1; + } + } + if (def->icu.normalization_mode != COLL_ICU_DEFAULT) { + enum coll_icu_on_off w = def->icu.normalization_mode; + UColAttributeValue v = + w == COLL_ICU_ON ? UCOL_ON : + w == COLL_ICU_OFF ? UCOL_OFF : + UCOL_DEFAULT; + ucol_setAttribute(collator, UCOL_NORMALIZATION_MODE, v, &status); + if (U_FAILURE(status)) { + diag_set(ClientError, ER_CANT_CREATE_COLLATION, + "failed to set normalization_mode"); + return -1; + } + } + if (def->icu.strength != COLL_ICU_STRENGTH_DEFAULT) { + enum coll_icu_strength w = def->icu.strength; + UColAttributeValue v = + w == COLL_ICU_STRENGTH_PRIMARY ? UCOL_PRIMARY : + w == COLL_ICU_STRENGTH_SECONDARY ? UCOL_SECONDARY : + w == COLL_ICU_STRENGTH_TERTIARY ? UCOL_TERTIARY : + w == COLL_ICU_STRENGTH_QUATERNARY ? UCOL_QUATERNARY : + w == COLL_ICU_STRENGTH_IDENTICAL ? UCOL_IDENTICAL : + UCOL_DEFAULT; + ucol_setAttribute(collator, UCOL_STRENGTH, v, &status); + if (U_FAILURE(status)) { + diag_set(ClientError, ER_CANT_CREATE_COLLATION, + "failed to set strength"); + return -1; + } + } + if (def->icu.numeric_collation != COLL_ICU_DEFAULT) { + enum coll_icu_on_off w = def->icu.numeric_collation; + UColAttributeValue v = + w == COLL_ICU_ON ? UCOL_ON : + w == COLL_ICU_OFF ? UCOL_OFF : + UCOL_DEFAULT; + ucol_setAttribute(collator, UCOL_NUMERIC_COLLATION, v, &status); + if (U_FAILURE(status)) { + diag_set(ClientError, ER_CANT_CREATE_COLLATION, + "failed to set numeric_collation"); + return -1; + } + } + + coll->cmp = coll_icu_cmp; + coll->hash = coll_icu_hash; + return 0; +} + +/** + * Destroy ICU collation. + */ +static void +coll_icu_destroy(struct coll *coll) +{ + if (coll->icu.collator != NULL) + ucol_close(coll->icu.collator); +} + +/** + * Create a collation by definition. + * @param def - collation definition. + * @return - the collation OR NULL on memory error (diag is set). + */ +struct coll * +coll_new(const struct coll_def *def) +{ + assert(def->type == COLL_TYPE_ICU); /* no more types are implemented yet */ + + size_t total_len = sizeof(struct coll) + def->name_len + 1; + struct coll *coll = (struct coll *)calloc(1, total_len); + if (coll == NULL) { + diag_set(OutOfMemory, total_len, "malloc", "struct coll"); + return NULL; + } + + coll->id = def->id; + coll->owner_id = def->owner_id; + coll->type = def->type; + coll->name_len = def->name_len; + memcpy(coll->name, def->name, def->name_len); + coll->name[coll->name_len] = 0; + + if (coll_icu_init_cmp(coll, def) != 0) { + free(coll); + return NULL; + } + + return coll; +} + +/** + * Delete a collation. + * @param cool - collation to delete. + */ +void +coll_delete(struct coll *coll) +{ + assert(coll->type == COLL_TYPE_ICU); /* no more types are implemented yet */ + coll_icu_destroy(coll); + free(coll); +} diff --git a/src/box/coll.h b/src/box/coll.h new file mode 100644 index 0000000000000000000000000000000000000000..fd99967591dd094d97a39d833917346cf308b026 --- /dev/null +++ b/src/box/coll.h @@ -0,0 +1,100 @@ +#ifndef TARANTOOL_BOX_COLL_H_INCLUDED +#define TARANTOOL_BOX_COLL_H_INCLUDED +/* + * Copyright 2010-2017, Tarantool AUTHORS, please see AUTHORS file. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the + * following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * AUTHORS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF + * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "coll_def.h" +#include <stddef.h> +#include <stdint.h> + +#if defined(__cplusplus) +extern "C" { +#endif /* defined(__cplusplus) */ + +struct coll; + +typedef int (*coll_cmp_f)(const char *s, size_t s_len, + const char *t, size_t t_len, + struct coll *coll); + +typedef uint32_t (*coll_hash_f)(const char *s, size_t s_len, + uint32_t *ph, uint32_t *pcarry, + struct coll *coll); + +/** + * ICU collation specific data. + */ +struct UCollator; + +struct coll_icu { + struct UCollator *collator; +}; + +/** + * A collation. + */ +struct coll { + /** Personal ID */ + uint32_t id; + /** Owner ID */ + uint32_t owner_id; + /** Collation type. */ + enum coll_type type; + /** Type specific data. */ + struct coll_icu icu; + /** String comparator. */ + coll_cmp_f cmp; + coll_hash_f hash; + /** Collation name. */ + size_t name_len; + char name[0]; +}; + +/** + * Create a collation by definition. + * @param def - collation definition. + * @return - the collation OR NULL on memory error (diag is set). + */ +struct coll * +coll_new(const struct coll_def *def); + +/** + * Delete a collation. + * @param cool - collation to delete. + */ +void +coll_delete(struct coll *coll); + +#if defined(__cplusplus) +} /* extern "C" */ +#endif /* defined(__cplusplus) */ + +#endif /* TARANTOOL_BOX_COLL_H_INCLUDED */ diff --git a/src/box/coll_def.c b/src/box/coll_def.c new file mode 100644 index 0000000000000000000000000000000000000000..1a469b70d5b37787b059c9ca3b13cb6803f1844b --- /dev/null +++ b/src/box/coll_def.c @@ -0,0 +1,65 @@ +/* + * Copyright 2010-2017, Tarantool AUTHORS, please see AUTHORS file. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the + * following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * AUTHORS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF + * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "coll_def.h" + +const char *coll_type_strs[] = { + "ICU" +}; + +const char *coll_icu_on_off_strs[] = { + "DEFAULT", + "ON", + "OFF" +}; + +const char *coll_icu_alternate_handling_strs[] = { + "DEFAULT", + "NON_IGNORABLE", + "SHIFTED" +}; + +const char *coll_icu_case_first_strs[] = { + "DEFAULT", + "OFF", + "UPPER_FIRST", + "LOWER_FIRST" +}; + +const char *coll_icu_strength_strs[] = { + "DEFAULT", + "PRIMARY", + "SECONDARY", + "TERTIARY", + "QUATERNARY", + "IDENTICAL" +}; + diff --git a/src/box/coll_def.h b/src/box/coll_def.h new file mode 100644 index 0000000000000000000000000000000000000000..f62e794033241cd80f3dbcd9fb507e0b13c7b099 --- /dev/null +++ b/src/box/coll_def.h @@ -0,0 +1,135 @@ +#ifndef TARANTOOL_BOX_COLL_DEF_H_INCLUDED +#define TARANTOOL_BOX_COLL_DEF_H_INCLUDED +/* + * Copyright 2010-2017, Tarantool AUTHORS, please see AUTHORS file. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the + * following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * AUTHORS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF + * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <stddef.h> +#include <stdint.h> + +#if defined(__cplusplus) +extern "C" { +#endif /* defined(__cplusplus) */ + +/** + * The supported collation types + */ +enum coll_type { + COLL_TYPE_ICU = 0, + coll_type_MAX, +}; + +extern const char *coll_type_strs[]; + +/* + * ICU collation options. See + * http://icu-project.org/apiref/icu4c/ucol_8h.html#a583fbe7fc4a850e2fcc692e766d2826c + */ + +/** Settings for simple ICU on/off options */ +enum coll_icu_on_off { + COLL_ICU_DEFAULT = 0, + COLL_ICU_ON, + COLL_ICU_OFF, + coll_icu_on_off_MAX +}; + +extern const char *coll_icu_on_off_strs[]; + +/** Alternate handling ICU settings */ +enum coll_icu_alternate_handling { + COLL_ICU_AH_DEFAULT = 0, + COLL_ICU_AH_NON_IGNORABLE, + COLL_ICU_AH_SHIFTED, + coll_icu_alternate_handling_MAX +}; + +extern const char *coll_icu_alternate_handling_strs[]; + +/** Case first ICU settings */ +enum coll_icu_case_first { + COLL_ICU_CF_DEFAULT = 0, + COLL_ICU_CF_OFF, + COLL_ICU_CF_UPPER_FIRST, + COLL_ICU_CF_LOWER_FIRST, + coll_icu_case_first_MAX +}; + +extern const char *coll_icu_case_first_strs[]; + +/** Strength ICU settings */ +enum coll_icu_strength { + COLL_ICU_STRENGTH_DEFAULT = 0, + COLL_ICU_STRENGTH_PRIMARY, + COLL_ICU_STRENGTH_SECONDARY, + COLL_ICU_STRENGTH_TERTIARY, + COLL_ICU_STRENGTH_QUATERNARY, + COLL_ICU_STRENGTH_IDENTICAL, + coll_icu_strength_MAX +}; + +extern const char *coll_icu_strength_strs[]; + +/** Collection of ICU settings */ +struct coll_icu_def { + enum coll_icu_on_off french_collation; + enum coll_icu_alternate_handling alternate_handling; + enum coll_icu_case_first case_first; + enum coll_icu_on_off case_level; + enum coll_icu_on_off normalization_mode; + enum coll_icu_strength strength; + enum coll_icu_on_off numeric_collation; +}; + +/** + * Definition of a collation. + */ +struct coll_def { + /** Perconal ID */ + uint32_t id; + /** Owner ID */ + uint32_t owner_id; + /** Collation name. */ + size_t name_len; + const char *name; + /** Locale. */ + size_t locale_len; + const char *locale; + /** Collation type. */ + enum coll_type type; + /** Type specific options. */ + struct coll_icu_def icu; +}; + +#if defined(__cplusplus) +} /* extern "C" */ +#endif /* defined(__cplusplus) */ + +#endif /* TARANTOOL_BOX_COLL_DEF_H_INCLUDED */ diff --git a/src/box/errcode.h b/src/box/errcode.h index 7c42300929ba8dc49d1f7ce3ca5b9cc030a55b3f..aaaa9127c546fd3b4b77fabba802a1ea13bb227b 100644 --- a/src/box/errcode.h +++ b/src/box/errcode.h @@ -202,6 +202,7 @@ struct errcode_record { /*147 */_(ER_SEQUENCE_OVERFLOW, "Sequence '%s' has overflowed") \ /*148 */_(ER_SEQUENCE_ACCESS_DENIED, "%s access is denied for user '%s' to sequence '%s'") \ /*149 */_(ER_SPACE_FIELD_IS_DUPLICATE, "Space field '%s' is duplicate") \ + /*150 */_(ER_CANT_CREATE_COLLATION, "Failed to initialize collation: %s.") \ /* * !IMPORTANT! Please follow instructions at start of the file diff --git a/test/box/misc.result b/test/box/misc.result index dc525ed69f5f6f007c05233e55636f0d90fc7111..f332fc156c796d3cd0b3ad503dbfbbd7b0255c7e 100644 --- a/test/box/misc.result +++ b/test/box/misc.result @@ -327,6 +327,7 @@ t; - 'box.error.PASSWORD_MISMATCH : 47' - 'box.error.UNSUPPORTED_ROLE_PRIV : 98' - 'box.error.ACCESS_DENIED : 42' + - 'box.error.CANT_CREATE_COLLATION : 150' - 'box.error.USER_EXISTS : 46' - 'box.error.WAL_IO : 40' - 'box.error.RTREE_RECT : 101' diff --git a/test/unit/CMakeLists.txt b/test/unit/CMakeLists.txt index 837530ed5a93bf6b8b27f2a9b0fd0eb47b96fafd..a8d339d7806e8bb4c12eecc7f4deb27d5f56d1fa 100644 --- a/test/unit/CMakeLists.txt +++ b/test/unit/CMakeLists.txt @@ -181,3 +181,6 @@ target_link_libraries(vy_write_iterator.test xlog ${ITERATOR_TEST_LIBS}) add_executable(vy_cache.test vy_cache.c ${ITERATOR_TEST_SOURCES}) target_link_libraries(vy_cache.test ${ITERATOR_TEST_LIBS}) + +add_executable(coll.test coll.cpp) +target_link_libraries(coll.test box) diff --git a/test/unit/coll.cpp b/test/unit/coll.cpp new file mode 100644 index 0000000000000000000000000000000000000000..edf7ec2101f1a200382779e936de60de65bf1f61 --- /dev/null +++ b/test/unit/coll.cpp @@ -0,0 +1,175 @@ +#include "box/coll.h" +#include <iostream> +#include <vector> +#include <algorithm> +#include <string.h> +#include <box/coll_def.h> +#include <assert.h> +#include <msgpuck.h> +#include <diag.h> +#include <fiber.h> +#include <memory.h> +#include "third_party/PMurHash.h" + +using namespace std; + +enum { HASH_SEED = 13 }; + +struct comp { + struct coll *coll; + comp(struct coll *coll_) : coll(coll_) {} + bool operator()(const char *a, const char *b) const + { + int cmp = coll->cmp(a, strlen(a), b, strlen(b), coll); + return cmp < 0; + } +}; + +void +test_sort_strings(vector<const char *> &strings, struct coll *coll) +{ + sort(strings.begin(), strings.end(), comp(coll)); + cout << strings[0] << endl; + for (size_t i = 1; i < strings.size(); i++) { + int cmp = coll->cmp(strings[i], strlen(strings[i]), + strings[i - 1], strlen(strings[i - 1]), + coll); + cout << strings[i] + << (cmp < 0 ? " LESS" : cmp > 0 ? " GREATER " : " EQUAL") + << endl; + } +}; + +void +manual_test() +{ + cout << "\t*** " << __func__ << " ***" << endl; + + vector<const char *> strings; + struct coll_def def; + memset(&def, 0, sizeof(def)); + def.locale = "ru_RU"; + def.locale_len = strlen(def.locale); + def.type = COLL_TYPE_ICU; + def.name = "test"; + def.name_len = strlen(def.name); + struct coll *coll; + + cout << " -- default ru_RU -- " << endl; + coll = coll_new(&def); + assert(coll != NULL); + strings = {"S", "z", "Б", "бб", "е", "ЕЕЕЕ", "Ñ‘", "Ð", "и", "И", "123", "45", "Z" }; + test_sort_strings(strings, coll); + coll_delete(coll); + + cout << " -- --||-- + upper first -- " << endl; + def.icu.case_first = COLL_ICU_CF_UPPER_FIRST; + coll = coll_new(&def); + assert(coll != NULL); + strings = {"S", "z", "Б", "бб", "е", "ЕЕЕЕ", "Ñ‘", "Ð", "и", "И", "123", "45", "Z" }; + test_sort_strings(strings, coll); + coll_delete(coll); + + cout << " -- --||-- + lower first -- " << endl; + def.icu.case_first = COLL_ICU_CF_LOWER_FIRST; + coll = coll_new(&def); + assert(coll != NULL); + strings = {"S", "z", "Б", "бб", "е", "ЕЕЕЕ", "Ñ‘", "Ð", "и", "И", "123", "45", "Z" }; + test_sort_strings(strings, coll); + coll_delete(coll); + + cout << " -- --||-- + secondary strength + numeric -- " << endl; + def.icu.strength = COLL_ICU_STRENGTH_SECONDARY; + def.icu.numeric_collation = COLL_ICU_ON; + coll = coll_new(&def); + assert(coll != NULL); + strings = {"S", "z", "Б", "бб", "е", "ЕЕЕЕ", "Ñ‘", "Ð", "и", "И", "123", "45", "Z" }; + test_sort_strings(strings, coll); + coll_delete(coll); + + cout << " -- --||-- + case level -- " << endl; + def.icu.case_level = COLL_ICU_ON; + coll = coll_new(&def); + assert(coll != NULL); + strings = {"S", "z", "Б", "бб", "е", "ЕЕЕЕ", "Ñ‘", "Ð", "и", "И", "123", "45", "Z" }; + test_sort_strings(strings, coll); + coll_delete(coll); + + cout << " -- en_EN -- " << endl; + def.locale = "en_EN-EN"; + def.locale_len = strlen(def.locale); + coll = coll_new(&def); + assert(coll != NULL); + strings = {"aa", "bb", "cc", "ch", "dd", "gg", "hh", "ii" }; + test_sort_strings(strings, coll); + coll_delete(coll); + + cout << " -- cs_CZ -- " << endl; + def.locale = "cs_CZ"; + def.locale_len = strlen(def.locale); + coll = coll_new(&def); + assert(coll != NULL); + strings = {"aa", "bb", "cc", "ch", "dd", "gg", "hh", "ii" }; + test_sort_strings(strings, coll); + coll_delete(coll); + + cout << "\t*** " << __func__ << ": done ***" << endl; +} + +unsigned calc_hash(const char *str, struct coll *coll) +{ + size_t str_len = strlen(str); + uint32_t h = HASH_SEED; + uint32_t carry = 0; + uint32_t actual_len = coll->hash(str, str_len, &h, &carry, coll); + return PMurHash32_Result(h, carry, actual_len); + +} + +void +hash_test() +{ + cout << "\t*** " << __func__ << " ***" << endl; + + struct coll_def def; + memset(&def, 0, sizeof(def)); + def.locale = "ru_RU"; + def.locale_len = strlen(def.locale); + def.type = COLL_TYPE_ICU; + def.name = "test"; + def.name_len = strlen(def.name); + struct coll *coll; + + /* Case sensitive */ + coll = coll_new(&def); + assert(coll != NULL); + cout << "Case sensitive" << endl; + cout << (calc_hash("ае", coll) != calc_hash("аё", coll) ? "OK" : "Fail") << endl; + cout << (calc_hash("ае", coll) != calc_hash("аЕ", coll) ? "OK" : "Fail") << endl; + cout << (calc_hash("аЕ", coll) != calc_hash("аё", coll) ? "OK" : "Fail") << endl; + coll_delete(coll); + + /* Case insensitive */ + def.icu.strength = COLL_ICU_STRENGTH_SECONDARY; + coll = coll_new(&def); + assert(coll != NULL); + cout << "Case insensitive" << endl; + cout << (calc_hash("ае", coll) != calc_hash("аё", coll) ? "OK" : "Fail") << endl; + cout << (calc_hash("ае", coll) == calc_hash("аЕ", coll) ? "OK" : "Fail") << endl; + cout << (calc_hash("аЕ", coll) != calc_hash("аё", coll) ? "OK" : "Fail") << endl; + coll_delete(coll); + + cout << "\t*** " << __func__ << ": done ***" << endl; +} + + +int +main(int, const char**) +{ + memory_init(); + fiber_init(fiber_c_invoke); + manual_test(); + hash_test(); + fiber_free(); + memory_free(); +} \ No newline at end of file diff --git a/test/unit/coll.result b/test/unit/coll.result new file mode 100644 index 0000000000000000000000000000000000000000..937579aa3f6011ceac71dde5efe06f696350ac3b --- /dev/null +++ b/test/unit/coll.result @@ -0,0 +1,100 @@ + *** manual_test *** + -- default ru_RU -- +123 +45 GREATER +Б GREATER +бб GREATER +е GREATER +Ñ‘ GREATER +Ð GREATER +ЕЕЕЕ GREATER +и GREATER +И GREATER +S GREATER +z GREATER +Z GREATER + -- --||-- + upper first -- +123 +45 GREATER +Б GREATER +бб GREATER +е GREATER +Ð GREATER +Ñ‘ GREATER +ЕЕЕЕ GREATER +И GREATER +и GREATER +S GREATER +Z GREATER +z GREATER + -- --||-- + lower first -- +123 +45 GREATER +Б GREATER +бб GREATER +е GREATER +Ñ‘ GREATER +Ð GREATER +ЕЕЕЕ GREATER +и GREATER +И GREATER +S GREATER +z GREATER +Z GREATER + -- --||-- + secondary strength + numeric -- +45 +123 GREATER +Б GREATER +бб GREATER +е GREATER +Ñ‘ GREATER +Ð EQUAL +ЕЕЕЕ GREATER +и GREATER +И EQUAL +S GREATER +z GREATER +Z EQUAL + -- --||-- + case level -- +45 +123 GREATER +Б GREATER +бб GREATER +е GREATER +Ñ‘ GREATER +Ð GREATER +ЕЕЕЕ GREATER +и GREATER +И GREATER +S GREATER +z GREATER +Z GREATER + -- en_EN -- +aa +bb GREATER +cc GREATER +ch GREATER +dd GREATER +gg GREATER +hh GREATER +ii GREATER + -- cs_CZ -- +aa +bb GREATER +cc GREATER +dd GREATER +gg GREATER +hh GREATER +ch GREATER +ii GREATER + *** manual_test: done *** + *** hash_test *** +Case sensitive +OK +OK +OK +Case insensitive +OK +OK +OK + *** hash_test: done ***