diff --git a/.gitignore b/.gitignore index 3ce574a60c09f884642f3ea37bdf543b2521cdba..b532180bdc800798065db25c0b63b7469ba026a7 100644 --- a/.gitignore +++ b/.gitignore @@ -33,6 +33,7 @@ test/unit/rope_stress test/unit/rlist test/unit/bit_test test/unit/bitset_basic_test +test/unit/bitset_iterator_test Makefile CMakeFiles CMakeCache.txt diff --git a/include/lib/bitset/expr.h b/include/lib/bitset/expr.h new file mode 100644 index 0000000000000000000000000000000000000000..8cc2b863e2e5167cab844ab6824826233769192c --- /dev/null +++ b/include/lib/bitset/expr.h @@ -0,0 +1,137 @@ +#ifndef TARANTOOL_LIB_BITSET_EXPR_H_INCLUDED +#define TARANTOOL_LIB_BITSET_EXPR_H_INCLUDED + +/* + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the + * following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY <COPYRIGHT HOLDER> ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * <COPYRIGHT HOLDER> OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF + * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/** + * @file + * @brief Expressions on bitsets. + * + * The library have full featured support for evaluating logical expressions + * on @link bitset bitsets @endlink. You can prepare an arbitrary logical + * expression in Disjunctive normal form (DNF) using @link bitset_expr @endlink + * methods and then evaluate the expression on the set of @link bitset @endlink + * objects. Currently only @link bitset_iterator @endlink supports expressions. + * It can be used for perfoming iteration over the expression result on the fly, + * without producing temporary bitsets. + * + * @link bitset_expr @endlink holds any expression that can be represented + * in DNF form. Since every propositional formula can be represented using DNF, + * you can construct any such logical expression using methods from this module. + * + * A DNF example: (~b0 & b1 & ~b2) | (b2 & ~b3 & b4) | (b3 & b6) + * where b[0-9] is an arbitrary bitset. + * + * @link bitset_expr @endlink does not operate directly on @link bitset @endlink + * objects. Instead of this, you should use placeholders (identificators) + * which will be bound to the actual objects by the selected evaluator + * (e.g. bitset_iterator). + * + * @link http://en.wikipedia.org/wiki/Disjunctive_normal_form @endlink + * @note Reduce operations in both cases are left-associate. + * + * @see bitset_iterator_init + */ + +#include "bitset.h" + +/** @cond false **/ +struct bitset_expr_conj { + size_t size; + size_t capacity; + size_t *bitset_ids; + bool *pre_nots; +}; +/** @endcond **/ + +/** + * @brief Bitset Expression + */ +struct bitset_expr { + /** @cond false **/ + /** Size of \a conjs array **/ + size_t size; + /** Capacity of \a conjs array **/ + size_t capacity; + /** Array of conjunctions **/ + struct bitset_expr_conj *conjs; + /** Memory allocator **/ + void *(*realloc)(void *ptr, size_t size); + /** @endcond **/ +}; + +/** + * @brief Construct bitset expression \a expr + * @param expr bitset expression + * @param realloc memory allocator to use + */ +void +bitset_expr_create(struct bitset_expr *expr, + void *(*realloc)(void *ptr, size_t size)); + +/** + * @brief Destruct bitset expression \a expr + * @param expr bitset expression + */ +void +bitset_expr_destroy(struct bitset_expr *expr); + +/** + * @brief Clear @a expr (remove all conjuctions from it) + * @param expr bitset expression + * @note An allocated memory is not freed. You can conitnue use the object + * after this operation. Use @link bitset_expr_destroy @endlink to destroy + * object completely. + */ +void +bitset_expr_clear(struct bitset_expr *expr); + +/** + * @brief Add a new conjunction to \a expr. + * @param expr bitset expression + * @retval 0 on success + * @retval -1 on memory error + */ +int +bitset_expr_add_conj(struct bitset_expr *expr); + +/** + * @brief Add a new placeholder for a bitset to the current conjunction. + * @param expr bitset expression + * @param bitset_id identificator of bitset (placeholder) + * @param pre_not if set to true then logical NOT will be performed to + * bitset during evaluation process. + * @retval 0 on success + * @retval -1 on memory error + */ +int +bitset_expr_add_param(struct bitset_expr *expr, size_t bitset_id, bool pre_not); + +#endif /* TARANTOOL_LIB_BITSET_EXPR_H_INCLUDED */ diff --git a/include/lib/bitset/iterator.h b/include/lib/bitset/iterator.h new file mode 100644 index 0000000000000000000000000000000000000000..b47ea2acface7c8ae6f516a6fa0df6a6dbaf5dab --- /dev/null +++ b/include/lib/bitset/iterator.h @@ -0,0 +1,130 @@ +#ifndef TARANTOOL_LIB_BITSET_ITERATOR_H_INCLUDED +#define TARANTOOL_LIB_BITSET_ITERATOR_H_INCLUDED + +/* + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the + * following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY <COPYRIGHT HOLDER> ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * <COPYRIGHT HOLDER> OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF + * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/** + * @file + * @brief Iterator for @link bitset @endlink objects with expressions support. + * + * @link bitset_iterator @endlink is used to iterate over a result of the + * evaluation a @link bitset_expr logical expression @endlink on the set + * of bitsets. The iterator evaluates expressions on the fly, without producing + * temporary bitsets. Each iteration (@link bitset_iterator_next @endlink) + * returns the next position where a given expression evaluates to true on a + * given set of bitsets. + * + * @see expr.h + */ + +#include <lib/bitset/bitset.h> +#include <lib/bitset/expr.h> + +/** @cond false **/ +struct bitset_iterator_conj; +/** @endcond **/ + +/** + * @brief Bitset Iterator + */ +struct bitset_iterator { + /** @cond false **/ + size_t size; + size_t capacity; + struct bitset_iterator_conj *conjs; + struct bitset_page *page; + struct bitset_page *page_tmp; + void *(*realloc)(void *ptr, size_t size); + struct bit_iterator page_it; + /** @endcond **/ +}; + +/** + * @brief Construct \a it. + * + * The created iterator must be initialized by + * @link bitset_iterator_init @endlink method before first usage. + * @param it bitset iterator + * @param realloc memory allocator to use + */ +void +bitset_iterator_create(struct bitset_iterator *it, + void *(*realloc)(void *ptr, size_t size)); + +/** + * @brief Destruct \a it. + * @param it bitset iterator + */ +void +bitset_iterator_destroy(struct bitset_iterator *it); + +/** + * @brief Initialize the \a it using \a expr and \a bitsets and rewind the + * iterator to the start position. + * + * @note It is safe to reinitialize an iterator with a new expression and new + * bitsets. All internal buffers are safely reused in this case with minimal + * number of new allocations. + * + * @note @a expr object is only used during initialization time and can be + * safetly reused or destroyed just after this call. + * + * @param it bitset iterator + * @param expr bitset expression + * @param bitsets array of pointers to bitsets that should be used to bind + * the expression parameters. + * @param size of @a bitsets array + * @retval 0 on success + * @retval -1 on memory error + * @see expr.h + */ +int +bitset_iterator_init(struct bitset_iterator *it, struct bitset_expr *expr, + struct bitset **bitsets, size_t bitsets_size); + +/** + * @brief Rewind the \a it to the start position. + * @param expr bitset expression + * @see @link bitset_iterator_init @endlink + */ +void +bitset_iterator_rewind(struct bitset_iterator *it); + +/** + * @brief Move \a it to a next position + * @param expr bitset expression + * @return a next offset where the expression evaluates to true or SIZE_MAX + * if there is no more bits in the result set. + * @see @link bitset_iterator_init @endlink + */ +size_t +bitset_iterator_next(struct bitset_iterator *it); + +#endif /* TARANTOOL_LIB_BITSET_ITERATOR_H_INCLUDED */ diff --git a/src/lib/bitset/CMakeLists.txt b/src/lib/bitset/CMakeLists.txt index 530fda2bdb9a03be64929235dd46b742915439ab..0b1316d002e0118f8103918146c4e8e8acb622e5 100644 --- a/src/lib/bitset/CMakeLists.txt +++ b/src/lib/bitset/CMakeLists.txt @@ -1,6 +1,8 @@ set(lib_sources bitset.c page.c + expr.c + iterator.c ) set_source_files_compile_flags(${lib_sources}) diff --git a/src/lib/bitset/expr.c b/src/lib/bitset/expr.c new file mode 100644 index 0000000000000000000000000000000000000000..860066899fb7e53011cd1c876bf66aa4a746cc1f --- /dev/null +++ b/src/lib/bitset/expr.c @@ -0,0 +1,168 @@ +/* + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the + * following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY <COPYRIGHT HOLDER> ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * <COPYRIGHT HOLDER> OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF + * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <lib/bitset/expr.h> + +#include <stdlib.h> +#include <string.h> +#include <assert.h> + +const size_t EXPR_DEFAULT_CAPACITY = 2; +const size_t EXPR_CONJ_DEFAULT_CAPACITY = 32; + +void +bitset_expr_create(struct bitset_expr *expr, + void *(*realloc)(void *ptr, size_t size)) +{ + memset(expr, 0, sizeof(*expr)); + expr->realloc = realloc; +} + +void +bitset_expr_destroy(struct bitset_expr *expr) +{ + for (size_t c = 0; c < expr->size; c++) { + expr->realloc(expr->conjs[c].bitset_ids, 0); + expr->realloc(expr->conjs[c].pre_nots, 0); + memset(&expr->conjs[c], 0, sizeof(expr->conjs[c])); + } + expr->realloc(expr->conjs, 0); + memset(expr, 0, sizeof(*expr)); +} + +void +bitset_expr_clear(struct bitset_expr *expr) +{ + for (size_t c = 0; c < expr->size; c++) { + memset(expr->conjs[c].bitset_ids, 0, expr->conjs[c].size * + sizeof(*expr->conjs[c].bitset_ids)); + memset(expr->conjs[c].pre_nots, 0, expr->conjs[c].size * + sizeof(*expr->conjs[c].pre_nots)); + expr->conjs[c].size = 0; + } + + expr->size = 0; +} + +static int +bitset_expr_reserve(struct bitset_expr *expr, size_t size) +{ + if (size <= expr->capacity) + return 0; + + size_t capacity = (expr->capacity > 0) + ? expr->capacity + : EXPR_DEFAULT_CAPACITY; + + while (capacity <= expr->size) { + capacity *= 2; + } + + struct bitset_expr_conj *conjs = + expr->realloc(expr->conjs, capacity * sizeof(*expr->conjs)); + + if (conjs == NULL) + return -1; + + memset(conjs + expr->capacity, 0, (capacity - expr->capacity) * + sizeof(*expr->conjs)); + expr->conjs = conjs; + expr->capacity = capacity; + + return 0; +} + +int +bitset_expr_add_conj(struct bitset_expr *expr) +{ + if (bitset_expr_reserve(expr, expr->size + 1) != 0) + return -1; + + expr->size++; + + return 0; +} + +static int +bitset_expr_conj_reserve(struct bitset_expr *expr, + struct bitset_expr_conj *conj, size_t size) +{ + if (size <= conj->capacity) + return 0; + + size_t capacity = (conj->capacity > 0) + ? conj->capacity + : EXPR_CONJ_DEFAULT_CAPACITY; + + while (capacity <= conj->size) { + capacity *= 2; + } + + size_t *bitset_ids = expr->realloc(conj->bitset_ids, + capacity * sizeof(*conj->bitset_ids)); + if (bitset_ids == NULL) + goto error_1; + bool *pre_nots = expr->realloc(conj->pre_nots, + capacity * sizeof(*conj->pre_nots)); + if (pre_nots == NULL) + goto error_2; + + memset(bitset_ids + conj->capacity, 0, + (capacity - conj->capacity) * sizeof(*conj->bitset_ids)); + memset(pre_nots + conj->capacity, 0, + (capacity - conj->capacity) * sizeof(*conj->pre_nots)); + + conj->bitset_ids = bitset_ids; + conj->pre_nots = pre_nots; + conj->capacity = capacity; + + return 0; + +error_2: + expr->realloc(bitset_ids, 0); +error_1: + return -1; +} + +int +bitset_expr_add_param(struct bitset_expr *expr, size_t bitset_id, + bool pre_not) +{ + assert (expr->size > 0); + struct bitset_expr_conj *conj = &expr->conjs[expr->size - 1]; + + if (bitset_expr_conj_reserve(expr, conj, conj->size + 1) != 0) + return -1; + + conj->bitset_ids[conj->size] = bitset_id; + conj->pre_nots[conj->size] = pre_not; + conj->size++; + + return 0; +} diff --git a/src/lib/bitset/iterator.c b/src/lib/bitset/iterator.c new file mode 100644 index 0000000000000000000000000000000000000000..6e21f670484c738a52f9542b16a7bf3d48a51ce8 --- /dev/null +++ b/src/lib/bitset/iterator.c @@ -0,0 +1,399 @@ +/* + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the + * following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY <COPYRIGHT HOLDER> ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * <COPYRIGHT HOLDER> OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF + * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <lib/bitset/iterator.h> +#include <lib/bitset/expr.h> +#include "page.h" + +#include <assert.h> + +const size_t ITERATOR_DEFAULT_CAPACITY = 2; +const size_t ITERATOR_CONJ_DEFAULT_CAPACITY = 32; + +struct bitset_iterator_conj { + size_t page_first_pos; + size_t size; + size_t capacity; + struct bitset **bitsets; + bool *pre_nots; + struct bitset_page **pages; +}; + +/** + * @brief Construct iterator + * @param it iterator + * @param realloc memory allocator to use + */ +void +bitset_iterator_create(struct bitset_iterator *it, + void *(*realloc)(void *ptr, size_t size)) +{ + memset(it, 0, sizeof(*it)); + it->realloc = realloc; +} + +/** + * @brief Destroys the @a it object + * @param it object + * @see bitset_iterator_new + */ +void +bitset_iterator_destroy(struct bitset_iterator *it) +{ + for (size_t c = 0; c < it->size; c++) { + it->realloc(it->conjs[c].bitsets, 0); + it->realloc(it->conjs[c].pre_nots, 0); + it->realloc(it->conjs[c].pages, 0); + } + it->realloc(it->conjs, 0); + + bitset_page_destroy(it->page); + bitset_page_destroy(it->page_tmp); + it->realloc(it->page, 0); + it->realloc(it->page_tmp, 0); + memset(it, 0, sizeof(*it)); +} + + +static int +bitset_iterator_reserve(struct bitset_iterator *it, size_t size) +{ + if (size <= it->capacity) + return 0; + + size_t capacity = (it->capacity > 0) + ? it->capacity + : ITERATOR_DEFAULT_CAPACITY; + + while (capacity <= size) { + capacity *= 2; + } + + struct bitset_iterator_conj *conjs = + it->realloc(it->conjs, capacity * sizeof(*it->conjs)); + if (conjs == NULL) + return -1; + + memset(conjs + it->capacity, 0, + (capacity - it->capacity) * sizeof(*it->conjs)); + + it->conjs = conjs; + it->capacity = capacity; + + return 0; +} + +static int +bitset_iterator_conj_reserve(struct bitset_iterator *it, + struct bitset_iterator_conj *conj, size_t size) +{ + if (size <= conj->capacity) + return 0; + + size_t capacity = (conj->capacity > 0) + ? conj->capacity + : ITERATOR_CONJ_DEFAULT_CAPACITY; + + while (capacity <= size) { + capacity *= 2; + } + + struct bitset **bitsets = it->realloc(conj->bitsets, + capacity * sizeof(*conj->bitsets)); + if (bitsets == NULL) + goto error_1; + bool *pre_nots = it->realloc(conj->pre_nots, + capacity * sizeof(*conj->pre_nots)); + if (pre_nots == NULL) + goto error_2; + struct bitset_page **pages = it->realloc(conj->pages, + capacity * sizeof(*conj->pages)); + if (pre_nots == NULL) + goto error_3; + + memset(bitsets + conj->capacity, 0, + (capacity - conj->capacity) * sizeof(*conj->bitsets)); + memset(pre_nots + conj->capacity, 0, + (capacity - conj->capacity) * sizeof(*conj->pre_nots)); + memset(pages + conj->capacity, 0, + (capacity - conj->capacity) * sizeof(*conj->pages)); + + conj->bitsets = bitsets; + conj->pre_nots = pre_nots; + conj->pages = pages; + conj->capacity = capacity; + + return 0; + +error_3: + it->realloc(pre_nots, 0); +error_2: + it->realloc(bitsets, 0); +error_1: + return -1; +} + +int +bitset_iterator_init(struct bitset_iterator *it, struct bitset_expr *expr, + struct bitset **p_bitsets, size_t bitsets_size) +{ + assert(it != NULL); + assert(expr != NULL); + assert(p_bitsets != NULL); + assert(expr->size > 0); + + size_t page_alloc_size = bitset_page_alloc_size(it->realloc); + it->page = it->realloc(NULL, page_alloc_size); + it->page_tmp = it->realloc(NULL, page_alloc_size); + + if (it->page == NULL || it->page_tmp == NULL) { + it->realloc(it->page, 0); + it->realloc(it->page_tmp, 0); + return -1; + } + + bitset_page_create(it->page); + bitset_page_create(it->page_tmp); + + if (bitset_iterator_reserve(it, expr->size) != 0) + return -1; + + for (size_t c = 0; c < expr->size; c++) { + struct bitset_expr_conj *exconj = &expr->conjs[c]; + struct bitset_iterator_conj *itconj = &it->conjs[c]; + assert(exconj->size > 0); + + if (bitset_iterator_conj_reserve(it, itconj, exconj->size) != 0) + return -1; + + for (size_t b = 0; b < exconj->size; b++) { + assert(exconj->bitset_ids[b] < bitsets_size); + itconj->page_first_pos = 0; + assert(p_bitsets[exconj->bitset_ids[b]] != NULL); + itconj->bitsets[b] = p_bitsets[exconj->bitset_ids[b]]; + itconj->pre_nots[b] = exconj->pre_nots[b]; + itconj->pages[b] = NULL; + } + + itconj->size = exconj->size; + } + + it->size = expr->size; + + bitset_iterator_rewind(it); + + return 0; +} + +static void +bitset_iterator_conj_rewind(struct bitset_iterator_conj *conj, size_t pos) +{ + assert(conj != NULL); + assert(pos % (BITSET_PAGE_DATA_SIZE * CHAR_BIT) == 0); + assert(conj->size > 0); + assert(conj->page_first_pos <= pos); + + struct bitset_page key; + key.first_pos = pos; + + restart: + for (size_t b = 0; b < conj->size; b++) { + conj->pages[b] = bitset_pages_nsearch(&conj->bitsets[b]->pages, + &key); +#if 0 + if (conj->pages[b] != NULL) { + fprintf(stderr, "rewind [%zu] => %zu (%p)\n", b, + conj->pages[b]->first_pos, conj->pages[b]); + } else { + fprintf(stderr, "rewind [%zu] => NULL\n", b); + } +#endif + if (conj->pre_nots[b]) + continue; + + /* bitset b does not have more pages */ + if (conj->pages[b] == NULL) { + conj->page_first_pos = SIZE_MAX; + return; + } + + assert(conj->pages[b]->first_pos >= key.first_pos); + + /* bitset b have a next page, but it is beyond pos scope */ + if (conj->pages[b]->first_pos > key.first_pos) { + key.first_pos = conj->pages[b]->first_pos; + goto restart; + } + } + + conj->page_first_pos = key.first_pos; +} + +static int +bitset_iterator_conj_cmp(const void *p1, const void *p2) +{ + assert(p1 != NULL && p2 != NULL); + + struct bitset_iterator_conj *conj1 = (struct bitset_iterator_conj *) p1; + struct bitset_iterator_conj *conj2 = (struct bitset_iterator_conj *) p2; + + if (conj1->page_first_pos < conj2->page_first_pos) { + return -1; + } else if (conj1->page_first_pos > conj2->page_first_pos) { + return 1; + } else { + return 0; + } +} + +static void +bitset_iterator_conj_prepare_page(struct bitset_iterator_conj *conj, + struct bitset_page *dst) +{ + assert(conj != NULL); + assert(dst != NULL); + assert(conj->size > 0); + assert(conj->page_first_pos != SIZE_MAX); + + bitset_page_set_ones(dst); + for (size_t b = 0; b < conj->size; b++) { + if (!conj->pre_nots[b]) { + /* conj->pages[b] is rewinded to conj->page_first_pos */ + assert(conj->pages[b]->first_pos == conj->page_first_pos); + bitset_page_and(dst, conj->pages[b]); + } else { + /* + * If page is NULL or its position is not equal + * to conj->page_first_pos then conj->bitset[b] + * does not have page with the required position and + * all bits in this page are considered to be zeros. + * Since NAND(a, zeros) => a, we can simple skip this + * bitset here. + */ + if (conj->pages[b] == NULL || + conj->pages[b]->first_pos != conj->page_first_pos) + continue; + + bitset_page_nand(dst, conj->pages[b]); + } + } +} + +static void +bitset_iterator_prepare_page(struct bitset_iterator *it) +{ + qsort(it->conjs, it->size, sizeof(*it->conjs), + bitset_iterator_conj_cmp); + + bitset_page_set_zeros(it->page); + it->page->first_pos = it->conjs[0].page_first_pos; + + /* There is no more conjunctions that can be ORed */ + if (it->page->first_pos == SIZE_MAX) + return; + + /* For each conj where conj->page_first_pos == pos */ + for (size_t c = 0; c < it->size; c++) { + if (it->conjs[c].page_first_pos > it->page->first_pos) + break; + + /* Get result from conj */ + bitset_iterator_conj_prepare_page(&it->conjs[c], it->page_tmp); + /* OR page from conjunction with it->page */ + bitset_page_or(it->page, it->page_tmp); + } + + /* Init the bit iterator on it->page */ + bit_iterator_init(&it->page_it, bitset_page_data(it->page), + BITSET_PAGE_DATA_SIZE, true); +} + +static void +bitset_iterator_first_page(struct bitset_iterator *it) +{ + assert(it != NULL); + + /* Rewind all conjunctions to first positions */ + for (size_t c = 0; c < it->size; c++) { + bitset_iterator_conj_rewind(&it->conjs[c], 0); + } + + /* Prepare the result page */ + bitset_iterator_prepare_page(it); +} + +static void +bitset_iterator_next_page(struct bitset_iterator *it) +{ + assert(it != NULL); + + size_t PAGE_BIT = BITSET_PAGE_DATA_SIZE * CHAR_BIT; + size_t pos = it->page->first_pos; + + /* Rewind all conjunctions that at the current position to the + * next position */ + for (size_t c = 0; c < it->size; c++) { + if (it->conjs[c].page_first_pos > pos) + break; + + bitset_iterator_conj_rewind(&it->conjs[c], pos + PAGE_BIT); + assert(pos + PAGE_BIT <= it->conjs[c].page_first_pos); + } + + /* Prepare the result page */ + bitset_iterator_prepare_page(it); +} + + +void +bitset_iterator_rewind(struct bitset_iterator *it) +{ + assert(it != NULL); + + /* Prepare first page */ + bitset_iterator_first_page(it); +} + +size_t +bitset_iterator_next(struct bitset_iterator *it) +{ + assert(it != NULL); + + while (true) { + if (it->page->first_pos == SIZE_MAX) + return SIZE_MAX; + + size_t pos = bit_iterator_next(&it->page_it); + if (pos != SIZE_MAX) { + return it->page->first_pos + pos; + } + + bitset_iterator_next_page(it); + } +} diff --git a/test/unit/CMakeLists.txt b/test/unit/CMakeLists.txt index 4ec3cfdc96f1b11ab7988318e00a56a10a8d6b3f..1fa92bbdb088b6317d4ab67fffe434b1afd1c6b5 100644 --- a/test/unit/CMakeLists.txt +++ b/test/unit/CMakeLists.txt @@ -12,6 +12,8 @@ add_executable(bit_test bit.c bit.c) target_link_libraries(bit_test bit) add_executable(bitset_basic_test bitset_basic.c) target_link_libraries(bitset_basic_test bitset) +add_executable(bitset_iterator_test bitset_iterator.c) +target_link_libraries(bitset_iterator_test bitset) add_executable(objc_finally objc_finally.m) add_executable(objc_catchcxx objc_catchcxx.m) diff --git a/test/unit/bitset_iterator.c b/test/unit/bitset_iterator.c new file mode 100644 index 0000000000000000000000000000000000000000..bebf1bf31dd6b867e7111fa25f5cf91a1b3b8cc5 --- /dev/null +++ b/test/unit/bitset_iterator.c @@ -0,0 +1,410 @@ +#include <stdlib.h> +#include <stdint.h> +#include <stdbool.h> +#include <stdio.h> +#include <time.h> + +#include <lib/bitset/iterator.h> +#include "unit.h" + +enum { NUMS_SIZE = 1 << 16 }; +static size_t NUMS[NUMS_SIZE]; + +static struct bitset ** +bitsets_create(size_t count) +{ + struct bitset **bitsets = malloc(count * sizeof(*bitsets)); + fail_if(bitsets == NULL); + for (size_t i = 0; i < count; i++) { + bitsets[i] = malloc(sizeof(struct bitset)); + fail_if(bitsets[i] == NULL); + bitset_create(bitsets[i], realloc); + } + + return bitsets; +} + +static void +bitsets_destroy(struct bitset **bitsets, size_t count) +{ + for (size_t i = 0; i < count; i++) { + bitset_destroy(bitsets[i]); + free(bitsets[i]); + } + + free(bitsets); +} + +static void +nums_fill(size_t *nums, size_t size) +{ + const size_t STEP_MAX = 7; + nums[0] = rand() % STEP_MAX; + for (size_t i = 1; i < size; i++) { + nums[i] = nums[i - 1] + 1 + rand() % STEP_MAX; + } +} + +static int +nums_comparator(const void *a, const void *b) +{ + size_t *aa = (size_t *) a; + size_t *bb = (size_t *) b; + + if (*aa < *bb) { + return -1; + } else if (*aa > *bb) { + return 1; + } else { + return 0; + } +} + +static void +nums_sort(size_t *nums, size_t size) +{ + qsort(nums, size, sizeof(*nums), nums_comparator); +} + +static void +nums_shuffle(size_t *nums, size_t size) +{ + for (size_t i = 0; i < size - 1; i++) { + size_t j = i + rand() / (RAND_MAX / (size- i) + 1); + size_t tmp = nums[j]; + nums[j] = nums[i]; + nums[i] = tmp; + } +} + +static +void test_empty() +{ + header(); + + struct bitset **bitsets = bitsets_create(2); + + bitset_set(bitsets[0], 1); + bitset_set(bitsets[0], 2); + bitset_set(bitsets[0], 3); + bitset_set(bitsets[0], 193); + bitset_set(bitsets[0], 1024); + + bitset_set(bitsets[0], 1025); + bitset_set(bitsets[0], 16384); + bitset_set(bitsets[0], 16385); + + bitset_set(bitsets[1], 17); + bitset_set(bitsets[1], 194); + bitset_set(bitsets[1], 1023); + + struct bitset_expr expr; + bitset_expr_create(&expr, realloc); + + fail_unless(bitset_expr_add_conj(&expr) == 0); + + fail_unless(bitset_expr_add_param(&expr, 0, false) == 0); + fail_unless(bitset_expr_add_param(&expr, 1, false) == 0); + + struct bitset_iterator it; + bitset_iterator_create(&it, realloc); + + fail_unless(bitset_iterator_init(&it, &expr, bitsets, 2) == 0); + bitset_expr_destroy(&expr); + + size_t pos = bitset_iterator_next(&it); + fail_unless(pos == SIZE_MAX); + + bitset_iterator_destroy(&it); + + bitsets_destroy(bitsets, 2); + + footer(); +} + +static +void test_first() +{ + header(); + + struct bitset **bitsets = bitsets_create(2); + + bitset_set(bitsets[0], 0); + bitset_set(bitsets[0], 1023); + + bitset_set(bitsets[1], 0); + bitset_set(bitsets[1], 1025); + + struct bitset_expr expr; + bitset_expr_create(&expr, realloc); + + fail_unless(bitset_expr_add_conj(&expr) == 0); + + fail_unless(bitset_expr_add_param(&expr, 0, false) == 0); + fail_unless(bitset_expr_add_param(&expr, 1, false) == 0); + + struct bitset_iterator it; + bitset_iterator_create(&it, realloc); + fail_unless(bitset_iterator_init(&it, &expr, bitsets, 2) == 0); + bitset_expr_destroy(&expr); + + size_t pos = bitset_iterator_next(&it); + + fail_unless(pos == 0); + fail_unless(bitset_iterator_next(&it) == SIZE_MAX); + + bitset_iterator_destroy(&it); + + bitsets_destroy(bitsets, 2); + + footer(); +} + +static +void test_simple() +{ + header(); + + enum { BITSETS_SIZE = 32 }; + + struct bitset **bitsets = bitsets_create(BITSETS_SIZE); + + nums_shuffle(NUMS, NUMS_SIZE); + + size_t NOISE_SIZE = NUMS_SIZE / 3; + for (size_t i = 0; i < NOISE_SIZE; i++) { + bitset_set(bitsets[i % BITSETS_SIZE], NUMS[i]); + } + + for (size_t i = NOISE_SIZE; i < NUMS_SIZE; i++) { + for (size_t b = 0; b < BITSETS_SIZE; b++) { + bitset_set(bitsets[b], NUMS[i]); + } + } + + struct bitset_expr expr; + bitset_expr_create(&expr, realloc); + fail_unless(bitset_expr_add_conj(&expr) == 0); + + for (size_t b = 0; b < BITSETS_SIZE; b++) { + fail_unless(bitset_expr_add_param(&expr, b, false) == 0); + } + + nums_sort(NUMS + NOISE_SIZE, NUMS_SIZE - NOISE_SIZE); + + struct bitset_iterator it; + bitset_iterator_create(&it, realloc); + fail_unless(bitset_iterator_init(&it, &expr, bitsets, BITSETS_SIZE) == 0); + bitset_expr_destroy(&expr); + + for (size_t i = NOISE_SIZE; i < NUMS_SIZE; i++) { + fail_unless(bitset_iterator_next(&it) == NUMS[i]); + } + fail_unless(bitset_iterator_next(&it) == SIZE_MAX); + + bitset_iterator_destroy(&it); + bitsets_destroy(bitsets, BITSETS_SIZE); + + footer(); +} + +static +void test_big() { + header(); + + const size_t BITSETS_SIZE = 32; + struct bitset **bitsets = bitsets_create(BITSETS_SIZE); + + nums_shuffle(NUMS, NUMS_SIZE); + + printf("Setting bits... "); + for (size_t i = 0; i < NUMS_SIZE; i++) { + for (size_t b = 0; b < BITSETS_SIZE; b++) { + bitset_set(bitsets[b], NUMS[i]); + if (b % 2 == 0 && i % 2 == 0) + continue; + } + } + printf("ok\n"); + + struct bitset_expr expr; + bitset_expr_create(&expr, realloc); + fail_unless(bitset_expr_add_conj(&expr) == 0); + for(size_t b = 0; b < BITSETS_SIZE; b++) { + fail_unless(bitset_expr_add_param(&expr, b, false) == 0); + } + + struct bitset_iterator it; + bitset_iterator_create(&it, realloc); + fail_unless(bitset_iterator_init(&it, &expr, bitsets, BITSETS_SIZE) == 0); + bitset_expr_destroy(&expr); + + printf("Iterating... "); + size_t pos; + while ((pos = bitset_iterator_next(&it)) != SIZE_MAX) { + size_t b; + for(b = 0; b < BITSETS_SIZE; b++) { + if(bitset_test(bitsets[b], pos)) + continue; + } + + fail_if(b < BITSETS_SIZE); + } + printf("ok\n"); + + bitset_iterator_destroy(&it); + + bitsets_destroy(bitsets, BITSETS_SIZE); + + footer(); +} + +static +void test_not_last() { + header(); + + struct bitset **bitsets = bitsets_create(2); + + size_t big_i = (size_t) 1 << 15; + + bitset_set(bitsets[0], 0); + bitset_set(bitsets[0], 11); + bitset_set(bitsets[0], 1024); + + bitset_set(bitsets[1], 0); + bitset_set(bitsets[1], 10); + bitset_set(bitsets[1], 11); + bitset_set(bitsets[1], 14); + bitset_set(bitsets[1], big_i); + + struct bitset_expr expr; + bitset_expr_create(&expr, realloc); + + fail_unless(bitset_expr_add_conj(&expr) == 0); + fail_unless(bitset_expr_add_param(&expr, 0, true) == 0); + fail_unless(bitset_expr_add_param(&expr, 1, false) == 0); + + struct bitset_iterator it; + bitset_iterator_create(&it, realloc); + fail_unless(bitset_iterator_init(&it, &expr, bitsets, 2) == 0); + bitset_expr_destroy(&expr); + + size_t result[] = {10, 14, big_i}; + size_t result_size = 3; + + size_t pos; + for (size_t i = 0; i < result_size; i++) { + pos = bitset_iterator_next(&it); + fail_unless (result[i] == pos); + } + fail_unless (pos = bitset_iterator_next(&it) == SIZE_MAX); + + bitset_iterator_destroy(&it); + + bitsets_destroy(bitsets, 2); + + footer(); +} + +static +void test_not_empty() { + header(); + + enum { + BITSETS_SIZE = 4, + CHECK_COUNT = (size_t) 1 << 14 + }; + + struct bitset **bitsets = bitsets_create(BITSETS_SIZE); + + nums_shuffle(NUMS, NUMS_SIZE); + for (size_t i = 0; i < NUMS_SIZE; i++) { + bitset_set(bitsets[i % BITSETS_SIZE], NUMS[i]); + } + + struct bitset_expr expr; + bitset_expr_create(&expr, realloc); + + for(size_t b = 0; b < BITSETS_SIZE; b++) { + fail_unless(bitset_expr_add_conj(&expr) == 0); + fail_unless(bitset_expr_add_param(&expr, b, true) == 0); + } + + struct bitset_iterator it; + bitset_iterator_create(&it, realloc); + fail_unless(bitset_iterator_init(&it, &expr, bitsets, BITSETS_SIZE) == 0); + bitset_expr_destroy(&expr); + + + for (size_t i = 0; i < CHECK_COUNT; i++) { + size_t pos = bitset_iterator_next(&it); + fail_unless (i == pos); + } + + bitset_iterator_destroy(&it); + + bitsets_destroy(bitsets, BITSETS_SIZE); + + footer(); +} + +static +void test_disjunction() +{ + header(); + + enum { BITSETS_SIZE = 32 }; + + struct bitset **bitsets = bitsets_create(BITSETS_SIZE); + + nums_shuffle(NUMS, NUMS_SIZE); + + for (size_t i = 0; i < NUMS_SIZE; i++) { + bitset_set(bitsets[i % BITSETS_SIZE], NUMS[i]); + } + + struct bitset_expr expr; + bitset_expr_create(&expr, realloc); + + for (size_t b = 0; b < BITSETS_SIZE; b++) { + fail_unless(bitset_expr_add_conj(&expr) == 0); + fail_unless(bitset_expr_add_param(&expr, b, false) == 0); + } + + nums_sort(NUMS, NUMS_SIZE); + + struct bitset_iterator it; + bitset_iterator_create(&it, realloc); + fail_unless(bitset_iterator_init(&it, &expr, bitsets, BITSETS_SIZE) == 0); + bitset_expr_destroy(&expr); + + for (size_t i = 0; i < NUMS_SIZE; i++) { + size_t pos = bitset_iterator_next(&it); + fail_unless(pos == NUMS[i]); + } + + size_t pos = bitset_iterator_next(&it); + fail_unless(pos == SIZE_MAX); + + bitset_iterator_destroy(&it); + + bitsets_destroy(bitsets, BITSETS_SIZE); + + footer(); +} + +int main(void) +{ + setbuf(stdout, NULL); + nums_fill(NUMS, NUMS_SIZE); + + test_empty(); + test_first(); + test_simple(); + test_big(); + test_not_empty(); + test_not_last(); + test_disjunction(); + + return 0; +} diff --git a/test/unit/bitset_iterator.result b/test/unit/bitset_iterator.result new file mode 100644 index 0000000000000000000000000000000000000000..9d6a699aad43228f187a82d52c4427b4907dfe70 --- /dev/null +++ b/test/unit/bitset_iterator.result @@ -0,0 +1,17 @@ + *** test_empty *** + *** test_empty: done *** + *** test_first *** + *** test_first: done *** + *** test_simple *** + *** test_simple: done *** + *** test_big *** +Setting bits... ok +Iterating... ok + *** test_big: done *** + *** test_not_empty *** + *** test_not_empty: done *** + *** test_not_last *** + *** test_not_last: done *** + *** test_disjunction *** + *** test_disjunction: done *** + \ No newline at end of file diff --git a/test/unit/bitset_iterator.test b/test/unit/bitset_iterator.test new file mode 100644 index 0000000000000000000000000000000000000000..eb272521cb9be9cba2cf0389725ecafe2fc7e4b2 --- /dev/null +++ b/test/unit/bitset_iterator.test @@ -0,0 +1 @@ +run_test("bitset_iterator_test")