diff --git a/cmake/BuildMisc.cmake b/cmake/BuildMisc.cmake index 1b12412d631221575879dd73ec83459fb610787c..881b92644b48ce109d0cf9fc72507641c92ac19f 100644 --- a/cmake/BuildMisc.cmake +++ b/cmake/BuildMisc.cmake @@ -7,6 +7,7 @@ macro(libmisc_build) ${PROJECT_SOURCE_DIR}/third_party/proctitle.c ${PROJECT_SOURCE_DIR}/third_party/PMurHash.c ${PROJECT_SOURCE_DIR}/third_party/base64.c + ${PROJECT_SOURCE_DIR}/third_party/rtree.cc ) if (NOT HAVE_MEMMEM) diff --git a/src/box/CMakeLists.txt b/src/box/CMakeLists.txt index bef3d928599d4b02f47441cda1c6e40f66c78c02..bb64945be0b18fac1e3b1e4238ff2a5b1079d77f 100644 --- a/src/box/CMakeLists.txt +++ b/src/box/CMakeLists.txt @@ -28,6 +28,7 @@ add_library(box index.cc hash_index.cc tree_index.cc + rtree_index.cc bitset_index.cc engine.cc engine_memtx.cc diff --git a/src/box/engine_memtx.cc b/src/box/engine_memtx.cc index 24f30946565638c2b25bda4935a5ef1a06314337..1ede5cbf3247ab367ae79f0763a5f9f0d24bbdae 100644 --- a/src/box/engine_memtx.cc +++ b/src/box/engine_memtx.cc @@ -32,6 +32,7 @@ #include "index.h" #include "hash_index.h" #include "tree_index.h" +#include "rtree_index.h" #include "bitset_index.h" #include "space.h" #include "exception.h" @@ -105,6 +106,8 @@ MemtxFactory::createIndex(struct key_def *key_def) return new HashIndex(key_def); case TREE: return new TreeIndex(key_def); + case RTREE: + return new RTreeIndex(key_def); case BITSET: return new BitsetIndex(key_def); default: @@ -138,6 +141,25 @@ MemtxFactory::keydefCheck(struct key_def *key_def) case TREE: /* TREE index has no limitations. */ break; + case RTREE: + if (key_def->part_count != 1 && key_def->part_count != 2 && key_def->part_count != 4) { + tnt_raise(ClientError, ER_MODIFY_INDEX, + "R-Tree index can be defied only for points (two parts) or rectangles (four parts)"); + } + if (key_def->part_count == 1) { + if (key_def->parts[0].type != ARR) { + tnt_raise(ClientError, ER_MODIFY_INDEX, + "R-Tree index can be defied only for points, rectangles and arrays"); + } + } else { + for (int i = 0; i < key_def->part_count; i++) { + if (key_def->parts[i].type != NUM) { + tnt_raise(ClientError, ER_MODIFY_INDEX, + "R-Tree index can be defied only for numeric fields"); + } + } + } + break; case BITSET: if (key_def->part_count != 1) { tnt_raise(ClientError, ER_MODIFY_INDEX, diff --git a/src/box/index.cc b/src/box/index.cc index d03db083b9be1555b495ed3ffa1e706cc886d751..54880ff19ea5a4859b14eebbe35c30c75e2d18fa 100644 --- a/src/box/index.cc +++ b/src/box/index.cc @@ -73,16 +73,34 @@ key_validate(struct key_def *key_def, enum iterator_type type, const char *key, /* Fall through. */ } - if (part_count > key_def->part_count) - tnt_raise(ClientError, ER_KEY_PART_COUNT, - key_def->part_count, part_count); - - /* Partial keys are allowed only for TREE index type. */ - if (key_def->type != TREE && part_count < key_def->part_count) { - tnt_raise(ClientError, ER_EXACT_MATCH, - key_def->part_count, part_count); - } - key_validate_parts(key_def, key, part_count); + if (key_def->type == RTREE) { + if (part_count != 1 && part_count != 2 && part_count != 4) { + tnt_raise(ClientError, ER_KEY_PART_COUNT, + "R-Tree key should be point (two integer coordinates) or rectangles (four integer coordinates)"); + } + if (part_count == 1) { + enum mp_type mp_type = mp_typeof(*key); + mp_next(&key); + key_mp_type_validate(ARR, mp_type, ER_KEY_PART_TYPE, 0); + } else { + for (uint32_t part = 0; part < part_count; part++) { + enum mp_type mp_type = mp_typeof(*key); + mp_next(&key); + key_mp_type_validate(NUM, mp_type, ER_KEY_PART_TYPE, part); + } + } + } else { + if (part_count > key_def->part_count) + tnt_raise(ClientError, ER_KEY_PART_COUNT, + key_def->part_count, part_count); + + /* Partial keys are allowed only for TREE index type. */ + if (key_def->type != TREE && part_count < key_def->part_count) { + tnt_raise(ClientError, ER_EXACT_MATCH, + key_def->part_count, part_count); + } + key_validate_parts(key_def, key, part_count); + } } void diff --git a/src/box/index.h b/src/box/index.h index 5ba9fd7ad8a92e33b908f33cddbae0c490504990..0b22c3fb9e4cdbb3511c806c1edf8b1a875c527b 100644 --- a/src/box/index.h +++ b/src/box/index.h @@ -70,6 +70,8 @@ struct tuple; _(ITER_BITS_ALL_SET, 7) /* all bits from x are set in key */ \ _(ITER_BITS_ANY_SET, 8) /* at least one x's bit is set */ \ _(ITER_BITS_ALL_NOT_SET, 9) /* all bits are not set */ \ + _(ITER_OVERLAPS, 10) /* key overlaps x */ \ + _(ITER_NEIGHBOR, 11) /* typles in distance ascending order from specified point */ \ ENUM(iterator_type, ITERATOR_TYPE); extern const char *iterator_type_strs[]; diff --git a/src/box/key_def.cc b/src/box/key_def.cc index 42283d80aeb7268d526eb517a7e283ea9d881780..3f70f12d2a06c012f3ad5230e90ff2762c19edf7 100644 --- a/src/box/key_def.cc +++ b/src/box/key_def.cc @@ -33,13 +33,14 @@ #include <stdio.h> #include "exception.h" -const char *field_type_strs[] = {"UNKNOWN", "NUM", "STR", "\0"}; +const char *field_type_strs[] = {"UNKNOWN", "NUM", "STR", "ARR", "\0"}; STRS(index_type, ENUM_INDEX_TYPE); const uint32_t key_mp_type[] = { /* [UNKNOWN] = */ UINT32_MAX, - /* [NUM] = */ 1U << MP_UINT, - /* [_STR] = */ 1U << MP_STR + /* [NUM] = */ (1U << MP_UINT)|(1U << MP_INT)|(1U << MP_FLOAT)|(1U << MP_DOUBLE), + /* [STR] = */ 1U << MP_STR, + /* [ARR] = */ 1U << MP_ARRAY }; enum schema_object_type diff --git a/src/box/key_def.h b/src/box/key_def.h index 63e7251357d554a31174d1ddac1f06980d7d7769..b5df2bc55300a529f96252d879a09df1cdf4ec3f 100644 --- a/src/box/key_def.h +++ b/src/box/key_def.h @@ -75,21 +75,22 @@ schema_object_type(const char *name); * since there is a mismatch between enum name (STRING) and type * name literal ("STR"). STR is already used as Objective C type. */ -enum field_type { UNKNOWN = 0, NUM, STRING, field_type_MAX }; +enum field_type { UNKNOWN = 0, NUM, STRING, ARR, field_type_MAX }; extern const char *field_type_strs[]; static inline uint32_t field_type_maxlen(enum field_type type) { static const uint32_t maxlen[] = - { UINT32_MAX, 4, 8, UINT32_MAX, UINT32_MAX }; + { UINT32_MAX, 8, UINT32_MAX, UINT32_MAX, UINT32_MAX }; return maxlen[type]; } #define ENUM_INDEX_TYPE(_) \ _(HASH, 0) /* HASH Index */ \ _(TREE, 1) /* TREE Index */ \ - _(BITSET, 2) /* BITSET Index */ + _(BITSET, 2) /* BITSET Index */ \ + _(RTREE, 3) /* R-Tree Index */ \ ENUM(index_type, ENUM_INDEX_TYPE); extern const char *index_type_strs[]; @@ -265,7 +266,6 @@ key_mp_type_validate(enum field_type key_type, enum mp_type mp_type, { assert(key_type < field_type_MAX); assert((int) mp_type < (int) CHAR_BIT * sizeof(*key_mp_type)); - if (unlikely((key_mp_type[key_type] & (1U << mp_type)) == 0)) tnt_raise(ClientError, err, field_no, field_type_strs[key_type]); diff --git a/src/box/rtree_index.cc b/src/box/rtree_index.cc new file mode 100644 index 0000000000000000000000000000000000000000..bc3e74cbf66be8607e87e3cdc0eae57537b01b67 --- /dev/null +++ b/src/box/rtree_index.cc @@ -0,0 +1,350 @@ +/* + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the + * following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY <COPYRIGHT HOLDER> ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * <COPYRIGHT HOLDER> OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF + * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ +#include "rtree_index.h" +#include "tuple.h" +#include "space.h" +#include "exception.h" +#include "errinj.h" +#include "fiber.h" +#include "small/small.h" + +/* {{{ Utilities. *************************************************/ + +inline void extract_rectangle(rectangle_t& r, struct tuple const* tuple, struct key_def* kd) +{ + switch (kd->part_count) { + case 1: // vector + { + const char* elems = tuple_field(tuple, kd->parts[0].fieldno); + uint32_t size = mp_decode_array(&elems); + switch (size) { + case 2: // point + r.boundary[0] = r.boundary[2] = mp_decode_num(&elems, 0); + r.boundary[1] = r.boundary[3] = mp_decode_num(&elems, 1); + break; + case 4: + for (int i = 0; i < 4; i++) { + r.boundary[i] = mp_decode_num(&elems, i); + } + break; + default: + tnt_raise(ClientError, ER_UNSUPPORTED, + "R-Tree field should be array with size 2 point) or 4 (rectangle)"); + + } + break; + } + case 2: // point + r.boundary[0] = r.boundary[2] = tuple_field_num(tuple, kd->parts[0].fieldno); + r.boundary[1] = r.boundary[3] = tuple_field_num(tuple, kd->parts[1].fieldno); + break; + case 4: // rectangle + for (int i = 0; i < 4; i++) { + r.boundary[i] = tuple_field_num(tuple, kd->parts[i].fieldno); + } + break; + default: + assert(false); + } +} +/* {{{ TreeIndex Iterators ****************************************/ + +struct rtree_iterator { + struct iterator base; + R_tree_iterator impl; +}; + +static void +rtree_iterator_free(struct iterator *i) +{ + delete (rtree_iterator*)i; +} + +static struct tuple * +rtree_iterator_next(struct iterator *i) +{ + return (tuple*)((rtree_iterator*)i)->impl.next(); +} + +/* }}} */ + +/* {{{ TreeIndex **********************************************************/ + +class MemPoolAllocatorFactory : public FixedSizeAllocator::Factory +{ + class Allocator : public FixedSizeAllocator + { + private: + struct mempool pool; + size_t size; + public: + Allocator(size_t obj_size) + { + size = obj_size; + mempool_create(&pool, &cord()->slabc, obj_size); + } + + virtual void* alloc() + { + return mempool_alloc(&pool); + } + + virtual void free(void* ptr) + { + mempool_free(&pool, ptr); + } + + virtual size_t used_size() + { + return mempool_used(&pool); + } + + virtual ~Allocator() + { + mempool_destroy(&pool); + } + }; +public: + virtual FixedSizeAllocator* create(size_t obj_size) + { + return new Allocator(obj_size); + } + + virtual void destroy(FixedSizeAllocator* allocator) + { + delete allocator; + } +}; + +static MemPoolAllocatorFactory rtree_allocator_factory; + +RTreeIndex::~RTreeIndex() +{ + // Iterator has to be destroye prior to tree + if (m_position != NULL) { + m_position->free(m_position); + m_position = NULL; + } +} + +RTreeIndex::RTreeIndex(struct key_def *key_def) +: Index(key_def), tree(&rtree_allocator_factory) +{ + if (key_def->part_count != 1 && key_def->part_count != 2 && key_def->part_count != 4) { + tnt_raise(ClientError, ER_UNSUPPORTED, + "R-Tree index can be defied only for points (two parts) or rectangles (four parts)"); + } + if (key_def->part_count == 1) { + if (key_def->parts[0].type != ARR) { + tnt_raise(ClientError, ER_UNSUPPORTED, + "R-Tree index can be defied only for arrays, points and rectangles"); + } + } else { + for (int i = 0; i < key_def->part_count; i++) { + if (key_def->parts[i].type != NUM) { + tnt_raise(ClientError, ER_UNSUPPORTED, + "R-Tree index can be defied only for numeric fields"); + } + } + } +} + +size_t +RTreeIndex::size() const +{ + return tree.number_of_records(); +} + +size_t +RTreeIndex::memsize() const +{ + return tree.used_size(); +} + +struct tuple * +RTreeIndex::findByKey(const char *key, uint32_t part_count) const +{ + rectangle_t r; + R_tree_iterator iterator; + switch (part_count) { + case 1: + { + uint32_t size = mp_decode_array(&key); + switch (size) { + case 2: + r.boundary[0] = r.boundary[2] = mp_decode_num(&key, 0); + r.boundary[1] = r.boundary[3] = mp_decode_num(&key, 1); + break; + case 4: + for (int i = 0; i < 4; i++) { + r.boundary[i] = mp_decode_num(&key, i); + } + break; + default: + tnt_raise(ClientError, ER_UNSUPPORTED, + "R-Tree key should be array of 2 (point ) or 4 (rectangle) numeric coordinates"); + } + break; + } + case 2: + r.boundary[0] = r.boundary[2] = mp_decode_num(&key, 0); + r.boundary[1] = r.boundary[3] = mp_decode_num(&key, 1); + break; + case 4: + for (int i = 0; i < 4; i++) { + r.boundary[i] = mp_decode_num(&key, i); + } + break; + default: + tnt_raise(ClientError, ER_UNSUPPORTED, + "R-Tree key should be point (two numeric coordinates) or rectangle (four numeric coordinates)"); + } + if (tree.search(r, SOP_OVERLAPS, iterator)) { + return (struct tuple*)iterator.next(); + } + return NULL; +} + +struct tuple * +RTreeIndex::replace(struct tuple *old_tuple, struct tuple *new_tuple, + enum dup_replace_mode) +{ + rectangle_t r; + if (new_tuple) { + extract_rectangle(r, new_tuple, key_def); + tree.insert(r, new_tuple); + } + if (old_tuple) { + extract_rectangle(r, old_tuple, key_def); + if (!tree.remove(r, old_tuple)) { + old_tuple = NULL; + } + } + return old_tuple; +} + +struct iterator * +RTreeIndex::allocIterator() const +{ + rtree_iterator *it = (rtree_iterator *)new rtree_iterator(); + if (it == NULL) { + tnt_raise(ClientError, ER_MEMORY_ISSUE, + sizeof(struct rtree_iterator), + "RTreeIndex", "iterator"); + } + it->base.next = rtree_iterator_next; + it->base.free = rtree_iterator_free; + return &it->base; +} + +void +RTreeIndex::initIterator(struct iterator *iterator, enum iterator_type type, + const char *key, uint32_t part_count) const +{ + rectangle_t r; + rtree_iterator *it = (rtree_iterator *)iterator; + switch (part_count) { + case 0: + if (type != ITER_ALL) { + tnt_raise(ClientError, ER_UNSUPPORTED, + "It is possible to omit key only for ITER_ALL"); + } + break; + case 1: + { + uint32_t size = mp_decode_array(&key); + switch (size) { + case 2: + r.boundary[0] = r.boundary[2] = mp_decode_num(&key, 0); + r.boundary[1] = r.boundary[3] = mp_decode_num(&key, 1); + break; + case 4: + for (int i = 0; i < 4; i++) { + r.boundary[i] = mp_decode_num(&key, i); + } + break; + default: + tnt_raise(ClientError, ER_UNSUPPORTED, + "R-Tree key should be array of 2 (point ) or 4 (rectangle) numeric coordinates"); + } + break; + } + case 2: + r.boundary[0] = r.boundary[2] = mp_decode_num(&key, 0); + r.boundary[1] = r.boundary[3] = mp_decode_num(&key, 1); + break; + case 4: + for (int i = 0; i < 4; i++) { + r.boundary[i] = mp_decode_num(&key, i); + } + break; + default: + tnt_raise(ClientError, ER_UNSUPPORTED, + "R-Tree key should be point (two numeric coordinates) or rectangle (four numeric coordinates)"); + } + Spatial_search_op op; + switch (type) { + case ITER_ALL: + op = SOP_ALL; + break; + case ITER_EQ: + op = SOP_EQUALS; + break; + case ITER_GT: + op = SOP_STRICT_CONTAINS; + break; + case ITER_GE: + op = SOP_CONTAINS; + break; + case ITER_LT: + op = SOP_STRICT_BELONGS; + break; + case ITER_LE: + op = SOP_BELONGS; + break; + case ITER_OVERLAPS: + op = SOP_OVERLAPS; + break; + case ITER_NEIGHBOR: + op = SOP_NEIGHBOR; + break; + default: + tnt_raise(ClientError, ER_UNSUPPORTED, + "Unsupported search operation %d for R-Tree", type); + } + tree.search(r, op, it->impl); +} + +void +RTreeIndex::beginBuild() +{ + tree.purge(); +} + + diff --git a/src/box/rtree_index.h b/src/box/rtree_index.h new file mode 100644 index 0000000000000000000000000000000000000000..a40976450af5b371e04eb57cfdc080077abe8df1 --- /dev/null +++ b/src/box/rtree_index.h @@ -0,0 +1,59 @@ +#ifndef TARANTOOL_BOX_RTREE_INDEX_H_INCLUDED +#define TARANTOOL_BOX_RTREE_INDEX_H_INCLUDED +/* + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the + * following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY <COPYRIGHT HOLDER> ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * <COPYRIGHT HOLDER> OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF + * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "index.h" + +#include <third_party/rtree.h> + +class RTreeIndex: public Index +{ +public: + RTreeIndex(struct key_def *key_def); + ~RTreeIndex(); + + virtual void beginBuild(); + virtual size_t size() const; + virtual struct tuple *findByKey(const char *key, uint32_t part_count) const; + virtual struct tuple *replace(struct tuple *old_tuple, + struct tuple *new_tuple, + enum dup_replace_mode mode); + + virtual size_t memsize() const; + virtual struct iterator *allocIterator() const; + virtual void initIterator(struct iterator *iterator, + enum iterator_type type, + const char *key, uint32_t part_count) const; + +protected: + R_tree tree; +}; + +#endif /* TARANTOOL_BOX_RTREE_INDEX_H_INCLUDED */ diff --git a/src/box/tuple.cc b/src/box/tuple.cc index 4c656f84c8b1a98910f805380f6c73949eb54a63..e83654bdb1cf6fc8b6cc93cdc324d1ec6258f1d2 100644 --- a/src/box/tuple.cc +++ b/src/box/tuple.cc @@ -36,6 +36,15 @@ #include <exception.h> #include <stdio.h> +#ifndef DBL_MIN +#define DBL_MIN 4.94065645841246544e-324 +#define FLT_MIN ((float)1.40129846432481707e-45) +#endif +#ifndef DBL_MAX +#define DBL_MAX 1.79769313486231470e+308 +#define FLT_MAX ((float)3.40282346638528860e+38) +#endif + /** Global table of tuple formats */ struct tuple_format **tuple_formats; struct tuple_format *tuple_format_ber; @@ -578,3 +587,26 @@ tuple_end_snapshot() { small_alloc_setopt(&memtx_alloc, SMALL_DELAYED_FREE_MODE, false); } + +double mp_decode_num(const char **data, uint32_t i) +{ + double val; + switch (mp_typeof(**data)) { + case MP_UINT: + val = mp_decode_uint(data); + break; + case MP_INT: + val = mp_decode_int(data); + break; + case MP_FLOAT: + val = mp_decode_float(data); + break; + case MP_DOUBLE: + val = mp_decode_double(data); + break; + default: + tnt_raise(ClientError, ER_FIELD_TYPE, i, field_type_strs[NUM]); + } + return val; +} + diff --git a/src/box/tuple.h b/src/box/tuple.h index 959a75da265aa6dca1af5e4e3d2d34673e74b8a7..fbea77c06db9111534f837558c315cca1b3b5cd3 100644 --- a/src/box/tuple.h +++ b/src/box/tuple.h @@ -341,6 +341,25 @@ tuple_field_u32(struct tuple *tuple, uint32_t i) return (uint32_t) val; } +/** + * Decode numeric field and return its value as double + */ +double +mp_decode_num(const char **data, uint32_t i); + +/** + * A convenience shortcut for data dictionary - get a numeric tuple field as double + */ +inline double +tuple_field_num(const struct tuple* tuple, uint32_t field_no) +{ + const char* field = tuple_field(tuple, field_no); + if (field == NULL) { + tnt_raise(ClientError, ER_NO_SUCH_FIELD, field_no); + } + return mp_decode_num(&field, field_no); +} + /** * A convenience shortcut for data dictionary - get a tuple field * as a NUL-terminated string - returns a string of up to 256 bytes. diff --git a/test/box/rtree_array.result b/test/box/rtree_array.result new file mode 100644 index 0000000000000000000000000000000000000000..d983ed9a779411dbd662024807898b64aa3dbb8f --- /dev/null +++ b/test/box/rtree_array.result @@ -0,0 +1,103 @@ +s = box.schema.create_space('spatial') +--- +... +s:create_index('primary') +--- +- unique: true + parts: + - type: NUM + fieldno: 1 + id: 0 + space_id: 512 + name: primary + type: TREE +... +s:create_index('spatial', { type = 'rtree', parts = {2, 'arr'}}) +--- +- unique: true + parts: + - type: ARR + fieldno: 2 + id: 1 + space_id: 512 + name: spatial + type: RTREE +... +s:insert{1,{0.0,0.0}} +--- +- [1, [0, 0]] +... +s:insert{2,{0.0,10.0}} +--- +- [2, [0, 10]] +... +s:insert{3,{0.0,50.0}} +--- +- [3, [0, 50]] +... +s:insert{4,{10.0,0.0}} +--- +- [4, [10, 0]] +... +s:insert{5,{50.0,0.0}} +--- +- [5, [50, 0]] +... +s:insert{6,{10.0,10.0}} +--- +- [6, [10, 10]] +... +s:insert{7,{10.0,50.0}} +--- +- [7, [10, 50]] +... +s:insert{8,{50.0,10.0}} +--- +- [8, [50, 10]] +... +s:insert{9,{50.0,50.0}} +--- +- [9, [50, 50]] +... +-- select all records +s.index.spatial:select({iterator = 'ALL'}) +--- +- - [1, [0, 0]] + - [2, [0, 10]] + - [3, [0, 50]] + - [4, [10, 0]] + - [5, [50, 0]] + - [6, [10, 10]] + - [7, [10, 50]] + - [8, [50, 10]] + - [9, [50, 50]] +... +-- select records belonging to rectangle (0,0,10,10) +s.index.spatial:select({0.0,0.0,10.0,10.0}, {iterator = 'LE'}) +--- +- - [1, [0, 0]] + - [2, [0, 10]] + - [4, [10, 0]] + - [6, [10, 10]] +... +-- select records with coodinates (10,10) +s.index.spatial:select({10.0,10.0}, {iterator = 'EQ'}) +--- +- - [6, [10, 10]] +... +-- select neigbors of point (5,5) +s.index.spatial:select({5.0,5.0}, {iterator = 'NEIGHBOR'}) +--- +- - [6, [10, 10]] + - [4, [10, 0]] + - [2, [0, 10]] + - [1, [0, 0]] + - [8, [50, 10]] + - [7, [10, 50]] + - [5, [50, 0]] + - [3, [0, 50]] + - [9, [50, 50]] +... +s:drop() +--- +... diff --git a/test/box/rtree_array.test.lua b/test/box/rtree_array.test.lua new file mode 100644 index 0000000000000000000000000000000000000000..2221cb88cae8dc1e5b00d8739133347cdcf97f07 --- /dev/null +++ b/test/box/rtree_array.test.lua @@ -0,0 +1,24 @@ +s = box.schema.create_space('spatial') +s:create_index('primary') +s:create_index('spatial', { type = 'rtree', parts = {2, 'arr'}}) + +s:insert{1,{0.0,0.0}} +s:insert{2,{0.0,10.0}} +s:insert{3,{0.0,50.0}} +s:insert{4,{10.0,0.0}} +s:insert{5,{50.0,0.0}} +s:insert{6,{10.0,10.0}} +s:insert{7,{10.0,50.0}} +s:insert{8,{50.0,10.0}} +s:insert{9,{50.0,50.0}} + +-- select all records +s.index.spatial:select({iterator = 'ALL'}) +-- select records belonging to rectangle (0,0,10,10) +s.index.spatial:select({0.0,0.0,10.0,10.0}, {iterator = 'LE'}) +-- select records with coodinates (10,10) +s.index.spatial:select({10.0,10.0}, {iterator = 'EQ'}) +-- select neigbors of point (5,5) +s.index.spatial:select({5.0,5.0}, {iterator = 'NEIGHBOR'}) + +s:drop() diff --git a/test/box/rtree_benchmark.result b/test/box/rtree_benchmark.result new file mode 100644 index 0000000000000000000000000000000000000000..02bfad52f4f31356a7baacb1350174ea26b56f6b --- /dev/null +++ b/test/box/rtree_benchmark.result @@ -0,0 +1,107 @@ +s = box.schema.create_space('rtreebench') +--- +... +s:create_index('primary') +--- +- unique: true + parts: + - type: NUM + fieldno: 1 + id: 0 + space_id: 512 + name: primary + type: TREE +... +s:create_index('spatial', { type = 'rtree', parts = {2, 'num', 3, 'num'}}) +--- +- unique: true + parts: + - type: NUM + fieldno: 2 + - type: NUM + fieldno: 3 + id: 1 + space_id: 512 + name: spatial + type: RTREE +... +n_records = 1000000 +--- +... +n_iterations = 100000 +--- +... +n_neighbors = 10 +--- +... +file = io.open("rtree_benchmark.res", "w") +--- +... +start = os.clock() +--- +... +--# setopt delimiter ';' +for i = 1, n_records do + s:insert{i,180*math.random(),180*math.random()} +end; +--- +... +file:write(string.format("Elapsed time for inserting %d records: %d\n", n_records, os.clock() - start)); +--- +- true +... +start = os.clock(); +--- +... +n = 0; +--- +... +for i = 1, n_iterations do + x = 180*math.random() + y = 180*math.random() + for k,v in s.index.spatial:pairs({x,y,x+1,y+1}, {iterator = 'LE'}) do + n = n + 1 + end +end; +--- +... +file:write(string.format("Elapsed time for %d belongs searches selecting %d records: %d\n", n_iterations, n, os.clock() - start)); +--- +- true +... +start = os.clock(); +--- +... +n = 0 +for i = 1, n_iterations do + x = 180*math.random() + y = 180*math.random() + for k,v in pairs(s.index.spatial:select({x,y }, {limit = n_neighbors, iterator = 'NEIGHBOR'})) do + n = n + 1 + end +end; +--- +... +file:write(string.format("Elapsed time for %d nearest %d neighbors searches selecting %d records: %d\n", n_iterations, n_neighbors, n, os.clock() - start)); +--- +- true +... +start = os.clock(); +--- +... +for i = 1, n_records do + s:delete{i} +end; +--- +... +file:write(string.format("Elapsed time for deleting %d records: %d\n", n_records, os.clock() - start)); +--- +- true +... +file:close(); +--- +- true +... +s:drop(); +--- +... diff --git a/test/box/rtree_benchmark.test.lua b/test/box/rtree_benchmark.test.lua new file mode 100644 index 0000000000000000000000000000000000000000..103cc4f2e367018d17369cebef1e36aced883bab --- /dev/null +++ b/test/box/rtree_benchmark.test.lua @@ -0,0 +1,49 @@ +s = box.schema.create_space('rtreebench') +s:create_index('primary') +s:create_index('spatial', { type = 'rtree', parts = {2, 'num', 3, 'num'}}) + +n_records = 1000000 +n_iterations = 100000 +n_neighbors = 10 + +file = io.open("rtree_benchmark.res", "w") +start = os.clock() + +--# setopt delimiter ';' +for i = 1, n_records do + s:insert{i,180*math.random(),180*math.random()} +end; + +file:write(string.format("Elapsed time for inserting %d records: %d\n", n_records, os.clock() - start)); + +start = os.clock(); +n = 0; +for i = 1, n_iterations do + x = 180*math.random() + y = 180*math.random() + for k,v in s.index.spatial:pairs({x,y,x+1,y+1}, {iterator = 'LE'}) do + n = n + 1 + end +end; +file:write(string.format("Elapsed time for %d belongs searches selecting %d records: %d\n", n_iterations, n, os.clock() - start)); + +start = os.clock(); +n = 0 +for i = 1, n_iterations do + x = 180*math.random() + y = 180*math.random() + for k,v in pairs(s.index.spatial:select({x,y }, {limit = n_neighbors, iterator = 'NEIGHBOR'})) do + n = n + 1 + end +end; +file:write(string.format("Elapsed time for %d nearest %d neighbors searches selecting %d records: %d\n", n_iterations, n_neighbors, n, os.clock() - start)); + +start = os.clock(); +for i = 1, n_records do + s:delete{i} +end; +file:write(string.format("Elapsed time for deleting %d records: %d\n", n_records, os.clock() - start)); + +file:close(); +s:drop(); + diff --git a/test/box/rtree_point.result b/test/box/rtree_point.result new file mode 100644 index 0000000000000000000000000000000000000000..aa86973a8f831cd32065bbc035ecaa5a909537ec --- /dev/null +++ b/test/box/rtree_point.result @@ -0,0 +1,105 @@ +s = box.schema.create_space('spatial') +--- +... +s:create_index('primary') +--- +- unique: true + parts: + - type: NUM + fieldno: 1 + id: 0 + space_id: 512 + name: primary + type: TREE +... +s:create_index('spatial', { type = 'rtree', parts = {2, 'num', 3, 'num'}}) +--- +- unique: true + parts: + - type: NUM + fieldno: 2 + - type: NUM + fieldno: 3 + id: 1 + space_id: 512 + name: spatial + type: RTREE +... +s:insert{1,0,0} +--- +- [1, 0, 0] +... +s:insert{2,0,10} +--- +- [2, 0, 10] +... +s:insert{3,0,50} +--- +- [3, 0, 50] +... +s:insert{4,10,0} +--- +- [4, 10, 0] +... +s:insert{5,50,0} +--- +- [5, 50, 0] +... +s:insert{6,10,10} +--- +- [6, 10, 10] +... +s:insert{7,10,50} +--- +- [7, 10, 50] +... +s:insert{8,50,10} +--- +- [8, 50, 10] +... +s:insert{9,50,50} +--- +- [9, 50, 50] +... +-- select all records +s.index.spatial:select({iterator = 'ALL'}) +--- +- - [1, 0, 0] + - [2, 0, 10] + - [3, 0, 50] + - [4, 10, 0] + - [5, 50, 0] + - [6, 10, 10] + - [7, 10, 50] + - [8, 50, 10] + - [9, 50, 50] +... +-- select records belonging to rectangle (0,0,10,10) +s.index.spatial:select({0,0,10,10}, {iterator = 'LE'}) +--- +- - [1, 0, 0] + - [2, 0, 10] + - [4, 10, 0] + - [6, 10, 10] +... +-- select records with coodinates (10,10) +s.index.spatial:select({10,10}, {iterator = 'EQ'}) +--- +- - [6, 10, 10] +... +-- select neigbors of point (5,5) +s.index.spatial:select({5,5}, {iterator = 'NEIGHBOR'}) +--- +- - [6, 10, 10] + - [4, 10, 0] + - [2, 0, 10] + - [1, 0, 0] + - [8, 50, 10] + - [7, 10, 50] + - [5, 50, 0] + - [3, 0, 50] + - [9, 50, 50] +... +s:drop() +--- +... diff --git a/test/box/rtree_point.test.lua b/test/box/rtree_point.test.lua new file mode 100644 index 0000000000000000000000000000000000000000..cc36e1e40020b854cf05b9250fa94650342e3cdf --- /dev/null +++ b/test/box/rtree_point.test.lua @@ -0,0 +1,24 @@ +s = box.schema.create_space('spatial') +s:create_index('primary') +s:create_index('spatial', { type = 'rtree', parts = {2, 'num', 3, 'num'}}) + +s:insert{1,0,0} +s:insert{2,0,10} +s:insert{3,0,50} +s:insert{4,10,0} +s:insert{5,50,0} +s:insert{6,10,10} +s:insert{7,10,50} +s:insert{8,50,10} +s:insert{9,50,50} + +-- select all records +s.index.spatial:select({iterator = 'ALL'}) +-- select records belonging to rectangle (0,0,10,10) +s.index.spatial:select({0,0,10,10}, {iterator = 'LE'}) +-- select records with coodinates (10,10) +s.index.spatial:select({10,10}, {iterator = 'EQ'}) +-- select neigbors of point (5,5) +s.index.spatial:select({5,5}, {iterator = 'NEIGHBOR'}) + +s:drop() diff --git a/test/box/rtree_point_r2.result b/test/box/rtree_point_r2.result new file mode 100644 index 0000000000000000000000000000000000000000..89ac9cdb19b69d199bc48c7f0f5d459089a79980 --- /dev/null +++ b/test/box/rtree_point_r2.result @@ -0,0 +1,105 @@ +s = box.schema.create_space('spatial') +--- +... +s:create_index('primary') +--- +- unique: true + parts: + - type: NUM + fieldno: 1 + id: 0 + space_id: 512 + name: primary + type: TREE +... +s:create_index('spatial', { type = 'rtree', parts = {2, 'num', 3, 'num'}}) +--- +- unique: true + parts: + - type: NUM + fieldno: 2 + - type: NUM + fieldno: 3 + id: 1 + space_id: 512 + name: spatial + type: RTREE +... +s:insert{1,0.0,0.0} +--- +- [1, 0, 0] +... +s:insert{2,0.0,10.0} +--- +- [2, 0, 10] +... +s:insert{3,0.0,50.0} +--- +- [3, 0, 50] +... +s:insert{4,10.0,0.0} +--- +- [4, 10, 0] +... +s:insert{5,50.0,0.0} +--- +- [5, 50, 0] +... +s:insert{6,10.0,10.0} +--- +- [6, 10, 10] +... +s:insert{7,10.0,50.0} +--- +- [7, 10, 50] +... +s:insert{8,50.0,10.0} +--- +- [8, 50, 10] +... +s:insert{9,50.0,50.0} +--- +- [9, 50, 50] +... +-- select all records +s.index.spatial:select({iterator = 'ALL'}) +--- +- - [1, 0, 0] + - [2, 0, 10] + - [3, 0, 50] + - [4, 10, 0] + - [5, 50, 0] + - [6, 10, 10] + - [7, 10, 50] + - [8, 50, 10] + - [9, 50, 50] +... +-- select records belonging to rectangle (0,0,10,10) +s.index.spatial:select({0.0,0.0,10.0,10.0}, {iterator = 'LE'}) +--- +- - [1, 0, 0] + - [2, 0, 10] + - [4, 10, 0] + - [6, 10, 10] +... +-- select records with coodinates (10,10) +s.index.spatial:select({10.0,10.0}, {iterator = 'EQ'}) +--- +- - [6, 10, 10] +... +-- select neigbors of point (5,5) +s.index.spatial:select({5.0,5.0}, {iterator = 'NEIGHBOR'}) +--- +- - [6, 10, 10] + - [4, 10, 0] + - [2, 0, 10] + - [1, 0, 0] + - [8, 50, 10] + - [7, 10, 50] + - [5, 50, 0] + - [3, 0, 50] + - [9, 50, 50] +... +s:drop() +--- +... diff --git a/test/box/rtree_point_r2.test.lua b/test/box/rtree_point_r2.test.lua new file mode 100644 index 0000000000000000000000000000000000000000..9fb63dcda3b0ada230633f646001f1faa11d8a09 --- /dev/null +++ b/test/box/rtree_point_r2.test.lua @@ -0,0 +1,24 @@ +s = box.schema.create_space('spatial') +s:create_index('primary') +s:create_index('spatial', { type = 'rtree', parts = {2, 'num', 3, 'num'}}) + +s:insert{1,0.0,0.0} +s:insert{2,0.0,10.0} +s:insert{3,0.0,50.0} +s:insert{4,10.0,0.0} +s:insert{5,50.0,0.0} +s:insert{6,10.0,10.0} +s:insert{7,10.0,50.0} +s:insert{8,50.0,10.0} +s:insert{9,50.0,50.0} + +-- select all records +s.index.spatial:select({iterator = 'ALL'}) +-- select records belonging to rectangle (0,0,10,10) +s.index.spatial:select({0.0,0.0,10.0,10.0}, {iterator = 'LE'}) +-- select records with coodinates (10,10) +s.index.spatial:select({10.0,10.0}, {iterator = 'EQ'}) +-- select neigbors of point (5,5) +s.index.spatial:select({5.0,5.0}, {iterator = 'NEIGHBOR'}) + +s:drop() diff --git a/test/box/rtree_rect.result b/test/box/rtree_rect.result new file mode 100644 index 0000000000000000000000000000000000000000..f0feac3aa2fb1167eb2ae106a0ad35ad909e018d --- /dev/null +++ b/test/box/rtree_rect.result @@ -0,0 +1,104 @@ +s = box.schema.create_space('spatial') +--- +... +s:create_index('primary') +--- +- unique: true + parts: + - type: NUM + fieldno: 1 + id: 0 + space_id: 512 + name: primary + type: TREE +... +s:create_index('spatial', { type = 'rtree', parts = {2, 'num', 3, 'num', 4, 'num', 5, 'num'}}) +--- +- unique: true + parts: + - type: NUM + fieldno: 2 + - type: NUM + fieldno: 3 + - type: NUM + fieldno: 4 + - type: NUM + fieldno: 5 + id: 1 + space_id: 512 + name: spatial + type: RTREE +... +s:insert{1,0,0,10,10} +--- +- [1, 0, 0, 10, 10] +... +s:insert{2,5,5,10,10} +--- +- [2, 5, 5, 10, 10] +... +s:insert{3,0,0,5,5} +--- +- [3, 0, 0, 5, 5] +... +-- select all records +s.index.spatial:select({}, {iterator = 'ALL'}) +--- +- - [1, 0, 0, 10, 10] + - [2, 5, 5, 10, 10] + - [3, 0, 0, 5, 5] +... +-- select records belonging to rectangle (0,0,5,5) +s.index.spatial:select({0,0,5,5}, {iterator = 'LE'}) +--- +- - [3, 0, 0, 5, 5] +... +-- select records strict belonging to rectangle (0,0,5,5) +s.index.spatial:select({0,0,5,5}, {iterator = 'LT'}) +--- +- [] +... +-- select records strict belonging to rectangle (4,4,10,10) +s.index.spatial:select({4,4,10,10}, {iterator = 'LT'}) +--- +- - [2, 5, 5, 10, 10] +... +-- select records containing point (5,5) +s.index.spatial:select({5,5}, {iterator = 'GE'}) +--- +- - [1, 0, 0, 10, 10] + - [2, 5, 5, 10, 10] + - [3, 0, 0, 5, 5] +... +-- select records containing rectangle (1,1,2,2) +s.index.spatial:select({1,1,2,2}, {iterator = 'GE'}) +--- +- - [1, 0, 0, 10, 10] + - [3, 0, 0, 5, 5] +... +-- select records strict containing rectangle (0,0,5,5) +s.index.spatial:select({0,0,5,5}, {iterator = 'GT'}) +--- +- - [1, 0, 0, 10, 10] +... +-- select records overlapping rectangle (9,4,11,6) +s.index.spatial:select({9,4,11,6}, {iterator = 'OVERLAPS'}) +--- +- - [1, 0, 0, 10, 10] + - [2, 5, 5, 10, 10] +... +-- select records with coordinates (0,0,5,5) +s.index.spatial:select({0,0,5,5}, {iterator = 'EQ'}) +--- +- - [3, 0, 0, 5, 5] +... +-- select neigbors of point (1,1) +s.index.spatial:select({1,1}, {iterator = 'NEIGHBOR'}) +--- +- - [3, 0, 0, 5, 5] + - [1, 0, 0, 10, 10] + - [2, 5, 5, 10, 10] +... +s:drop() +--- +... diff --git a/test/box/rtree_rect.test.lua b/test/box/rtree_rect.test.lua new file mode 100644 index 0000000000000000000000000000000000000000..9ffdaa7ae80d318c313623b315601bef5c452160 --- /dev/null +++ b/test/box/rtree_rect.test.lua @@ -0,0 +1,30 @@ +s = box.schema.create_space('spatial') +s:create_index('primary') +s:create_index('spatial', { type = 'rtree', parts = {2, 'num', 3, 'num', 4, 'num', 5, 'num'}}) + +s:insert{1,0,0,10,10} +s:insert{2,5,5,10,10} +s:insert{3,0,0,5,5} + +-- select all records +s.index.spatial:select({}, {iterator = 'ALL'}) +-- select records belonging to rectangle (0,0,5,5) +s.index.spatial:select({0,0,5,5}, {iterator = 'LE'}) +-- select records strict belonging to rectangle (0,0,5,5) +s.index.spatial:select({0,0,5,5}, {iterator = 'LT'}) +-- select records strict belonging to rectangle (4,4,10,10) +s.index.spatial:select({4,4,10,10}, {iterator = 'LT'}) +-- select records containing point (5,5) +s.index.spatial:select({5,5}, {iterator = 'GE'}) +-- select records containing rectangle (1,1,2,2) +s.index.spatial:select({1,1,2,2}, {iterator = 'GE'}) +-- select records strict containing rectangle (0,0,5,5) +s.index.spatial:select({0,0,5,5}, {iterator = 'GT'}) +-- select records overlapping rectangle (9,4,11,6) +s.index.spatial:select({9,4,11,6}, {iterator = 'OVERLAPS'}) +-- select records with coordinates (0,0,5,5) +s.index.spatial:select({0,0,5,5}, {iterator = 'EQ'}) +-- select neigbors of point (1,1) +s.index.spatial:select({1,1}, {iterator = 'NEIGHBOR'}) + +s:drop() diff --git a/test/box/suite.ini b/test/box/suite.ini index adc25b973ed65ff82f0e9d8a061d64bc6197240f..6f9a00fec05deb820394d6f41fe181b0d7470979 100644 --- a/test/box/suite.ini +++ b/test/box/suite.ini @@ -2,8 +2,8 @@ core = tarantool description = tarantool/box, minimal configuration script = box.lua -disabled = +disabled = rtree_benchmark.test.lua valgrind_disabled = admin_coredump.test.lua -release_disabled = errinj.test.lua errinj_index.test.lua +release_disabled = errinj.test.lua errinj_index.test.lua cmdline.test.lua lua_libs = lua/fiber.lua lua/fifo.lua use_unix_sockets = True diff --git a/third_party/rtree.cc b/third_party/rtree.cc new file mode 100644 index 0000000000000000000000000000000000000000..016f0842d12eb7f4e5f43180b25ac5b0b3fd01a7 --- /dev/null +++ b/third_party/rtree.cc @@ -0,0 +1,585 @@ +#include <string.h> +#include <assert.h> +#include "rtree.h" + +inline void* operator new(size_t, void* at) +{ + return at; +} + +class R_page { +public: + struct branch { + rectangle_t r; + R_page* p; + }; + + enum { + card = (RTREE_PAGE_SIZE-4)/sizeof(branch), // maximal number of branches at page + min_fill = card/2 // minimal number of branches at non-root page + }; + + struct reinsert_list { + R_page* chain; + int level; + reinsert_list() { chain = NULL; } + }; + + R_page* insert(R_tree* tree, rectangle_t const& r, record_t obj, int level); + + bool remove(R_tree* tree, rectangle_t const& r, record_t obj, int level, reinsert_list& rlist); + + rectangle_t cover() const; + + R_page* split_page(R_tree* tree, branch const& br); + + R_page* add_branch(R_tree* tree, branch const& br) { + if (n < card) { + b[n++] = br; + return NULL; + } else { + return split_page(tree, br); + } + } + void remove_branch(int i); + + void purge(R_tree* tree, int level); + + R_page* next_reinsert_page() const { return (R_page*)b[card-1].p; } + + R_page(rectangle_t const& rect, record_t obj); + R_page(R_page* old_root, R_page* new_page); + + int n; // number of branches at page + branch b[card]; +}; + +R_tree::R_tree(FixedSizeAllocator::Factory* factory) +{ + n_records = 0; + height = 0; + root = NULL; + update_count = 0; + page_allocator = factory->create(sizeof(R_page)); + neighbor_allocator = factory->create(sizeof(R_tree_iterator::Neighbor)); + allocator_factory = factory; +} + +R_tree::~R_tree() +{ + purge(); + allocator_factory->destroy(page_allocator); + allocator_factory->destroy(neighbor_allocator); +} + +void R_tree::insert(rectangle_t const& r, record_t obj) +{ + if (root == NULL) { + root = new (page_allocator->alloc()) R_page(r, obj); + height = 1; + } else { + R_page* p = root->insert(this, r, obj, height); + if (p != NULL) { + // root splitted + root = new (page_allocator->alloc()) R_page(root, p); + height += 1; + } + } + update_count += 1; + n_records += 1; +} + + +bool R_tree::remove(rectangle_t const& r, record_t obj) +{ + if (height != 0) { + R_page::reinsert_list rlist; + if (root->remove(this, r, obj, height, rlist)) { + R_page* pg = rlist.chain; + int level = rlist.level; + while (pg != NULL) { + for (int i = 0, n = pg->n; i < n; i++) { + R_page* p = root->insert(this, pg->b[i].r, + pg->b[i].p, height-level); + if (p != NULL) { + // root splitted + root = new (page_allocator->alloc()) R_page(root, p); + height += 1; + } + } + level -= 1; + R_page* next = pg->next_reinsert_page(); + page_allocator->free(pg); + pg = next; + } + if (root->n == 1 && height > 1) { + R_page* new_root = root->b[0].p; + page_allocator->free(root); + root = new_root; + height -= 1; + } + n_records -= 1; + update_count += 1; + return true; + } + } + return false; +} + +bool R_tree_iterator::goto_first(int sp, R_page* pg) +{ + if (sp+1 == tree->height) { + for (int i = 0, n = pg->n; i < n; i++) { + if ((r.*leaf_cmp)(pg->b[i].r)) { + stack[sp].page = pg; + stack[sp].pos = i; + return true; + } + } + } else { + for (int i = 0, n = pg->n; i < n; i++) { + if ((r.*intr_cmp)(pg->b[i].r) && goto_first(sp+1, pg->b[i].p)) { + stack[sp].page = pg; + stack[sp].pos = i; + return true; + } + } + } + return false; +} + + +bool R_tree_iterator::goto_next(int sp) +{ + R_page* pg = stack[sp].page; + if (sp+1 == tree->height) { + for (int i = stack[sp].pos, n = pg->n; ++i < n;) { + if ((r.*leaf_cmp)(pg->b[i].r)) { + stack[sp].pos = i; + return true; + } + } + } else { + for (int i = stack[sp].pos, n = pg->n; ++i < n;) { + if ((r.*intr_cmp)(pg->b[i].r) && goto_first(sp+1, pg->b[i].p)) { + stack[sp].page = pg; + stack[sp].pos = i; + return true; + } + } + } + return sp > 0 ? goto_next(sp-1) : false; +} + +R_tree_iterator::R_tree_iterator() +{ + list = NULL; + free = NULL; + tree = NULL; +} + +R_tree_iterator::~R_tree_iterator() +{ + Neighbor *curr, *next; + reset(); + for (curr = free; curr != NULL; curr = next) { + next = curr->next; + tree->neighbor_allocator->free(curr); + } +} + +void R_tree_iterator::reset() +{ + if (list != NULL) { + Neighbor** npp = &free; + while (*npp != NULL) { + npp = &(*npp)->next; + } + *npp = list; + list = NULL; + } +} + + +bool R_tree_iterator::init(R_tree const* tree, rectangle_t const& r, Spatial_search_op op) +{ + reset(); + this->tree = tree; + this->update_count = tree->update_count; + this->r = r; + this->op = op; + assert(tree->height <= MAX_HEIGHT); + switch (op) { + case SOP_ALL: + intr_cmp = leaf_cmp = &rectangle_t::operator_true; + break; + case SOP_EQUALS: + intr_cmp = &rectangle_t::operator <=; + leaf_cmp = &rectangle_t::operator ==; + break; + case SOP_CONTAINS: + intr_cmp = leaf_cmp = &rectangle_t::operator <=; + break; + case SOP_STRICT_CONTAINS: + intr_cmp = leaf_cmp = &rectangle_t::operator <; + break; + case SOP_OVERLAPS: + intr_cmp = leaf_cmp = &rectangle_t::operator &; + break; + case SOP_BELONGS: + intr_cmp = &rectangle_t::operator &; + leaf_cmp = &rectangle_t::operator >=; + break; + case SOP_STRICT_BELONGS: + intr_cmp = &rectangle_t::operator &; + leaf_cmp = &rectangle_t::operator >; + break; + case SOP_NEIGHBOR: + if (tree->root) { + list = new_neighbor(tree->root, tree->root->cover().distance2(r.boundary), tree->height); + return true; + } else { + list = NULL; + return false; + } + } + if (tree->root && goto_first(0, tree->root)) { + stack[tree->height-1].pos -= 1; // will be incremented by goto_next + eof = false; + return true; + } else { + eof = true; + return false; + } +} + +void R_tree_iterator::insert(Neighbor* node) +{ + Neighbor *prev = NULL, *next = list; + area_t distance = node->distance; + while (next != NULL && next->distance < distance) { + prev = next; + next = prev->next; + } + node->next = next; + if (prev == NULL) { + list = node; + } else { + prev->next = node; + } +} + +R_tree_iterator::Neighbor* R_tree_iterator::new_neighbor(void* child, area_t distance, int level) +{ + Neighbor* n = free; + if (n == NULL) { + n = new (tree->neighbor_allocator->alloc()) Neighbor(); + } else { + free = n->next; + } + n->child = child; + n->distance = distance; + n->level = level; + n->next = NULL; + return n; +} + +void R_tree_iterator::free_neighbor(Neighbor* n) +{ + n->next = free; + free = n; +} + +record_t R_tree_iterator::next() +{ + if (update_count != tree->update_count) { + // Index was updated since cursor initialziation + return NULL; + } + if (op == SOP_NEIGHBOR) { + // To return element in order of increasing distance from specified point, + // we build sorted list of R-Tree items + // (ordered by distance from specified point) starting from root page. + // Algorithm is the following: + // + // insert root R-Tree page in the sorted list + // while sorted list is not empty: + // get top element from the sorted list + // if it is tree leaf (record) then return it as current element + // otherwise (R-Tree page) get siblings of this R-Tree page and insert them in sorted list + while (true) { + Neighbor* neighbor = list; + if (neighbor == NULL) { + return NULL; + } + R_page* pg = (R_page*)neighbor->child; + int level = neighbor->level; + list = neighbor->next; + free_neighbor(neighbor); + if (level == 0) { + return (record_t*)pg; + } + for (int i = 0, n = pg->n; i < n; i++) { + insert(new_neighbor(pg->b[i].p, pg->b[i].r.distance2(r.boundary), level-1)); + } + } + } + int sp = tree->height-1; + if (!eof && goto_next(sp)) { + return stack[sp].page->b[stack[sp].pos].p; + } + eof = true; + return NULL; +} + +bool R_tree::search(rectangle_t const& r, Spatial_search_op op, R_tree_iterator& iterator) const +{ + return iterator.init(this, r, op); +} + +void R_tree::purge() +{ + if (root != NULL) { + root->purge(this, height); + root = NULL; + n_records = 0; + height = 0; + } +} + +//------------------------------------------------------------------------- +// R-tree page methods +//------------------------------------------------------------------------- + +// +// Create root page +// +R_page::R_page(rectangle_t const& r, record_t obj) +{ + n = 1; + b[0].r = r; + b[0].p = (R_page*)obj; +} + +// +// Create new root page (root splitting) +// +R_page::R_page(R_page* old_root, R_page* new_page) +{ + n = 2; + b[0].r = old_root->cover(); + b[0].p = old_root; + b[1].r = new_page->cover(); + b[1].p = new_page; +} + +// +// Calculate cover of all rectangles at page +// +rectangle_t R_page::cover() const +{ + rectangle_t r = b[0].r; + for (int i = 1; i < n; i++) { + r += b[i].r; + } + return r; +} + +R_page* R_page::split_page(R_tree* tree, branch const& br) +{ + int i, j, seed[2] = {0,0}; + area_t rect_area[card+1], waste, worst_waste = AREA_MIN; + // + // As the seeds for the two groups, find two rectangles which waste + // the most area if covered by a single rectangle. + // + rect_area[0] = area(br.r); + for (i = 0; i < card; i++) { + rect_area[i+1] = area(b[i].r); + } + branch const* bp = &br; + for (i = 0; i < card; i++) { + for (j = i+1; j <= card; j++) { + waste = area(bp->r + b[j-1].r) - rect_area[i] - rect_area[j]; + if (waste > worst_waste) { + worst_waste = waste; + seed[0] = i; + seed[1] = j; + } + } + bp = &b[i]; + } + char taken[card]; + rectangle_t group[2]; + area_t group_area[2]; + int group_card[2]; + R_page* p; + + memset(taken, 0, sizeof taken); + taken[seed[1]-1] = 2; + group[1] = b[seed[1]-1].r; + + if (seed[0] == 0) { + group[0] = br.r; + p = new (tree->page_allocator->alloc()) R_page(br.r, br.p); + } else { + group[0] = b[seed[0]-1].r; + p = new (tree->page_allocator->alloc()) R_page(group[0], b[seed[0]-1].p); + b[seed[0]-1] = br; + } + group_card[0] = group_card[1] = 1; + group_area[0] = rect_area[seed[0]]; + group_area[1] = rect_area[seed[1]]; + // + // Split remaining rectangles between two groups. + // The one chosen is the one with the greatest difference in area + // expansion depending on which group - the rect most strongly + // attracted to one group and repelled from the other. + // + while (group_card[0] + group_card[1] < card + 1 + && group_card[0] < card + 1 - min_fill + && group_card[1] < card + 1 - min_fill) + { + int better_group = -1, chosen = -1; + area_t biggest_diff = -1; + for (i = 0; i < card; i++) { + if (!taken[i]) { + area_t diff = (area(group[0] + b[i].r) - group_area[0]) + - (area(group[1] + b[i].r) - group_area[1]); + if (diff > biggest_diff || -diff > biggest_diff) { + chosen = i; + if (diff < 0) { + better_group = 0; + biggest_diff = -diff; + } else { + better_group = 1; + biggest_diff = diff; + } + } + } + } + assert(chosen >= 0); + group_card[better_group] += 1; + group[better_group] += b[chosen].r; + group_area[better_group] = area(group[better_group]); + taken[chosen] = better_group+1; + if (better_group == 0) { + p->b[group_card[0]-1] = b[chosen]; + } + } + // + // If one group gets too full, then remaining rectangle are + // split between two groups in such way to balance cards of two groups. + // + if (group_card[0] + group_card[1] < card + 1) { + for (i = 0; i < card; i++) { + if (!taken[i]) { + if (group_card[0] >= group_card[1]) { + taken[i] = 2; + group_card[1] += 1; + } else { + taken[i] = 1; + p->b[group_card[0]++] = b[i]; + } + } + } + } + p->n = group_card[0]; + n = group_card[1]; + for (i = 0, j = 0; i < n; j++) { + if (taken[j] == 2) { + b[i++] = b[j]; + } + } + return p; +} + +void R_page::remove_branch(int i) +{ + n -= 1; + memmove(&b[i], &b[i+1], (n-i)*sizeof(branch)); +} + +R_page* R_page::insert(R_tree* tree, rectangle_t const& r, record_t obj, int level) +{ + branch br; + if (--level != 0) { + // not leaf page + int i, mini = 0; + area_t min_incr = AREA_MAX; + area_t best_area = AREA_MAX; + for (i = 0; i < n; i++) { + area_t r_area = area(b[i].r); + area_t incr = area(b[i].r + r) - r_area; + if (incr < min_incr) { + best_area = r_area; + min_incr = incr; + mini = i; + } else if (incr == min_incr && r_area < best_area) { + best_area = r_area; + mini = i; + } + } + R_page* p = b[mini].p; + R_page* q = p->insert(tree, r, obj, level); + if (q == NULL) { + // child was not split + b[mini].r += r; + return NULL; + } else { + // child was split + b[mini].r = p->cover(); + br.p = q; + br.r = q->cover(); + return add_branch(tree, br); + } + } else { + br.p = (R_page*)obj; + br.r = r; + return add_branch(tree, br); + } +} + +bool R_page::remove(R_tree* tree, rectangle_t const& r, record_t rec, + int level, reinsert_list& rlist) +{ + if (--level != 0) { + for (int i = 0; i < n; i++) { + if (b[i].r & r) { + R_page* p = b[i].p; + if (p->remove(tree, r, rec, level, rlist)) { + if (p->n >= min_fill) { + b[i].r = p->cover(); + } else { + // not enough entries in child + p->b[card-1].p = rlist.chain; + rlist.chain = p; + rlist.level = level - 1; + remove_branch(i); + } + return true; + } + } + } + } else { + for (int i = 0; i < n; i++) { + if (b[i].p == rec) { + remove_branch(i); + return true; + } + } + } + return false; +} + +void R_page::purge(R_tree* tree, int level) +{ + if (--level != 0) { /* this is an internal node in the tree */ + for (int i = 0; i < n; i++) { + b[i].p->purge(tree, level); + } + } + tree->page_allocator->free(this); +} + + diff --git a/third_party/rtree.h b/third_party/rtree.h new file mode 100644 index 0000000000000000000000000000000000000000..48406c98234ad96c023b33429f489e9c7b37e08f --- /dev/null +++ b/third_party/rtree.h @@ -0,0 +1,229 @@ +/* + * Guttman's R-Tree + * Copyright (C) 2014 Mail.RU + */ + +#ifndef __RTREE_H__ +#define __RTREE_H__ + +#include <stdlib.h> +#include <limits.h> +#include <math.h> +#include <float.h> + +#define MAX_HEIGHT 16 +#define DIMENSIONS 2 + +typedef double coord_t; +typedef double area_t; +typedef void* record_t; + +#define AREA_MAX DBL_MAX +#define AREA_MIN DBL_MIN + +#define RTREE_PAGE_SIZE 1024 /* R-Tree use linear search within element on the page, so larger page cause worse performance */ + +class R_tree; +class R_page; +class R_tree_iterator; + +class rectangle_t +{ +public: + enum { dim = 2 }; + coord_t boundary[dim*2]; + + // Squarer of distance + area_t distance2(coord_t const* point) const + { + area_t d = 0; + for (int i = 0; i < dim; i++) { + if (point[i] < boundary[i]) { + d += (boundary[i] - point[i]) * (boundary[i] - point[i]); + } else if (point[i] > boundary[dim + i]) { + d += (boundary[dim + i] - point[i]) * (boundary[dim + i] - point[i]); + } + } + return d; + } + + + friend area_t area(rectangle_t const& r) { + area_t area = 1; + for (int i = dim; --i >= 0; area *= r.boundary[i+dim] - r.boundary[i]); + return area; + } + + void operator +=(rectangle_t const& r) { + int i = dim; + while (--i >= 0) { + boundary[i] = (boundary[i] <= r.boundary[i]) + ? boundary[i] : r.boundary[i]; + boundary[i+dim] = (boundary[i+dim] >= r.boundary[i+dim]) + ? boundary[i+dim] : r.boundary[i+dim]; + } + } + rectangle_t operator + (rectangle_t const& r) const { + rectangle_t res; + int i = dim; + while (--i >= 0) { + res.boundary[i] = (boundary[i] <= r.boundary[i]) + ? boundary[i] : r.boundary[i]; + res.boundary[i+dim] = (boundary[i+dim] >= r.boundary[i+dim]) + ? boundary[i+dim] : r.boundary[i+dim]; + } + return res; + } + bool operator& (rectangle_t const& r) const { + int i = dim; + while (--i >= 0) { + if (boundary[i] > r.boundary[i+dim] || + r.boundary[i] > boundary[i+dim]) + { + return false; + } + } + return true; + } + bool operator <= (rectangle_t const& r) const { + int i = dim; + while (--i >= 0) { + if (boundary[i] < r.boundary[i] || + boundary[i+dim] > r.boundary[i+dim]) + { + return false; + } + } + return true; + } + bool operator < (rectangle_t const& r) const { + return *this <= r && *this != r; + } + + bool operator >= (rectangle_t const& r) const { + return r <= *this; + } + bool operator > (rectangle_t const& r) const { + return r <= *this && *this != r; + } + + bool operator == (rectangle_t const& r) const { + int i = dim*2; + while (--i >= 0) { + if (boundary[i] != r.boundary[i]) { + return false; + } + } + return true; + } + bool operator != (rectangle_t const& r) const { + return !(*this == r); + } + bool operator_true(rectangle_t const&) const { + return true; + } +}; + +enum Spatial_search_op +{ + SOP_ALL, + SOP_EQUALS, + SOP_CONTAINS, + SOP_STRICT_CONTAINS, + SOP_OVERLAPS, + SOP_BELONGS, + SOP_STRICT_BELONGS, + SOP_NEIGHBOR +}; + +class R_tree_iterator +{ + friend class R_tree; + struct { + R_page* page; + int pos; + } stack[MAX_HEIGHT]; + + struct Neighbor { + void* child; + Neighbor* next; + int level; + area_t distance; + }; + + typedef bool (rectangle_t::*comparator_t)(rectangle_t const& r) const; + + rectangle_t r; + Spatial_search_op op; + R_tree const* tree; + Neighbor* list; + Neighbor* free; + bool eof; + int update_count; + + comparator_t intr_cmp; + comparator_t leaf_cmp; + + bool goto_first(int sp, R_page* pg); + bool goto_next(int sp); + bool init(R_tree const* tree, rectangle_t const& r, Spatial_search_op op); + void insert(Neighbor* node); + + Neighbor* new_neighbor(void* child, area_t distance, int level); + void free_neighbor(Neighbor* n); +public: + void reset(); + record_t next(); + + R_tree_iterator(); + ~R_tree_iterator(); +}; + +class FixedSizeAllocator { +public: + class Factory { + public: + virtual FixedSizeAllocator* create(size_t obj_size) = 0; + virtual void destroy(FixedSizeAllocator* allocator) = 0; + virtual ~Factory() {} + }; + + virtual void* alloc() = 0; + virtual void free(void* ptr) = 0; + virtual size_t used_size() = 0; + virtual ~FixedSizeAllocator() {} +}; + +class R_tree +{ + friend class R_tree_iterator; + friend class R_page; +public: + size_t used_size() const { + return page_allocator->used_size(); + } + + unsigned number_of_records() const { + return n_records; + } + bool search(rectangle_t const& r, Spatial_search_op op, R_tree_iterator& iterator) const; + void insert(rectangle_t const& r, record_t obj); + bool remove(rectangle_t const& r, record_t obj); + void purge(); + R_tree(FixedSizeAllocator::Factory* allocator_factory); + ~R_tree(); + +protected: + unsigned n_records; + unsigned height; + R_page* root; + int update_count; + FixedSizeAllocator* page_allocator; + FixedSizeAllocator* neighbor_allocator; + FixedSizeAllocator::Factory* allocator_factory; +}; + +#endif + + +