diff --git a/.gitignore b/.gitignore index a3dc76d92f0f68e83f9e9d07f1fad4ad12c078a7..d9f849d028cbc601ab1bf41cb0d982065e0b1f52 100644 --- a/.gitignore +++ b/.gitignore @@ -2,11 +2,11 @@ .gdb_history TAGS _* -*.cfg config.mk lcov *.o *.d -tarantool* *.snap *.xlog +tarantool_version.h +test/var diff --git a/Makefile b/Makefile index 25e9fa8de8b0f2f2c0036edd440cf0b358e27dc6..b0bdb07f6bce010d2017450af1643315e57d07f4 100644 --- a/Makefile +++ b/Makefile @@ -30,6 +30,11 @@ else include $(SRCDIR)/scripts/rules.mk endif +.PHONY: test +test: + cd ./test && ./test-run.py + + ifeq ("$(origin module)", "command line") .PHONY: clean clean: diff --git a/doc/silverbox-protocol.txt b/doc/silverbox-protocol.txt index bc1a5ca23550e32604509e47692abf2a656b1550..8be69f51f8078016e0ef41b4e733f9c65b7489dd 100644 --- a/doc/silverbox-protocol.txt +++ b/doc/silverbox-protocol.txt @@ -1,81 +1,328 @@ +; Mail.RU IPROTO protocol, Tarantool/Silverbox subset. +; +; The latest version of this document can be found in +; tarantool source tree, doc/silverbox-protocol.txt +; +; IPROTO is a binary request/response protocol that features a +; complete access to Tarantool functionality, including: +; - request multiplexing, e.g. ability to asynchronously issue +; multiple requests via the same connection +; - response format that supports zero-copy writes +; +; The atoms of representation in the protocol include: +; +; int8 - a single 8-bit byte (i.e. an octet) +; +; int32 - a 32-bit integer in big-endian format (Intel x86) +; +; int32_ber - a 1 to 5 byte BER encoding of a 32 bit integer +; +; BER stands for Basic Encoding Rules, and allows to unequivocally +; compact a 32-bit integer into 1 to 5 bytes depending on its value. +; For more information, see +; http://en.wikipedia.org/wiki/Basic_Encoding_Rules and +; http://www.itu.int/ITU-T/studygroups/com17/languages/X.690-0207.pdf, +; chapter 8.3 Encoding of an Integer Value -varint32 - int32 BER encoding (http://perldoc.perl.org/perlpacktut.html#Another-Portable-Binary-Encoding) -field_t = <size, varint32><data, char *> -tuple_t = <field[1], field_t>...<field[n], field_t> -key_t = <key_cardinality, int32_t><key_fields, tuple_t> - -ret_code: - * 0x******00 - ok - * 0x******01 - retry later - * 0x******02 - permanent error -flags: - * BOX_RETURN_TUPLE = 0x01 - -* Insert (msg = 13) - * Query: - * <n, uint32_t> - namespace number - * <flags, uint32_t> - * <cardinality, uint32_t> - tuple cardinality - * <field[0], field_t> - * ... - * <field[cardinality - 1], field_t> - * Answer: - * <ret_code, uint32_t> - * <tuples_affected, uint32_t> - if (flags & BOX_RETURN_TUPLE) - * <tuple_data_size, uint32_t> - * <cardinality, uint32_t> - * <tuple_data, tuple_t> -* Select (msg = 17) - * Query: - * <n, uint32_t> - namespace number - * <index_n, uint32_t> - index to use - * <offset, uint32_t> - offset (applied to the whole resultset) - * <limit, uint32_t> - limit (the same) - * <count, uint32_t> - number of keys to select by - * <key[0], key_t> - * ... - * <key[count - 1], key_t> - * Answer: - * <ret_code, uint32_t> - * <count, uint32_t> - tuples in answer - * tuple[0]: - * <tuple_data_size, uint32_t> - * <cardinality, uint32_t> - * <tuple_data, tuple_t> - * ... - * tuple[count - 1]: - * ... -* Update fields (msg = 19) - * Query: - * <n, uint32_t> - namespace number - * <flags, uint32_t> - * <key, key_t> // for now key cardinality of 1 is only allowed - * <op_cnt, uint32_t> - number of operations to do - * op[0]: - * <fieldno, uint32_t> - number of field to update - * <op, uint8_t> - operation: - * 0 - set - * 1 - add - * 2 - and - * 3 - xor - * 4 - or - * <argument, field_t> - argument for operation, limitations: - * for add - int32_t - * for and, or, xor - uint32_t - * ... - * op[op_cnt - 1]: - * ... - * Answer: - * <ret_code, uint32_t> - * <tuples_affected, uint32_t> - if (flags & BOX_RETURN_TUPLE) - * <tuple_data_size, uint32_t> - * <cardinality, uint32_t> - * <tuple_data, tuple_t> -* Delete (msg = 20) - * Query: - * <n, uint32_t> - namespace number - * <key, key_t> // for now key cardinality of 1 is only allowed - * Answer: - * <ret_code, uint32_t> +; All requests and responses utilize the same basic structure: + +<packet> ::= <request> | <response> + +<request> ::= <header><body> + +<response> ::= <header><return_code><body> + +; +; <header> has a fixed structure of three 4-byte integers (12 bytes): + +<header> ::= <type><body_length><request_id> + +; <type> represents a request type, a single server command, +; such as PING, SELECT, UPDATE, DELETE, INSERT, etc. +; <type> is replicated intact in the response header. +; The currently supported types are: +; - 13 -- <insert> +; - 17 -- <select> +; - 19 -- <update> +; - 20 -- <delete> +; - 65280 -- <ping> +; This list is scarce since a number of old commands +; were deprecated and removed. + +<type> ::= <int32> + +; +; <body_length> tells the sender or receiver the length of data +; that follows the header. If there is no data, <body_length> is 0. +; +; Note, that <ping> request body is empty; <ping>; request packet +; consists solely of a 12-byte header (65280, 0, 0) +; and gets the same 12-byte header in response. + +<body_length> ::= <int32> + +; +; <request_id> is a unique request identifier set by the client, +; The identifier is necessary to allow request multiplexing -- +; i.e. sending multiple requests through the same connection +; before fetching a response to any of them. +; The value of the identifier currently bears no meaning to the +; server. Similarly to request <type>, it's simply copied to the +; response header as-is. +; Consequently, <request_id> can be 0 or two requests +; can have an identical id. + +<request_id> ::= <int32> + +; <body> holds actual command data. +; Its format and interpretation are defined by the value of +; request <type>. + +<body> ::= <select_request_body> | <select_response_body> | + <insert_request_body> | <insert_response_body> | + <update_request_body> | <update_response_body> | + <delete_request_body> | <delete_response_body> + +; <select_request_body> (required <header> <type> is 17): +; +; Specify which namespace to query, which index in the namespace +; to use, offset in the resulting tuple set (set to 0 for no offset), +; a limit (set to 4294967295 for no limit), and one or several +; keys to use in lookup. When more than one key is given, they +; specify a disjunctive search condition (key1 or key2 or ...). +; + +<select_request_body> ::= <namespace_no><index_no> + <offset><limit><count><tuple>+ + +; Namespace number is a non-negative integer, starting from 0. +; All namespaces are defined in the server configuration file, +; and then referred to by numeric id. + +<namespace_no> ::= <int32> + +; Tarantool supports HASH and TREE indexes. Indexes are +; enumerated similarly to namespaces, starting from 0. +; Each namespace has at least index #0, which defines +; the primary key. + +<index_no> ::= <int32> + +; offset in the result set + +<offset> ::= <int32> + +; limit for the result set + +<limit> ::= <int32> + +; key count in the disjunctive set + +<count> ::= <int32> +; +; A tuple that represents a search key simply lists all key +; fields, preceded with key cardinality (number of list +; elements). Each key in <select_request_body> can have a +; different cardinality. + +<tuple> ::= <cardinality><field>+ + +; +; If a key is not fully specified, i.e. has smaller cardinality +; than the corresponding index, each unspecified field is treated +; as a wildcard. +; + +<cardinality> ::= <int32> + +; +; A field represents a single atom of storage. In key/value +; paradigm, Tarantool's "value" is a list of fields. +; A single unit of storage, therefore, must contain all fields +; of all (possibly multipart) keys and zero or more fields +; treated as "data". To do a <select> the user only needs to +; define fields of the key that is used for search. +; + +<field> ::= <data_length_ber><data> + +; +; SELECT may return zero, one or several tuples. +; <select_response_body> starts with the number of found +; tuples: +; + +<select_response_body> ::= <count><fq_tuple>* + +; +; Tuples returned by the server (we call them "fully qualified") +; are always preceded with calculated information: +; total size of the tuple and number of fields in it. +; This is how the tuple is stored on server side. +; While this information can be often derived from body length, +; it allows the recipient to simplify memory allocation and tuple +; decoding. Certain requests, such as +; <select>, can return more than one tuple. In that case +; fully qualified tuples are also used to identify tuple +; boundaries: in Tarantool, tuples have variable cardinality. +; + +<fq_tuple> ::= <size><cardinality><field>+ + +<size> ::= <int32> + +; +; It is not possible to insert more than one tuple at a time. +; Thus <insert_request_body> (<header> <type> = 13) simply +; holds one tuple, and which namespace to put it into. +; + +<insert_request_body> ::= <namespace_no><flags><tuple> + +; The only defined flag BOX_RETURN_TUPLE (0x01) indicates +; that it is required to return the inserted tuple back: + +<flags> ::= 0 | 1 + +; +; A tuple may already exist. In that case INSERT +; returns 0 for tuple count in response. If BOX_RETURN_TUPLE +; is set, the inserted tuple will be sent back: + +<insert_response_body> ::= <count> | <count><fq_tuple> + +; <update> request, <type> = 19 is similar to <insert>: +; - <namespace_no>: same as in <select> or <insert> +; - <flags>, <tuple>: same as in <insert> +; Index number for tuple lookup does not need to be provided, +; since only primary key updates are allowed. +; Moreover, <tuple> cardinality is always 1, since currently +; primary keys are always single-dimensioned. +; - <count> specifies possibly zero operation count +; + +<update_request_body> ::= <namespace_no><flags><tuple><count><operation>+ + +; +; Operations are optional and exist primarily to allow +; updates of individual fields. +; + +<operation> ::= <field_no><op_code><op_arg> + +; +; Field index, specifies argument(s) of the operation +; + +<field_no> ::= <int32> + +; +; 0 - assign operation argument to field <field_no> +; The rest of operations are only defined for 32-bit integer +; types: +; 1 - add argument to field <field_no>, both arguments +; are treated as signed 32-bit ints +; 2 - bitwise AND of argument and field <field_no> +; 3 - bitwise XOR of argument and field <field_no> +; 4 - bitwise OR of argument and field <field_no> + +<op_code> ::= 0 | 1 | 2 | 3 + +; +; It's an error to specify an argument of a type that +; differs from expected type. +; + +<op_arg> ::= <field> + +<update_response_body> ::= <insert_response_body> + +; +; <delete>, request <type> = 20 +; Similarly to updates, <delete> always uses the +; primary key. +; + +<delete_request_body> ::= <namespace_no><tuple> + +; +; Return the number of deleted tuples. +; Currently it's always 1 +; + +<delete_response_body> ::= <count> + +; +; The server response, in addition to response header and body, +; contains a return code. It's a 4-byte integer, that has +; a higher 1-byte completion status part, and a lower 3-byte +; error code part. +; + +<return_code> ::= <int32> + +; Currently, the completion status is complementary: +; it can be deduced from the error code. +; +; Currently there are only 3 completion status codes +; in use: +; 0 - success; The only possible error code with this status is + 0, ERR_CODE_OK +; 1 - try again; An indicator of an intermittent error. +; Usually is returned when two clients attempt to change +; the same tuple simultaneously. +; XXX: how can this happen in a single-threaded application? +; 2 - error +; +; The error code holds the actual error. Existing error codes include: +; +; Completion status 0 (success) +; ----------------------------- +; 0 -- ERR_CODE_OK +; +; 36 -- ERR_CODE_NOTHING +; The query does not support data modification or return +; +; Completion status 1 (try again) +; ------------------------------- +; 4 -- ERR_CODE_NODE_IS_RO +; The requested data is blocked from modification +; +; 6 -- ERR_CODE_NODE_IS_LOCKED +; The requested data is not available +; +; 7 -- ERR_CODE_MEMORY_ISSUE +; An error occurred when allocating memory +; +; Completion status 2 (error) +; --------------------------- +; +; 1 -- ERR_CODE_NONMASTER +; An attempt was made to change data on a read-only port +; +; 2 -- ERR_CODE_ILLEGAL_PARAMS +; Malformed query +; +; 10 -- ERR_CODE_UNSUPPORTED_COMMAND +; The query is not recognized +; +; 30 -- ERR_CODE_WRONG_FIELD, 0x00001e02: +; An unknown field was requested +; +; 31 -- ERR_CODE_WRONG_NUMBER +; An out-of-range numeric value was included in the query +; +; 32 -- ERR_CODE_DUPLICATE +; An attempt was made to create an object with an existing key. +; +; 38 -- ERR_CODE_WRONG_VERSION +; The protocol version is not supported +; +; 39 -- ERR_CODE_UNKNOWN_ERROR +; Unknown error +; +; Convenience macros that define hexadecimal constants for <int32> +; return codes (completion status + code) can be found in +; include/iproto.h. +; +; vim: syntax=bnf diff --git a/mod/silverbox/Makefile b/mod/silverbox/Makefile index 1a16feb4a728bf62e9de88785c2207bb8a0937e6..b11c787c5ec40156a482752ed7db7f9978bda75d 100644 --- a/mod/silverbox/Makefile +++ b/mod/silverbox/Makefile @@ -2,6 +2,7 @@ core/tarantool.o: CFLAGS += -DSTORAGE obj += core/admin.o obj += mod/silverbox/box.o +obj += mod/silverbox/index.o obj += mod/silverbox/memcached.o obj += third_party/qsort_arg.o diff --git a/mod/silverbox/assoc.h b/mod/silverbox/assoc.h index 69e0656a64373033db3be2d2526f3ff838ba8ac1..c8b1e3835d1722e142ba9bf3a8d43f1760ed6a66 100644 --- a/mod/silverbox/assoc.h +++ b/mod/silverbox/assoc.h @@ -12,9 +12,10 @@ typedef void *ptr_t; -KHASH_MAP_INIT_INT(int2ptr_map, ptr_t, realloc); -KHASH_MAP_INIT_STR(str2ptr_map, ptr_t, realloc); -KHASH_MAP_INIT_INT(int2int_map, uint32_t, realloc); +KHASH_MAP_INIT_INT(int_ptr_map, ptr_t, realloc); +KHASH_MAP_INIT_INT(int64_ptr_map, ptr_t, realloc); +KHASH_MAP_INIT_STR(str_ptr_map, ptr_t, realloc); +KHASH_MAP_INIT_INT(int_int_map, uint32_t, realloc); KHASH_MAP_INIT_INT(seen, int32_t, realloc); KHASH_SET_INIT_INT(seen_set, realloc); KHASH_SET_INIT_INT(int_set, realloc); @@ -46,8 +47,8 @@ static inline int lstrcmp(void *a, void *b) #define kh_lstr_hash_func(key) ({ void *_k = key; unsigned int l = load_varint32(&_k); MurmurHash2(_k, l, 13); }) #define kh_lstr_hash_equal(a, b) (lstrcmp(a, b) == 0) -KHASH_INIT(lstr2ptr_map, void *, ptr_t, 1, kh_lstr_hash_func, kh_lstr_hash_equal, xrealloc) - KHASH_INIT(ptr_set, uint64_t, char, 0, kh_int64_hash_func, kh_int64_hash_equal, xrealloc); +KHASH_INIT(lstr_ptr_map, void *, ptr_t, 1, kh_lstr_hash_func, kh_lstr_hash_equal, xrealloc); +KHASH_INIT(ptr_set, uint64_t, char, 0, kh_int64_hash_func, kh_int64_hash_equal, xrealloc); void assoc_init(void); diff --git a/mod/silverbox/box.c b/mod/silverbox/box.c index 458ec4ff648feb16c0c5a31c2c02ab31124724f6..bb3357c61056b7d2a8d08cd3d8a77198150cfd65 100644 --- a/mod/silverbox/box.c +++ b/mod/silverbox/box.c @@ -39,9 +39,9 @@ #include <tarantool.h> #include <tbuf.h> #include <util.h> -// #include <third_party/sptree.h> #include <mod/silverbox/box.h> +#include <mod/silverbox/index.h> bool box_updates_allowed = false; static char *status = "unknown"; @@ -52,13 +52,6 @@ STRS(messages, MESSAGES); const int MEMCACHED_NAMESPACE = 23; static char *custom_proc_title; -const struct field ASTERISK = { - .len = UINT32_MAX, - { - .data_ptr = NULL, - } -}; - /* hooks */ typedef int (*box_hook_t) (struct box_txn * txn); @@ -77,7 +70,8 @@ typedef int (*box_hook_t) (struct box_txn * txn); const int BOX_REF_THRESHOLD = 8196; -struct namespace namespace[256]; +struct namespace *namespace; +const int namespace_count = 256; struct box_snap_row { u32 namespace; @@ -96,13 +90,6 @@ static void tuple_add_iov(struct box_txn *txn, struct box_tuple *tuple); box_hook_t *before_commit_update_hook; -#define box_raise(n, err) \ - ({ \ - if (n != ERR_CODE_NODE_IS_RO) \ - say_warn("box.c:%i %s/%s", __LINE__, error_codes_strs[(n)], err); \ - raise(n, err); \ - }) - static void run_hooks(struct box_txn *txn, box_hook_t * hook) { @@ -136,106 +123,6 @@ tuple_field(struct box_tuple *tuple, size_t i) return field; } -bool -field_is_num(void *field) -{ - u32 len = load_varint32(&field); - - if (len == sizeof(u32)) - return true; - - return false; -} - -#define IS_ASTERISK(f) ((f)->len == ASTERISK.len && (f)->data_ptr == ASTERISK.data_ptr) -static i8 -field_compare(struct field *f1, struct field *f2, enum field_data_type type) -{ - i8 r; - - if (IS_ASTERISK(f1) || IS_ASTERISK(f2)) - r = 0; - else { - if (type == NUM) { - assert(f1->len == f2->len); - assert(f1->len == sizeof(f1->u32)); - - r = f1->u32 >f2->u32 ? 1 : f1->u32 == f2->u32 ? 0 : -1; - } else { - i32 cmp; - void *f1_data, *f2_data; - - f1_data = f1->len <= sizeof(f1->data) ? f1->data : f1->data_ptr; - f2_data = f2->len <= sizeof(f2->data) ? f2->data : f2->data_ptr; - - cmp = memcmp(f1_data, f2_data, MIN(f1->len, f2->len)); - - if (cmp > 0) - r = 1; - else if (cmp < 0) - r = -1; - else if (f1->len == f2->len) - r = 0; - else if (f1->len > f2->len) - r = 1; - else - r = -1; - } - } - - return r; -} - -/* - * Compare index_tree_members only by fields defined in index->field_cmp_order. - * Return: - * Common meaning: - * < 0 - a is smaler than b - * == 0 - a is equal to b - * > 0 - a is greater than b - * Custom treatment (by absolute value): - * 1 - differ in some key field - * 2 - one tuple is a search pattern - * 3 - differ in pointers - */ -static int -tree_index_member_compare(struct tree_index_member *member_a, struct tree_index_member *member_b, - struct index *index) -{ - i8 r = 0; - - for (i32 i = 0, end = index->key_cardinality; i < end; ++i) { - r = field_compare(&member_a->key[i], &member_b->key[i], index->key_field[i].type); - - if (r != 0) - break; - } - - if (r != 0) - return r; - - if (member_a->tuple == NULL) - return -2; - - if (member_b->tuple == NULL) - return 2; - - if (index->unique == false) { - if (member_a->tuple > member_b->tuple) - return 3; - else if (member_a->tuple < member_b->tuple) - return -3; - } - - return 0; -} - -#define foreach_index(n, index_var) \ - for (struct index *index_var = namespace[(n)].index; \ - index_var->key_cardinality != 0; \ - index_var++) \ - if (index_var->enabled) - static void lock_tuple(struct box_txn *txn, struct box_tuple *tuple) { @@ -341,263 +228,6 @@ tuple_txn_ref(struct box_txn *txn, struct box_tuple *tuple) tuple_ref(tuple, +1); } -static struct box_tuple * -index_find_hash_by_tuple(struct index *self, struct box_tuple *tuple) -{ - void *key = tuple_field(tuple, self->key_field->fieldno); - if (key == NULL) - box_raise(ERR_CODE_ILLEGAL_PARAMS, "invalid tuple, can't find key"); - return self->find(self, key); -} - -static struct box_tuple * -index_find_hash_num(struct index *self, void *key) -{ - struct box_tuple *ret = NULL; - u32 key_size = load_varint32(&key); - u32 num = *(u32 *)key; - - if (key_size != 4) - box_raise(ERR_CODE_ILLEGAL_PARAMS, "key is not u32"); - - assoc_find(int2ptr_map, self->idx.int_hash, num, ret); -#ifdef DEBUG - say_debug("index_find_hash_num(self:%p, key:%i) = %p", self, num, ret); -#endif - return ret; -} - -static struct box_tuple * -index_find_hash_str(struct index *self, void *key) -{ - struct box_tuple *ret = NULL; - - assoc_find(lstr2ptr_map, self->idx.str_hash, key, ret); -#ifdef DEBUG - u32 size = load_varint32(&key); - say_debug("index_find_hash_str(self:%p, key:(%i)'%.*s') = %p", self, size, size, (u8 *)key, - ret); -#endif - return ret; -} - -static struct tree_index_member * -tuple2tree_index_member(struct index *index, - struct box_tuple *tuple, struct tree_index_member **member_p) -{ - struct tree_index_member *member; - void *tuple_data = tuple->data; - - if (member_p == NULL || *member_p == NULL) - member = palloc(fiber->pool, SIZEOF_TREE_INDEX_MEMBER(index)); - else - member = *member_p; - - for (i32 i = 0; i < index->field_cmp_order_cnt; ++i) { - struct field f; - - if (i < tuple->cardinality) { - f.len = load_varint32(&tuple_data); - if (f.len <= sizeof(f.data)) { - memset(f.data, 0, sizeof(f.data)); - memcpy(f.data, tuple_data, f.len); - } else - f.data_ptr = tuple_data; - tuple_data += f.len; - } else - f = ASTERISK; - - if (index->field_cmp_order[i] == -1) - continue; - - member->key[index->field_cmp_order[i]] = f; - } - - member->tuple = tuple; - - if (member_p) - *member_p = member; - - return member; -} - -static struct tree_index_member * -alloc_search_pattern(struct index *index, int key_cardinality, void *key) -{ - struct tree_index_member *pattern = index->search_pattern; - void *key_field = key; - - assert(key_cardinality <= index->key_cardinality); - - for (i32 i = 0; i < index->key_cardinality; ++i) - pattern->key[i] = ASTERISK; - for (int i = 0; i < key_cardinality; i++) { - u32 len; - - len = pattern->key[i].len = load_varint32(&key_field); - if (len <= sizeof(pattern->key[i].data)) { - memset(pattern->key[i].data, 0, sizeof(pattern->key[i].data)); - memcpy(pattern->key[i].data, key_field, len); - } else - pattern->key[i].data_ptr = key_field; - - key_field += len; - } - - pattern->tuple = NULL; - - return pattern; -} - -static struct box_tuple * -index_find_tree(struct index *self, void *key) -{ - struct tree_index_member *member = (struct tree_index_member *)key; - - return sptree_str_t_find(self->idx.tree, member); -} - -static struct box_tuple * -index_find_tree_by_tuple(struct index *self, struct box_tuple *tuple) -{ - struct tree_index_member *member = tuple2tree_index_member(self, tuple, NULL); - - return self->find(self, member); -} - -static void -index_remove_hash_num(struct index *self, struct box_tuple *tuple) -{ - void *key = tuple_field(tuple, self->key_field->fieldno); - unsigned int key_size = load_varint32(&key); - u32 num = *(u32 *)key; - - if (key_size != 4) - box_raise(ERR_CODE_ILLEGAL_PARAMS, "key is not u32"); - assoc_delete(int2ptr_map, self->idx.int_hash, num); -#ifdef DEBUG - say_debug("index_remove_hash_num(self:%p, key:%i)", self, num); -#endif -} - -static void -index_remove_hash_str(struct index *self, struct box_tuple *tuple) -{ - void *key = tuple_field(tuple, self->key_field->fieldno); - assoc_delete(lstr2ptr_map, self->idx.str_hash, key); -#ifdef DEBUG - u32 size = load_varint32(&key); - say_debug("index_remove_hash_str(self:%p, key:'%.*s')", self, size, (u8 *)key); -#endif -} - -static void -index_remove_tree_str(struct index *self, struct box_tuple *tuple) -{ - struct tree_index_member *member = tuple2tree_index_member(self, tuple, NULL); - sptree_str_t_delete(self->idx.tree, member); -} - -static void -index_replace_hash_num(struct index *self, struct box_tuple *old_tuple, struct box_tuple *tuple) -{ - void *key = tuple_field(tuple, self->key_field->fieldno); - u32 key_size = load_varint32(&key); - u32 num = *(u32 *)key; - - if (old_tuple != NULL) { - void *old_key = tuple_field(old_tuple, self->key_field->fieldno); - load_varint32(&old_key); - u32 old_num = *(u32 *)old_key; - assoc_delete(int2ptr_map, self->idx.int_hash, old_num); - } - - if (key_size != 4) - box_raise(ERR_CODE_ILLEGAL_PARAMS, "key is not u32"); - assoc_replace(int2ptr_map, self->idx.int_hash, num, tuple); -#ifdef DEBUG - say_debug("index_replace_hash_num(self:%p, old_tuple:%p, tuple:%p) key:%i", self, old_tuple, - tuple, num); -#endif -} - -static void -index_replace_hash_str(struct index *self, struct box_tuple *old_tuple, struct box_tuple *tuple) -{ - void *key = tuple_field(tuple, self->key_field->fieldno); - - if (old_tuple != NULL) { - void *old_key = tuple_field(old_tuple, self->key_field->fieldno); - assoc_delete(lstr2ptr_map, self->idx.str_hash, old_key); - } - - assoc_replace(lstr2ptr_map, self->idx.str_hash, key, tuple); -#ifdef DEBUG - u32 size = load_varint32(&key); - say_debug("index_replace_hash_str(self:%p, old_tuple:%p, tuple:%p) key:'%.*s'", self, - old_tuple, tuple, size, (u8 *)key); -#endif -} - -static void -index_replace_tree_str(struct index *self, struct box_tuple *old_tuple, struct box_tuple *tuple) -{ - struct tree_index_member *member = tuple2tree_index_member(self, tuple, NULL); - - if (old_tuple) - index_remove_tree_str(self, old_tuple); - sptree_str_t_insert(self->idx.tree, member); -} - -static void -index_iterator_init_tree_str(struct index *self, struct tree_index_member *pattern) -{ - sptree_str_t_iterator_init_set(self->idx.tree, - (struct sptree_str_t_iterator **)&self->iterator, pattern); -} - -static struct box_tuple * -index_iterator_next_tree_str(struct index *self, struct tree_index_member *pattern) -{ - struct tree_index_member *member = - sptree_str_t_iterator_next((struct sptree_str_t_iterator *)self->iterator); - - if (member == NULL) - return NULL; - - i32 r = tree_index_member_compare(pattern, member, self); - if (r == -2) - return member->tuple; - - return NULL; -} - -static void -validate_indeces(struct box_txn *txn) -{ - if (namespace[txn->n].index[1].key_cardinality != 0) { /* there is more then one index */ - foreach_index(txn->n, index) { - for (u32 f = 0; f < index->key_cardinality; ++f) { - void *field; - - if (index->key_field[f].type == STR) - continue; - - field = tuple_field(txn->tuple, index->key_field[f].fieldno); - if (!field_is_num(field)) - box_raise(ERR_CODE_ILLEGAL_PARAMS, "field must be NUM"); - } - if (index->type == TREE && index->unique == false) - /* Don't check non unique indexes */ - continue; - - struct box_tuple *tuple = index->find_by_tuple(index, txn->tuple); - - if (tuple != NULL && tuple != txn->old_tuple) - box_raise(ERR_CODE_INDEX_VIOLATION, "unique index violation"); - } - } -} static int __noinline__ prepare_replace(struct box_txn *txn, size_t cardinality, struct tbuf *data) @@ -1430,7 +1060,7 @@ custom_init(void) if (cfg.namespace == NULL) panic("at least one namespace should be configured"); - for (int i = 0; i < nelem(namespace); i++) { + for (int i = 0; i < namespace_count; i++) { if (cfg.namespace[i] == NULL) break; @@ -1485,21 +1115,23 @@ custom_init(void) sizeof(index->field_cmp_order[0]) * index->field_cmp_order_cnt); for (int k = 0; cfg.namespace[i]->index[j]->key_field[k] != NULL; k++) { - if (cfg.namespace[i]->index[j]->key_field[k]->fieldno == -1) + typeof(cfg.namespace[i]->index[j]->key_field[k]) cfg_key_field = + cfg.namespace[i]->index[j]->key_field[k]; + + if (cfg_key_field->fieldno == -1) break; - index->key_field[k].fieldno = - cfg.namespace[i]->index[j]->key_field[k]->fieldno; - if (strcmp(cfg.namespace[i]->index[j]->key_field[k]->type, "NUM") == - 0) + index->key_field[k].fieldno = cfg_key_field->fieldno; + if (strcmp(cfg_key_field->type, "NUM") == 0) index->key_field[k].type = NUM; - else if (strcmp(cfg.namespace[i]->index[j]->key_field[k]->type, - "STR") == 0) + else if (strcmp(cfg_key_field->type, "NUM64") == 0) + index->key_field[k].type = NUM64; + else if (strcmp(cfg_key_field->type, "STR") == 0) index->key_field[k].type = STR; else panic("(namespace = %" PRIu32 " index = %" PRIu32 ") " "unknown field data type: `%s'", - i, j, cfg.namespace[i]->index[j]->key_field[k]->type); + i, j, cfg_key_field->type); index->field_cmp_order[index->key_field[k].fieldno] = k; } @@ -1519,49 +1151,21 @@ custom_init(void) panic("(namespace = %" PRIu32 " index = %" PRIu32 ") " "hash index must have single-filed key", i, j); - index->enabled = true; - index->type = HASH; - if (index->unique == false) panic("(namespace = %" PRIu32 " index = %" PRIu32 ") " "hash index must be unique", i, j); + index->enabled = true; if (index->key_field->type == NUM) { - index->find = index_find_hash_num; - index->find_by_tuple = index_find_hash_by_tuple; - index->remove = index_remove_hash_num; - index->replace = index_replace_hash_num; - index->namespace = &namespace[i]; - index->idx.int_hash = kh_init(int2ptr_map, NULL); - - if (estimated_rows > 0) - kh_resize(int2ptr_map, index->idx.int_hash, - estimated_rows); + index_hash_num(index, &namespace[i], estimated_rows); + } else if (index->key_field->type == NUM64) { + index_hash_num64(index, &namespace[i], estimated_rows); } else { - index->find = index_find_hash_str; - index->find_by_tuple = index_find_hash_by_tuple; - index->remove = index_remove_hash_str; - index->replace = index_replace_hash_str; - index->namespace = &namespace[i]; - index->idx.str_hash = kh_init(lstr2ptr_map, NULL); - - if (estimated_rows > 0) - kh_resize(lstr2ptr_map, index->idx.str_hash, - estimated_rows); + index_hash_str(index, &namespace[i], estimated_rows); } } else if (strcmp(cfg.namespace[i]->index[j]->type, "TREE") == 0) { index->enabled = false; - index->type = TREE; - - index->find = index_find_tree; - index->find_by_tuple = index_find_tree_by_tuple; - index->remove = index_remove_tree_str; - index->replace = index_replace_tree_str; - index->iterator_init = index_iterator_init_tree_str; - index->iterator_next = index_iterator_next_tree_str; - index->namespace = &namespace[i]; - - index->idx.tree = palloc(eter_pool, sizeof(*index->idx.tree)); + index_tree(index, &namespace[0], 0); } else panic("namespace = %" PRIu32 " index = %" PRIu32 ") " "unknown index type `%s'", @@ -1646,91 +1250,13 @@ memcached_bound_to_primary(void *data __unused__) fiber_call(expire); } -static void -build_indexes(void) -{ - for (u32 n = 0; n < nelem(namespace); ++n) { - u32 n_tuples, estimated_tuples; - struct tree_index_member *members[nelem(namespace[n].index)] = { NULL }; - - if (namespace[n].enabled == false) - continue; - - n_tuples = kh_size(namespace[n].index[0].idx.hash); - estimated_tuples = n_tuples * 1.2; - - say_info("build_indexes: n = %" PRIu32 ": build arrays", n); - - khiter_t k; - u32 i = 0; - assoc_foreach(namespace[n].index[0].idx.hash, k) { - for (u32 idx = 0;; idx++) { - struct index *index = &namespace[n].index[idx]; - struct tree_index_member *member; - struct tree_index_member *m; - - if (index->key_cardinality == 0) - break; - - if (index->type != TREE) - continue; - - member = members[idx]; - if (member == NULL) { - member = malloc(estimated_tuples * - SIZEOF_TREE_INDEX_MEMBER(index)); - if (member == NULL) - panic("build_indexes: malloc failed: %m"); - - members[idx] = member; - } - - m = (struct tree_index_member *) - ((char *)member + i * SIZEOF_TREE_INDEX_MEMBER(index)); - - tuple2tree_index_member(index, - kh_value(namespace[n].index[0].idx.hash, - k), - &m); - } - - ++i; - } - - say_info("build_indexes: n = %" PRIu32 ": build trees", n); - - for (u32 idx = 0;; idx++) { - struct index *index = &namespace[n].index[idx]; - struct tree_index_member *member = members[idx]; - - if (index->key_cardinality == 0) - break; - - if (index->type != TREE) - continue; - - assert(index->enabled == false); - - say_info("build_indexes: n = %" PRIu32 " idx = %" PRIu32 ": build tree", n, - idx); - - /* if n_tuples == 0 then estimated_tuples = 0, member == NULL, tree is empty */ - sptree_str_t_init(index->idx.tree, - SIZEOF_TREE_INDEX_MEMBER(index), - member, n_tuples, estimated_tuples, - (void *)tree_index_member_compare, index); - index->enabled = true; - - say_info("build_indexes: n = %" PRIu32 " idx = %" PRIu32 ": end", n, idx); - } - } -} - void mod_init(void) { stat_base = stat_register(messages_strs, messages_MAX); - for (int i = 0; i < nelem(namespace); i++) { + + namespace = palloc(eter_pool, sizeof(struct namespace) * namespace_count); + for (int i = 0; i < namespace_count; i++) { namespace[i].enabled = false; for (int j = 0; j < MAX_IDX; j++) namespace[i].index[j].key_cardinality = 0; @@ -1849,7 +1375,7 @@ mod_snapshot(struct log_io_iter *i) struct box_tuple *tuple; khiter_t k; - for (uint32_t n = 0; n < nelem(namespace); ++n) { + for (uint32_t n = 0; n < namespace_count; ++n) { if (!namespace[n].enabled) continue; diff --git a/mod/silverbox/box.h b/mod/silverbox/box.h index 888af4cfcb9e2d9652d4c941734b97eab5a71190..d9ece509dff2b871111affdf0fe21829d518d48b 100644 --- a/mod/silverbox/box.h +++ b/mod/silverbox/box.h @@ -27,77 +27,14 @@ #ifndef TARANTOOL_SILVERBOX_H #define TARANTOOL_SILVERBOX_H -#include <mod/silverbox/assoc.h> +#include <mod/silverbox/index.h> extern bool box_updates_allowed; void memcached_handler(void *_data __unused__); struct namespace; struct box_tuple; - -struct field { - u32 len; - union { - u32 u32; - - u8 data[sizeof(void *)]; - - void *data_ptr; - }; -}; - -enum field_data_type { NUM, STR }; - -struct tree_index_member { - struct box_tuple *tuple; - struct field key[]; -}; - -#define SIZEOF_TREE_INDEX_MEMBER(index) \ - (sizeof(struct tree_index_member) + sizeof(struct field) * (index)->key_cardinality) - -#include <third_party/sptree.h> -SPTREE_DEF(str_t, realloc); - -// #include <mod/silverbox/tree.h> - -struct index { - bool enabled; - - bool unique; - - struct box_tuple *(*find) (struct index * index, void *key); /* only for unique lookups */ - struct box_tuple *(*find_by_tuple) (struct index * index, struct box_tuple * pattern); - void (*remove) (struct index * index, struct box_tuple *); - void (*replace) (struct index * index, struct box_tuple *, struct box_tuple *); - void (*iterator_init) (struct index *, struct tree_index_member * pattern); - struct box_tuple *(*iterator_next) (struct index *, struct tree_index_member * pattern); - union { - khash_t(lstr2ptr_map) * str_hash; - khash_t(int2ptr_map) * int_hash; - khash_t(int2ptr_map) * hash; - sptree_str_t *tree; - } idx; - void *iterator; - bool iterator_empty; - - struct namespace *namespace; - - struct { - struct { - u32 fieldno; - enum field_data_type type; - } *key_field; - u32 key_cardinality; - - u32 *field_cmp_order; - u32 field_cmp_order_cnt; - }; - - struct tree_index_member *search_pattern; - - enum { HASH, TREE } type; -}; +struct index; extern struct index *memcached_index; @@ -109,6 +46,9 @@ struct namespace { struct index index[MAX_IDX]; }; +extern struct namespace *namespace; +extern const int namespace_count; + struct box_tuple { u16 refs; u16 flags; @@ -178,7 +118,12 @@ enum box_mode { ENUM(messages, MESSAGES); -struct box_tuple *index_find(struct index *index, void *key); +#define box_raise(n, err) \ + ({ \ + if (n != ERR_CODE_NODE_IS_RO) \ + say_warn("box.c:%i %s/%s", __LINE__, error_codes_strs[(n)], err); \ + raise(n, err); \ + }) struct box_txn *txn_alloc(u32 flags); u32 box_dispach(struct box_txn *txn, enum box_mode mode, u16 op, struct tbuf *data); diff --git a/mod/silverbox/client/perl/lib/MR/SilverBox.pm b/mod/silverbox/client/perl/lib/MR/SilverBox.pm index d8dd642cc43b2601805bd3510df59dbb41f8d30c..cc8befc2d339714b612cdad081cb81e91551e107 100644 --- a/mod/silverbox/client/perl/lib/MR/SilverBox.pm +++ b/mod/silverbox/client/perl/lib/MR/SilverBox.pm @@ -54,7 +54,7 @@ sub new { confess "ns[$namespace] no indexes defined" unless $ns->{indexes} && @{$ns->{indexes}}; $namespaces{$namespace} = $ns; $namespaces{$ns->{name}} = $ns if $ns->{name}; - confess "ns[$namespace] bad format `$ns->{format}'" if $ns->{format} =~ m/[^&lLsScC ]/; + confess "ns[$namespace] bad format `$ns->{format}'" if $ns->{format} =~ m/[^&lLsScCqQ ]/; $ns->{format} =~ s/\s+//g; my @f = split //, $ns->{format}; $ns->{byfield_unpack_format} = [ map { /&/ ? 'w/a*' : "x$_" } @f ]; diff --git a/mod/silverbox/index.c b/mod/silverbox/index.c new file mode 100644 index 0000000000000000000000000000000000000000..268abfd32c328664bcbb7eff61c9a070e910c22a --- /dev/null +++ b/mod/silverbox/index.c @@ -0,0 +1,607 @@ +/* + * Copyright (C) 2010 Mail.RU + * Copyright (C) 2010 Yuriy Vostrikov + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <stdarg.h> +#include <stdint.h> +#include <stdbool.h> +#include <errno.h> + +#include <fiber.h> +#include <iproto.h> +#include <log_io.h> +#include <pickle.h> +#include <salloc.h> +#include <say.h> +#include <stat.h> +#include <tarantool.h> +#include <tbuf.h> +#include <util.h> + +#include <mod/silverbox/box.h> +#include <mod/silverbox/index.h> + +const struct field ASTERISK = { + .len = UINT32_MAX, + { + .data_ptr = NULL, + } +}; + +/* hooks */ +typedef int (*box_hook_t) (struct box_txn * txn); + + + +#define IS_ASTERISK(f) ((f)->len == ASTERISK.len && (f)->data_ptr == ASTERISK.data_ptr) + +/** Compare two fields of an index key. + * + * @retval 0 two fields are equal + * @retval -1 f2 is less than f1 + * @retval 1 f2 is greater than f1 + */ + +static i8 +field_compare(struct field *f1, struct field *f2, enum field_data_type type) +{ + if (IS_ASTERISK(f1) || IS_ASTERISK(f2)) + return 0; + + if (type == NUM) { + assert(f1->len == f2->len); + assert(f1->len == sizeof(f1->u32)); + + return f1->u32 >f2->u32 ? 1 : f1->u32 == f2->u32 ? 0 : -1; + } else if (type == NUM64) { + assert(f1->len == f2->len); + assert(f1->len == sizeof(f1->u64)); + + return f1->u64 >f2->u64 ? 1 : f1->u64 == f2->u64 ? 0 : -1; + } else if (type == STR) { + i32 cmp; + void *f1_data, *f2_data; + + f1_data = f1->len <= sizeof(f1->data) ? f1->data : f1->data_ptr; + f2_data = f2->len <= sizeof(f2->data) ? f2->data : f2->data_ptr; + + cmp = memcmp(f1_data, f2_data, MIN(f1->len, f2->len)); + + if (cmp > 0) + return 1; + else if (cmp < 0) + return -1; + else if (f1->len == f2->len) + return 0; + else if (f1->len > f2->len) + return 1; + else + return -1; + } + + panic("imposible happend"); +} + + +/* + * Compare index_tree_members only by fields defined in index->field_cmp_order. + * Return: + * Common meaning: + * < 0 - a is smaler than b + * == 0 - a is equal to b + * > 0 - a is greater than b + * Custom treatment (by absolute value): + * 1 - differ in some key field + * 2 - one tuple is a search pattern + * 3 - differ in pointers + */ +static int +tree_index_member_compare(struct tree_index_member *member_a, struct tree_index_member *member_b, + struct index *index) +{ + i8 r = 0; + + for (i32 i = 0, end = index->key_cardinality; i < end; ++i) { + r = field_compare(&member_a->key[i], &member_b->key[i], index->key_field[i].type); + + if (r != 0) + break; + } + + if (r != 0) + return r; + + if (member_a->tuple == NULL) + return -2; + + if (member_b->tuple == NULL) + return 2; + + if (index->unique == false) { + if (member_a->tuple > member_b->tuple) + return 3; + else if (member_a->tuple < member_b->tuple) + return -3; + } + + return 0; +} + + + +static struct box_tuple * +index_find_hash_by_tuple(struct index *self, struct box_tuple *tuple) +{ + void *key = tuple_field(tuple, self->key_field->fieldno); + if (key == NULL) + box_raise(ERR_CODE_ILLEGAL_PARAMS, "invalid tuple, can't find key"); + return self->find(self, key); +} + +static struct box_tuple * +index_find_hash_num(struct index *self, void *key) +{ + struct box_tuple *ret = NULL; + u32 key_size = load_varint32(&key); + u32 num = *(u32 *)key; + + if (key_size != 4) + box_raise(ERR_CODE_ILLEGAL_PARAMS, "key is not u32"); + + assoc_find(int_ptr_map, self->idx.int_hash, num, ret); +#ifdef DEBUG + say_debug("index_find_hash_num(self:%p, key:%i) = %p", self, num, ret); +#endif + return ret; +} + +static struct box_tuple * +index_find_hash_num64(struct index *self, void *key) +{ + struct box_tuple *ret = NULL; + u32 key_size = load_varint32(&key); + u64 num = *(u64 *)key; + + if (key_size != 8) + box_raise(ERR_CODE_ILLEGAL_PARAMS, "key is not u64"); + + assoc_find(int64_ptr_map, self->idx.int64_hash, num, ret); +#ifdef DEBUG + say_debug("index_find_hash_num(self:%p, key:%"PRIu64") = %p", self, num, ret); +#endif + return ret; +} + +static struct box_tuple * +index_find_hash_str(struct index *self, void *key) +{ + struct box_tuple *ret = NULL; + + assoc_find(lstr_ptr_map, self->idx.str_hash, key, ret); +#ifdef DEBUG + u32 size = load_varint32(&key); + say_debug("index_find_hash_str(self:%p, key:(%i)'%.*s') = %p", self, size, size, (u8 *)key, + ret); +#endif + return ret; +} + +static struct tree_index_member * +tuple2tree_index_member(struct index *index, + struct box_tuple *tuple, struct tree_index_member **member_p) +{ + struct tree_index_member *member; + void *tuple_data = tuple->data; + + if (member_p == NULL || *member_p == NULL) + member = palloc(fiber->pool, SIZEOF_TREE_INDEX_MEMBER(index)); + else + member = *member_p; + + for (i32 i = 0; i < index->field_cmp_order_cnt; ++i) { + struct field f; + + if (i < tuple->cardinality) { + f.len = load_varint32(&tuple_data); + if (f.len <= sizeof(f.data)) { + memset(f.data, 0, sizeof(f.data)); + memcpy(f.data, tuple_data, f.len); + } else + f.data_ptr = tuple_data; + tuple_data += f.len; + } else + f = ASTERISK; + + if (index->field_cmp_order[i] == -1) + continue; + + member->key[index->field_cmp_order[i]] = f; + } + + member->tuple = tuple; + + if (member_p) + *member_p = member; + + return member; +} + +struct tree_index_member * +alloc_search_pattern(struct index *index, int key_cardinality, void *key) +{ + struct tree_index_member *pattern = index->search_pattern; + void *key_field = key; + + assert(key_cardinality <= index->key_cardinality); + + for (i32 i = 0; i < index->key_cardinality; ++i) + pattern->key[i] = ASTERISK; + for (int i = 0; i < key_cardinality; i++) { + u32 len; + + len = pattern->key[i].len = load_varint32(&key_field); + if (len <= sizeof(pattern->key[i].data)) { + memset(pattern->key[i].data, 0, sizeof(pattern->key[i].data)); + memcpy(pattern->key[i].data, key_field, len); + } else + pattern->key[i].data_ptr = key_field; + + key_field += len; + } + + pattern->tuple = NULL; + + return pattern; +} + +static struct box_tuple * +index_find_tree(struct index *self, void *key) +{ + struct tree_index_member *member = (struct tree_index_member *)key; + + return sptree_str_t_find(self->idx.tree, member); +} + +static struct box_tuple * +index_find_tree_by_tuple(struct index *self, struct box_tuple *tuple) +{ + struct tree_index_member *member = tuple2tree_index_member(self, tuple, NULL); + + return self->find(self, member); +} + +static void +index_remove_hash_num(struct index *self, struct box_tuple *tuple) +{ + void *key = tuple_field(tuple, self->key_field->fieldno); + unsigned int key_size = load_varint32(&key); + u32 num = *(u32 *)key; + + if (key_size != 4) + box_raise(ERR_CODE_ILLEGAL_PARAMS, "key is not u32"); + assoc_delete(int_ptr_map, self->idx.int_hash, num); +#ifdef DEBUG + say_debug("index_remove_hash_num(self:%p, key:%i)", self, num); +#endif +} + +static void +index_remove_hash_num64(struct index *self, struct box_tuple *tuple) +{ + void *key = tuple_field(tuple, self->key_field->fieldno); + unsigned int key_size = load_varint32(&key); + u64 num = *(u64 *)key; + + if (key_size != 8) + box_raise(ERR_CODE_ILLEGAL_PARAMS, "key is not u64"); + assoc_delete(int64_ptr_map, self->idx.int64_hash, num); +#ifdef DEBUG + say_debug("index_remove_hash_num(self:%p, key:%"PRIu64")", self, num); +#endif +} + +static void +index_remove_hash_str(struct index *self, struct box_tuple *tuple) +{ + void *key = tuple_field(tuple, self->key_field->fieldno); + assoc_delete(lstr_ptr_map, self->idx.str_hash, key); +#ifdef DEBUG + u32 size = load_varint32(&key); + say_debug("index_remove_hash_str(self:%p, key:'%.*s')", self, size, (u8 *)key); +#endif +} + +static void +index_remove_tree_str(struct index *self, struct box_tuple *tuple) +{ + struct tree_index_member *member = tuple2tree_index_member(self, tuple, NULL); + sptree_str_t_delete(self->idx.tree, member); +} + +static void +index_replace_hash_num(struct index *self, struct box_tuple *old_tuple, struct box_tuple *tuple) +{ + void *key = tuple_field(tuple, self->key_field->fieldno); + u32 key_size = load_varint32(&key); + u32 num = *(u32 *)key; + + if (old_tuple != NULL) { + void *old_key = tuple_field(old_tuple, self->key_field->fieldno); + load_varint32(&old_key); + u32 old_num = *(u32 *)old_key; + assoc_delete(int_ptr_map, self->idx.int_hash, old_num); + } + + if (key_size != 4) + box_raise(ERR_CODE_ILLEGAL_PARAMS, "key is not u32"); + assoc_replace(int_ptr_map, self->idx.int_hash, num, tuple); +#ifdef DEBUG + say_debug("index_replace_hash_num(self:%p, old_tuple:%p, tuple:%p) key:%i", self, old_tuple, + tuple, num); +#endif +} + +static void +index_replace_hash_num64(struct index *self, struct box_tuple *old_tuple, struct box_tuple *tuple) +{ + void *key = tuple_field(tuple, self->key_field->fieldno); + u32 key_size = load_varint32(&key); + u64 num = *(u64 *)key; + + if (old_tuple != NULL) { + void *old_key = tuple_field(old_tuple, self->key_field->fieldno); + load_varint32(&old_key); + u64 old_num = *(u64 *)old_key; + assoc_delete(int64_ptr_map, self->idx.int64_hash, old_num); + } + + if (key_size != 8) + box_raise(ERR_CODE_ILLEGAL_PARAMS, "key is not u64"); + assoc_replace(int64_ptr_map, self->idx.int64_hash, num, tuple); +#ifdef DEBUG + say_debug("index_replace_hash_num(self:%p, old_tuple:%p, tuple:%p) key:%"PRIu64, self, old_tuple, + tuple, num); +#endif +} + +static void +index_replace_hash_str(struct index *self, struct box_tuple *old_tuple, struct box_tuple *tuple) +{ + void *key = tuple_field(tuple, self->key_field->fieldno); + + if (old_tuple != NULL) { + void *old_key = tuple_field(old_tuple, self->key_field->fieldno); + assoc_delete(lstr_ptr_map, self->idx.str_hash, old_key); + } + + assoc_replace(lstr_ptr_map, self->idx.str_hash, key, tuple); +#ifdef DEBUG + u32 size = load_varint32(&key); + say_debug("index_replace_hash_str(self:%p, old_tuple:%p, tuple:%p) key:'%.*s'", self, + old_tuple, tuple, size, (u8 *)key); +#endif +} + +static void +index_replace_tree_str(struct index *self, struct box_tuple *old_tuple, struct box_tuple *tuple) +{ + struct tree_index_member *member = tuple2tree_index_member(self, tuple, NULL); + + if (old_tuple) + index_remove_tree_str(self, old_tuple); + sptree_str_t_insert(self->idx.tree, member); +} + +void +index_iterator_init_tree_str(struct index *self, struct tree_index_member *pattern) +{ + sptree_str_t_iterator_init_set(self->idx.tree, + (struct sptree_str_t_iterator **)&self->iterator, pattern); +} + +struct box_tuple * +index_iterator_next_tree_str(struct index *self, struct tree_index_member *pattern) +{ + struct tree_index_member *member = + sptree_str_t_iterator_next((struct sptree_str_t_iterator *)self->iterator); + + if (member == NULL) + return NULL; + + i32 r = tree_index_member_compare(pattern, member, self); + if (r == -2) + return member->tuple; + + return NULL; +} + +void +validate_indeces(struct box_txn *txn) +{ + if (namespace[txn->n].index[1].key_cardinality != 0) { /* there is more then one index */ + foreach_index(txn->n, index) { + for (u32 f = 0; f < index->key_cardinality; ++f) { + if (index->key_field[f].type == STR) + continue; + + void *field = tuple_field(txn->tuple, index->key_field[f].fieldno); + u32 len = load_varint32(&field); + + if (index->key_field[f].type == NUM && len != sizeof(u32)) + box_raise(ERR_CODE_ILLEGAL_PARAMS, "field must be NUM"); + + if (index->key_field[f].type == NUM64 && len != sizeof(u64)) + box_raise(ERR_CODE_ILLEGAL_PARAMS, "field must be NUM64"); + } + if (index->type == TREE && index->unique == false) + /* Don't check non unique indexes */ + continue; + + struct box_tuple *tuple = index->find_by_tuple(index, txn->tuple); + + if (tuple != NULL && tuple != txn->old_tuple) + box_raise(ERR_CODE_INDEX_VIOLATION, "unique index violation"); + } + } +} + + +void +build_indexes(void) +{ + for (u32 n = 0; n < namespace_count; ++n) { + u32 n_tuples, estimated_tuples; + struct tree_index_member *members[nelem(namespace[n].index)] = { NULL }; + + if (namespace[n].enabled == false) + continue; + + n_tuples = kh_size(namespace[n].index[0].idx.hash); + estimated_tuples = n_tuples * 1.2; + + say_info("build_indexes: n = %" PRIu32 ": build arrays", n); + + khiter_t k; + u32 i = 0; + assoc_foreach(namespace[n].index[0].idx.hash, k) { + for (u32 idx = 0;; idx++) { + struct index *index = &namespace[n].index[idx]; + struct tree_index_member *member; + struct tree_index_member *m; + + if (index->key_cardinality == 0) + break; + + if (index->type != TREE) + continue; + + member = members[idx]; + if (member == NULL) { + member = malloc(estimated_tuples * + SIZEOF_TREE_INDEX_MEMBER(index)); + if (member == NULL) + panic("build_indexes: malloc failed: %m"); + + members[idx] = member; + } + + m = (struct tree_index_member *) + ((char *)member + i * SIZEOF_TREE_INDEX_MEMBER(index)); + + tuple2tree_index_member(index, + kh_value(namespace[n].index[0].idx.hash, + k), + &m); + } + + ++i; + } + + say_info("build_indexes: n = %" PRIu32 ": build trees", n); + + for (u32 idx = 0;; idx++) { + struct index *index = &namespace[n].index[idx]; + struct tree_index_member *member = members[idx]; + + if (index->key_cardinality == 0) + break; + + if (index->type != TREE) + continue; + + assert(index->enabled == false); + + say_info("build_indexes: n = %" PRIu32 " idx = %" PRIu32 ": build tree", n, + idx); + + /* if n_tuples == 0 then estimated_tuples = 0, member == NULL, tree is empty */ + sptree_str_t_init(index->idx.tree, + SIZEOF_TREE_INDEX_MEMBER(index), + member, n_tuples, estimated_tuples, + (void *)tree_index_member_compare, index); + index->enabled = true; + + say_info("build_indexes: n = %" PRIu32 " idx = %" PRIu32 ": end", n, idx); + } + } +} + + +void +index_hash_num(struct index *index, struct namespace *namespace, size_t estimated_rows) +{ + index->type = HASH; + index->namespace = namespace; + index->find = index_find_hash_num; + index->find_by_tuple = index_find_hash_by_tuple; + index->remove = index_remove_hash_num; + index->replace = index_replace_hash_num; + index->idx.int_hash = kh_init(int_ptr_map, NULL); + if (estimated_rows > 0) + kh_resize(int_ptr_map, index->idx.int_hash, estimated_rows); +} + + +void +index_hash_num64(struct index *index, struct namespace *namespace, size_t estimated_rows) +{ + index->type = HASH; + index->namespace = namespace; + index->find = index_find_hash_num64; + index->find_by_tuple = index_find_hash_by_tuple; + index->remove = index_remove_hash_num64; + index->replace = index_replace_hash_num64; + index->idx.int64_hash = kh_init(int64_ptr_map, NULL); + if (estimated_rows > 0) + kh_resize(int64_ptr_map, index->idx.int64_hash, estimated_rows); +} + +void +index_hash_str(struct index *index, struct namespace *namespace, size_t estimated_rows) +{ + index->type = HASH; + index->namespace = namespace; + index->find = index_find_hash_str; + index->find_by_tuple = index_find_hash_by_tuple; + index->remove = index_remove_hash_str; + index->replace = index_replace_hash_str; + index->idx.str_hash = kh_init(lstr_ptr_map, NULL); + if (estimated_rows > 0) + kh_resize(lstr_ptr_map, index->idx.str_hash, estimated_rows); +} + +void +index_tree(struct index *index, struct namespace *namespace, size_t estimated_rows __unused__) +{ + index->type = TREE; + index->namespace = namespace; + index->find = index_find_tree; + index->find_by_tuple = index_find_tree_by_tuple; + index->remove = index_remove_tree_str; + index->replace = index_replace_tree_str; + index->iterator_init = index_iterator_init_tree_str; + index->iterator_next = index_iterator_next_tree_str; + index->idx.tree = palloc(eter_pool, sizeof(*index->idx.tree)); +} diff --git a/mod/silverbox/index.h b/mod/silverbox/index.h new file mode 100644 index 0000000000000000000000000000000000000000..a7eb36aa2731e8a9dbe1f5f93eeaac42f0ae2663 --- /dev/null +++ b/mod/silverbox/index.h @@ -0,0 +1,125 @@ +/* + * Copyright (C) 2010 Mail.RU + * Copyright (C) 2010 Yuriy Vostrikov + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef TARANTOOL_SILVERBOX_INDEX_H +#define TARANTOOL_SILVERBOX_INDEX_H + +#include <mod/silverbox/assoc.h> + +/** + * A field reference used for TREE indexes. Either stores a copy + * of the corresponding field in the tuple or points to that field + * in the tuple (depending on field length). + */ + +struct field { + /** Field data length. */ + u32 len; + /** Actual field data. For small fields we store the value + * of the field (u32, u64, strings up to 8 bytes), for + * longer fields, we store a pointer to field data in the + * tuple in the primary index. + */ + union { + u32 u32; + u64 u64; + u8 data[sizeof(u64)]; + void *data_ptr; + }; +}; + +enum field_data_type { NUM, NUM64, STR }; + +struct tree_index_member { + struct box_tuple *tuple; + struct field key[]; +}; + +#define SIZEOF_TREE_INDEX_MEMBER(index) \ + (sizeof(struct tree_index_member) + sizeof(struct field) * (index)->key_cardinality) + +#include <third_party/sptree.h> +SPTREE_DEF(str_t, realloc); + +struct index { + bool enabled; + + bool unique; + + struct box_tuple *(*find) (struct index * index, void *key); /* only for unique lookups */ + struct box_tuple *(*find_by_tuple) (struct index * index, struct box_tuple * pattern); + void (*remove) (struct index * index, struct box_tuple *); + void (*replace) (struct index * index, struct box_tuple *, struct box_tuple *); + void (*iterator_init) (struct index *, struct tree_index_member * pattern); + struct box_tuple *(*iterator_next) (struct index *, struct tree_index_member * pattern); + union { + khash_t(lstr_ptr_map) * str_hash; + khash_t(int_ptr_map) * int_hash; + khash_t(int64_ptr_map) * int64_hash; + khash_t(int_ptr_map) * hash; + sptree_str_t *tree; + } idx; + void *iterator; + bool iterator_empty; + + struct namespace *namespace; + + struct { + struct { + u32 fieldno; + enum field_data_type type; + } *key_field; + u32 key_cardinality; + + u32 *field_cmp_order; + u32 field_cmp_order_cnt; + }; + + struct tree_index_member *search_pattern; + + enum { HASH, TREE } type; +}; + +#define foreach_index(n, index_var) \ + for (struct index *index_var = namespace[(n)].index; \ + index_var->key_cardinality != 0; \ + index_var++) \ + if (index_var->enabled) + +void index_hash_num(struct index *index, struct namespace *namespace, size_t estimated_rows); +void index_hash_num64(struct index *index, struct namespace *namespace, size_t estimated_rows); +void index_hash_str(struct index *index, struct namespace *namespace, size_t estimated_rows); +void index_tree(struct index *index, struct namespace *namespace, size_t estimated_rows __unused__); + +struct tree_index_member * alloc_search_pattern(struct index *index, int key_cardinality, void *key); +void index_iterator_init_tree_str(struct index *self, struct tree_index_member *pattern); +struct box_tuple * index_iterator_next_tree_str(struct index *self, struct tree_index_member *pattern); + +struct box_txn; +void validate_indeces(struct box_txn *txn); +void build_indexes(void); + +#endif diff --git a/mod/silverbox/memcached.c b/mod/silverbox/memcached.c index a818f8840071708eb3e3d5f9caa142328165c42c..126e548df578bd3612922ffb01616d898438eece 100644 --- a/mod/silverbox/memcached.c +++ b/mod/silverbox/memcached.c @@ -212,7 +212,7 @@ flush_all(void *data) { uintptr_t delay = (uintptr_t)data; fiber_sleep(delay - ev_now()); - khash_t(lstr2ptr_map) *map = memcached_index->idx.str_hash; + khash_t(lstr_ptr_map) *map = memcached_index->idx.str_hash; for (khiter_t i = kh_begin(map); i != kh_end(map); i++) { if (kh_exist(map, i)) { struct box_tuple *tuple = kh_value(map, i); @@ -3746,7 +3746,7 @@ void memcached_expire(void *data __unused__) { static khiter_t i; - khash_t(lstr2ptr_map) *map = memcached_index->idx.str_hash; + khash_t(lstr_ptr_map) *map = memcached_index->idx.str_hash; say_info("memcached expire fiber started"); for (;;) { diff --git a/mod/silverbox/memcached.rl b/mod/silverbox/memcached.rl index 4df75b1ba53d6b9f2494d101f3c07df244c5d5aa..b07ac1ef561aacc2eba2caf440ac2448899144da 100644 --- a/mod/silverbox/memcached.rl +++ b/mod/silverbox/memcached.rl @@ -203,7 +203,7 @@ flush_all(void *data) { uintptr_t delay = (uintptr_t)data; fiber_sleep(delay - ev_now()); - khash_t(lstr2ptr_map) *map = memcached_index->idx.str_hash; + khash_t(lstr_ptr_map) *map = memcached_index->idx.str_hash; for (khiter_t i = kh_begin(map); i != kh_end(map); i++) { if (kh_exist(map, i)) { struct box_tuple *tuple = kh_value(map, i); @@ -650,7 +650,7 @@ void memcached_expire(void *data __unused__) { static khiter_t i; - khash_t(lstr2ptr_map) *map = memcached_index->idx.str_hash; + khash_t(lstr_ptr_map) *map = memcached_index->idx.str_hash; say_info("memcached expire fiber started"); for (;;) { diff --git a/mod/silverbox/t/box.pl b/mod/silverbox/t/box.pl index 29a16aad64841609eeeed5a7af525498fc481481..810a919f14b0a17cf48e17fdbdc0fa088adfa960 100644 --- a/mod/silverbox/t/box.pl +++ b/mod/silverbox/t/box.pl @@ -10,7 +10,7 @@ use lib "$Bin"; use TBox (); use Carp qw/confess/; -use Test::More tests => 201; +use Test::More tests => 218; use Test::Exception; local $SIG{__DIE__} = \&confess; @@ -513,3 +513,36 @@ foreach my $r (@res) { ok sub { return $r != $tuples->[-1] && $r != $tuples->[-2] }; } + + + +## Check u64 index +# note, that u64 keys are emulated via pack('ll') since default ubuntu perl doesn't support pack('q') +sub def_param_u64 { + my $format = '&&&&'; + return { servers => $server, + namespaces => [ { + indexes => [ { + index_name => 'id', + keys => [0], + } ], + namespace => 20, + format => $format, + default_index => 'id', + } ]} +} + +$box = MR::SilverBox->new(def_param_u64); +ok $box->isa('MR::SilverBox'), 'connect'; + +$_->[0] = pack('ll', $_->[0], 0) foreach @$tuples; + +foreach my $tuple (@$tuples) { + cleanup $tuple->[0]; +} + +foreach my $tuple (@$tuples) { + ok $box->Insert(@$tuple), "unique_tree_index/insert \'$tuple->[0]\'"; +} + +is_deeply($tuples, [$box->Select([map $_->[0], @$tuples])]); diff --git a/scripts/rules.mk b/scripts/rules.mk index 0659e1ca2e1c6e8af829a70bf92c922c6366bec0..fb5a312c6feac4acff18e2924fdf20a068b23eb3 100644 --- a/scripts/rules.mk +++ b/scripts/rules.mk @@ -103,5 +103,6 @@ tarantool_version.h: FORCE @$(GIT) describe HEAD | tr -d \\n >> $@_ @echo '";' >> $@_ @diff -q $@ $@_ 2>/dev/null >/dev/null || ($(ECHO) " GEN " $(notdir $@); cp $@_ $@) + @rm $@_ FORCE: endif diff --git a/scripts/run_test.sh b/scripts/run_test.sh index ce6abef662c744e6f6d43c99f69b8f81009efa96..ac745d895320e57ef9972bf8e2c36ac59c1c08dd 100755 --- a/scripts/run_test.sh +++ b/scripts/run_test.sh @@ -12,7 +12,7 @@ kill_all () { test -f $pid_file && kill $(cat $pid_file) done sleep 1 - ps -eopid,cmd | grep [t]arantul + ps -eopid,cmd | grep [t]arantool } trap 'kill_all' EXIT @@ -121,6 +121,12 @@ write_config() { namespace[19].index[0].unique = 1 namespace[19].index[0].key_field[0].fieldno = 0 namespace[19].index[0].key_field[0].type = "STR" + + namespace[20].enabled = 1 + namespace[20].index[0].type = "HASH" + namespace[20].index[0].unique = 1 + namespace[20].index[0].key_field[0].fieldno = 0 + namespace[20].index[0].key_field[0].type = "NUM64" namespace[22].enabled = 1 namespace[22].index[0].type = "HASH" diff --git a/test/Makefile b/test/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..d3c699e728d01be31efa8f68bf87ac0e667159b1 --- /dev/null +++ b/test/Makefile @@ -0,0 +1,3 @@ +test: + ./test-run.py + diff --git a/test/admin.py b/test/admin.py new file mode 100755 index 0000000000000000000000000000000000000000..1f171e03ef9a96658ab476de42213d5d0f009aca --- /dev/null +++ b/test/admin.py @@ -0,0 +1,145 @@ +#! /usr/bin/python +"""A simplistic client for tarantool administrative console. + +On startup, establishes a connection to tarantool server. +Then, reads commands from stdin, and sends them to stdout. +The commands are echoed to stdout. The results are echoed +to stdout as well, prefixed with "r> ". +""" +__author__ = "Konstantin Osipov <kostja.osipov@gmail.com>" + +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND +# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +# SUCH DAMAGE. + +import argparse +import socket +import sys +import string + +class Options: + def __init__(self): + """Add all program options, with their defaults.""" + + parser = argparse.ArgumentParser( + description = "Tarantool regression test suite client.") + + parser.add_argument( + "--host", + dest = 'host', + metavar = "host", + default = "localhost", + help = "Host to connect to. Default: localhost") + + parser.add_argument( + "--port", + dest = "port", + default = 33015, + help = "Server port to connect to. Default: 33015") + + parser.add_argument( + "--result-prefix", + metavar = "prefix", + dest = "result_prefix", + help = """Skip input lines that have the given prefix (e.g. "r> ". + Prepend the prefix to all output lines. If not set, nothing is + skipped and output is printed as-is. This option is used + to pipe in .test files, automatically skipping test output. + Without this option the program may be used as an interactive + client. See also --prompt.""") + + parser.add_argument( + "--prompt", + metavar = "prompt", + dest = "prompt", + default = "\033[92mtarantool> \033[0m", + help = """Command prompt. Set to "" for no prompt. Default: + tarantool> """) + + self.args = parser.parse_args() + + +class Connection: + def __init__(self, host, port): + self.host = host + self.port = port + self.is_connected = False + + def connect(self): + self.socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + self.socket.connect((self.host, self.port)) + self.is_connected = True + + def disconnect(self): + if self.is_connected: + self.socket.close() + self.is_connected = False + + def execute(self, command): + self.socket.sendall(command) + + bufsiz = 4096 + res = "" + + while True: + buf = self.socket.recv(bufsiz) + if not buf: + break + res+= buf; + if res.rfind("---\n"): + break + + return res + + def __enter__(self): + self.connect() + return self + + def __exit__(self, type, value, tb): + self.disconnect() + + +def main(): + options = Options() + try: + with Connection(options.args.host, options.args.port) as con: + result_prefix = options.args.result_prefix + prompt = options.args.prompt + if prompt != "": + sys.stdout.write(prompt) + for line in iter(sys.stdin.readline, ""): + if result_prefix != None and line.find(result_prefix) == 0: + continue + output = con.execute(line) + if result_prefix != None: + print line, result_prefix, string.join(output.split("\n"), + "\n" + result_prefix) + else: + sys.stdout.write(output) + sys.stdout.write(prompt) + + return 0 + except (RuntimeError, socket.error, KeyboardInterrupt) as e: + print "Fatal error: ", repr(e) + return -1 + +if __name__ == "__main__": + exit(main()) + diff --git a/test/box/show.test b/test/box/show.test new file mode 100644 index 0000000000000000000000000000000000000000..c1d11e921d1955d094d31659ae2f3fbce339c288 --- /dev/null +++ b/test/box/show.test @@ -0,0 +1,58 @@ +show stat +r> statistics: +r> INSERT: { rps: 0 , total: 0 } +r> SELECT_LIMIT: { rps: 0 , total: 0 } +r> SELECT: { rps: 0 , total: 0 } +r> UPDATE_FIELDS: { rps: 0 , total: 0 } +r> DELETE: { rps: 0 , total: 0 } +r> --- +r> +show configuration +r> configuration: +r> username: (null) +r> coredump: "0" +r> admin_port: "33015" +r> log_level: "4" +r> slab_alloc_arena: "0.1" +r> slab_alloc_minimal: "64" +r> slab_alloc_factor: "2" +r> work_dir: (null) +r> pid_file: "box.pid" +r> logger: (null) +r> logger_nonblock: "1" +r> io_collect_interval: "0" +r> backlog: "1024" +r> readahead: "16320" +r> snap_dir: "." +r> wal_dir: "." +r> primary_port: "33013" +r> secondary_port: "33014" +r> too_long_threshold: "0.5" +r> custom_proc_title: (null) +r> memcached: "0" +r> memcached_namespace: "23" +r> memcached_expire_per_loop: "1024" +r> memcached_expire_full_sweep: "3600" +r> snap_io_rate_limit: "0" +r> rows_per_wal: "50" +r> wal_fsync_delay: "0" +r> wal_writer_inbox_size: "128" +r> local_hot_standby: "0" +r> wal_dir_rescan_delay: "0.1" +r> panic_on_snap_error: "1" +r> panic_on_wal_error: "0" +r> remote_hot_standby: "0" +r> wal_feeder_ipaddr: (null) +r> wal_feeder_port: "0" +r> namespace[0].enabled: "1" +r> namespace[0].cardinality: "-1" +r> namespace[0].estimated_rows: "0" +r> namespace[0].index[0].type: "HASH" +r> namespace[0].index[0].unique: "1" +r> namespace[0].index[0].key_field[0].fieldno: "0" +r> namespace[0].index[0].key_field[0].type: "NUM" +r> --- +r> +save snapshot +r> ok +r> diff --git a/test/box/suite.ini b/test/box/suite.ini new file mode 100644 index 0000000000000000000000000000000000000000..5d7d6ddca15323208674a211733e22c119ab1632 --- /dev/null +++ b/test/box/suite.ini @@ -0,0 +1,5 @@ +[default] +description = tarantool/silverbox, minimal configuration +client = admin.py --prompt "" --result-prefix "r> " +config = tarantool.cfg +pidfile = box.pid diff --git a/test/box/tarantool.cfg b/test/box/tarantool.cfg new file mode 100644 index 0000000000000000000000000000000000000000..c09c23a187f1636abe74081ba9ac4968be65b27f --- /dev/null +++ b/test/box/tarantool.cfg @@ -0,0 +1,16 @@ +slab_alloc_arena = 0.1 + +pid_file = "box.pid" + +primary_port = 33013 +secondary_port = 33014 +admin_port = 33015 + +rows_per_wal = 50 + +namespace[0].enabled = 1 +namespace[0].index[0].type = "HASH" +namespace[0].index[0].unique = 1 +namespace[0].index[0].key_field[0].fieldno = 0 +namespace[0].index[0].key_field[0].type = "NUM" + diff --git a/test/cmd/args.test b/test/cmd/args.test new file mode 100644 index 0000000000000000000000000000000000000000..58b53b5ea42258804a360cba548f1ce156e1ac0e --- /dev/null +++ b/test/cmd/args.test @@ -0,0 +1,8 @@ +--config +r> +--verbose +r> +--config tarantool.cfg +r> +--foo +r> diff --git a/test/cmd/suite.ini b/test/cmd/suite.ini new file mode 100644 index 0000000000000000000000000000000000000000..25ddabc507553389955bf2db9c48b9c59d2fd44f --- /dev/null +++ b/test/cmd/suite.ini @@ -0,0 +1,5 @@ +[default] +description = tarantool/silverbox, command line options +client = cmdline.py $server --result-prefix "r> " +config = tarantool.cfg +pidfile = box.pid diff --git a/test/cmd/tarantool.cfg b/test/cmd/tarantool.cfg new file mode 100644 index 0000000000000000000000000000000000000000..c09c23a187f1636abe74081ba9ac4968be65b27f --- /dev/null +++ b/test/cmd/tarantool.cfg @@ -0,0 +1,16 @@ +slab_alloc_arena = 0.1 + +pid_file = "box.pid" + +primary_port = 33013 +secondary_port = 33014 +admin_port = 33015 + +rows_per_wal = 50 + +namespace[0].enabled = 1 +namespace[0].index[0].type = "HASH" +namespace[0].index[0].unique = 1 +namespace[0].index[0].key_field[0].fieldno = 0 +namespace[0].index[0].key_field[0].type = "NUM" + diff --git a/test/cmdline.py b/test/cmdline.py new file mode 100755 index 0000000000000000000000000000000000000000..3e8a6a1d3468979a862fe54a7029bd43a319d4be --- /dev/null +++ b/test/cmdline.py @@ -0,0 +1,84 @@ +#! /usr/bin/python +"""Test stdout/stdin interaction of a program". + +Accepts a path to a UNIX command line program to test. +Reads command line options from stdin (one set of options per +one line of input), executes the program, and prints the output +to stdout, prefixed with r>. """ + +__author__ = "Konstantin Osipov <kostja.osipov@gmail.com>" + +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND +# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +# SUCH DAMAGE. + +import argparse +import shlex +import subprocess +import sys +import string + +def main(): + parser = argparse.ArgumentParser( + description = "Test command line options of a program.") + + parser.add_argument( + "bin", + help = """Path to the binary to test. Command line options are + read from stdin, one line at a time, the program is run + with the options, the output is sent to stdout.""") + + parser.add_argument( + "--result-prefix", + metavar = "prefix", + dest = "result_prefix", + help = """Skip input lines that have the given prefix (e.g. "r> ". + Prepend the prefix to all output lines. If not set, nothing is + skipped and output is printed as-is. This option is used + to pipe in .test files, automatically skipping test output. + Without this option the program may be used as an interactive + client.""") + + args = parser.parse_args() + + try: + result_prefix = args.result_prefix + + for line in iter(sys.stdin.readline, ""): + if result_prefix != None and line.find(result_prefix) == 0: + continue + path = string.join([args.bin, line]) + output = subprocess.Popen(shlex.split(path), + stdout = subprocess.PIPE, + stderr = subprocess.PIPE).stdout.read() + if result_prefix != None: + print line, result_prefix, string.join(output.split("\n"), + "\n" + result_prefix) + else: + sys.stdout.write(output) + + return 0 + except RuntimeError as e: + print "Fatal error: ", repr(e) + return -1 + +if __name__ == "__main__": + exit(main()) + diff --git a/test/run b/test/run new file mode 120000 index 0000000000000000000000000000000000000000..f2e04c05d84a1ac678d89a9553a246d1d1033aad --- /dev/null +++ b/test/run @@ -0,0 +1 @@ +./test-run.py \ No newline at end of file diff --git a/test/test-run.py b/test/test-run.py new file mode 100755 index 0000000000000000000000000000000000000000..bfa77eea7e588d168c365ebb15bf140ac0fc7b3b --- /dev/null +++ b/test/test-run.py @@ -0,0 +1,435 @@ +#! /usr/bin/python +"""Tarantool regression test suite front-end.""" + +__author__ = "Konstantin Osipov <kostja.osipov@gmail.com>" + +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND +# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +# SUCH DAMAGE. + +import argparse +import os +import os.path +import signal +import sys +import stat +import glob +import shutil +import ConfigParser +import subprocess +import pexpect +import time +import collections +import difflib +import filecmp +import shlex + +# +# Run a collection of tests. +# +# @todo +# --gdb +# put class definitions into separate files + +############################################################################ +# Class definition +############################################################################ + +class TestRunException(RuntimeError): + """A common exception to use across the program.""" + def __init__(self, message): + self.message = message + def __str__(self): + return self.message + +class Options: + """Handle options of test-runner""" + def __init__(self): + """Add all program options, with their defaults. We assume + that the program is started from the directory where it is + located""" + + parser = argparse.ArgumentParser( + description = "Tarantool regression test suite front-end. \ + This program must be started from its working directory (" + + os.path.abspath(os.path.dirname(sys.argv[0])) + ").") + + parser.epilog = "For a complete description, use 'pydoc ./" +\ + os.path.basename(sys.argv[0]) + "'" + + parser.add_argument( + "tests", + metavar="list of tests", + nargs="*", + default = [""], + help="""Can be empty. List of test names, to look for in suites. Each + name is used as a substring to look for in the path to test file, + e.g. "show" will run all tests that have "show" in their name in all + suites, "box/show" will only enable tests starting with "show" in + "box" suite. Default: run all tests in all specified suites.""") + + parser.add_argument( + "--suite", + dest = 'suites', + metavar = "suite", + nargs="*", + default = ["box", "cmd"], + help = """List of tests suites to look for tests in. Default: "box".""") + + parser.add_argument( + "--force", + dest = "is_force", + action = "store_true", + default = False, + help = "Go on with other tests in case of an individual test failure." + " Default: false.") + + parser.add_argument( + "--start-and-exit", + dest = "start_and_exit", + action = "store_true", + default = False, + help = "Start the server from the first specified suite and" + "exit without running any tests. Default: false.") + + parser.add_argument( + "--bindir", + dest = "bindir", + default = "../_debug_box", + help = "Path to server binary." + " Default: " + "../_debug_box.") + + parser.add_argument( + "--vardir", + dest = "vardir", + default = "var", + help = "Path to data directory. Default: var.") + + self.check(parser) + self.args = parser.parse_args() + + def check(self, parser): + """Check that the program is started from the directory where + it is located. This is necessary to minimize potential confusion + with absolute paths, since all default paths are relative to the + starting directory.""" + + if not os.path.exists(os.path.basename(sys.argv[0])): +# print first 6 lines of help + short_help = "\n".join(parser.format_help().split("\n")[0:6]) + print short_help + exit(-1) + + +class Server: + """Server represents a single server instance. Normally, the + program operates with only one server, but in future we may add + replication slaves. The server is started once at the beginning + of each suite, and stopped at the end.""" + + def __init__(self, args, config, pidfile): + """Set server options: path to configuration file, pid file, exe, etc.""" + self.args = args + self.path_to_config = config + self.path_to_pidfile = os.path.join(args.vardir, pidfile) + self.path_to_exe = None + self.abspath_to_exe = None + self.is_started = False + + def start(self): + """Start server instance: check if the old one exists, kill it + if necessary, create necessary directories and files, start + the server. The server working directory is taken from 'vardir', + specified in the prgoram options. + Currently this is implemented for tarantool_silverbox only.""" + + if not self.is_started: + print "Starting the server..." + + if self.path_to_exe == None: + self.path_to_exe = self.find_exe() + self.abspath_to_exe = os.path.abspath(self.path_to_exe) + + print " Found executable at " + self.path_to_exe + "." + + print " Creating and populating working directory in " +\ + self.args.vardir + "..." + + if os.access(self.args.vardir, os.F_OK): + print " Found old vardir, deleting..." + self.kill_old_server() + shutil.rmtree(self.args.vardir, ignore_errors = True) + + os.mkdir(self.args.vardir) + shutil.copy(self.path_to_config, self.args.vardir) + + subprocess.check_call([self.abspath_to_exe, "--init_storage"], + cwd = self.args.vardir, +# catch stdout/stderr to not clutter output + stdout = subprocess.PIPE, + stderr = subprocess.PIPE) + + if self.args.start_and_exit: + subprocess.check_call([self.abspath_to_exe, "--daemonize"], + cwd = self.args.vardir, + stdout = subprocess.PIPE, + stderr = subprocess.PIPE) + else: + self.server = pexpect.spawn(self.abspath_to_exe, + cwd = self.args.vardir) + self.logfile_read = sys.stdout + self.server.expect_exact("entering event loop") + + version = subprocess.Popen([self.abspath_to_exe, "--version"], + cwd = self.args.vardir, + stdout = subprocess.PIPE).stdout.read().rstrip() + + print "Started {0} {1}.".format(os.path.basename(self.abspath_to_exe), + version) + +# Set is_started flag, to nicely support cleanup during an exception. + self.is_started = True + else: + print "The server is already started." + + def stop(self): + """Stop server instance. Do nothing if the server is not started, + to properly shut down the server in case of an exception during + start up.""" + if self.is_started: + print "Stopping the server..." + self.server.terminate() + self.server.expect(pexpect.EOF) + self.is_started = False + else: + print "The server is not started." + + def find_exe(self): + """Locate server executable in the bindir. We just take + the first thing looking like an exe in there.""" + + if (os.access(self.args.bindir, os.F_OK) == False or + stat.S_ISDIR(os.stat(self.args.bindir).st_mode) == False): + raise TestRunException("Directory " + self.args.bindir + + " doesn't exist") + + for f in os.listdir(self.args.bindir): + f = os.path.join(self.args.bindir, f) + st_mode = os.stat(f).st_mode + if stat.S_ISREG(st_mode) and st_mode & stat.S_IXUSR: + return f + raise TestRunException("Can't find server executable in " + + self.args.bindir) + + def kill_old_server(self): + """Kill old server instance if it exists.""" + if os.access(self.path_to_pidfile, os.F_OK) == False: + return # Nothing to do + pid = 0 + with open(self.path_to_pidfile) as f: + pid = int(f.read()) + print " Found old server, pid {0}, killing...".format(pid) + try: + os.kill(pid, signal.SIGTERM) + while os.kill(pid, 0) != -1: + time.sleep(0.01) + except OSError: + pass + +class Test: + """An individual test file. A test can run itself, and remembers + its completion state.""" + def __init__(self, name, client): + """Initialize test properties: path to test file, path to + temporary result file, path to the client program, test status.""" + self.name = name + self.client = os.path.join(".", client) + self.result = name.replace(".test", ".result") + self.is_executed = False + self.is_client_ok = None + self.is_equal_result = None + + def passed(self): + """Return true if this test was run successfully.""" + return self.is_executed and self.is_client_ok and self.is_equal_result + + def run(self, test_env): + """Execute the client program, giving it test as stdin, + result as stdout. If the client program aborts, print + its output to stdout, and raise an exception. Else, comprare + result and reject files. If there is a difference, print it to + stdout and raise an exception. The exception is raised only + if is_force flag is not set.""" + + def subst_test_env(arg): + if len(arg) and arg[0] == '$': + return test_env[arg[1:]] + else: + return arg + + args = map(subst_test_env, shlex.split(self.client)) + + sys.stdout.write("{0}".format(self.name)) + + with open(self.name, "r") as test: + with open(self.result, "w+") as result: + self.is_client_ok = \ + subprocess.call(args, + stdin = test, stdout = result) == 0 + + self.is_executed = True + + if self.is_client_ok: + self.is_equal_result = filecmp.cmp(self.name, self.result) + + if self.is_client_ok and self.is_equal_result: + print "\t\t\t[ pass ]" + os.remove(self.result) + else: + print "\t\t\t[ fail ]" + where = "" + if not self.is_client_ok: + self.print_diagnostics() + where = ": client execution aborted" + else: + self.print_unidiff() + where = ": wrong test output" + if not test_env["is_force"]: + raise TestRunException("Failed to run test " + self.name + where) + + + def print_diagnostics(self): + """Print 10 lines of client program output leading to test + failure. Used to diagnose a failure of the client program""" + + print "Test failed! Last 10 lines of the result file:" + with open(self.result, "r+") as result: + tail_10 = collections.deque(result, 10) + for line in tail_10: + sys.stdout.write(line) + + def print_unidiff(self): + """Print a unified diff between .test and .result files. Used + to establish the cause of a failure when .test differs + from .result.""" + + print "Test failed! Result content mismatch:" + with open(self.name, "r") as test: + with open(self.result, "r") as result: + test_time = time.ctime(os.stat(self.name).st_mtime) + result_time = time.ctime(os.stat(self.result).st_mtime) + diff = difflib.unified_diff(test.readlines(), + result.readlines(), + self.name, + self.result, + test_time, + result_time) + for line in diff: + sys.stdout.write(line) + + +class TestSuite: + """Each test suite contains a number of related tests files, + located in the same directory on disk. Each test file has + extention .test and contains a listing of server commands, + followed by their output. The commands are executed, and + obtained results are compared with pre-recorded output. In case + of a comparision difference, an exception is raised. A test suite + must also contain suite.ini, which describes how to start the + server for this suite, the client program to execute individual + tests and other suite properties. The server is started once per + suite.""" + + def __init__(self, suite_path, args): + """Initialize a test suite: check that it exists and contains + a syntactically correct configuration file. Then create + a test instance for each found test.""" + self.path = suite_path + self.args = args + self.tests = [] + + if os.access(self.path, os.F_OK) == False: + raise TestRunException("Suite \"" + self.path + "\" doesn't exist") + + config = ConfigParser.ConfigParser() + config.read(os.path.join(self.path, "suite.ini")) + self.ini = dict(config.items("default")) + print "Collecting tests in \"" + self.path + "\": " +\ + self.ini["description"] + "." + + for test_name in glob.glob(os.path.join(self.path, "*.test")): + for test_pattern in self.args.tests: + if test_name.find(test_pattern) != -1: + self.tests.append(Test(test_name, self.ini["client"])) + print "Found " + str(len(self.tests)) + " tests." + + def run_all(self): + """For each file in the test suite, run client program + assuming each file represents an individual test.""" + server = Server(self.args, os.path.join(self.path, self.ini["config"]), + self.ini["pidfile"]) + server.start() + if self.args.start_and_exit: + print " Start and exit requested, exiting..." + exit(0) + + longsep = "==============================================================================" + shortsep = "------------------------------------------------------------" + print longsep + print "TEST\t\t\t\tRESULT" + print shortsep + failed_tests = [] + test_env = { "is_force" : self.args.is_force, + "server" : server.abspath_to_exe } + + for test in self.tests: + test.run(test_env) + if not test.passed(): + failed_tests.append(test.name) + + print shortsep + if len(failed_tests): + print "Failed {0} tests: {1}.".format(len(failed_tests), + ", ".join(failed_tests)) + server.stop(); + +####################################################################### +# Program body +####################################################################### + +def main(): + options = Options() + + try: + print "Started", " ".join(sys.argv) + suites = [] + for suite_name in options.args.suites: + suites.append(TestSuite(suite_name, options.args)) + + for suite in suites: + suite.run_all() + except RuntimeError as e: + print "\nFatal error: ", e, ". Execution aborted." + return (-1) + + return 0 + +if __name__ == "__main__": + exit(main())