diff --git a/.travis.yml b/.travis.yml index 6819f9220dde1611b3b5fa256e0a8524ea6e30db..40cc8bf626cfa6c74f70159ebe3e8615f1104948 100644 --- a/.travis.yml +++ b/.travis.yml @@ -18,8 +18,6 @@ git: depth: 100500 env: - global: - - PACKAGECLOUD_REPO=1_8 matrix: - TARGET=source - TARGET=test @@ -92,21 +90,23 @@ before_deploy: deploy: # Deploy packages to PackageCloud - provider: packagecloud - username: "${PACKAGECLOUD_USER}" - repository: "${PACKAGECLOUD_REPO}" + username: "tarantool" + repository: "1_8" token: "${PACKAGECLOUD_TOKEN}" dist: "${OS}/${DIST}" package_glob: build/*.{rpm,deb,dsc} skip_cleanup: true on: - branch: "1.8" + repo: tarantool/tarantool + branch: "1.8" # releases condition: -n "${OS}" && -n "${DIST}" && -n "${PACKAGECLOUD_TOKEN}" - # Deploy sources to S3 + # Deploy source tarballs to S3 - provider: script script: make -f .travis.mk source_deploy skip_cleanup: true on: - branch: "1.8" + repo: tarantool/tarantool + branch: "1.8" # releases condition: "x${TARGET} = xsource" notifications: diff --git a/src/box/CMakeLists.txt b/src/box/CMakeLists.txt index ec6408040694122a211c9dedc43af22940fdc241..33bcdbab9ba120195ccb03a35bb0c7ac44397435 100644 --- a/src/box/CMakeLists.txt +++ b/src/box/CMakeLists.txt @@ -68,9 +68,6 @@ add_library(box STATIC memtx_tuple.cc sysview_engine.c sysview_index.c - vinyl_engine.c - vinyl_space.c - vinyl_index.c vinyl.c vy_stmt.c vy_mem.c diff --git a/src/box/alter.cc b/src/box/alter.cc index e9e866e962bb929a23b738a85b9ebe63fc2ccc45..5b2807e1d5d0911f5c09fdfb5ecb34caa215d90f 100644 --- a/src/box/alter.cc +++ b/src/box/alter.cc @@ -2088,7 +2088,7 @@ coll_cache_rollback(struct trigger *trigger, void *event) if (new_tuple != NULL) { uint32_t new_id = tuple_field_u32_xc(new_tuple, BOX_COLLATION_FIELD_ID); - struct coll *new_coll = coll_cache_find(new_id); + struct coll *new_coll = coll_by_id(new_id); coll_cache_delete(new_coll); coll_delete(new_coll); } @@ -2126,7 +2126,7 @@ on_replace_dd_collation(struct trigger * /* trigger */, void *event) /* TODO: Check that no index uses the collation */ uint32_t old_id = tuple_field_u32_xc(old_tuple, BOX_COLLATION_FIELD_ID); - old_coll = coll_cache_find(old_id); + old_coll = coll_by_id(old_id); assert(old_coll != NULL); access_check_ddl(old_coll->owner_id, SC_COLLATION); diff --git a/src/box/applier.cc b/src/box/applier.cc index 4980432c6455f7fe04265eb9963d5ceddd547aae..e54bea2de6f40dcfa9fa69dbe14aadd96b44f44e 100644 --- a/src/box/applier.cc +++ b/src/box/applier.cc @@ -421,8 +421,6 @@ applier_subscribe(struct applier *applier) } coio_read_xrow(coio, &iobuf->in, &row); - applier->lag = ev_now(loop()) - row.tm; - applier->last_row_time = ev_monotonic_now(loop()); if (iproto_type_is_error(row.type)) xrow_decode_error_xc(&row); /* error */ @@ -437,6 +435,10 @@ applier_subscribe(struct applier *applier) int2str(row.replica_id), tt_uuid_str(&REPLICASET_UUID)); } + + applier->lag = ev_now(loop()) - row.tm; + applier->last_row_time = ev_monotonic_now(loop()); + if (vclock_get(&replicaset_vclock, row.replica_id) < row.lsn) { /** * Promote the replica set vclock before @@ -467,6 +469,8 @@ applier_disconnect(struct applier *applier, enum applier_state state) } coio_close(loop(), &applier->io); + /* Clear all unparsed input. */ + ibuf_reset(&applier->iobuf->in); iobuf_reset(applier->iobuf); fiber_gc(); } diff --git a/src/box/box.cc b/src/box/box.cc index 1f7107ff611a4d7da749dc4d414a70863b5a2a48..542bb97331827797729a1774bcc8244e1e35d6b5 100644 --- a/src/box/box.cc +++ b/src/box/box.cc @@ -50,7 +50,7 @@ #include "engine.h" #include "memtx_engine.h" #include "sysview_engine.h" -#include "vinyl_engine.h" +#include "vinyl.h" #include "space.h" #include "index.h" #include "port.h" @@ -260,30 +260,6 @@ recovery_journal_create(struct recovery_journal *journal, struct vclock *v) journal->vclock = v; } -/** - * Dummy journal used to generate unique LSNs for rows received - * during initial join. - */ -struct join_journal { - struct journal base; - int64_t lsn; -}; - -static int64_t -join_journal_write(struct journal *base, - struct journal_entry * /* entry */) -{ - struct join_journal *journal = (struct join_journal *) base; - return ++journal->lsn; -} - -static inline void -join_journal_create(struct join_journal *journal) -{ - journal_create(&journal->base, join_journal_write, NULL); - journal->lsn = 0; -} - static inline void apply_row(struct xstream *stream, struct xrow_header *row) { @@ -615,6 +591,11 @@ void box_set_too_long_threshold(void) { too_long_threshold = cfg_getd("too_long_threshold"); + + struct vinyl_engine *vinyl; + vinyl = (struct vinyl_engine *)engine_by_name("vinyl"); + assert(vinyl != NULL); + vinyl_engine_set_too_long_threshold(vinyl, too_long_threshold); } void @@ -1130,7 +1111,7 @@ box_on_join(const tt_uuid *instance_uuid) } void -box_process_auth(struct auth_request *request, struct obuf *out) +box_process_auth(struct auth_request *request) { rmean_collect(rmean_box, IPROTO_AUTH, 1); @@ -1141,7 +1122,6 @@ box_process_auth(struct auth_request *request, struct obuf *out) const char *user = request->user_name; uint32_t len = mp_decode_strl(&user); authenticate(user, len, request->scramble); - iproto_reply_ok_xc(out, request->header->sync, ::schema_version); } void @@ -1435,9 +1415,10 @@ engine_init() cfg_geti64("vinyl_cache"), cfg_geti("vinyl_read_threads"), cfg_geti("vinyl_write_threads"), - cfg_getd("vinyl_timeout")); + cfg_geti("force_recovery")); engine_register((struct engine *)vinyl); box_set_vinyl_max_tuple_size(); + box_set_vinyl_timeout(); } /** @@ -1502,11 +1483,8 @@ bootstrap_from_master(struct replica *master) * Process initial data (snapshot or dirty disk data). */ engine_begin_initial_recovery_xc(NULL); - struct join_journal join_journal; - join_journal_create(&join_journal); - journal_set(&join_journal.base); - applier_resume_to_state(applier, APPLIER_FINAL_JOIN, TIMEOUT_INFINITY); + /* * Process final data (WALs). */ diff --git a/src/box/box.h b/src/box/box.h index bd4837925112a7db726cffc52ad543f0395293f0..b9f938cceed15ade159b21ace2971963d4462398 100644 --- a/src/box/box.h +++ b/src/box/box.h @@ -126,7 +126,7 @@ const char *box_status(void); } /* extern "C" */ void -box_process_auth(struct auth_request *request, struct obuf *out); +box_process_auth(struct auth_request *request); void box_process_join(struct ev_io *io, struct xrow_header *header); diff --git a/src/box/coll.c b/src/box/coll.c index cb2db58db767b7e28c70b22b6ec2c8f2155b3ce8..97e1c306e0673041a5daa0c5378018a235a24960 100644 --- a/src/box/coll.c +++ b/src/box/coll.c @@ -46,7 +46,7 @@ enum { */ static int coll_icu_cmp(const char *s, size_t slen, const char *t, size_t tlen, - struct coll *coll) + const struct coll *coll) { assert(coll->icu.collator != NULL); diff --git a/src/box/coll.h b/src/box/coll.h index fd99967591dd094d97a39d833917346cf308b026..aa0e4c01a5802aaa87d3f4f9353aead52dddcd8a 100644 --- a/src/box/coll.h +++ b/src/box/coll.h @@ -43,7 +43,7 @@ struct coll; typedef int (*coll_cmp_f)(const char *s, size_t s_len, const char *t, size_t t_len, - struct coll *coll); + const struct coll *coll); typedef uint32_t (*coll_hash_f)(const char *s, size_t s_len, uint32_t *ph, uint32_t *pcarry, diff --git a/src/box/coll_cache.c b/src/box/coll_cache.c index 137f5d717c25ce74de4034ae3bf8fda60423f619..b7eb3edb961021a074bd32a420e2200a0a4e6657 100644 --- a/src/box/coll_cache.c +++ b/src/box/coll_cache.c @@ -42,7 +42,7 @@ coll_cache_init() coll_cache_id = mh_i32ptr_new(); if (coll_cache_id == NULL) { diag_set(OutOfMemory, sizeof(*coll_cache_id), "malloc", - "coll_cache"); + "coll_cache_id"); return -1; } return 0; @@ -63,15 +63,16 @@ coll_cache_destroy() int coll_cache_replace(struct coll *coll, struct coll **replaced) { - const struct mh_i32ptr_node_t node = {coll->id, coll}; - struct mh_i32ptr_node_t repl_node = {0, NULL}; - struct mh_i32ptr_node_t *prepl_node = &repl_node; - if (mh_i32ptr_put(coll_cache_id, &node, &prepl_node, NULL) == + const struct mh_i32ptr_node_t id_node = {coll->id, coll}; + struct mh_i32ptr_node_t repl_id_node = {0, NULL}; + struct mh_i32ptr_node_t *prepl_id_node = &repl_id_node; + if (mh_i32ptr_put(coll_cache_id, &id_node, &prepl_id_node, NULL) == mh_end(coll_cache_id)) { - diag_set(OutOfMemory, sizeof(node), "malloc", "coll_cache"); + diag_set(OutOfMemory, sizeof(id_node), "malloc", "coll_cache_id"); return -1; } - *replaced = repl_node.val; + assert(repl_id_node.val == NULL); + *replaced = repl_id_node.val; return 0; } @@ -92,7 +93,7 @@ coll_cache_delete(const struct coll *coll) * Find a collation object by its id. */ struct coll * -coll_cache_find(uint32_t id) +coll_by_id(uint32_t id) { mh_int_t pos = mh_i32ptr_find(coll_cache_id, id, NULL); if (pos == mh_end(coll_cache_id)) diff --git a/src/box/coll_cache.h b/src/box/coll_cache.h index b982ec285059e450e6e1b972bc8a8edaf6a0e6c3..418de4e35c985c72caf4c929e7a00e3bc540940e 100644 --- a/src/box/coll_cache.h +++ b/src/box/coll_cache.h @@ -68,7 +68,7 @@ coll_cache_delete(const struct coll *coll); * Find a collation object by its id. */ struct coll * -coll_cache_find(uint32_t id); +coll_by_id(uint32_t id); #if defined(__cplusplus) } /* extern "C" */ diff --git a/src/box/iproto.cc b/src/box/iproto.cc index a7d885fa98a2ff0a7a2a3eec05f9506b9964bddb..3cce83246561e6945de5e8643e9b4890693ecaac 100644 --- a/src/box/iproto.cc +++ b/src/box/iproto.cc @@ -95,13 +95,13 @@ struct iproto_msg: public cmsg struct xrow_header header; union { /* Box request, if this is a DML */ - struct request dml_request; + struct request dml; /* Box request, if this is misc (call, eval). */ - struct call_request call_request; + struct call_request call; /* Authentication request. */ - struct auth_request auth_request; + struct auth_request auth; /* SQL request, if this is the EXECUTE request. */ - struct sql_request sql_request; + struct sql_request sql; }; /** Output buffer to write response and flush. */ struct obuf *p_obuf; @@ -140,6 +140,9 @@ iproto_msg_new(struct iproto_connection *con) static void iproto_resume(); +static void +iproto_msg_decode(struct iproto_msg *msg, const char **pos, const char *reqend, + bool *stop_input); static inline void iproto_msg_delete(struct cmsg *msg) @@ -148,10 +151,6 @@ iproto_msg_delete(struct cmsg *msg) iproto_resume(); } -/* }}} */ - -/* {{{ iproto connection and requests */ - /** * A single global queue for all requests in all connections. All * requests from all connections are processed concurrently. @@ -178,6 +177,21 @@ enum rmean_net_name { const char *rmean_net_strings[IPROTO_LAST] = { "SENT", "RECEIVED" }; +static void +tx_process_disconnect(struct cmsg *m); + +static void +net_finish_disconnect(struct cmsg *m); + +static const struct cmsg_hop disconnect_route[] = { + { tx_process_disconnect, &net_pipe }, + { net_finish_disconnect, NULL }, +}; + +/* }}} */ + +/* {{{ iproto_connection - declaration and definition */ + /** * Context of a single client connection. * Interaction scheme: @@ -344,183 +358,6 @@ iproto_write_error_blocking(int sock, const struct error *e, uint64_t sync) (void) fcntl(sock, F_SETFL, flags); } -static void -iproto_connection_on_input(ev_loop * /* loop */, struct ev_io *watcher, - int /* revents */); -static void -iproto_connection_on_output(ev_loop * /* loop */, struct ev_io *watcher, - int /* revents */); - -/** Recycle a connection. Never throws. */ -static inline void -iproto_connection_delete(struct iproto_connection *con) -{ - assert(iproto_connection_is_idle(con)); - assert(!evio_has_fd(&con->output)); - assert(!evio_has_fd(&con->input)); - assert(con->session == NULL); - /* - * The output buffers must have been deleted - * in tx thread. - */ - ibuf_destroy(&con->ibuf[0]); - ibuf_destroy(&con->ibuf[1]); - assert(con->obuf[0].pos == 0 && - con->obuf[0].iov[0].iov_base == NULL); - assert(con->obuf[1].pos == 0 && - con->obuf[1].iov[0].iov_base == NULL); - if (con->disconnect) - iproto_msg_delete(con->disconnect); - mempool_free(&iproto_connection_pool, con); -} - -static void -tx_process_misc(struct cmsg *msg); -static void -tx_process1(struct cmsg *msg); -static void -tx_process_select(struct cmsg *msg); -static void -tx_process_sql(struct cmsg *m); -static void -net_send_msg(struct cmsg *msg); - -static void -tx_process_join_subscribe(struct cmsg *msg); -static void -net_end_join(struct cmsg *msg); -static void -net_end_subscribe(struct cmsg *msg); - -static void -tx_fiber_init(struct session *session, uint64_t sync) -{ - session->sync = sync; - /* - * We do not cleanup fiber keys at the end of each request. - * This does not lead to privilege escalation as long as - * fibers used to serve iproto requests never mingle with - * fibers used to serve background tasks without going - * through the purification of fiber_recycle(), which - * resets the fiber local storage. Fibers, used to run - * background tasks clean up their session in on_stop - * trigger as well. - */ - fiber_set_session(fiber(), session); - fiber_set_user(fiber(), &session->credentials); -} - -/** - * Fire on_disconnect triggers in the tx - * thread and destroy the session object, - * as well as output buffers of the connection. - */ -static void -tx_process_disconnect(struct cmsg *m) -{ - struct iproto_msg *msg = (struct iproto_msg *) m; - struct iproto_connection *con = msg->connection; - if (con->session) { - tx_fiber_init(con->session, 0); - if (! rlist_empty(&session_on_disconnect)) - session_run_on_disconnect_triggers(con->session); - session_destroy(con->session); - con->session = NULL; /* safety */ - } - /* - * Got to be done in iproto thread since - * that's where the memory is allocated. - */ - obuf_destroy(&con->obuf[0]); - obuf_destroy(&con->obuf[1]); -} - -/** - * Cleanup the net thread resources of a connection - * and close the connection. - */ -static void -net_finish_disconnect(struct cmsg *m) -{ - struct iproto_msg *msg = (struct iproto_msg *) m; - /* Runs the trigger, which may yield. */ - iproto_connection_delete(msg->connection); - iproto_msg_delete(msg); -} - -static const struct cmsg_hop disconnect_route[] = { - { tx_process_disconnect, &net_pipe }, - { net_finish_disconnect, NULL }, -}; - -static const struct cmsg_hop misc_route[] = { - { tx_process_misc, &net_pipe }, - { net_send_msg, NULL }, -}; - -static const struct cmsg_hop select_route[] = { - { tx_process_select, &net_pipe }, - { net_send_msg, NULL }, -}; - -static const struct cmsg_hop process1_route[] = { - { tx_process1, &net_pipe }, - { net_send_msg, NULL }, -}; - -static const struct cmsg_hop sql_route[] = { - { tx_process_sql, &net_pipe }, - { net_send_msg, NULL }, -}; - -static const struct cmsg_hop *dml_route[IPROTO_TYPE_STAT_MAX] = { - NULL, /* IPROTO_OK */ - select_route, /* IPROTO_SELECT */ - process1_route, /* IPROTO_INSERT */ - process1_route, /* IPROTO_REPLACE */ - process1_route, /* IPROTO_UPDATE */ - process1_route, /* IPROTO_DELETE */ - misc_route, /* IPROTO_CALL_16 */ - misc_route, /* IPROTO_AUTH */ - misc_route, /* IPROTO_EVAL */ - process1_route, /* IPROTO_UPSERT */ - misc_route, /* IPROTO_CALL */ - sql_route, /* IPROTO_EXECUTE */ -}; - -static const struct cmsg_hop join_route[] = { - { tx_process_join_subscribe, &net_pipe }, - { net_end_join, NULL }, -}; - -static const struct cmsg_hop subscribe_route[] = { - { tx_process_join_subscribe, &net_pipe }, - { net_end_subscribe, NULL }, -}; - -static struct iproto_connection * -iproto_connection_new(int fd) -{ - struct iproto_connection *con = (struct iproto_connection *) - mempool_alloc_xc(&iproto_connection_pool); - con->input.data = con->output.data = con; - con->loop = loop(); - ev_io_init(&con->input, iproto_connection_on_input, fd, EV_READ); - ev_io_init(&con->output, iproto_connection_on_output, fd, EV_WRITE); - ibuf_create(&con->ibuf[0], cord_slab_cache(), iobuf_readahead); - ibuf_create(&con->ibuf[1], cord_slab_cache(), iobuf_readahead); - obuf_create(&con->obuf[0], &tx_cord->slabc, iobuf_readahead); - obuf_create(&con->obuf[1], &tx_cord->slabc, iobuf_readahead); - con->p_ibuf = &con->ibuf[0]; - con->parse_size = 0; - con->session = NULL; - rlist_create(&con->in_stop_list); - /* It may be very awkward to allocate at close. */ - con->disconnect = iproto_msg_new(con); - cmsg_init(con->disconnect, disconnect_route); - return con; -} - /** * Initiate a connection shutdown. This method may * be invoked many times, and does the internal @@ -684,65 +521,6 @@ iproto_connection_input_buffer(struct iproto_connection *con) return new_ibuf; } -static void -iproto_decode_msg(struct iproto_msg *msg, const char **pos, const char *reqend, - bool *stop_input) -{ - xrow_header_decode_xc(&msg->header, pos, reqend); - assert(*pos == reqend); - uint8_t type = msg->header.type; - - /* - * Parse request before putting it into the queue - * to save tx some CPU. More complicated requests are - * parsed in tx thread into request type-specific objects. - */ - switch (type) { - case IPROTO_SELECT: - case IPROTO_INSERT: - case IPROTO_REPLACE: - case IPROTO_UPDATE: - case IPROTO_DELETE: - case IPROTO_UPSERT: - xrow_decode_dml_xc(&msg->header, &msg->dml_request, - dml_request_key_map(type)); - assert(type < sizeof(dml_route)/sizeof(*dml_route)); - cmsg_init(msg, dml_route[type]); - break; - case IPROTO_CALL_16: - case IPROTO_CALL: - case IPROTO_EVAL: - xrow_decode_call_xc(&msg->header, &msg->call_request); - cmsg_init(msg, misc_route); - break; - case IPROTO_PING: - cmsg_init(msg, misc_route); - break; - case IPROTO_JOIN: - cmsg_init(msg, join_route); - *stop_input = true; - break; - case IPROTO_SUBSCRIBE: - cmsg_init(msg, subscribe_route); - *stop_input = true; - break; - case IPROTO_EXECUTE: - xrow_decode_sql_xc(&msg->header, &msg->sql_request, - &fiber()->gc); - cmsg_init(msg, sql_route); - break; - case IPROTO_AUTH: - xrow_decode_auth_xc(&msg->header, &msg->auth_request); - cmsg_init(msg, misc_route); - break; - default: - tnt_raise(ClientError, ER_UNKNOWN_REQUEST_TYPE, - (uint32_t) type); - break; - } - return; -} - /** Enqueue all requests which were read up. */ static inline void iproto_enqueue_batch(struct iproto_connection *con, struct ibuf *in) @@ -772,7 +550,7 @@ iproto_enqueue_batch(struct iproto_connection *con, struct ibuf *in) msg->len = reqend - reqstart; /* total request length */ try { - iproto_decode_msg(msg, &pos, reqend, &stop_input); + iproto_msg_decode(msg, &pos, reqend, &stop_input); /* * This can't throw, but should not be * done in case of exception. @@ -1001,6 +779,243 @@ iproto_connection_on_output(ev_loop *loop, struct ev_io *watcher, } } +static struct iproto_connection * +iproto_connection_new(int fd) +{ + struct iproto_connection *con = (struct iproto_connection *) + mempool_alloc_xc(&iproto_connection_pool); + con->input.data = con->output.data = con; + con->loop = loop(); + ev_io_init(&con->input, iproto_connection_on_input, fd, EV_READ); + ev_io_init(&con->output, iproto_connection_on_output, fd, EV_WRITE); + ibuf_create(&con->ibuf[0], cord_slab_cache(), iobuf_readahead); + ibuf_create(&con->ibuf[1], cord_slab_cache(), iobuf_readahead); + obuf_create(&con->obuf[0], &tx_cord->slabc, iobuf_readahead); + obuf_create(&con->obuf[1], &tx_cord->slabc, iobuf_readahead); + con->p_ibuf = &con->ibuf[0]; + con->parse_size = 0; + con->session = NULL; + rlist_create(&con->in_stop_list); + /* It may be very awkward to allocate at close. */ + con->disconnect = iproto_msg_new(con); + cmsg_init(con->disconnect, disconnect_route); + return con; +} + +/** Recycle a connection. Never throws. */ +static inline void +iproto_connection_delete(struct iproto_connection *con) +{ + assert(iproto_connection_is_idle(con)); + assert(!evio_has_fd(&con->output)); + assert(!evio_has_fd(&con->input)); + assert(con->session == NULL); + /* + * The output buffers must have been deleted + * in tx thread. + */ + ibuf_destroy(&con->ibuf[0]); + ibuf_destroy(&con->ibuf[1]); + assert(con->obuf[0].pos == 0 && + con->obuf[0].iov[0].iov_base == NULL); + assert(con->obuf[1].pos == 0 && + con->obuf[1].iov[0].iov_base == NULL); + if (con->disconnect) + iproto_msg_delete(con->disconnect); + mempool_free(&iproto_connection_pool, con); +} + +/* }}} iproto_connection */ + +/* {{{ iproto_msg - methods and routes */ + +static void +tx_process_misc(struct cmsg *msg); + +static void +tx_process1(struct cmsg *msg); + +static void +tx_process_select(struct cmsg *msg); + +static void +tx_process_sql(struct cmsg *msg); + +static void +tx_reply_error(struct iproto_msg *msg); + +static void +net_send_msg(struct cmsg *msg); + +static void +tx_process_join_subscribe(struct cmsg *msg); + +static void +net_end_join(struct cmsg *msg); + +static void +net_end_subscribe(struct cmsg *msg); + +static const struct cmsg_hop misc_route[] = { + { tx_process_misc, &net_pipe }, + { net_send_msg, NULL }, +}; + +static const struct cmsg_hop select_route[] = { + { tx_process_select, &net_pipe }, + { net_send_msg, NULL }, +}; + +static const struct cmsg_hop process1_route[] = { + { tx_process1, &net_pipe }, + { net_send_msg, NULL }, +}; + +static const struct cmsg_hop sql_route[] = { + { tx_process_sql, &net_pipe }, + { net_send_msg, NULL }, +}; + +static const struct cmsg_hop *dml_route[IPROTO_TYPE_STAT_MAX] = { + NULL, /* IPROTO_OK */ + select_route, /* IPROTO_SELECT */ + process1_route, /* IPROTO_INSERT */ + process1_route, /* IPROTO_REPLACE */ + process1_route, /* IPROTO_UPDATE */ + process1_route, /* IPROTO_DELETE */ + misc_route, /* IPROTO_CALL_16 */ + misc_route, /* IPROTO_AUTH */ + misc_route, /* IPROTO_EVAL */ + process1_route, /* IPROTO_UPSERT */ + misc_route, /* IPROTO_CALL */ + sql_route, /* IPROTO_EXECUTE */ +}; + +static const struct cmsg_hop join_route[] = { + { tx_process_join_subscribe, &net_pipe }, + { net_end_join, NULL }, +}; + +static const struct cmsg_hop subscribe_route[] = { + { tx_process_join_subscribe, &net_pipe }, + { net_end_subscribe, NULL }, +}; + +static void +iproto_msg_decode(struct iproto_msg *msg, const char **pos, const char *reqend, + bool *stop_input) +{ + xrow_header_decode_xc(&msg->header, pos, reqend); + assert(*pos == reqend); + uint8_t type = msg->header.type; + + /* + * Parse request before putting it into the queue + * to save tx some CPU. More complicated requests are + * parsed in tx thread into request type-specific objects. + */ + switch (type) { + case IPROTO_SELECT: + case IPROTO_INSERT: + case IPROTO_REPLACE: + case IPROTO_UPDATE: + case IPROTO_DELETE: + case IPROTO_UPSERT: + xrow_decode_dml_xc(&msg->header, &msg->dml, + dml_request_key_map(type)); + assert(type < sizeof(dml_route)/sizeof(*dml_route)); + cmsg_init(msg, dml_route[type]); + break; + case IPROTO_CALL_16: + case IPROTO_CALL: + case IPROTO_EVAL: + xrow_decode_call_xc(&msg->header, &msg->call); + cmsg_init(msg, misc_route); + break; + case IPROTO_PING: + cmsg_init(msg, misc_route); + break; + case IPROTO_JOIN: + cmsg_init(msg, join_route); + *stop_input = true; + break; + case IPROTO_SUBSCRIBE: + cmsg_init(msg, subscribe_route); + *stop_input = true; + break; + case IPROTO_EXECUTE: + xrow_decode_sql_xc(&msg->header, &msg->sql, &fiber()->gc); + cmsg_init(msg, sql_route); + break; + case IPROTO_AUTH: + xrow_decode_auth_xc(&msg->header, &msg->auth); + cmsg_init(msg, misc_route); + break; + default: + tnt_raise(ClientError, ER_UNKNOWN_REQUEST_TYPE, + (uint32_t) type); + break; + } + return; +} + +static void +tx_fiber_init(struct session *session, uint64_t sync) +{ + session->sync = sync; + /* + * We do not cleanup fiber keys at the end of each request. + * This does not lead to privilege escalation as long as + * fibers used to serve iproto requests never mingle with + * fibers used to serve background tasks without going + * through the purification of fiber_recycle(), which + * resets the fiber local storage. Fibers, used to run + * background tasks clean up their session in on_stop + * trigger as well. + */ + fiber_set_session(fiber(), session); + fiber_set_user(fiber(), &session->credentials); +} + +/** + * Fire on_disconnect triggers in the tx + * thread and destroy the session object, + * as well as output buffers of the connection. + */ +static void +tx_process_disconnect(struct cmsg *m) +{ + struct iproto_msg *msg = (struct iproto_msg *) m; + struct iproto_connection *con = msg->connection; + if (con->session) { + tx_fiber_init(con->session, 0); + if (! rlist_empty(&session_on_disconnect)) + session_run_on_disconnect_triggers(con->session); + session_destroy(con->session); + con->session = NULL; /* safety */ + } + /* + * Got to be done in iproto thread since + * that's where the memory is allocated. + */ + obuf_destroy(&con->obuf[0]); + obuf_destroy(&con->obuf[1]); +} + +/** + * Cleanup the net thread resources of a connection + * and close the connection. + */ +static void +net_finish_disconnect(struct cmsg *m) +{ + struct iproto_msg *msg = (struct iproto_msg *) m; + /* Runs the trigger, which may yield. */ + iproto_connection_delete(msg->connection); + iproto_msg_delete(msg); +} + + static int tx_check_schema(uint32_t new_schema_version) { @@ -1012,6 +1027,18 @@ tx_check_schema(uint32_t new_schema_version) return 0; } +/** + * Write error message to the output buffer and advance + * write position. Doesn't throw. + */ +static void +tx_reply_error(struct iproto_msg *msg) +{ + iproto_reply_error(msg->p_obuf, diag_last_error(&fiber()->diag), + msg->header.sync, ::schema_version); + msg->write_end = obuf_create_svp(msg->p_obuf); +} + static void tx_process1(struct cmsg *m) { @@ -1024,7 +1051,7 @@ tx_process1(struct cmsg *m) struct tuple *tuple; struct obuf_svp svp; - if (box_process1(&msg->dml_request, &tuple) || + if (box_process1(&msg->dml, &tuple) || iproto_prepare_select(out, &svp)) goto error; if (tuple && tuple_to_obuf(tuple, out)) @@ -1034,9 +1061,7 @@ tx_process1(struct cmsg *m) msg->write_end = obuf_create_svp(out); return; error: - iproto_reply_error(out, diag_last_error(&fiber()->diag), - msg->header.sync, ::schema_version); - msg->write_end = obuf_create_svp(out); + tx_reply_error(msg); } static void @@ -1047,7 +1072,7 @@ tx_process_select(struct cmsg *m) struct obuf_svp svp; struct port port; int rc; - struct request *req = &msg->dml_request; + struct request *req = &msg->dml; tx_fiber_init(msg->connection->session, msg->header.sync); @@ -1073,9 +1098,7 @@ tx_process_select(struct cmsg *m) msg->write_end = obuf_create_svp(out); return; error: - iproto_reply_error(out, diag_last_error(&fiber()->diag), - msg->header.sync, ::schema_version); - msg->write_end = obuf_create_svp(out); + tx_reply_error(msg); } static void @@ -1093,13 +1116,15 @@ tx_process_misc(struct cmsg *m) switch (msg->header.type) { case IPROTO_CALL: case IPROTO_CALL_16: - box_process_call(&msg->call_request, out); + box_process_call(&msg->call, out); break; case IPROTO_EVAL: - box_process_eval(&msg->call_request, out); + box_process_eval(&msg->call, out); break; case IPROTO_AUTH: - box_process_auth(&msg->auth_request, out); + box_process_auth(&msg->auth); + iproto_reply_ok_xc(out, msg->header.sync, + ::schema_version); break; case IPROTO_PING: iproto_reply_ok_xc(out, msg->header.sync, @@ -1108,16 +1133,13 @@ tx_process_misc(struct cmsg *m) default: unreachable(); } + msg->write_end = obuf_create_svp(out); } catch (Exception *e) { - iproto_reply_error(out, diag_last_error(&fiber()->diag), - msg->header.sync, ::schema_version); + tx_reply_error(msg); } - msg->write_end = obuf_create_svp(out); return; error: - iproto_reply_error(out, diag_last_error(&fiber()->diag), - msg->header.sync, ::schema_version); - msg->write_end = obuf_create_svp(out); + tx_reply_error(msg); } static void @@ -1132,8 +1154,7 @@ tx_process_sql(struct cmsg *m) if (tx_check_schema(msg->header.schema_version)) goto error; assert(msg->header.type == IPROTO_EXECUTE); - if (sql_prepare_and_execute(&msg->sql_request, out, - &fiber()->gc) == 0) { + if (sql_prepare_and_execute(&msg->sql, out, &fiber()->gc) == 0) { msg->write_end = obuf_create_svp(out); return; } @@ -1257,8 +1278,7 @@ tx_process_connect(struct cmsg *m) } msg->write_end = obuf_create_svp(out); } catch (Exception *e) { - /* zero sync for connect errors */ - iproto_reply_error(out, e, 0, ::schema_version); + tx_reply_error(msg); msg->close_connection = true; } } diff --git a/src/box/key_def.cc b/src/box/key_def.cc index b6b0f21f96bc1dce5a5a8646075fef03a57f7df2..d71cff3b6c8fd8eae188c6453b225e0bff61355b 100644 --- a/src/box/key_def.cc +++ b/src/box/key_def.cc @@ -140,7 +140,7 @@ key_def_new_with_parts(struct key_part_def *parts, uint32_t part_count) struct key_part_def *part = &parts[i]; struct coll *coll = NULL; if (part->coll_id != COLL_NONE) { - coll = coll_cache_find(part->coll_id); + coll = coll_by_id(part->coll_id); if (coll == NULL) { diag_set(ClientError, ER_WRONG_INDEX_OPTIONS, i + 1, "collation was not found by ID"); diff --git a/src/box/lua/checkpoint_daemon.lua b/src/box/lua/checkpoint_daemon.lua index 0783c11a806593ca411e824d1b8989a76760f0f9..e7ef05fdb12cbbb248a1a95a5bd88dda7682e878 100644 --- a/src/box/lua/checkpoint_daemon.lua +++ b/src/box/lua/checkpoint_daemon.lua @@ -44,10 +44,6 @@ local function process(self) local checkpoints = box.internal.gc.info().checkpoints local last_checkpoint = checkpoints[#checkpoints] - if last_checkpoint.signature == box.info.signature then - log.debug('snapshot %d already exists', last_checkpoint.signature) - return false - end local last_snap = fio.pathjoin(box.cfg.memtx_dir, string.format('%020d.snap', last_checkpoint.signature)) @@ -56,7 +52,7 @@ local function process(self) log.error("can't stat %s: %s", last_snap, errno.strerror()) return false end - if snstat.mtime <= fiber.time() + daemon.checkpoint_interval then + if snstat.mtime + daemon.checkpoint_interval <= fiber.time() then return snapshot() end end diff --git a/src/box/lua/info.c b/src/box/lua/info.c index f3343752a6daf4353ada84e7257286ee8116ccfd..25e538f26627f2308f5587d0e3044a2921c3462d 100644 --- a/src/box/lua/info.c +++ b/src/box/lua/info.c @@ -45,14 +45,14 @@ #include "box/wal.h" #include "box/replication.h" #include "box/info.h" +#include "box/engine.h" +#include "box/vinyl.h" #include "main.h" #include "version.h" #include "box/box.h" #include "lua/utils.h" #include "fiber.h" -#include "box/vinyl.h" - static void lbox_pushvclock(struct lua_State *L, const struct vclock *vclock) { @@ -363,7 +363,10 @@ lbox_info_vinyl_call(struct lua_State *L) { struct info_handler h; luaT_info_handler_create(&h, L); - vy_info(vinyl_engine_get_env(), &h); + struct vinyl_engine *vinyl; + vinyl = (struct vinyl_engine *)engine_by_name("vinyl"); + assert(vinyl != NULL); + vinyl_engine_info(vinyl, &h); return 1; } diff --git a/src/box/vinyl.c b/src/box/vinyl.c index 4159b635dff7c28b3decf1e73634c50ef53d593d..feded3436f18298657df5e66309f2115cd0641ae 100644 --- a/src/box/vinyl.c +++ b/src/box/vinyl.c @@ -45,18 +45,26 @@ #include "vy_stat.h" #include <math.h> +#include <stdbool.h> +#include <stddef.h> +#include <stdint.h> +#include <stdlib.h> + #include <small/lsregion.h> -#include <coio_file.h> +#include <small/region.h> +#include <small/mempool.h> +#include "coio_file.h" #include "coio_task.h" #include "cbus.h" #include "histogram.h" - #include "tuple_update.h" #include "txn.h" #include "xrow.h" #include "xlog.h" +#include "engine.h" #include "space.h" +#include "index.h" #include "xstream.h" #include "info.h" #include "column_mask.h" @@ -92,8 +100,8 @@ struct vy_env { struct tx_manager *xm; /** Upsert squash queue */ struct vy_squash_queue *squash_queue; - /** Mempool for struct vy_cursor */ - struct mempool cursor_pool; + /** Memory pool for index iterator. */ + struct mempool iterator_pool; /** Memory quota */ struct vy_quota quota; /** Timer for updating quota watermark. */ @@ -135,16 +143,34 @@ struct vy_env { struct vy_recovery *recovery; /** Local recovery vclock. */ const struct vclock *recovery_vclock; + /** + * LSN to assign to the next statement received during + * initial join. + * + * We can't use original statements' LSNs, because we + * send statements not in the chronological order while + * the receiving end expects LSNs to grow monotonically + * due to the design of the lsregion allocator, which is + * used for storing statements in memory. + */ + int64_t join_lsn; /** Path to the data directory. */ char *path; /** Max size of the memory level. */ size_t memory; /** Max time a transaction may wait for memory. */ double timeout; + /** + * If read of a single statement takes longer than + * the given value, warn about it in the log. + */ + double too_long_threshold; /** Max number of threads used for reading. */ int read_threads; /** Max number of threads used for writing. */ int write_threads; + /** Try to recover corrupted data if set. */ + bool force_recovery; }; enum { @@ -167,6 +193,32 @@ vy_dump_bandwidth(struct vy_env *env) return histogram_percentile(env->dump_bw, 10); } +struct vinyl_engine { + struct engine base; + /** Vinyl environment. */ + struct vy_env *env; +}; + +/** Extract vy_env from an engine object. */ +static inline struct vy_env * +vy_env(struct engine *engine) +{ + return ((struct vinyl_engine *)engine)->env; +} + +struct vinyl_index { + struct index base; + /** Vinyl index implementation. */ + struct vy_index *db; +}; + +/** Extract vy_index from an index object. */ +struct vy_index * +vy_index(struct index *index) +{ + return ((struct vinyl_index *)index)->db; +} + /** Mask passed to vy_gc(). */ enum { /** Delete incomplete runs. */ @@ -179,31 +231,35 @@ static void vy_gc(struct vy_env *env, struct vy_recovery *recovery, unsigned int gc_mask, int64_t gc_lsn); -/** Cursor. */ -struct vy_cursor { +struct vinyl_iterator { + struct iterator base; + /** Vinyl environment. */ + struct vy_env *env; + /** Vinyl index this iterator is for. */ + struct vy_index *index; /** - * A built-in transaction created when a cursor is open - * in autocommit mode. + * Points either to tx_autocommit for autocommit mode + * or to a multi-statement transaction active when the + * iterator was created. */ - struct vy_tx tx_autocommit; - struct vy_index *index; + struct vy_tx *tx; + /** Search key. */ struct tuple *key; + /** Vinyl read iterator. */ + struct vy_read_iterator iterator; /** - * Points either to tx_autocommit for autocommit mode or - * to a multi-statement transaction active when the cursor - * was created. + * Built-in transaction created when iterator is opened + * in autocommit mode. */ - struct vy_tx *tx; - /** The number of vy_cursor_next() invocations. */ - int n_reads; - /** Cursor creation time, used for statistics. */ - ev_tstamp start; - /** Trigger invoked when tx ends to close the cursor. */ + struct vy_tx tx_autocommit; + /** Trigger invoked when tx ends to close the iterator. */ struct trigger on_tx_destroy; - /** Iterator over index */ - struct vy_read_iterator iterator; }; +static const struct engine_vtab vinyl_engine_vtab; +static const struct space_vtab vinyl_space_vtab; +static const struct index_vtab vinyl_index_vtab; + /** * A quick intro into Vinyl cosmology and file format * -------------------------------------------------- @@ -295,8 +351,10 @@ vy_info_append_tx(struct vy_env *env, struct info_handler *h) } void -vy_info(struct vy_env *env, struct info_handler *h) +vinyl_engine_info(struct vinyl_engine *vinyl, struct info_handler *h) { + struct vy_env *env = vinyl->env; + info_begin(h); vy_info_append_quota(env, h); vy_info_append_cache(env, h); @@ -341,10 +399,11 @@ vy_info_append_compact_stat(struct info_handler *h, const char *name, info_table_end(h); } -void -vy_index_info(struct vy_index *index, struct info_handler *h) +static void +vinyl_index_info(struct index *base, struct info_handler *h) { char buf[1024]; + struct vy_index *index = vy_index(base); struct vy_index_stat *stat = &index->stat; struct vy_cache_stat *cache_stat = &index->cache.stat; @@ -463,24 +522,141 @@ vy_index_find_unique(struct space *space, uint32_t index_id) return index; } -struct vy_index * -vy_new_index(struct vy_env *env, struct index_def *index_def, - struct tuple_format *format, struct vy_index *pk) +static int +vinyl_engine_check_space_def(struct space_def *def) { - return vy_index_new(&env->index_env, &env->cache_env, - index_def, format, pk); + if (def->opts.temporary) { + diag_set(ClientError, ER_ALTER_SPACE, + def->name, "engine does not support temporary flag"); + return -1; + } + return 0; } -void -vy_delete_index(struct vy_env *env, struct vy_index *index) +static struct space * +vinyl_engine_create_space(struct engine *engine, struct space_def *def, + struct rlist *key_list) +{ + struct space *space = malloc(sizeof(*space)); + if (space == NULL) { + diag_set(OutOfMemory, sizeof(*space), + "malloc", "struct space"); + return NULL; + } + + /* Create a format from key and field definitions. */ + int key_count = 0; + struct index_def *index_def; + rlist_foreach_entry(index_def, key_list, link) + key_count++; + struct key_def **keys = region_alloc(&fiber()->gc, + sizeof(*keys) * key_count); + if (keys == NULL) { + free(space); + return NULL; + } + key_count = 0; + rlist_foreach_entry(index_def, key_list, link) + keys[key_count++] = index_def->key_def; + + struct tuple_format *format = tuple_format_new(&vy_tuple_format_vtab, + keys, key_count, 0, def->fields, def->field_count); + if (format == NULL) { + free(space); + return NULL; + } + format->exact_field_count = def->exact_field_count; + tuple_format_ref(format); + + if (space_create(space, engine, &vinyl_space_vtab, + def, key_list, format) != 0) { + tuple_format_unref(format); + free(space); + return NULL; + } + + /* Format is now referenced by the space. */ + tuple_format_unref(format); + return space; +} + +static void +vinyl_space_destroy(struct space *space) { - (void)env; + free(space); +} + +static int +vinyl_space_check_index_def(struct space *space, struct index_def *index_def) +{ + if (index_def->type != TREE) { + diag_set(ClientError, ER_INDEX_TYPE, + index_def->name, space_name(space)); + return -1; + } + if (index_def->key_def->is_nullable && index_def->iid == 0) { + diag_set(ClientError, ER_NULLABLE_PRIMARY, space_name(space)); + return -1; + } + /* Check that there are no ANY, ARRAY, MAP parts */ + for (uint32_t i = 0; i < index_def->key_def->part_count; i++) { + struct key_part *part = &index_def->key_def->parts[i]; + if (part->type <= FIELD_TYPE_ANY || + part->type >= FIELD_TYPE_ARRAY) { + diag_set(ClientError, ER_MODIFY_INDEX, + index_def->name, space_name(space), + tt_sprintf("field type '%s' is not supported", + field_type_strs[part->type])); + return -1; + } + } + return 0; +} + +static struct index * +vinyl_space_create_index(struct space *space, struct index_def *index_def) +{ + assert(index_def->type == TREE); + struct vinyl_engine *vinyl = (struct vinyl_engine *)space->engine; + struct vinyl_index *index = calloc(1, sizeof(*index)); + if (index == NULL) { + diag_set(OutOfMemory, sizeof(*index), + "malloc", "struct vinyl_index"); + return NULL; + } + struct vy_env *env = vinyl->env; + struct vy_index *pk = NULL; + if (index_def->iid > 0) { + pk = vy_index(space_index(space, 0)); + assert(pk != NULL); + } + struct vy_index *db = vy_index_new(&env->index_env, &env->cache_env, + index_def, space->format, pk); + if (db == NULL) { + free(index); + return NULL; + } + if (index_create(&index->base, (struct engine *)vinyl, + &vinyl_index_vtab, index_def) != 0) { + vy_index_delete(db); + free(index); + return NULL; + } + index->db = db; + return &index->base; +} + +static void +vinyl_index_destroy(struct index *base) +{ + struct vy_index *index = vy_index(base); /* * There still may be a task scheduled for this index * so postpone actual deletion until the last reference * is gone. */ vy_index_unref(index); + free(base); } /** @@ -489,8 +665,8 @@ vy_delete_index(struct vy_env *env, struct vy_index *index) * create a new index. Take the current recovery status into * account. */ -int -vy_index_open(struct vy_env *env, struct vy_index *index, bool force_recovery) +static int +vy_index_open(struct vy_env *env, struct vy_index *index) { /* Ensure vinyl data directory exists. */ if (access(env->path, F_OK) != 0) { @@ -531,7 +707,7 @@ vy_index_open(struct vy_env *env, struct vy_index *index, bool force_recovery) rc = vy_index_recover(index, env->recovery, vclock_sum(env->recovery_vclock), env->status == VINYL_INITIAL_RECOVERY_LOCAL, - force_recovery); + env->force_recovery); break; default: unreachable(); @@ -539,9 +715,12 @@ vy_index_open(struct vy_env *env, struct vy_index *index, bool force_recovery) return rc; } -void -vy_index_commit_create(struct vy_env *env, struct vy_index *index, int64_t lsn) +static void +vinyl_index_commit_create(struct index *base, int64_t lsn) { + struct vy_env *env = vy_env(base->engine); + struct vy_index *index = vy_index(base); + if (env->status == VINYL_INITIAL_RECOVERY_LOCAL || env->status == VINYL_FINAL_RECOVERY_LOCAL) { /* @@ -558,6 +737,15 @@ vy_index_commit_create(struct vy_env *env, struct vy_index *index, int64_t lsn) } } + if (env->status == VINYL_INITIAL_RECOVERY_REMOTE) { + /* + * Records received during initial join do not + * have LSNs so we use a fake one to identify + * the index in vylog. + */ + lsn = ++env->join_lsn; + } + /* * Backward compatibility fixup: historically, we used * box.info.signature for LSN of index creation, which @@ -620,9 +808,12 @@ vy_log_index_prune(struct vy_index *index, int64_t gc_lsn) } } -void -vy_index_commit_drop(struct vy_env *env, struct vy_index *index) +static void +vinyl_index_commit_drop(struct index *base) { + struct vy_env *env = vy_env(base->engine); + struct vy_index *index = vy_index(base); + vy_scheduler_remove_index(&env->scheduler, index); /* @@ -646,10 +837,18 @@ vy_index_commit_drop(struct vy_env *env, struct vy_index *index) diag_last_error(diag_get())->errmsg); } -int -vy_prepare_truncate_space(struct vy_env *env, struct space *old_space, - struct space *new_space) +static void +vinyl_init_system_space(struct space *space) +{ + (void)space; + unreachable(); +} + +static int +vinyl_space_prepare_truncate(struct space *old_space, struct space *new_space) { + struct vy_env *env = vy_env(old_space->engine); + if (vinyl_check_wal(env, "DDL") != 0) return -1; @@ -710,10 +909,11 @@ vy_prepare_truncate_space(struct vy_env *env, struct space *old_space, return 0; } -void -vy_commit_truncate_space(struct vy_env *env, struct space *old_space, - struct space *new_space) +static void +vinyl_space_commit_truncate(struct space *old_space, struct space *new_space) { + struct vy_env *env = vy_env(old_space->engine); + assert(old_space->index_count == new_space->index_count); uint32_t index_count = new_space->index_count; if (index_count == 0) @@ -778,10 +978,11 @@ vy_commit_truncate_space(struct vy_env *env, struct space *old_space, } } -int -vy_prepare_alter_space(struct vy_env *env, struct space *old_space, - struct space *new_space) +static int +vinyl_space_prepare_alter(struct space *old_space, struct space *new_space) { + struct vy_env *env = vy_env(old_space->engine); + if (vinyl_check_wal(env, "DDL") != 0) return -1; /* @@ -838,9 +1039,11 @@ vy_prepare_alter_space(struct vy_env *env, struct space *old_space, return 0; } -int -vy_check_format(struct vy_env *env, struct space *old_space) +static int +vinyl_space_check_format(struct space *new_space, struct space *old_space) { + (void)new_space; + struct vy_env *env = vy_env(old_space->engine); /* @sa vy_prepare_alter_space for checks below. */ if (old_space->index_count == 0) return 0; @@ -854,11 +1057,14 @@ vy_check_format(struct vy_env *env, struct space *old_space) return -1; } -int -vy_commit_alter_space(struct vy_env *env, struct space *new_space, - struct tuple_format *new_format) +static void +vinyl_space_commit_alter(struct space *old_space, struct space *new_space) { - (void) env; + (void)old_space; + if (new_space == NULL || new_space->index_count == 0) + return; /* space drop */ + + struct tuple_format *new_format = new_space->format; struct vy_index *pk = vy_index(new_space->index[0]); struct index_def *new_index_def = space_index_def(new_space, 0); @@ -868,14 +1074,14 @@ vy_commit_alter_space(struct vy_env *env, struct space *new_space, struct tuple_format *format = vy_tuple_format_new_with_colmask(new_format); if (format == NULL) - return -1; + goto fail; /* Update the upsert format. */ struct tuple_format *upsert_format = vy_tuple_format_new_upsert(new_format); if (upsert_format == NULL) { tuple_format_delete(format); - return -1; + goto fail; } /* Set possibly changed opts. */ @@ -915,12 +1121,69 @@ vy_commit_alter_space(struct vy_env *env, struct space *new_space, tuple_format_ref(index->upsert_format); vy_index_validate_formats(index); } + return; +fail: + /* FIXME: space_vtab::commit_alter() must not fail. */ + diag_log(); + unreachable(); + panic("failed to alter space"); +} + +static int +vinyl_space_add_primary_key(struct space *space) +{ + return vy_index_open(vy_env(space->engine), + vy_index(space->index[0])); +} + +static void +vinyl_space_drop_primary_key(struct space *space) +{ + (void)space; +} + +static int +vinyl_space_build_secondary_key(struct space *old_space, + struct space *new_space, + struct index *new_index) +{ + (void)old_space; + (void)new_space; + /* + * Unlike Memtx, Vinyl does not need building of a secondary index. + * This is true because of two things: + * 1) Vinyl does not support alter of non-empty spaces + * 2) During recovery a Vinyl index already has all needed data on disk. + * And there are 3 cases: + * I. The secondary index is added in snapshot. Then Vinyl was + * snapshotted too and all necessary for that moment data is on disk. + * II. The secondary index is added in WAL. That means that vinyl + * space had no data at that point and had nothing to build. The + * index actually could contain recovered data, but it will handle it + * by itself during WAL recovery. + * III. Vinyl is online. The space is definitely empty and there's + * nothing to build. + * + * When we start to implement alter of non-empty vinyl spaces, it + * seems that we should call here: + * Engine::buildSecondaryKey(old_space, new_space, new_index_arg); + * but aware of three cases mentioned above. + */ + return vy_index_open(vy_env(new_index->engine), + vy_index(new_index)); +} + +static size_t +vinyl_space_bsize(struct space *space) +{ + (void)space; return 0; } -size_t -vy_index_bsize(struct vy_index *index) +static ssize_t +vinyl_index_bsize(struct index *base) { + struct vy_index *index = vy_index(base); return index->stat.memory.count.bytes; } @@ -1008,8 +1271,8 @@ vy_index_get(struct vy_env *env, struct vy_tx *tx, struct vy_index *index, } struct vy_read_iterator itr; - vy_read_iterator_open(&itr, &env->run_env, index, tx, - ITER_EQ, vykey, p_read_view); + vy_read_iterator_open(&itr, &env->run_env, index, tx, ITER_EQ, vykey, + p_read_view, env->too_long_threshold); int rc = vy_read_iterator_next(&itr, result); tuple_unref(vykey); if (*result != NULL) @@ -1038,6 +1301,12 @@ vy_check_dup_key(struct vy_env *env, struct vy_tx *tx, struct space *space, { struct tuple *found; (void) part_count; + /* + * During recovery we apply rows that were successfully + * applied before restart so no conflict is possible. + */ + if (env->status != VINYL_ONLINE) + return 0; /* * Expect a full tuple as input (secondary key || primary key) * but use only the secondary key fields (partial key look @@ -1159,8 +1428,7 @@ vy_replace_one(struct vy_env *env, struct vy_tx *tx, struct space *space, return -1; /** * If the space has triggers, then we need to fetch the - * old tuple to pass it to the trigger. Use vy_get to - * fetch it. + * old tuple to pass it to the trigger. */ if (stmt != NULL && !rlist_empty(&space->on_replace)) { const char *key; @@ -1168,7 +1436,8 @@ vy_replace_one(struct vy_env *env, struct vy_tx *tx, struct space *space, if (key == NULL) goto error_unref; uint32_t part_count = mp_decode_array(&key); - if (vy_get(env, tx, pk, key, part_count, &stmt->old_tuple) != 0) + if (vy_index_get(env, tx, pk, key, part_count, + &stmt->old_tuple) != 0) goto error_unref; } if (vy_tx_set(tx, pk, new_tuple)) @@ -1429,7 +1698,20 @@ vy_delete_impl(struct vy_env *env, struct vy_tx *tx, struct space *space, return -1; } -int +/** + * Execute DELETE in a vinyl space. + * @param env Vinyl environment. + * @param tx Current transaction. + * @param stmt Statement for triggers filled with deleted + * statement. + * @param space Vinyl space. + * @param request Request with the tuple data. + * + * @retval 0 Success + * @retval -1 Memory error OR the index is not found OR a tuple + * reference increment error. + */ +static int vy_delete(struct vy_env *env, struct vy_tx *tx, struct txn_stmt *stmt, struct space *space, struct request *request) { @@ -1531,7 +1813,20 @@ vy_update_changes_all_indexes(const struct space *space, uint64_t column_mask) return true; } -int +/** + * Execute UPDATE in a vinyl space. + * @param env Vinyl environment. + * @param tx Current transaction. + * @param stmt Statement for triggers filled with old and new + * statements. + * @param space Vinyl space. + * @param request Request with the tuple data. + * + * @retval 0 Success + * @retval -1 Memory error OR the index is not found OR a tuple + * reference increment error. + */ +static int vy_update(struct vy_env *env, struct vy_tx *tx, struct txn_stmt *stmt, struct space *space, struct request *request) { @@ -1766,7 +2061,21 @@ request_normalize_ops(struct request *request) return 0; } -int +/** + * Execute UPSERT in a vinyl space. + * @param env Vinyl environment. + * @param tx Current transaction. + * @param stmt Statement for triggers filled with old and new + * statements. + * @param space Vinyl space. + * @param request Request with the tuple data and update + * operations. + * + * @retval 0 Success + * @retval -1 Memory error OR the index is not found OR a tuple + * reference increment error. + */ +static int vy_upsert(struct vy_env *env, struct vy_tx *tx, struct txn_stmt *stmt, struct space *space, struct request *request) { @@ -1936,6 +2245,8 @@ vy_insert(struct vy_env *env, struct vy_tx *tx, struct txn_stmt *stmt, /* The space hasn't the primary index. */ return -1; assert(pk->id == 0); + /* Primary key is dumped last. */ + assert(!vy_is_committed_one(env, space, pk)); if (tuple_validate_raw(pk->mem_format, request->tuple)) return -1; /* First insert into the primary index. */ @@ -1949,6 +2260,8 @@ vy_insert(struct vy_env *env, struct vy_tx *tx, struct txn_stmt *stmt, for (uint32_t iid = 1; iid < space->index_count; ++iid) { struct vy_index *index = vy_index(space->index[iid]); + if (vy_is_committed_one(env, space, index)) + continue; if (vy_insert_secondary(env, tx, space, index, stmt->new_tuple) != 0) return -1; @@ -1956,13 +2269,27 @@ vy_insert(struct vy_env *env, struct vy_tx *tx, struct txn_stmt *stmt, return 0; } -int +/** + * Execute REPLACE in a vinyl space. + * @param env Vinyl environment. + * @param tx Current transaction. + * @param stmt Statement for triggers filled with old + * statement. + * @param space Vinyl space. + * @param request Request with the tuple data. + * + * @retval 0 Success + * @retval -1 Memory error OR duplicate key error OR the primary + * index is not found OR a tuple reference increment + * error. + */ +static int vy_replace(struct vy_env *env, struct vy_tx *tx, struct txn_stmt *stmt, struct space *space, struct request *request) { if (vy_is_committed(env, space)) return 0; - if (request->type == IPROTO_INSERT && env->status == VINYL_ONLINE) + if (request->type == IPROTO_INSERT) return vy_insert(env, tx, stmt, space, request); if (space->index_count == 1) { @@ -1974,15 +2301,89 @@ vy_replace(struct vy_env *env, struct vy_tx *tx, struct txn_stmt *stmt, } } -struct vy_tx * -vy_begin(struct vy_env *env) +static int +vinyl_space_execute_replace(struct space *space, struct txn *txn, + struct request *request, struct tuple **result) +{ + assert(request->index_id == 0); + struct vy_env *env = vy_env(space->engine); + struct vy_tx *tx = txn->engine_tx; + struct txn_stmt *stmt = txn_current_stmt(txn); + if (vy_replace(env, tx, stmt, space, request)) + return -1; + *result = stmt->new_tuple; + return 0; +} + +static int +vinyl_space_execute_delete(struct space *space, struct txn *txn, + struct request *request, struct tuple **result) +{ + struct vy_env *env = vy_env(space->engine); + struct vy_tx *tx = txn->engine_tx; + struct txn_stmt *stmt = txn_current_stmt(txn); + if (vy_delete(env, tx, stmt, space, request)) + return -1; + /* + * Delete may or may not set stmt->old_tuple, + * but we always return NULL. + */ + *result = NULL; + return 0; +} + +static int +vinyl_space_execute_update(struct space *space, struct txn *txn, + struct request *request, struct tuple **result) +{ + struct vy_env *env = vy_env(space->engine); + struct vy_tx *tx = txn->engine_tx; + struct txn_stmt *stmt = txn_current_stmt(txn); + if (vy_update(env, tx, stmt, space, request) != 0) + return -1; + *result = stmt->new_tuple; + return 0; +} + +static int +vinyl_space_execute_upsert(struct space *space, struct txn *txn, + struct request *request) +{ + struct vy_env *env = vy_env(space->engine); + struct vy_tx *tx = txn->engine_tx; + struct txn_stmt *stmt = txn_current_stmt(txn); + return vy_upsert(env, tx, stmt, space, request); +} + +static inline void +txn_stmt_unref_tuples(struct txn_stmt *stmt) +{ + if (stmt->old_tuple) + tuple_unref(stmt->old_tuple); + if (stmt->new_tuple) + tuple_unref(stmt->new_tuple); + stmt->old_tuple = NULL; + stmt->new_tuple = NULL; +} + +static int +vinyl_engine_begin(struct engine *engine, struct txn *txn) { - return vy_tx_begin(env->xm); + struct vy_env *env = vy_env(engine); + assert(txn->engine_tx == NULL); + txn->engine_tx = vy_tx_begin(env->xm); + if (txn->engine_tx == NULL) + return -1; + return 0; } -int -vy_prepare(struct vy_env *env, struct vy_tx *tx) +static int +vinyl_engine_prepare(struct engine *engine, struct txn *txn) { + struct vy_env *env = vy_env(engine); + struct vy_tx *tx = txn->engine_tx; + assert(tx != NULL); + if (tx->write_size > 0 && vinyl_check_wal(env, "DML") != 0) return -1; @@ -2036,9 +2437,13 @@ vy_prepare(struct vy_env *env, struct vy_tx *tx) return 0; } -void -vy_commit(struct vy_env *env, struct vy_tx *tx, int64_t lsn) +static void +vinyl_engine_commit(struct engine *engine, struct txn *txn) { + struct vy_env *env = vy_env(engine); + struct vy_tx *tx = txn->engine_tx; + assert(tx != NULL); + /* * vy_tx_commit() may trigger an upsert squash. * If there is no memory for a created statement, @@ -2047,53 +2452,58 @@ vy_commit(struct vy_env *env, struct vy_tx *tx, int64_t lsn) */ size_t mem_used_before = lsregion_used(&env->stmt_env.allocator); - vy_tx_commit(tx, lsn); + vy_tx_commit(tx, txn->signature); size_t mem_used_after = lsregion_used(&env->stmt_env.allocator); assert(mem_used_after >= mem_used_before); /* We can't abort the transaction at this point, use force. */ vy_quota_force_use(&env->quota, mem_used_after - mem_used_before); + + struct txn_stmt *stmt; + stailq_foreach_entry(stmt, &txn->stmts, next) + txn_stmt_unref_tuples(stmt); + txn->engine_tx = NULL; } -void -vy_rollback(struct vy_env *env, struct vy_tx *tx) +static void +vinyl_engine_rollback(struct engine *engine, struct txn *txn) { - (void)env; + (void)engine; + struct vy_tx *tx = txn->engine_tx; + if (tx == NULL) + return; + vy_tx_rollback(tx); -} -void * -vy_savepoint(struct vy_env *env, struct vy_tx *tx) -{ - (void)env; - return vy_tx_savepoint(tx); + struct txn_stmt *stmt; + stailq_foreach_entry(stmt, &txn->stmts, next) + txn_stmt_unref_tuples(stmt); + txn->engine_tx = NULL; } -void -vy_rollback_to_savepoint(struct vy_env *env, struct vy_tx *tx, void *svp) +static int +vinyl_engine_begin_statement(struct engine *engine, struct txn *txn) { - (void)env; - vy_tx_rollback_to_savepoint(tx, svp); + (void)engine; + struct vy_tx *tx = txn->engine_tx; + struct txn_stmt *stmt = txn_current_stmt(txn); + assert(tx != NULL); + stmt->engine_savepoint = vy_tx_savepoint(tx); + return 0; } -/* }}} Public API of transaction control */ - -int -vy_get(struct vy_env *env, struct vy_tx *tx, struct vy_index *index, - const char *key, uint32_t part_count, struct tuple **result) +static void +vinyl_engine_rollback_statement(struct engine *engine, struct txn *txn, + struct txn_stmt *stmt) { - assert(tx == NULL || tx->state == VINYL_TX_READY); - assert(result != NULL); - struct tuple *vyresult = NULL; - assert(part_count <= index->cmp_def->part_count); - if (vy_index_full_by_key(env, tx, index, key, part_count, &vyresult)) - return -1; - if (vyresult == NULL) - return 0; - *result = vyresult; - return 0; + (void)engine; + struct vy_tx *tx = txn->engine_tx; + assert(tx != NULL); + vy_tx_rollback_to_savepoint(tx, stmt->engine_savepoint); + txn_stmt_unref_tuples(stmt); } +/* }}} Public API of transaction control */ /** {{{ Environment */ @@ -2194,9 +2604,9 @@ static void vy_squash_schedule(struct vy_index *index, struct tuple *stmt, void /* struct vy_env */ *arg); -struct vy_env * -vy_env_new(const char *path, size_t memory, size_t cache, int read_threads, - int write_threads, double timeout) +static struct vy_env * +vy_env_new(const char *path, size_t memory, size_t cache, + int read_threads, int write_threads, bool force_recovery) { enum { KB = 1000, MB = 1000 * 1000 }; static int64_t dump_bandwidth_buckets[] = { @@ -2221,9 +2631,11 @@ vy_env_new(const char *path, size_t memory, size_t cache, int read_threads, memset(e, 0, sizeof(*e)); e->status = VINYL_OFFLINE; e->memory = memory; - e->timeout = timeout; + e->timeout = TIMEOUT_INFINITY; + e->too_long_threshold = TIMEOUT_INFINITY; e->read_threads = read_threads; e->write_threads = write_threads; + e->force_recovery = force_recovery; e->path = strdup(path); if (e->path == NULL) { diag_set(OutOfMemory, strlen(path), @@ -2263,8 +2675,8 @@ vy_env_new(const char *path, size_t memory, size_t cache, int read_threads, goto error_index_env; struct slab_cache *slab_cache = cord_slab_cache(); - mempool_create(&e->cursor_pool, slab_cache, - sizeof(struct vy_cursor)); + mempool_create(&e->iterator_pool, slab_cache, + sizeof(struct vinyl_iterator)); vy_quota_create(&e->quota, vy_env_quota_exceeded_cb); ev_timer_init(&e->quota_timer, vy_env_quota_timer_cb, 0, VY_QUOTA_UPDATE_INTERVAL); @@ -2289,7 +2701,7 @@ vy_env_new(const char *path, size_t memory, size_t cache, int read_threads, return NULL; } -void +static void vy_env_delete(struct vy_env *e) { ev_timer_stop(loop(), &e->quota_timer); @@ -2298,7 +2710,7 @@ vy_env_delete(struct vy_env *e) tx_manager_delete(e->xm); free(e->path); histogram_delete(e->dump_bw); - mempool_destroy(&e->cursor_pool); + mempool_destroy(&e->iterator_pool); vy_run_env_destroy(&e->run_env); vy_index_env_destroy(&e->index_env); vy_stmt_env_destroy(&e->stmt_env); @@ -2311,26 +2723,65 @@ vy_env_delete(struct vy_env *e) free(e); } +struct vinyl_engine * +vinyl_engine_new(const char *dir, size_t memory, size_t cache, + int read_threads, int write_threads, bool force_recovery) +{ + struct vinyl_engine *vinyl = calloc(1, sizeof(*vinyl)); + if (vinyl == NULL) { + diag_set(OutOfMemory, sizeof(*vinyl), + "malloc", "struct vinyl_engine"); + return NULL; + } + + vinyl->env = vy_env_new(dir, memory, cache, read_threads, + write_threads, force_recovery); + if (vinyl->env == NULL) { + free(vinyl); + return NULL; + } + + vinyl->base.vtab = &vinyl_engine_vtab; + vinyl->base.name = "vinyl"; + return vinyl; +} + +static void +vinyl_engine_shutdown(struct engine *engine) +{ + struct vinyl_engine *vinyl = (struct vinyl_engine *)engine; + vy_env_delete(vinyl->env); + free(vinyl); +} + void -vy_set_max_tuple_size(struct vy_env *env, size_t max_size) +vinyl_engine_set_max_tuple_size(struct vinyl_engine *vinyl, size_t max_size) { - (void) env; + (void)vinyl; vy_max_tuple_size = max_size; } void -vy_set_timeout(struct vy_env *env, double timeout) +vinyl_engine_set_timeout(struct vinyl_engine *vinyl, double timeout) { - env->timeout = timeout; + vinyl->env->timeout = timeout; +} + +void +vinyl_engine_set_too_long_threshold(struct vinyl_engine *vinyl, + double too_long_threshold) +{ + vinyl->env->too_long_threshold = too_long_threshold; } /** }}} Environment */ /* {{{ Checkpoint */ -int -vy_begin_checkpoint(struct vy_env *env) +static int +vinyl_engine_begin_checkpoint(struct engine *engine) { + struct vy_env *env = vy_env(engine); assert(env->status == VINYL_ONLINE); /* * The scheduler starts worker threads upon the first wakeup. @@ -2344,9 +2795,10 @@ vy_begin_checkpoint(struct vy_env *env) return 0; } -int -vy_wait_checkpoint(struct vy_env *env, struct vclock *vclock) +static int +vinyl_engine_wait_checkpoint(struct engine *engine, struct vclock *vclock) { + struct vy_env *env = vy_env(engine); assert(env->status == VINYL_ONLINE); if (vy_scheduler_wait_checkpoint(&env->scheduler) != 0) return -1; @@ -2355,17 +2807,19 @@ vy_wait_checkpoint(struct vy_env *env, struct vclock *vclock) return 0; } -void -vy_commit_checkpoint(struct vy_env *env, struct vclock *vclock) +static void +vinyl_engine_commit_checkpoint(struct engine *engine, struct vclock *vclock) { (void)vclock; + struct vy_env *env = vy_env(engine); assert(env->status == VINYL_ONLINE); vy_scheduler_end_checkpoint(&env->scheduler); } -void -vy_abort_checkpoint(struct vy_env *env) +static void +vinyl_engine_abort_checkpoint(struct engine *engine) { + struct vy_env *env = vy_env(engine); assert(env->status == VINYL_ONLINE); vy_scheduler_end_checkpoint(&env->scheduler); } @@ -2374,9 +2828,10 @@ vy_abort_checkpoint(struct vy_env *env) /** {{{ Recovery */ -int -vy_bootstrap(struct vy_env *e) +static int +vinyl_engine_bootstrap(struct engine *engine) { + struct vy_env *e = vy_env(engine); assert(e->status == VINYL_OFFLINE); if (vy_log_bootstrap() != 0) return -1; @@ -2385,10 +2840,11 @@ vy_bootstrap(struct vy_env *e) return 0; } -int -vy_begin_initial_recovery(struct vy_env *e, - const struct vclock *recovery_vclock) +static int +vinyl_engine_begin_initial_recovery(struct engine *engine, + const struct vclock *recovery_vclock) { + struct vy_env *e = vy_env(engine); assert(e->status == VINYL_OFFLINE); if (recovery_vclock != NULL) { e->xm->lsn = vclock_sum(recovery_vclock); @@ -2406,9 +2862,10 @@ vy_begin_initial_recovery(struct vy_env *e, return 0; } -int -vy_begin_final_recovery(struct vy_env *e) +static int +vinyl_engine_begin_final_recovery(struct engine *engine) { + struct vy_env *e = vy_env(engine); switch (e->status) { case VINYL_INITIAL_RECOVERY_LOCAL: e->status = VINYL_FINAL_RECOVERY_LOCAL; @@ -2422,9 +2879,10 @@ vy_begin_final_recovery(struct vy_env *e) return 0; } -int -vy_end_recovery(struct vy_env *e) +static int +vinyl_engine_end_recovery(struct engine *engine) { + struct vy_env *e = vy_env(engine); switch (e->status) { case VINYL_FINAL_RECOVERY_LOCAL: if (vy_log_end_recovery() != 0) @@ -2504,16 +2962,6 @@ struct vy_join_ctx { * is to the head of the list. */ struct rlist slices; - /** - * LSN to assign to the next statement. - * - * We can't use original statements' LSNs, because we - * send statements not in the chronological order while - * the receiving end expects LSNs to grow monotonically - * due to the design of the lsregion allocator, which is - * used for storing statements in memory. - */ - int64_t lsn; }; static int @@ -2532,8 +2980,11 @@ vy_send_range_f(struct cbus_call_msg *cmsg) ctx->space_id, &xrow); if (rc != 0) break; - /* See comment to vy_join_ctx::lsn. */ - xrow.lsn = ++ctx->lsn; + /* + * Reset the LSN as the replica will ignore it + * anyway - see comment to vy_env::join_lsn. + */ + xrow.lsn = 0; rc = xstream_write(ctx->stream, &xrow); if (rc != 0) break; @@ -2700,9 +3151,11 @@ vy_join_f(va_list ap) return 0; } -int -vy_join(struct vy_env *env, struct vclock *vclock, struct xstream *stream) +static int +vinyl_engine_join(struct engine *engine, struct vclock *vclock, + struct xstream *stream) { + struct vy_env *env = vy_env(engine); int rc = -1; /* Allocate the relay context. */ @@ -2759,6 +3212,69 @@ vy_join(struct vy_env *env, struct vclock *vclock, struct xstream *stream) return rc; } +static int +vinyl_space_apply_initial_join_row(struct space *space, struct request *request) +{ + assert(request->header != NULL); + struct vy_env *env = vy_env(space->engine); + + struct vy_tx *tx = vy_tx_begin(env->xm); + if (tx == NULL) + return -1; + + struct txn_stmt stmt; + memset(&stmt, 0, sizeof(stmt)); + + int rc = -1; + switch (request->type) { + case IPROTO_REPLACE: + rc = vy_replace(env, tx, &stmt, space, request); + break; + case IPROTO_UPSERT: + rc = vy_upsert(env, tx, &stmt, space, request); + break; + case IPROTO_DELETE: + rc = vy_delete(env, tx, &stmt, space, request); + break; + default: + diag_set(ClientError, ER_UNKNOWN_REQUEST_TYPE, request->type); + break; + } + if (rc != 0) { + vy_tx_rollback(tx); + return -1; + } + + /* + * Account memory quota, see vinyl_engine_prepare() + * and vinyl_engine_commit() for more details about + * quota accounting. + */ + size_t reserved = tx->write_size; + if (vy_quota_use(&env->quota, reserved, TIMEOUT_INFINITY) != 0) + unreachable(); + + size_t mem_used_before = lsregion_used(&env->stmt_env.allocator); + + rc = vy_tx_prepare(tx); + if (rc == 0) + vy_tx_commit(tx, ++env->join_lsn); + else + vy_tx_rollback(tx); + + txn_stmt_unref_tuples(&stmt); + + size_t mem_used_after = lsregion_used(&env->stmt_env.allocator); + assert(mem_used_after >= mem_used_before); + size_t used = mem_used_after - mem_used_before; + if (used >= reserved) + vy_quota_force_use(&env->quota, used - reserved); + else + vy_quota_release(&env->quota, reserved - used); + + return rc; +} + /* }}} Replication */ /* {{{ Garbage collection */ @@ -2862,9 +3378,11 @@ vy_gc(struct vy_env *env, struct vy_recovery *recovery, vy_recovery_iterate(recovery, vy_gc_cb, &arg); } -void -vy_collect_garbage(struct vy_env *env, int64_t lsn) +static int +vinyl_engine_collect_garbage(struct engine *engine, int64_t lsn) { + struct vy_env *env = vy_env(engine); + /* Cleanup old metadata log files. */ vy_log_collect_garbage(lsn); @@ -2874,10 +3392,11 @@ vy_collect_garbage(struct vy_env *env, int64_t lsn) if (recovery == NULL) { say_warn("vinyl garbage collection failed: %s", diag_last_error(diag_get())->errmsg); - return; + return 0; } vy_gc(env, recovery, VY_GC_DROPPED, lsn); vy_recovery_delete(recovery); + return 0; } /* }}} Garbage collection */ @@ -2930,10 +3449,12 @@ vy_backup_cb(const struct vy_log_record *record, void *cb_arg) return 0; } -int -vy_backup(struct vy_env *env, struct vclock *vclock, - int (*cb)(const char *, void *), void *cb_arg) +static int +vinyl_engine_backup(struct engine *engine, struct vclock *vclock, + engine_backup_cb cb, void *cb_arg) { + struct vy_env *env = vy_env(engine); + /* Backup the metadata log. */ const char *path = vy_log_backup_path(vclock); if (path == NULL) @@ -3033,7 +3554,8 @@ vy_squash_process(struct vy_squash *squash) * prepared, but not committed statements. */ vy_read_iterator_open(&itr, &env->run_env, index, NULL, ITER_EQ, - squash->stmt, &env->xm->p_committed_read_view); + squash->stmt, &env->xm->p_committed_read_view, + env->too_long_threshold); struct tuple *result; int rc = vy_read_iterator_next(&itr, &result); if (rc == 0 && result != NULL) @@ -3276,110 +3798,245 @@ vy_squash_schedule(struct vy_index *index, struct tuple *stmt, void *arg) /* {{{ Cursor */ static void -vy_cursor_on_tx_destroy(struct trigger *trigger, void *event) +vinyl_iterator_on_tx_destroy(struct trigger *trigger, void *event) { (void)event; - struct vy_cursor *c = container_of(trigger, struct vy_cursor, - on_tx_destroy); - c->tx = NULL; + struct vinyl_iterator *it = container_of(trigger, + struct vinyl_iterator, on_tx_destroy); + it->tx = NULL; } -struct vy_cursor * -vy_cursor_new(struct vy_env *env, struct vy_tx *tx, struct vy_index *index, - const char *key, uint32_t part_count, enum iterator_type type) +static int +vinyl_iterator_last(struct iterator *base, struct tuple **ret) { - struct vy_cursor *c = mempool_alloc(&env->cursor_pool); - if (c == NULL) { - diag_set(OutOfMemory, sizeof(*c), "cursor", "cursor pool"); - return NULL; - } - assert(part_count <= index->cmp_def->part_count); - c->key = vy_stmt_new_select(index->env->key_format, key, part_count); - if (c->key == NULL) { - mempool_free(&env->cursor_pool, c); - return NULL; - } - c->index = index; - c->n_reads = 0; - trigger_create(&c->on_tx_destroy, vy_cursor_on_tx_destroy, NULL, NULL); - if (tx == NULL) { - tx = &c->tx_autocommit; - vy_tx_create(env->xm, tx); - } else { + (void)base; + *ret = NULL; + return 0; +} + +static void +vinyl_iterator_close(struct vinyl_iterator *it) +{ + vy_read_iterator_close(&it->iterator); + vy_index_unref(it->index); + it->index = NULL; + tuple_unref(it->key); + it->key = NULL; + if (it->tx == &it->tx_autocommit) { /* - * Register a trigger that will abort this cursor - * when the transaction ends. + * Rollback the automatic transaction. + * Use vy_tx_destroy() so as not to spoil + * the statistics of rollbacks issued by + * user transactions. */ - trigger_add(&tx->on_destroy, &c->on_tx_destroy); + vy_tx_destroy(it->tx); + } else { + trigger_clear(&it->on_tx_destroy); } - c->tx = tx; - vy_read_iterator_open(&c->iterator, &env->run_env, index, tx, - type, c->key, - (const struct vy_read_view **)&tx->read_view); - vy_index_ref(c->index); - return c; + it->tx = NULL; + it->base.next = vinyl_iterator_last; } -int -vy_cursor_next(struct vy_env *env, struct vy_cursor *c, struct tuple **result) +static int +vinyl_iterator_next(struct iterator *base, struct tuple **ret) { - struct tuple *vyresult = NULL; - struct vy_index *index = c->index; - *result = NULL; + assert(base->next = vinyl_iterator_next); + struct vinyl_iterator *it = (struct vinyl_iterator *)base; + struct tuple *tuple; - if (c->tx == NULL) { + if (it->tx == NULL) { diag_set(ClientError, ER_CURSOR_NO_TRANSACTION); - return -1; + goto fail; } - if (c->tx->state == VINYL_TX_ABORT || c->tx->read_view->is_aborted) { + if (it->tx->state == VINYL_TX_ABORT || it->tx->read_view->is_aborted) { diag_set(ClientError, ER_READ_VIEW_ABORTED); - return -1; + goto fail; } - assert(c->key != NULL); - int rc = vy_read_iterator_next(&c->iterator, &vyresult); - if (rc) - return -1; - c->n_reads++; - if (vyresult == NULL) + if (vy_read_iterator_next(&it->iterator, &tuple) != 0) + goto fail; + + if (tuple == NULL) { + /* EOF. Close the iterator immediately. */ + vinyl_iterator_close(it); + *ret = NULL; return 0; - if (index->id > 0 && vy_index_full_by_stmt(env, c->tx, index, vyresult, - &vyresult)) - return -1; - *result = vyresult; - /** - * If the index is not primary (def->iid != 0) then no - * need to reference the tuple, because it is returned - * from vy_index_full_by_stmt() as new statement with 1 - * reference. - */ - if (index->id == 0) - tuple_ref(vyresult); - return *result != NULL ? 0 : -1; + } + + if (it->index->id > 0) { + /* Get the full tuple from the primary index. */ + if (vy_index_full_by_stmt(it->env, it->tx, it->index, + tuple, &tuple) != 0) + goto fail; + } else { + tuple_ref(tuple); + } + *ret = tuple_bless(tuple); + tuple_unref(*ret); + if (*ret == NULL) + goto fail; + return 0; +fail: + vinyl_iterator_close(it); + return -1; } -void -vy_cursor_delete(struct vy_env *env, struct vy_cursor *c) +static void +vinyl_iterator_free(struct iterator *base) { - vy_read_iterator_close(&c->iterator); - if (c->tx != NULL) { - if (c->tx == &c->tx_autocommit) { - /* - * Rollback the automatic transaction, - * use vy_tx_destroy() to not spoil - * the statistics of rollbacks issued - * by user transactions. - */ - vy_tx_destroy(c->tx); - } else { - trigger_clear(&c->on_tx_destroy); - } + assert(base->free == vinyl_iterator_free); + struct vinyl_iterator *it = (struct vinyl_iterator *)base; + if (base->next != vinyl_iterator_last) + vinyl_iterator_close(it); + mempool_free(&it->env->iterator_pool, it); +} + +static struct iterator * +vinyl_index_create_iterator(struct index *base, enum iterator_type type, + const char *key, uint32_t part_count) +{ + struct vy_index *index = vy_index(base); + struct vy_env *env = vy_env(base->engine); + + if (type > ITER_GT) { + diag_set(UnsupportedIndexFeature, base->def, + "requested iterator type"); + return NULL; } - if (c->key) - tuple_unref(c->key); - vy_index_unref(c->index); - TRASH(c); - mempool_free(&env->cursor_pool, c); + + struct vinyl_iterator *it = mempool_alloc(&env->iterator_pool); + if (it == NULL) { + diag_set(OutOfMemory, sizeof(struct vinyl_iterator), + "mempool", "struct vinyl_iterator"); + return NULL; + } + it->key = vy_stmt_new_select(index->env->key_format, key, part_count); + if (it->key == NULL) { + mempool_free(&env->iterator_pool, it); + return NULL; + } + + iterator_create(&it->base, base); + it->base.next = vinyl_iterator_next; + it->base.free = vinyl_iterator_free; + + it->env = env; + it->index = index; + vy_index_ref(index); + + struct vy_tx *tx = in_txn() ? in_txn()->engine_tx : NULL; + assert(tx == NULL || tx->state == VINYL_TX_READY); + if (tx != NULL) { + /* + * Register a trigger that will abort this iterator + * when the transaction ends. + */ + trigger_create(&it->on_tx_destroy, + vinyl_iterator_on_tx_destroy, NULL, NULL); + trigger_add(&tx->on_destroy, &it->on_tx_destroy); + } else { + tx = &it->tx_autocommit; + vy_tx_create(env->xm, tx); + } + it->tx = tx; + + vy_read_iterator_open(&it->iterator, &env->run_env, + index, tx, type, it->key, + (const struct vy_read_view **)&tx->read_view, + env->too_long_threshold); + return (struct iterator *)it; +} + +static int +vinyl_index_get(struct index *base, const char *key, + uint32_t part_count, struct tuple **ret) +{ + assert(base->def->opts.is_unique); + assert(base->def->key_def->part_count == part_count); + + struct vy_index *index = vy_index(base); + struct vy_env *env = vy_env(base->engine); + struct vy_tx *tx = in_txn() ? in_txn()->engine_tx : NULL; + assert(tx == NULL || tx->state == VINYL_TX_READY); + + struct tuple *tuple; + if (vy_index_full_by_key(env, tx, index, + key, part_count, &tuple) != 0) + return -1; + + if (tuple != NULL) { + *ret = tuple_bless(tuple); + tuple_unref(tuple); + return *ret == NULL ? -1 : 0; + } + *ret = NULL; + return 0; } /*** }}} Cursor */ + +static const struct engine_vtab vinyl_engine_vtab = { + /* .shutdown = */ vinyl_engine_shutdown, + /* .create_space = */ vinyl_engine_create_space, + /* .join = */ vinyl_engine_join, + /* .begin = */ vinyl_engine_begin, + /* .begin_statement = */ vinyl_engine_begin_statement, + /* .prepare = */ vinyl_engine_prepare, + /* .commit = */ vinyl_engine_commit, + /* .rollback_statement = */ vinyl_engine_rollback_statement, + /* .rollback = */ vinyl_engine_rollback, + /* .bootstrap = */ vinyl_engine_bootstrap, + /* .begin_initial_recovery = */ vinyl_engine_begin_initial_recovery, + /* .begin_final_recovery = */ vinyl_engine_begin_final_recovery, + /* .end_recovery = */ vinyl_engine_end_recovery, + /* .begin_checkpoint = */ vinyl_engine_begin_checkpoint, + /* .wait_checkpoint = */ vinyl_engine_wait_checkpoint, + /* .commit_checkpoint = */ vinyl_engine_commit_checkpoint, + /* .abort_checkpoint = */ vinyl_engine_abort_checkpoint, + /* .collect_garbage = */ vinyl_engine_collect_garbage, + /* .backup = */ vinyl_engine_backup, + /* .check_space_def = */ vinyl_engine_check_space_def, +}; + +static const struct space_vtab vinyl_space_vtab = { + /* .destroy = */ vinyl_space_destroy, + /* .bsize = */ vinyl_space_bsize, + /* .apply_initial_join_row = */ vinyl_space_apply_initial_join_row, + /* .execute_replace = */ vinyl_space_execute_replace, + /* .execute_delete = */ vinyl_space_execute_delete, + /* .execute_update = */ vinyl_space_execute_update, + /* .execute_upsert = */ vinyl_space_execute_upsert, + /* .init_system_space = */ vinyl_init_system_space, + /* .check_index_def = */ vinyl_space_check_index_def, + /* .create_index = */ vinyl_space_create_index, + /* .add_primary_key = */ vinyl_space_add_primary_key, + /* .drop_primary_key = */ vinyl_space_drop_primary_key, + /* .check_format = */ vinyl_space_check_format, + /* .build_secondary_key = */ vinyl_space_build_secondary_key, + /* .prepare_truncate = */ vinyl_space_prepare_truncate, + /* .commit_truncate = */ vinyl_space_commit_truncate, + /* .prepare_alter = */ vinyl_space_prepare_alter, + /* .commit_alter = */ vinyl_space_commit_alter, +}; + +static const struct index_vtab vinyl_index_vtab = { + /* .destroy = */ vinyl_index_destroy, + /* .commit_create = */ vinyl_index_commit_create, + /* .commit_drop = */ vinyl_index_commit_drop, + /* .size = */ generic_index_size, + /* .bsize = */ vinyl_index_bsize, + /* .min = */ generic_index_min, + /* .max = */ generic_index_max, + /* .random = */ generic_index_random, + /* .count = */ generic_index_count, + /* .get = */ vinyl_index_get, + /* .replace = */ generic_index_replace, + /* .create_iterator = */ vinyl_index_create_iterator, + /* .create_snapshot_iterator = */ + generic_index_create_snapshot_iterator, + /* .info = */ vinyl_index_info, + /* .begin_build = */ generic_index_begin_build, + /* .reserve = */ generic_index_reserve, + /* .build_next = */ generic_index_build_next, + /* .end_build = */ generic_index_end_build, +}; diff --git a/src/box/vinyl.h b/src/box/vinyl.h index 1138c2a716c4de9566212a134b9c9aded21648be..63add146b60c8e73d448518b3aebf62b9927e429 100644 --- a/src/box/vinyl.h +++ b/src/box/vinyl.h @@ -31,380 +31,62 @@ * SUCH DAMAGE. */ -#include <stdlib.h> -#include <stdint.h> #include <stdbool.h> +#include <stddef.h> #ifdef __cplusplus extern "C" { -#endif - -struct vy_env; -struct vy_tx; -struct vy_cursor; -struct vy_index; -struct index_def; -struct tuple; -struct tuple_format; -struct vclock; -struct request; -struct space; -struct index; -struct txn_stmt; -struct xrow_header; -struct xstream; -enum iterator_type; - -/* - * Environment - */ - -struct vy_env * -vy_env_new(const char *path, size_t memory, size_t cache, int read_threads, - int write_threads, double timeout); - -void -vy_env_delete(struct vy_env *e); - -/* - * Recovery - */ - -int -vy_bootstrap(struct vy_env *e); - -int -vy_begin_initial_recovery(struct vy_env *e, - const struct vclock *recovery_vclock); - -int -vy_begin_final_recovery(struct vy_env *e); - -int -vy_end_recovery(struct vy_env *e); - -/* - * Checkpoint - */ - -int -vy_begin_checkpoint(struct vy_env *env); - -int -vy_wait_checkpoint(struct vy_env *env, struct vclock *vclock); - -void -vy_commit_checkpoint(struct vy_env *env, struct vclock *vclock); - -void -vy_abort_checkpoint(struct vy_env *env); - -/* - * Introspection - */ +#endif /* defined(__cplusplus) */ struct info_handler; +struct vinyl_engine; -/* - * Engine introspection (box.info.vinyl()) - * - * @param env environment - * @param handler info handler - */ -void -vy_info(struct vy_env *env, struct info_handler *handler); +struct vinyl_engine * +vinyl_engine_new(const char *dir, size_t memory, size_t cache, + int read_threads, int write_threads, bool force_recovery); /** - * Index introspection (index:info()) - * - * @param index index - * @param handler info handler - */ -void -vy_index_info(struct vy_index *index, struct info_handler *handler); - -/* - * Transaction - */ - -struct vy_tx * -vy_begin(struct vy_env *e); - -/** - * Get a tuple from the vinyl index. - * @param env Vinyl environment. - * @param tx Current transaction. - * @param index Vinyl index. - * @param key MessagePack'ed data, the array without a - * header. - * @param part_count Part count of the key - * @param[out] result Is set to the the found tuple. - * - * @retval 0 Success. - * @retval -1 Memory or read error. - */ -int -vy_get(struct vy_env *env, struct vy_tx *tx, struct vy_index *index, - const char *key, uint32_t part_count, struct tuple **result); - -/** - * Execute REPLACE in a vinyl space. - * @param env Vinyl environment. - * @param tx Current transaction. - * @param stmt Statement for triggers filled with old - * statement. - * @param space Vinyl space. - * @param request Request with the tuple data. - * - * @retval 0 Success - * @retval -1 Memory error OR duplicate key error OR the primary - * index is not found OR a tuple reference increment - * error. - */ -int -vy_replace(struct vy_env *env, struct vy_tx *tx, struct txn_stmt *stmt, - struct space *space, struct request *request); - -/** - * Execute DELETE in a vinyl space. - * @param env Vinyl environment. - * @param tx Current transaction. - * @param stmt Statement for triggers filled with deleted - * statement. - * @param space Vinyl space. - * @param request Request with the tuple data. - * - * @retval 0 Success - * @retval -1 Memory error OR the index is not found OR a tuple - * reference increment error. - */ -int -vy_delete(struct vy_env *env, struct vy_tx *tx, struct txn_stmt *stmt, - struct space *space, struct request *request); - -/** - * Execute UPDATE in a vinyl space. - * @param env Vinyl environment. - * @param tx Current transaction. - * @param stmt Statement for triggers filled with old and new - * statements. - * @param space Vinyl space. - * @param request Request with the tuple data. - * - * @retval 0 Success - * @retval -1 Memory error OR the index is not found OR a tuple - * reference increment error. - */ -int -vy_update(struct vy_env *env, struct vy_tx *tx, struct txn_stmt *stmt, - struct space *space, struct request *request); - -/** - * Execute UPSERT in a vinyl space. - * @param env Vinyl environment. - * @param tx Current transaction. - * @param stmt Statement for triggers filled with old and new - * statements. - * @param space Vinyl space. - * @param request Request with the tuple data and update - * operations. - * - * @retval 0 Success - * @retval -1 Memory error OR the index is not found OR a tuple - * reference increment error. - */ -int -vy_upsert(struct vy_env *env, struct vy_tx *tx, struct txn_stmt *stmt, - struct space *space, struct request *request); - -int -vy_prepare(struct vy_env *env, struct vy_tx *tx); - -void -vy_commit(struct vy_env *env, struct vy_tx *tx, int64_t lsn); - -void -vy_rollback(struct vy_env *env, struct vy_tx *tx); - -void * -vy_savepoint(struct vy_env *env, struct vy_tx *tx); - -void -vy_rollback_to_savepoint(struct vy_env *env, struct vy_tx *tx, void *svp); - -/* - * Index - */ - -/** - * Extract vy_index from a VinylIndex object. - * Defined in vinyl_index.cc - */ -struct vy_index * -vy_index(struct index *index); - -struct field_def; -/** - * Create a new vinyl index object without opening it. - * @param env Vinyl environment. - * @param index_def Index definition. - * @param format Space format. - * @param pk Primary index. - */ -struct vy_index * -vy_new_index(struct vy_env *env, struct index_def *index_def, - struct tuple_format *format, struct vy_index *pk); - -/** - * Delete a vinyl index object. - * @param env Vinyl environment. - * @param index Index object. - */ -void -vy_delete_index(struct vy_env *env, struct vy_index *index); - -/** - * Handle vinyl space truncation. - * - * This function initializes indexes of the new space - * so that it can replace the old space on truncation. - */ -int -vy_prepare_truncate_space(struct vy_env *env, struct space *old_space, - struct space *new_space); - -/** - * Commit space truncation in the metadata log. - */ -void -vy_commit_truncate_space(struct vy_env *env, struct space *old_space, - struct space *new_space); - -/** - * Hook on an preparation of space alter event. - * @param env Vinyl environment. - * @param old_space Old space. - * @param new_space New space. - * - * @retval 0 Success. - * @retval -1 Error. - */ -int -vy_prepare_alter_space(struct vy_env *env, struct space *old_space, - struct space *new_space); - -/** - * Check that new fields of a space format are - * compatible with existing tuples. - * @param env Vinyl environment. - * @param old_space Old space. - * - * @retval 0 Success. - * @retval -1 Client error. - */ -int -vy_check_format(struct vy_env *env, struct space *old_space); - -/** - * Hook on an alter space commit event. It is called on each - * create_index(), drop_index() and is used for update - * vy_index.space attribute. - * @param env Vinyl environment. - * @param old_space Old space. - * @param new_space New space. - * - * @retval 0 Success. - * @retval -1 Memory or new format register error. - */ -int -vy_commit_alter_space(struct vy_env *env, struct space *new_space, - struct tuple_format *new_format); - -/** - * Open a vinyl index. - * - * During recovery, this function loads run files from - * the index directory. After recovery is complete, it - * creates the index directory. - */ -int -vy_index_open(struct vy_env *env, struct vy_index *index, bool force_recovery); - -/** - * Commit index creation in the metadata log. + * Engine introspection (box.info.vinyl()) */ void -vy_index_commit_create(struct vy_env *env, struct vy_index *index, int64_t lsn); +vinyl_engine_info(struct vinyl_engine *vinyl, struct info_handler *handler); /** - * Commit index drop in the metadata log. - */ -void -vy_index_commit_drop(struct vy_env *env, struct vy_index *index); - -size_t -vy_index_bsize(struct vy_index *index); - -/* - * Index Cursor - */ - -/** - * Create a cursor. If tx is not NULL, the cursor life time is - * bound by the transaction life time. Otherwise, the cursor - * allocates its own transaction. - */ -struct vy_cursor * -vy_cursor_new(struct vy_env *env, struct vy_tx *tx, struct vy_index *index, - const char *key, uint32_t part_count, enum iterator_type type); - -void -vy_cursor_delete(struct vy_env *env, struct vy_cursor *cursor); - -int -vy_cursor_next(struct vy_env *env, struct vy_cursor *cursor, - struct tuple **result); - -/* - * Replication - */ - -int -vy_join(struct vy_env *env, struct vclock *vclock, struct xstream *stream); - -/* - * Garbage collection + * Update max tuple size. */ - void -vy_collect_garbage(struct vy_env *env, int64_t lsn); - -/* - * Backup - */ - -int -vy_backup(struct vy_env *env, struct vclock *vclock, - int (*cb)(const char *, void *), void *cb_arg); - -/* - * Configuration - */ +vinyl_engine_set_max_tuple_size(struct vinyl_engine *vinyl, size_t max_size); /** - * Update max tuple size. + * Update query timeout. */ void -vy_set_max_tuple_size(struct vy_env *env, size_t max_size); +vinyl_engine_set_timeout(struct vinyl_engine *vinyl, double timeout); /** - * Update query timeout. + * Update too_long_threshold. */ void -vy_set_timeout(struct vy_env *env, double timeout); +vinyl_engine_set_too_long_threshold(struct vinyl_engine *vinyl, + double too_long_threshold); #ifdef __cplusplus +} /* extern "C" */ + +#include "diag.h" + +static inline struct vinyl_engine * +vinyl_engine_new_xc(const char *dir, size_t memory, size_t cache, + int read_threads, int write_threads, bool force_recovery) +{ + struct vinyl_engine *vinyl; + vinyl = vinyl_engine_new(dir, memory, cache, read_threads, + write_threads, force_recovery); + if (vinyl == NULL) + diag_raise(); + return vinyl; } -#endif + +#endif /* defined(__plusplus) */ #endif /* INCLUDES_TARANTOOL_BOX_VINYL_H */ diff --git a/src/box/vinyl_engine.c b/src/box/vinyl_engine.c deleted file mode 100644 index 1abd9353cfc670f60c5b4375413a72b98f0c62e4..0000000000000000000000000000000000000000 --- a/src/box/vinyl_engine.c +++ /dev/null @@ -1,303 +0,0 @@ -/* - * Copyright 2010-2016, Tarantool AUTHORS, please see AUTHORS file. - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * 1. Redistributions of source code must retain the above - * copyright notice, this list of conditions and the - * following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY <COPYRIGHT HOLDER> ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED - * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL - * <COPYRIGHT HOLDER> OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, - * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF - * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ -#include "vinyl_engine.h" - -#include <stdlib.h> -#include <stdio.h> -#include <string.h> -#include <small/mempool.h> - -#include "trivia/util.h" -#include "vinyl_space.h" -#include "xrow.h" -#include "tuple.h" -#include "txn.h" -#include "space.h" -#include "vinyl.h" - -/* Used by lua/info.c */ -struct vy_env * -vinyl_engine_get_env(void) -{ - struct vinyl_engine *vinyl; - vinyl = (struct vinyl_engine *)engine_by_name("vinyl"); - assert(vinyl != NULL); - return vinyl->env; -} - -static void -vinyl_engine_shutdown(struct engine *engine) -{ - struct vinyl_engine *vinyl = (struct vinyl_engine *)engine; - if (mempool_is_initialized(&vinyl->iterator_pool)) - mempool_destroy(&vinyl->iterator_pool); - vy_env_delete(vinyl->env); - free(vinyl); -} - -static int -vinyl_engine_bootstrap(struct engine *engine) -{ - struct vinyl_engine *vinyl = (struct vinyl_engine *)engine; - return vy_bootstrap(vinyl->env); -} - -static int -vinyl_engine_begin_initial_recovery(struct engine *engine, - const struct vclock *recovery_vclock) -{ - struct vinyl_engine *vinyl = (struct vinyl_engine *)engine; - return vy_begin_initial_recovery(vinyl->env, recovery_vclock); -} - -static int -vinyl_engine_begin_final_recovery(struct engine *engine) -{ - struct vinyl_engine *vinyl = (struct vinyl_engine *)engine; - return vy_begin_final_recovery(vinyl->env); -} - -static int -vinyl_engine_end_recovery(struct engine *engine) -{ - struct vinyl_engine *vinyl = (struct vinyl_engine *)engine; - return vy_end_recovery(vinyl->env); -} - -static struct space * -vinyl_engine_create_space(struct engine *engine, struct space_def *def, - struct rlist *key_list) -{ - struct vinyl_engine *vinyl = (struct vinyl_engine *)engine; - return vinyl_space_new(vinyl, def, key_list); -} - -static int -vinyl_engine_join(struct engine *engine, struct vclock *vclock, - struct xstream *stream) -{ - struct vinyl_engine *vinyl = (struct vinyl_engine *)engine; - return vy_join(vinyl->env, vclock, stream); -} - -static int -vinyl_engine_begin(struct engine *engine, struct txn *txn) -{ - struct vinyl_engine *vinyl = (struct vinyl_engine *)engine; - assert(txn->engine_tx == NULL); - txn->engine_tx = vy_begin(vinyl->env); - if (txn->engine_tx == NULL) - return -1; - return 0; -} - -static int -vinyl_engine_begin_statement(struct engine *engine, struct txn *txn) -{ - struct vinyl_engine *vinyl = (struct vinyl_engine *)engine; - struct vy_tx *tx = (struct vy_tx *)(txn->engine_tx); - struct txn_stmt *stmt = txn_current_stmt(txn); - stmt->engine_savepoint = vy_savepoint(vinyl->env, tx); - return 0; -} - -static int -vinyl_engine_prepare(struct engine *engine, struct txn *txn) -{ - struct vinyl_engine *vinyl = (struct vinyl_engine *)engine; - struct vy_tx *tx = (struct vy_tx *) txn->engine_tx; - return vy_prepare(vinyl->env, tx); -} - -static inline void -txn_stmt_unref_tuples(struct txn_stmt *stmt) -{ - if (stmt->old_tuple) - tuple_unref(stmt->old_tuple); - if (stmt->new_tuple) - tuple_unref(stmt->new_tuple); - stmt->old_tuple = NULL; - stmt->new_tuple = NULL; -} - -static void -vinyl_engine_commit(struct engine *engine, struct txn *txn) -{ - struct vinyl_engine *vinyl = (struct vinyl_engine *)engine; - struct vy_tx *tx = (struct vy_tx *) txn->engine_tx; - struct txn_stmt *stmt; - stailq_foreach_entry(stmt, &txn->stmts, next) { - txn_stmt_unref_tuples(stmt); - } - if (tx) { - vy_commit(vinyl->env, tx, txn->signature); - txn->engine_tx = NULL; - } -} - -static void -vinyl_engine_rollback(struct engine *engine, struct txn *txn) -{ - if (txn->engine_tx == NULL) - return; - - struct vinyl_engine *vinyl = (struct vinyl_engine *)engine; - struct vy_tx *tx = (struct vy_tx *) txn->engine_tx; - vy_rollback(vinyl->env, tx); - txn->engine_tx = NULL; - struct txn_stmt *stmt; - stailq_foreach_entry(stmt, &txn->stmts, next) { - txn_stmt_unref_tuples(stmt); - } -} - -static void -vinyl_engine_rollback_statement(struct engine *engine, struct txn *txn, - struct txn_stmt *stmt) -{ - struct vinyl_engine *vinyl = (struct vinyl_engine *)engine; - txn_stmt_unref_tuples(stmt); - vy_rollback_to_savepoint(vinyl->env, (struct vy_tx *)txn->engine_tx, - stmt->engine_savepoint); -} - -static int -vinyl_engine_begin_checkpoint(struct engine *engine) -{ - struct vinyl_engine *vinyl = (struct vinyl_engine *)engine; - return vy_begin_checkpoint(vinyl->env); -} - -static int -vinyl_engine_wait_checkpoint(struct engine *engine, struct vclock *vclock) -{ - struct vinyl_engine *vinyl = (struct vinyl_engine *)engine; - return vy_wait_checkpoint(vinyl->env, vclock); -} - -static void -vinyl_engine_commit_checkpoint(struct engine *engine, struct vclock *vclock) -{ - struct vinyl_engine *vinyl = (struct vinyl_engine *)engine; - vy_commit_checkpoint(vinyl->env, vclock); -} - -static void -vinyl_engine_abort_checkpoint(struct engine *engine) -{ - struct vinyl_engine *vinyl = (struct vinyl_engine *)engine; - vy_abort_checkpoint(vinyl->env); -} - -static int -vinyl_engine_collect_garbage(struct engine *engine, int64_t lsn) -{ - struct vinyl_engine *vinyl = (struct vinyl_engine *)engine; - vy_collect_garbage(vinyl->env, lsn); - return 0; -} - -static int -vinyl_engine_backup(struct engine *engine, struct vclock *vclock, - engine_backup_cb cb, void *arg) -{ - struct vinyl_engine *vinyl = (struct vinyl_engine *)engine; - return vy_backup(vinyl->env, vclock, cb, arg); -} - -static int -vinyl_engine_check_space_def(struct space_def *def) -{ - if (def->opts.temporary) { - diag_set(ClientError, ER_ALTER_SPACE, - def->name, "engine does not support temporary flag"); - return -1; - } - return 0; -} - -static const struct engine_vtab vinyl_engine_vtab = { - /* .shutdown = */ vinyl_engine_shutdown, - /* .create_space = */ vinyl_engine_create_space, - /* .join = */ vinyl_engine_join, - /* .begin = */ vinyl_engine_begin, - /* .begin_statement = */ vinyl_engine_begin_statement, - /* .prepare = */ vinyl_engine_prepare, - /* .commit = */ vinyl_engine_commit, - /* .rollback_statement = */ vinyl_engine_rollback_statement, - /* .rollback = */ vinyl_engine_rollback, - /* .bootstrap = */ vinyl_engine_bootstrap, - /* .begin_initial_recovery = */ vinyl_engine_begin_initial_recovery, - /* .begin_final_recovery = */ vinyl_engine_begin_final_recovery, - /* .end_recovery = */ vinyl_engine_end_recovery, - /* .begin_checkpoint = */ vinyl_engine_begin_checkpoint, - /* .wait_checkpoint = */ vinyl_engine_wait_checkpoint, - /* .commit_checkpoint = */ vinyl_engine_commit_checkpoint, - /* .abort_checkpoint = */ vinyl_engine_abort_checkpoint, - /* .collect_garbage = */ vinyl_engine_collect_garbage, - /* .backup = */ vinyl_engine_backup, - /* .check_space_def = */ vinyl_engine_check_space_def, -}; - -struct vinyl_engine * -vinyl_engine_new(const char *dir, size_t memory, size_t cache, - int read_threads, int write_threads, double timeout) -{ - struct vinyl_engine *vinyl = calloc(1, sizeof(*vinyl)); - if (vinyl == NULL) { - diag_set(OutOfMemory, sizeof(*vinyl), - "malloc", "struct vinyl_engine"); - return NULL; - } - - vinyl->env = vy_env_new(dir, memory, cache, read_threads, - write_threads, timeout); - if (vinyl->env == NULL) { - free(vinyl); - return NULL; - } - - vinyl->base.vtab = &vinyl_engine_vtab; - vinyl->base.name = "vinyl"; - return vinyl; -} - -void -vinyl_engine_set_max_tuple_size(struct vinyl_engine *vinyl, size_t max_size) -{ - vy_set_max_tuple_size(vinyl->env, max_size); -} - -void -vinyl_engine_set_timeout(struct vinyl_engine *vinyl, double timeout) -{ - vy_set_timeout(vinyl->env, timeout); -} diff --git a/src/box/vinyl_engine.h b/src/box/vinyl_engine.h deleted file mode 100644 index 486402ba7ff3b87386846759fe1edb22b151ed70..0000000000000000000000000000000000000000 --- a/src/box/vinyl_engine.h +++ /dev/null @@ -1,80 +0,0 @@ -#ifndef TARANTOOL_BOX_VINYL_ENGINE_H_INCLUDED -#define TARANTOOL_BOX_VINYL_ENGINE_H_INCLUDED -/* - * Copyright 2010-2016, Tarantool AUTHORS, please see AUTHORS file. - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * 1. Redistributions of source code must retain the above - * copyright notice, this list of conditions and the - * following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY <COPYRIGHT HOLDER> ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED - * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL - * <COPYRIGHT HOLDER> OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, - * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF - * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ -#include <stddef.h> -#include <small/mempool.h> - -#include "engine.h" - -#if defined(__cplusplus) -extern "C" { -#endif /* defined(__cplusplus) */ - -struct vy_env; - -struct vinyl_engine { - struct engine base; - struct vy_env *env; - /** Memory pool for index iterator. */ - struct mempool iterator_pool; -}; - -struct vinyl_engine * -vinyl_engine_new(const char *dir, size_t memory, size_t cache, - int read_threads, int write_threads, double timeout); - -void -vinyl_engine_set_max_tuple_size(struct vinyl_engine *vinyl, size_t max_size); - -void -vinyl_engine_set_timeout(struct vinyl_engine *vinyl, double timeout); - -#if defined(__cplusplus) -} /* extern "C" */ - -#include "diag.h" - -static inline struct vinyl_engine * -vinyl_engine_new_xc(const char *dir, size_t memory, size_t cache, - int read_threads, int write_threads, double timeout) -{ - struct vinyl_engine *vinyl; - vinyl = vinyl_engine_new(dir, memory, cache, read_threads, - write_threads, timeout); - if (vinyl == NULL) - diag_raise(); - return vinyl; -} - -#endif /* defined(__plusplus) */ - -#endif /* TARANTOOL_BOX_VINYL_ENGINE_H_INCLUDED */ diff --git a/src/box/vinyl_index.c b/src/box/vinyl_index.c deleted file mode 100644 index 11b9f522e7c25843c1ab37770fc591f57567a683..0000000000000000000000000000000000000000 --- a/src/box/vinyl_index.c +++ /dev/null @@ -1,276 +0,0 @@ -/* - * Copyright 2010-2016, Tarantool AUTHORS, please see AUTHORS file. - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * 1. Redistributions of source code must retain the above - * copyright notice, this list of conditions and the - * following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY <COPYRIGHT HOLDER> ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED - * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL - * <COPYRIGHT HOLDER> OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, - * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF - * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ -#include "vinyl_index.h" -#include "vinyl_engine.h" - -#include <stdio.h> -#include <small/mempool.h> - -#include "trivia/util.h" -#include "fiber.h" -#include "schema.h" -#include "txn.h" -#include "vinyl.h" -#include "tuple.h" -#include "cfg.h" - -/** - * Get (struct vy_index *) by (struct index *). - * @param index vinyl_index to convert. - * @retval Pointer to index->db. - */ -struct vy_index * -vy_index(struct index *index) -{ - return ((struct vinyl_index *) index)->db; -} - -struct vinyl_iterator { - struct iterator base; - struct vy_env *env; - struct vy_cursor *cursor; - /** Memory pool the iterator was allocated from. */ - struct mempool *pool; -}; - -static void -vinyl_index_destroy(struct index *base) -{ - struct vinyl_index *index = (struct vinyl_index *)base; - struct vinyl_engine *vinyl = (struct vinyl_engine *)base->engine; - vy_delete_index(vinyl->env, index->db); - free(index); -} - -static void -vinyl_index_commit_create(struct index *base, int64_t signature) -{ - struct vinyl_index *index = (struct vinyl_index *)base; - struct vinyl_engine *vinyl = (struct vinyl_engine *)base->engine; - vy_index_commit_create(vinyl->env, index->db, signature); -} - -static void -vinyl_index_commit_drop(struct index *base) -{ - struct vinyl_index *index = (struct vinyl_index *)base; - struct vinyl_engine *vinyl = (struct vinyl_engine *)base->engine; - vy_index_commit_drop(vinyl->env, index->db); -} - -static int -vinyl_index_get(struct index *base, const char *key, - uint32_t part_count, struct tuple **result) -{ - assert(base->def->opts.is_unique && - part_count == base->def->key_def->part_count); - struct vinyl_index *index = (struct vinyl_index *)base; - struct vinyl_engine *vinyl = (struct vinyl_engine *)base->engine; - /* - * engine_tx might be empty, even if we are in txn context. - * This can happen on a first-read statement. - */ - struct vy_tx *transaction = in_txn() ? - (struct vy_tx *) in_txn()->engine_tx : NULL; - struct tuple *tuple = NULL; - if (vy_get(vinyl->env, transaction, index->db, - key, part_count, &tuple) != 0) - return -1; - if (tuple != NULL) { - tuple = tuple_bless(tuple); - if (tuple == NULL) - return -1; - tuple_unref(tuple); - } - *result = tuple; - return 0; -} - -static ssize_t -vinyl_index_bsize(struct index *base) -{ - struct vinyl_index *index = (struct vinyl_index *)base; - return vy_index_bsize(index->db); -} - -static int -vinyl_iterator_last(MAYBE_UNUSED struct iterator *ptr, struct tuple **ret) -{ - *ret = NULL; - return 0; -} - -static int -vinyl_iterator_next(struct iterator *base_it, struct tuple **ret) -{ - struct vinyl_iterator *it = (struct vinyl_iterator *) base_it; - struct vy_env *env = it->env; - struct tuple *tuple; - - /* found */ - if (vy_cursor_next(env, it->cursor, &tuple) != 0) { - /* immediately close the cursor */ - vy_cursor_delete(env, it->cursor); - it->cursor = NULL; - it->base.next = vinyl_iterator_last; - return -1; - } - if (tuple != NULL) { - tuple = tuple_bless(tuple); - if (tuple == NULL) - return -1; - tuple_unref(tuple); - *ret = tuple; - return 0; - } - - /* immediately close the cursor */ - vy_cursor_delete(env, it->cursor); - it->cursor = NULL; - it->base.next = vinyl_iterator_last; - *ret = NULL; - return 0; -} - -static void -vinyl_iterator_free(struct iterator *ptr) -{ - assert(ptr->free == vinyl_iterator_free); - struct vinyl_iterator *it = (struct vinyl_iterator *) ptr; - if (it->cursor) { - vy_cursor_delete(it->env, it->cursor); - it->cursor = NULL; - } - mempool_free(it->pool, it); -} - -static struct iterator * -vinyl_index_create_iterator(struct index *base, enum iterator_type type, - const char *key, uint32_t part_count) -{ - struct vinyl_index *index = (struct vinyl_index *)base; - struct vinyl_engine *vinyl = (struct vinyl_engine *)base->engine; - - assert(part_count == 0 || key != NULL); - struct vy_tx *tx = in_txn() ? in_txn()->engine_tx : NULL; - if (type > ITER_GT) { - diag_set(UnsupportedIndexFeature, base->def, - "requested iterator type"); - return NULL; - } - struct vinyl_iterator *it = mempool_alloc(&vinyl->iterator_pool); - if (it == NULL) { - diag_set(OutOfMemory, sizeof(struct vinyl_iterator), - "mempool", "struct vinyl_iterator"); - return NULL; - } - iterator_create(&it->base, base); - it->pool = &vinyl->iterator_pool; - it->base.next = vinyl_iterator_next; - it->base.free = vinyl_iterator_free; - - it->env = vinyl->env; - it->cursor = vy_cursor_new(it->env, tx, index->db, - key, part_count, type); - if (it->cursor == NULL) { - mempool_free(&vinyl->iterator_pool, it); - return NULL; - } - return (struct iterator *)it; -} - -static void -vinyl_index_info(struct index *base, struct info_handler *handler) -{ - struct vinyl_index *index = (struct vinyl_index *)base; - vy_index_info(index->db, handler); -} - -static const struct index_vtab vinyl_index_vtab = { - /* .destroy = */ vinyl_index_destroy, - /* .commit_create = */ vinyl_index_commit_create, - /* .commit_drop = */ vinyl_index_commit_drop, - /* .size = */ generic_index_size, - /* .bsize = */ vinyl_index_bsize, - /* .min = */ generic_index_min, - /* .max = */ generic_index_max, - /* .random = */ generic_index_random, - /* .count = */ generic_index_count, - /* .get = */ vinyl_index_get, - /* .replace = */ generic_index_replace, - /* .create_iterator = */ vinyl_index_create_iterator, - /* .create_snapshot_iterator = */ - generic_index_create_snapshot_iterator, - /* .info = */ vinyl_index_info, - /* .begin_build = */ generic_index_begin_build, - /* .reserve = */ generic_index_reserve, - /* .build_next = */ generic_index_build_next, - /* .end_build = */ generic_index_end_build, -}; - -struct vinyl_index * -vinyl_index_new(struct vinyl_engine *vinyl, struct index_def *def, - struct tuple_format *format, struct vy_index *pk) -{ - if (!mempool_is_initialized(&vinyl->iterator_pool)) { - mempool_create(&vinyl->iterator_pool, cord_slab_cache(), - sizeof(struct vinyl_iterator)); - } - - struct vinyl_index *index = - (struct vinyl_index *)calloc(1, sizeof(*index)); - if (index == NULL) { - diag_set(OutOfMemory, sizeof(*index), - "malloc", "struct vinyl_index"); - return NULL; - } - struct vy_index *db = vy_new_index(vinyl->env, def, format, pk); - if (db == NULL) { - free(index); - return NULL; - } - if (index_create(&index->base, (struct engine *)vinyl, - &vinyl_index_vtab, def) != 0) { - vy_delete_index(vinyl->env, db); - free(index); - return NULL; - } - index->db = db; - return index; -} - -int -vinyl_index_open(struct vinyl_index *index) -{ - struct vinyl_engine *vinyl = (struct vinyl_engine *)index->base.engine; - return vy_index_open(vinyl->env, index->db, - cfg_geti("force_recovery")); -} diff --git a/src/box/vinyl_space.c b/src/box/vinyl_space.c deleted file mode 100644 index c92039cf90105d9cfb4863d78f5fddc30814c459..0000000000000000000000000000000000000000 --- a/src/box/vinyl_space.c +++ /dev/null @@ -1,383 +0,0 @@ -/* - * Copyright 2010-2016, Tarantool AUTHORS, please see AUTHORS file. - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * 1. Redistributions of source code must retain the above - * copyright notice, this list of conditions and the - * following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY <COPYRIGHT HOLDER> ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED - * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL - * <COPYRIGHT HOLDER> OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, - * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF - * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ -#include "vinyl_space.h" -#include "vinyl_index.h" -#include "vinyl_engine.h" -#include "xrow.h" -#include "txn.h" -#include "vinyl.h" -#include "tuple.h" -#include "iproto_constants.h" -#include "vy_stmt.h" - -#include <stdlib.h> -#include <stdio.h> -#include <string.h> - -static void -vinyl_space_destroy(struct space *space) -{ - free(space); -} - -static size_t -vinyl_space_bsize(struct space *space) -{ - (void)space; - return 0; -} - -/* {{{ DML */ - -static int -vinyl_space_apply_initial_join_row(struct space *space, struct request *request) -{ - assert(request->header != NULL); - struct vy_env *env = ((struct vinyl_engine *)space->engine)->env; - - struct vy_tx *tx = vy_begin(env); - if (tx == NULL) - return -1; - - int64_t signature = request->header->lsn; - - struct txn_stmt stmt; - memset(&stmt, 0, sizeof(stmt)); - - int rc; - switch (request->type) { - case IPROTO_REPLACE: - rc = vy_replace(env, tx, &stmt, space, request); - break; - case IPROTO_UPSERT: - rc = vy_upsert(env, tx, &stmt, space, request); - break; - case IPROTO_DELETE: - rc = vy_delete(env, tx, &stmt, space, request); - break; - default: - diag_set(ClientError, ER_UNKNOWN_REQUEST_TYPE, request->type); - return -1; - } - if (rc != 0) - return -1; - - if (stmt.old_tuple) - tuple_unref(stmt.old_tuple); - if (stmt.new_tuple) - tuple_unref(stmt.new_tuple); - - if (vy_prepare(env, tx)) { - vy_rollback(env, tx); - return -1; - } - vy_commit(env, tx, signature); - return 0; -} - -/* - * Four cases: - * - insert in one index - * - insert in multiple indexes - * - replace in one index - * - replace in multiple indexes. - */ -static int -vinyl_space_execute_replace(struct space *space, struct txn *txn, - struct request *request, struct tuple **result) -{ - assert(request->index_id == 0); - struct vinyl_engine *engine = (struct vinyl_engine *)space->engine; - struct vy_tx *tx = (struct vy_tx *)txn->engine_tx; - struct txn_stmt *stmt = txn_current_stmt(txn); - - if (vy_replace(engine->env, tx, stmt, space, request)) - return -1; - *result = stmt->new_tuple; - return 0; -} - -static int -vinyl_space_execute_delete(struct space *space, struct txn *txn, - struct request *request, struct tuple **result) -{ - struct vinyl_engine *engine = (struct vinyl_engine *)space->engine; - struct txn_stmt *stmt = txn_current_stmt(txn); - struct vy_tx *tx = (struct vy_tx *) txn->engine_tx; - if (vy_delete(engine->env, tx, stmt, space, request)) - return -1; - /* - * Delete may or may not set stmt->old_tuple, but we - * always return NULL. - */ - *result = NULL; - return 0; -} - -static int -vinyl_space_execute_update(struct space *space, struct txn *txn, - struct request *request, struct tuple **result) -{ - struct vinyl_engine *engine = (struct vinyl_engine *)space->engine; - struct vy_tx *tx = (struct vy_tx *)txn->engine_tx; - struct txn_stmt *stmt = txn_current_stmt(txn); - if (vy_update(engine->env, tx, stmt, space, request) != 0) - return -1; - *result = stmt->new_tuple; - return 0; -} - -static int -vinyl_space_execute_upsert(struct space *space, struct txn *txn, - struct request *request) -{ - struct vinyl_engine *engine = (struct vinyl_engine *)space->engine; - struct vy_tx *tx = (struct vy_tx *)txn->engine_tx; - struct txn_stmt *stmt = txn_current_stmt(txn); - return vy_upsert(engine->env, tx, stmt, space, request); -} - -/* }}} DML */ - -/* {{{ DDL */ - -static void -vinyl_init_system_space(struct space *space) -{ - (void)space; - unreachable(); -} - -static int -vinyl_space_check_format(struct space *new_space, struct space *old_space) -{ - struct vinyl_engine *engine = (struct vinyl_engine *)new_space->engine; - return vy_check_format(engine->env, old_space); -} - -static int -vinyl_space_check_index_def(struct space *space, struct index_def *index_def) -{ - if (index_def->type != TREE) { - diag_set(ClientError, ER_INDEX_TYPE, - index_def->name, space_name(space)); - return -1; - } - if (index_def->key_def->is_nullable && index_def->iid == 0) { - diag_set(ClientError, ER_NULLABLE_PRIMARY, space_name(space)); - return -1; - } - /* Check that there are no ANY, ARRAY, MAP parts */ - for (uint32_t i = 0; i < index_def->key_def->part_count; i++) { - struct key_part *part = &index_def->key_def->parts[i]; - if (part->type <= FIELD_TYPE_ANY || - part->type >= FIELD_TYPE_ARRAY) { - diag_set(ClientError, ER_MODIFY_INDEX, - index_def->name, space_name(space), - tt_sprintf("field type '%s' is not supported", - field_type_strs[part->type])); - return -1; - } - } - return 0; -} - -static struct index * -vinyl_space_create_index(struct space *space, struct index_def *index_def) -{ - struct vinyl_engine *engine = (struct vinyl_engine *)space->engine; - if (index_def->type != TREE) { - unreachable(); - return NULL; - } - struct vy_index *pk = NULL; - if (index_def->iid > 0) { - pk = vy_index(space_index(space, 0)); - assert(pk != NULL); - } - return (struct index *)vinyl_index_new(engine, index_def, - space->format, pk); -} - -static int -vinyl_space_add_primary_key(struct space *space) -{ - struct index *pk = index_find(space, 0); - if (pk == NULL) - return -1; - return vinyl_index_open((struct vinyl_index *)pk); -} - -static void -vinyl_space_drop_primary_key(struct space *space) -{ - (void)space; -} - -static int -vinyl_space_build_secondary_key(struct space *old_space, - struct space *new_space, - struct index *new_index) -{ - (void)old_space; - (void)new_space; - /* - * Unlike Memtx, Vinyl does not need building of a secondary index. - * This is true because of two things: - * 1) Vinyl does not support alter of non-empty spaces - * 2) During recovery a Vinyl index already has all needed data on disk. - * And there are 3 cases: - * I. The secondary index is added in snapshot. Then Vinyl was - * snapshotted too and all necessary for that moment data is on disk. - * II. The secondary index is added in WAL. That means that vinyl - * space had no data at that point and had nothing to build. The - * index actually could contain recovered data, but it will handle it - * by itself during WAL recovery. - * III. Vinyl is online. The space is definitely empty and there's - * nothing to build. - * - * When we start to implement alter of non-empty vinyl spaces, it - * seems that we should call here: - * Engine::buildSecondaryKey(old_space, new_space, new_index_arg); - * but aware of three cases mentioned above. - */ - return vinyl_index_open((struct vinyl_index *)new_index); -} - -static int -vinyl_space_prepare_truncate(struct space *old_space, - struct space *new_space) -{ - struct vinyl_engine *engine = (struct vinyl_engine *)old_space->engine; - return vy_prepare_truncate_space(engine->env, old_space, new_space); -} - -static void -vinyl_space_commit_truncate(struct space *old_space, - struct space *new_space) -{ - struct vinyl_engine *engine = (struct vinyl_engine *)old_space->engine; - vy_commit_truncate_space(engine->env, old_space, new_space); -} - -static int -vinyl_space_prepare_alter(struct space *old_space, struct space *new_space) -{ - struct vinyl_engine *engine = (struct vinyl_engine *)old_space->engine; - return vy_prepare_alter_space(engine->env, old_space, new_space); -} - -static void -vinyl_space_commit_alter(struct space *old_space, struct space *new_space) -{ - struct vinyl_engine *engine = (struct vinyl_engine *)old_space->engine; - if (new_space == NULL || new_space->index_count == 0) { - /* This is a drop space. */ - return; - } - if (vy_commit_alter_space(engine->env, new_space, - new_space->format) != 0) { - /* FIXME: space_vtab::commit_alter() must not fail. */ - diag_log(); - unreachable(); - panic("failed to alter space"); - } -} - -/* }}} DDL */ - -static const struct space_vtab vinyl_space_vtab = { - /* .destroy = */ vinyl_space_destroy, - /* .bsize = */ vinyl_space_bsize, - /* .apply_initial_join_row = */ vinyl_space_apply_initial_join_row, - /* .execute_replace = */ vinyl_space_execute_replace, - /* .execute_delete = */ vinyl_space_execute_delete, - /* .execute_update = */ vinyl_space_execute_update, - /* .execute_upsert = */ vinyl_space_execute_upsert, - /* .init_system_space = */ vinyl_init_system_space, - /* .check_index_def = */ vinyl_space_check_index_def, - /* .create_index = */ vinyl_space_create_index, - /* .add_primary_key = */ vinyl_space_add_primary_key, - /* .drop_primary_key = */ vinyl_space_drop_primary_key, - /* .check_format = */ vinyl_space_check_format, - /* .build_secondary_key = */ vinyl_space_build_secondary_key, - /* .prepare_truncate = */ vinyl_space_prepare_truncate, - /* .commit_truncate = */ vinyl_space_commit_truncate, - /* .prepare_alter = */ vinyl_space_prepare_alter, - /* .commit_alter = */ vinyl_space_commit_alter, -}; - -struct space * -vinyl_space_new(struct vinyl_engine *vinyl, - struct space_def *def, struct rlist *key_list) -{ - struct space *space = malloc(sizeof(*space)); - if (space == NULL) { - diag_set(OutOfMemory, sizeof(*space), - "malloc", "struct space"); - return NULL; - } - - /* Create a format from key and field definitions. */ - int key_count = 0; - struct index_def *index_def; - rlist_foreach_entry(index_def, key_list, link) - key_count++; - struct key_def **keys = region_alloc(&fiber()->gc, - sizeof(*keys) * key_count); - if (keys == NULL) { - free(space); - return NULL; - } - key_count = 0; - rlist_foreach_entry(index_def, key_list, link) - keys[key_count++] = index_def->key_def; - - struct tuple_format *format = tuple_format_new(&vy_tuple_format_vtab, - keys, key_count, 0, def->fields, def->field_count); - if (format == NULL) { - free(space); - return NULL; - } - format->exact_field_count = def->exact_field_count; - tuple_format_ref(format); - - if (space_create(space, (struct engine *)vinyl, - &vinyl_space_vtab, def, key_list, format) != 0) { - tuple_format_unref(format); - free(space); - return NULL; - } - - /* Format is now referenced by the space. */ - tuple_format_unref(format); - return space; -} diff --git a/src/box/vinyl_space.h b/src/box/vinyl_space.h deleted file mode 100644 index ad023b3d8b3e737bd45d02c20ace819ca604eaee..0000000000000000000000000000000000000000 --- a/src/box/vinyl_space.h +++ /dev/null @@ -1,49 +0,0 @@ -#ifndef TARANTOOL_BOX_VINYL_SPACE_H_INCLUDED -#define TARANTOOL_BOX_VINYL_SPACE_H_INCLUDED -/* - * Copyright 2010-2016, Tarantool AUTHORS, please see AUTHORS file. - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * 1. Redistributions of source code must retain the above - * copyright notice, this list of conditions and the - * following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY <COPYRIGHT HOLDER> ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED - * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL - * <COPYRIGHT HOLDER> OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, - * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF - * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ -#include "space.h" - -#if defined(__cplusplus) -extern "C" { -#endif /* defined(__cplusplus) */ - -struct vinyl_engine; - -struct space * -vinyl_space_new(struct vinyl_engine *vinyl, - struct space_def *def, struct rlist *key_list); - -#if defined(__cplusplus) -} /* extern "C" */ -#endif /* defined(__cplusplus) */ - -#endif /* TARANTOOL_BOX_VINYL_SPACE_H_INCLUDED */ diff --git a/src/box/vy_cache.c b/src/box/vy_cache.c index 24ce2fa348287dc4100f2b8c56e11e86f9c8136d..2c265bb103ebe175ed06858d2f38356708d8b026 100644 --- a/src/box/vy_cache.c +++ b/src/box/vy_cache.c @@ -578,6 +578,23 @@ vy_cache_iterator_step(struct vy_cache_iterator *itr, struct tuple **ret) return vy_cache_iterator_is_stop(itr, entry); } +/** + * Skip all statements that are invisible in the read view + * associated with the iterator. + */ +static void +vy_cache_iterator_skip_to_read_view(struct vy_cache_iterator *itr, bool *stop) +{ + while (itr->curr_stmt != NULL && + vy_stmt_lsn(itr->curr_stmt) > (**itr->read_view).vlsn) { + /* + * The cache stores the latest tuple of the key, + * but there could be older tuples in runs. + */ + *stop = false; + vy_cache_iterator_step(itr, &itr->curr_stmt); + } +} /** * Position the iterator to the first cache entry satisfying @@ -618,21 +635,18 @@ vy_cache_iterator_seek(struct vy_cache_iterator *itr, *entry = *vy_cache_tree_iterator_get_elem(tree, &itr->curr_pos); } -NODISCARD int -vy_cache_iterator_next_key(struct vy_stmt_iterator *vitr, - struct tuple **ret, bool *stop) +void +vy_cache_iterator_next(struct vy_cache_iterator *itr, + struct tuple **ret, bool *stop) { - assert(vitr->iface->next_key == vy_cache_iterator_next_key); - struct vy_cache_iterator *itr = (struct vy_cache_iterator *) vitr; - *ret = NULL; *stop = false; /* disable cache for errinj test - let it try to read from disk */ ERROR_INJECT(ERRINJ_VY_READ_PAGE, - { itr->search_started = true; return 0; }); + { itr->search_started = true; return; }); ERROR_INJECT(ERRINJ_VY_READ_PAGE_TIMEOUT, - { itr->search_started = true; return 0; }); + { itr->search_started = true; return; }); if (!itr->search_started) { assert(itr->curr_stmt == NULL); @@ -642,67 +656,102 @@ vy_cache_iterator_next_key(struct vy_stmt_iterator *vitr, vy_cache_iterator_seek(itr, itr->iterator_type, itr->key, &entry); if (entry == NULL) - return 0; + return; itr->curr_stmt = entry->stmt; *stop = vy_cache_iterator_is_stop(itr, entry); } else { assert(itr->version == itr->cache->version); if (itr->curr_stmt == NULL) - return 0; + return; tuple_unref(itr->curr_stmt); *stop = vy_cache_iterator_step(itr, &itr->curr_stmt); } - while (itr->curr_stmt != NULL && - vy_stmt_lsn(itr->curr_stmt) > (**itr->read_view).vlsn) { - /* - * The cache stores the latest tuple of the key, - * but there could be older tuples in runs. - */ - *stop = false; - vy_cache_iterator_step(itr, &itr->curr_stmt); - } + + vy_cache_iterator_skip_to_read_view(itr, stop); if (itr->curr_stmt != NULL) { *ret = itr->curr_stmt; tuple_ref(itr->curr_stmt); vy_stmt_counter_acct_tuple(&itr->cache->stat.get, itr->curr_stmt); } - return 0; } -/** - * This should never be called, because ->next_key() may only - * return REPLACE statements. - */ -NODISCARD int -vy_cache_iterator_next_lsn(struct vy_stmt_iterator *vitr, struct tuple **ret) +void +vy_cache_iterator_skip(struct vy_cache_iterator *itr, + const struct tuple *last_stmt, + struct tuple **ret, bool *stop) { - assert(vitr->iface->next_lsn == vy_cache_iterator_next_lsn); - (void)vitr; - (void)ret; - unreachable(); - return 0; + *ret = NULL; + *stop = false; + + /* disable cache for errinj test - let it try to read from disk */ + ERROR_INJECT(ERRINJ_VY_READ_PAGE, + { itr->search_started = true; return; }); + ERROR_INJECT(ERRINJ_VY_READ_PAGE_TIMEOUT, + { itr->search_started = true; return; }); + + assert(!itr->search_started || itr->version == itr->cache->version); + + /* + * Check if the iterator is already positioned + * at the statement following last_stmt. + */ + if (itr->search_started && + (itr->curr_stmt == NULL || last_stmt == NULL || + iterator_direction(itr->iterator_type) * + vy_stmt_compare(itr->curr_stmt, last_stmt, + itr->cache->cmp_def) > 0)) { + if (itr->curr_stmt == NULL) + return; + struct vy_cache_tree *tree = &itr->cache->cache_tree; + struct vy_cache_entry *entry = + *vy_cache_tree_iterator_get_elem(tree, &itr->curr_pos); + *ret = itr->curr_stmt; + *stop = vy_cache_iterator_is_stop(itr, entry); + return; + } + + itr->search_started = true; + itr->version = itr->cache->version; + if (itr->curr_stmt != NULL) + tuple_unref(itr->curr_stmt); + itr->curr_stmt = NULL; + + const struct tuple *key = itr->key; + enum iterator_type iterator_type = itr->iterator_type; + if (last_stmt != NULL) { + key = last_stmt; + iterator_type = iterator_direction(iterator_type) > 0 ? + ITER_GT : ITER_LT; + } + + struct vy_cache_entry *entry; + vy_cache_iterator_seek(itr, iterator_type, key, &entry); + + if (itr->iterator_type == ITER_EQ && last_stmt != NULL && + entry != NULL && vy_stmt_compare(itr->key, entry->stmt, + itr->cache->cmp_def) != 0) + entry = NULL; + + if (entry != NULL) { + *stop = vy_cache_iterator_is_stop(itr, entry); + itr->curr_stmt = entry->stmt; + } + + vy_cache_iterator_skip_to_read_view(itr, stop); + if (itr->curr_stmt != NULL) { + *ret = itr->curr_stmt; + tuple_ref(itr->curr_stmt); + vy_stmt_counter_acct_tuple(&itr->cache->stat.get, + itr->curr_stmt); + } } -/** - * Restore the current position (if necessary). - * @sa struct vy_stmt_iterator comments. - * - * @param last_stmt the key the iterator was positioned on - * - * @retval 0 nothing changed - * @retval 1 iterator position was changed - */ int -vy_cache_iterator_restore(struct vy_stmt_iterator *vitr, - const struct tuple *last_stmt, struct tuple **ret, - bool *stop) +vy_cache_iterator_restore(struct vy_cache_iterator *itr, + const struct tuple *last_stmt, + struct tuple **ret, bool *stop) { - assert(vitr->iface->restore == vy_cache_iterator_restore); - struct vy_cache_iterator *itr = (struct vy_cache_iterator *) vitr; - - assert(itr->search_started); - /* disable cache for errinj test - let it try to read from disk */ if ((errinj(ERRINJ_VY_READ_PAGE, ERRINJ_BOOL) != NULL && errinj(ERRINJ_VY_READ_PAGE, ERRINJ_BOOL)->bparam) || @@ -716,7 +765,7 @@ vy_cache_iterator_restore(struct vy_stmt_iterator *vitr, struct key_def *def = itr->cache->cmp_def; int dir = iterator_direction(itr->iterator_type); - if (itr->version == itr->cache->version) + if (!itr->search_started || itr->version == itr->cache->version) return 0; itr->version = itr->cache->version; @@ -751,16 +800,7 @@ vy_cache_iterator_restore(struct vy_stmt_iterator *vitr, *stop = vy_cache_iterator_is_stop(itr, entry); itr->curr_stmt = entry->stmt; } - - while (itr->curr_stmt != NULL && - vy_stmt_lsn(itr->curr_stmt) > (**itr->read_view).vlsn) { - /* - * The cache stores the latest tuple of the key, - * but there could be older tuples in runs. - */ - *stop = false; - vy_cache_iterator_step(itr, &itr->curr_stmt); - } + vy_cache_iterator_skip_to_read_view(itr, stop); } else { /* * The iterator position is still valid, but new @@ -806,40 +846,24 @@ vy_cache_iterator_restore(struct vy_stmt_iterator *vitr, return 0; } -/** - * Close the iterator and free resources. - */ void -vy_cache_iterator_close(struct vy_stmt_iterator *vitr) +vy_cache_iterator_close(struct vy_cache_iterator *itr) { - assert(vitr->iface->close == vy_cache_iterator_close); - struct vy_cache_iterator *itr = (struct vy_cache_iterator *) vitr; if (itr->curr_stmt != NULL) { tuple_unref(itr->curr_stmt); itr->curr_stmt = NULL; } - tuple_unref(itr->key); TRASH(itr); } -static struct vy_stmt_iterator_iface vy_cache_iterator_iface = { - .next_key = vy_cache_iterator_next_key, - .next_lsn = vy_cache_iterator_next_lsn, - .restore = vy_cache_iterator_restore, - .close = vy_cache_iterator_close, -}; - void vy_cache_iterator_open(struct vy_cache_iterator *itr, struct vy_cache *cache, - enum iterator_type iterator_type, struct tuple *key, - const struct vy_read_view **rv) + enum iterator_type iterator_type, + const struct tuple *key, const struct vy_read_view **rv) { - itr->base.iface = &vy_cache_iterator_iface; - itr->cache = cache; itr->iterator_type = iterator_type; itr->key = key; - tuple_ref(key); itr->read_view = rv; itr->curr_stmt = NULL; diff --git a/src/box/vy_cache.h b/src/box/vy_cache.h index b3ad3d0da272a4b728602ad1e2bab1fcbeebdd43..d1a80ec57911de7183fb5b5eb805265729111c1f 100644 --- a/src/box/vy_cache.h +++ b/src/box/vy_cache.h @@ -38,7 +38,7 @@ #include "iterator_type.h" #include "vy_stmt.h" /* for comparators */ -#include "vy_stmt_iterator.h" /* struct vy_stmt_iterator */ +#include "vy_read_view.h" #include "vy_stat.h" #include "small/mempool.h" @@ -214,8 +214,6 @@ vy_cache_on_write(struct vy_cache *cache, const struct tuple *stmt, * Cache iterator */ struct vy_cache_iterator { - /** Parent class, must be the first member */ - struct vy_stmt_iterator base; /* The cache */ struct vy_cache *cache; @@ -226,7 +224,7 @@ struct vy_cache_iterator { */ enum iterator_type iterator_type; /* Search key data in terms of vinyl, vy_stmt_compare_raw argument */ - struct tuple *key; + const struct tuple *key; /* LSN visibility, iterator shows values with lsn <= vlsn */ const struct vy_read_view **read_view; @@ -253,7 +251,43 @@ struct vy_cache_iterator { void vy_cache_iterator_open(struct vy_cache_iterator *itr, struct vy_cache *cache, enum iterator_type iterator_type, - struct tuple *key, const struct vy_read_view **rv); + const struct tuple *key, const struct vy_read_view **rv); + +/** + * Advance a cache iterator to the next statement. + * The next statement is returned in @ret (NULL if EOF). + * @stop flag is set if a chain was found in the cache + * and so there shouldn't be statements preceding the + * returned statement in memory or on disk. + */ +void +vy_cache_iterator_next(struct vy_cache_iterator *itr, + struct tuple **ret, bool *stop); + +/** + * Advance a cache iterator to the statement following @last_stmt. + * The statement is returned in @ret (NULL if EOF). + */ +void +vy_cache_iterator_skip(struct vy_cache_iterator *itr, + const struct tuple *last_stmt, + struct tuple **ret, bool *stop); + +/** + * Check if a cache iterator was invalidated and needs to be restored. + * If it does, set the iterator position to the statement following + * @last_stmt and return 1, otherwise return 0. + */ +int +vy_cache_iterator_restore(struct vy_cache_iterator *itr, + const struct tuple *last_stmt, + struct tuple **ret, bool *stop); + +/** + * Close a cache iterator. + */ +void +vy_cache_iterator_close(struct vy_cache_iterator *itr); #if defined(__cplusplus) } /* extern "C" { */ diff --git a/src/box/vy_index.c b/src/box/vy_index.c index b99fcbcc38d4756194011a40b0ebd92b83126518..a1f19fb2fc76468caa66c7fc5f5d153f34e3ece1 100644 --- a/src/box/vy_index.c +++ b/src/box/vy_index.c @@ -999,8 +999,6 @@ vy_index_split_range(struct vy_index *index, struct vy_range *range) } part->compact_priority = range->compact_priority; } - tuple_unref(split_key); - split_key = NULL; /* * Log change in metadata. @@ -1036,11 +1034,12 @@ vy_index_split_range(struct vy_index *index, struct vy_range *range) index->range_tree_version++; say_info("%s: split range %s by key %s", vy_index_name(index), - vy_range_str(range), vy_key_str(split_key_raw)); + vy_range_str(range), tuple_str(split_key)); rlist_foreach_entry(slice, &range->slices, in_range) vy_slice_wait_pinned(slice); vy_range_delete(range); + tuple_unref(split_key); return true; fail: for (int i = 0; i < n_parts; i++) { diff --git a/src/box/vy_log.c b/src/box/vy_log.c index 381446cd1de6f7056a7a87907ead9d63f9689963..95a28e847ebfe2a64324f5945ff984bf11e8ae6d 100644 --- a/src/box/vy_log.c +++ b/src/box/vy_log.c @@ -272,6 +272,15 @@ struct vy_slice_recovery_info { char *end; }; +/** + * Return the name of the vylog file that has the given signature. + */ +static inline const char * +vy_log_filename(int64_t signature) +{ + return xdir_format_filename(&vy_log.dir, signature, NONE); +} + /** * Return the lsn of the checkpoint that was taken * before the given lsn. @@ -663,7 +672,7 @@ vy_log_record_decode(struct vy_log_record *record, fail: buf = tt_static_buf(); mp_snprint(buf, TT_STATIC_BUF_LEN, req.tuple); - say_error("invalid record in metadata log: %s", buf); + say_error("failed to decode vylog record: %s", buf); return -1; } @@ -813,8 +822,7 @@ vy_log_open(struct xlog *xlog) * Open the current log file or create a new one * if it doesn't exist. */ - char *path = xdir_format_filename(&vy_log.dir, - vclock_sum(&vy_log.last_checkpoint), NONE); + const char *path = vy_log_filename(vclock_sum(&vy_log.last_checkpoint)); if (access(path, F_OK) == 0) return xlog_open(xlog, path); @@ -925,8 +933,11 @@ vy_log_end_recovery(void) assert(vy_log.recovery != NULL); /* Flush all pending records. */ - if (vy_log_flush() < 0) + if (vy_log_flush() < 0) { + diag_log(); + say_error("failed to flush vylog after recovery"); return -1; + } /* * On backup we copy files corresponding to the most recent @@ -949,8 +960,12 @@ vy_log_end_recovery(void) } vclock_copy(vclock, &vy_log.last_checkpoint); xdir_add_vclock(&vy_log.dir, vclock); - if (vy_log_create(vclock, vy_log.recovery) < 0) + if (vy_log_create(vclock, vy_log.recovery) < 0) { + diag_log(); + say_error("failed to write `%s'", + vy_log_filename(vclock_sum(vclock))); return -1; + } } vy_log.recovery = NULL; @@ -972,6 +987,8 @@ vy_log_rotate_cb_func(const struct vy_log_record *record, void *cb_arg) struct xlog *xlog = arg->xlog; struct xrow_header row; + say_verbose("save vylog record: %s", vy_log_record_str(record)); + /* Create the log file on the first write. */ if (!xlog_is_open(xlog) && xdir_create_xlog(arg->dir, xlog, arg->vclock) < 0) @@ -996,6 +1013,8 @@ vy_log_create(const struct vclock *vclock, struct vy_recovery *recovery) struct xlog xlog; xlog_clear(&xlog); + say_verbose("saving vylog %lld", (long long)vclock_sum(vclock)); + struct vy_log_rotate_cb_arg arg = { .xlog = &xlog, .dir = &vy_log.dir, @@ -1005,7 +1024,7 @@ vy_log_create(const struct vclock *vclock, struct vy_recovery *recovery) goto err_write_xlog; if (!xlog_is_open(&xlog)) - return 0; /* nothing written */ + goto done; /* nothing written */ /* Mark the end of the snapshot. */ struct xrow_header row; @@ -1023,6 +1042,8 @@ vy_log_create(const struct vclock *vclock, struct vy_recovery *recovery) goto err_write_xlog; xlog_close(&xlog, false); +done: + say_verbose("done saving vylog"); return 0; err_write_xlog: @@ -1066,8 +1087,9 @@ vy_log_rotate(const struct vclock *vclock) } vclock_copy(new_vclock, vclock); - say_debug("%s: signature %lld", __func__, - (long long)vclock_sum(vclock)); + say_verbose("rotating vylog %lld => %lld", + (long long)vclock_sum(&vy_log.last_checkpoint), + (long long)vclock_sum(vclock)); struct vy_recovery *recovery; recovery = vy_recovery_new(vclock_sum(&vy_log.last_checkpoint), false); @@ -1089,6 +1111,9 @@ vy_log_rotate(const struct vclock *vclock) int rc = coio_call(vy_log_rotate_f, recovery, vclock); vy_recovery_delete(recovery); if (rc < 0) { + diag_log(); + say_error("failed to write `%s'", + vy_log_filename(vclock_sum(vclock))); latch_unlock(&vy_log.latch); goto fail; } @@ -1104,13 +1129,9 @@ vy_log_rotate(const struct vclock *vclock) xdir_add_vclock(&vy_log.dir, new_vclock); latch_unlock(&vy_log.latch); - say_debug("%s: complete", __func__); + say_verbose("done rotating vylog"); return 0; - fail: - say_debug("%s: failed", __func__); - say_error("failed to rotate metadata log: %s", - diag_last_error(diag_get())->errmsg); free(new_vclock); return -1; } @@ -1136,7 +1157,7 @@ vy_log_backup_path(struct vclock *vclock) int64_t lsn = vy_log_prev_checkpoint(vclock_sum(vclock)); if (lsn < 0) return NULL; - const char *path = xdir_format_filename(&vy_log.dir, lsn, NONE); + const char *path = vy_log_filename(lsn); if (access(path, F_OK) == -1 && errno == ENOENT) return NULL; /* vinyl not used */ return path; @@ -1148,7 +1169,7 @@ vy_log_tx_begin(void) latch_lock(&vy_log.latch); vy_log.tx_begin = NULL; vy_log.tx_failed = false; - say_debug("%s", __func__); + say_verbose("begin vylog transaction"); } /** @@ -1162,7 +1183,6 @@ static int vy_log_tx_do_commit(bool no_discard) { struct stailq rollback; - int rc = 0; assert(latch_owner(&vy_log.latch) == fiber()); @@ -1171,12 +1191,11 @@ vy_log_tx_do_commit(bool no_discard) * vy_log_write() failed to append a record to tx. * @no_discard transactions can't handle this. */ + diag_move(&vy_log.tx_diag, diag_get()); if (no_discard) { - error_log(diag_last_error(&vy_log.tx_diag)); - panic("vinyl log write failed"); + diag_log(); + panic("non-discardable vylog transaction failed"); } - diag_move(&vy_log.tx_diag, diag_get()); - rc = -1; goto rollback; } @@ -1187,22 +1206,19 @@ vy_log_tx_do_commit(bool no_discard) * recovery completion. */ if (vy_log.recovery != NULL) - goto out; - - rc = vy_log_flush(); + goto done; /* * Rollback the transaction on failure unless * we were explicitly told not to. */ - if (rc != 0 && !no_discard) + if (vy_log_flush() != 0 && !no_discard) goto rollback; -out: - say_debug("%s(no_discard=%d): %s", __func__, no_discard, - rc == 0 ? "success" : "fail"); +done: + say_verbose("commit vylog transaction"); latch_unlock(&vy_log.latch); - return rc; + return 0; rollback: stailq_create(&rollback); @@ -1211,7 +1227,9 @@ vy_log_tx_do_commit(bool no_discard) vy_log.tx_size = 0; vy_log.tx_svp = 0; vy_log.tx_begin = NULL; - goto out; + say_verbose("rollback vylog transaction"); + latch_unlock(&vy_log.latch); + return -1; } int @@ -1241,7 +1259,7 @@ vy_log_write(const struct vy_log_record *record) return; } - say_debug("%s: %s", __func__, vy_log_record_str(tx_record)); + say_verbose("write vylog record: %s", vy_log_record_str(tx_record)); stailq_add_tail_entry(&vy_log.tx, tx_record, in_tx); vy_log.tx_size++; @@ -1928,8 +1946,6 @@ static int vy_recovery_process_record(struct vy_recovery *recovery, const struct vy_log_record *record) { - say_debug("%s: %s", __func__, vy_log_record_str(record)); - int rc; switch (record->type) { case VY_LOG_CREATE_INDEX: @@ -1981,6 +1997,9 @@ vy_recovery_process_record(struct vy_recovery *recovery, default: unreachable(); } + if (rc != 0) + say_error("failed to process vylog record: %s", + vy_log_record_str(record)); return rc; } @@ -1991,6 +2010,8 @@ vy_recovery_new_f(va_list ap) bool only_checkpoint = va_arg(ap, int); struct vy_recovery **p_recovery = va_arg(ap, struct vy_recovery **); + say_verbose("loading vylog %lld", (long long)signature); + struct vy_recovery *recovery = malloc(sizeof(*recovery)); if (recovery == NULL) { diag_set(OutOfMemory, sizeof(*recovery), @@ -2024,8 +2045,7 @@ vy_recovery_new_f(va_list ap) * be stored in it, so if the log doesn't exist, assume * the recovery context is empty. */ - const char *path = xdir_format_filename(&vy_log.dir, - signature, NONE); + const char *path = vy_log_filename(signature); if (access(path, F_OK) < 0 && errno == ENOENT) goto out; @@ -2040,6 +2060,8 @@ vy_recovery_new_f(va_list ap) rc = vy_log_record_decode(&record, &row); if (rc < 0) break; + say_verbose("load vylog record: %s", + vy_log_record_str(&record)); if (record.type == VY_LOG_SNAPSHOT) { if (only_checkpoint) break; @@ -2056,6 +2078,7 @@ vy_recovery_new_f(va_list ap) xlog_cursor_close(&cursor, false); out: + say_verbose("done loading vylog"); *p_recovery = recovery; return 0; @@ -2080,12 +2103,19 @@ vy_recovery_new(int64_t signature, bool only_checkpoint) * pending records have been flushed out. */ rc = vy_log_flush(); - if (rc != 0) + if (rc != 0) { + diag_log(); + say_error("failed to flush vylog for recovery"); goto out; + } /* Load the log from coio so as not to stall tx thread. */ rc = coio_call(vy_recovery_new_f, signature, (int)only_checkpoint, &recovery); + if (rc != 0) { + diag_log(); + say_error("failed to load `%s'", vy_log_filename(signature)); + } out: latch_unlock(&vy_log.latch); return rc == 0 ? recovery : NULL; @@ -2128,15 +2158,6 @@ vy_recovery_delete(struct vy_recovery *recovery) free(recovery); } -/** Helper to call a recovery callback and log the event if debugging. */ -static int -vy_recovery_cb_call(vy_recovery_cb cb, void *cb_arg, - const struct vy_log_record *record) -{ - say_debug("%s: %s", __func__, vy_log_record_str(record)); - return cb(record, cb_arg); -} - static int vy_recovery_iterate_index(struct vy_index_recovery_info *index, vy_recovery_cb cb, void *cb_arg) @@ -2153,7 +2174,7 @@ vy_recovery_iterate_index(struct vy_index_recovery_info *index, record.space_id = index->space_id; record.key_parts = index->key_parts; record.key_part_count = index->key_part_count; - if (vy_recovery_cb_call(cb, cb_arg, &record) != 0) + if (cb(&record, cb_arg) != 0) return -1; if (index->truncate_count > 0) { @@ -2161,7 +2182,7 @@ vy_recovery_iterate_index(struct vy_index_recovery_info *index, record.type = VY_LOG_TRUNCATE_INDEX; record.index_lsn = index->index_lsn; record.truncate_count = index->truncate_count; - if (vy_recovery_cb_call(cb, cb_arg, &record) != 0) + if (cb(&record, cb_arg) != 0) return -1; } @@ -2170,7 +2191,7 @@ vy_recovery_iterate_index(struct vy_index_recovery_info *index, record.type = VY_LOG_DUMP_INDEX; record.index_lsn = index->index_lsn; record.dump_lsn = index->dump_lsn; - if (vy_recovery_cb_call(cb, cb_arg, &record) != 0) + if (cb(&record, cb_arg) != 0) return -1; } @@ -2185,7 +2206,7 @@ vy_recovery_iterate_index(struct vy_index_recovery_info *index, record.index_lsn = index->index_lsn; record.run_id = run->id; record.is_dropped = run->is_dropped; - if (vy_recovery_cb_call(cb, cb_arg, &record) != 0) + if (cb(&record, cb_arg) != 0) return -1; if (!run->is_dropped) @@ -2195,7 +2216,7 @@ vy_recovery_iterate_index(struct vy_index_recovery_info *index, record.type = VY_LOG_DROP_RUN; record.run_id = run->id; record.gc_lsn = run->gc_lsn; - if (vy_recovery_cb_call(cb, cb_arg, &record) != 0) + if (cb(&record, cb_arg) != 0) return -1; } @@ -2206,7 +2227,7 @@ vy_recovery_iterate_index(struct vy_index_recovery_info *index, record.range_id = range->id; record.begin = range->begin; record.end = range->end; - if (vy_recovery_cb_call(cb, cb_arg, &record) != 0) + if (cb(&record, cb_arg) != 0) return -1; /* * Newer slices are stored closer to the head of the list, @@ -2221,7 +2242,7 @@ vy_recovery_iterate_index(struct vy_index_recovery_info *index, record.run_id = slice->run->id; record.begin = slice->begin; record.end = slice->end; - if (vy_recovery_cb_call(cb, cb_arg, &record) != 0) + if (cb(&record, cb_arg) != 0) return -1; } } @@ -2230,7 +2251,7 @@ vy_recovery_iterate_index(struct vy_index_recovery_info *index, vy_log_record_init(&record); record.type = VY_LOG_DROP_INDEX; record.index_lsn = index->index_lsn; - if (vy_recovery_cb_call(cb, cb_arg, &record) != 0) + if (cb(&record, cb_arg) != 0) return -1; } return 0; @@ -2281,12 +2302,12 @@ vy_recovery_load_index(struct vy_recovery *recovery, record.index_id = index->index_id; record.space_id = index->space_id; record.index_lsn = index_lsn; - if (vy_recovery_cb_call(cb, cb_arg, &record) != 0) + if (cb(&record, cb_arg) != 0) return -1; vy_log_record_init(&record); record.type = VY_LOG_DROP_INDEX; record.index_lsn = index_lsn; - if (vy_recovery_cb_call(cb, cb_arg, &record) != 0) + if (cb(&record, cb_arg) != 0) return -1; return 0; } else if (is_checkpoint_recovery || index_lsn == index->index_lsn) { diff --git a/src/box/vy_mem.c b/src/box/vy_mem.c index 453c194f99ae2493ae8ba3567d5f5b78e73e0dd0..e07395c4e0ed18daa43411d3cd25a798105476f2 100644 --- a/src/box/vy_mem.c +++ b/src/box/vy_mem.c @@ -417,15 +417,11 @@ vy_mem_iterator_start(struct vy_mem_iterator *itr) /* {{{ vy_mem_iterator API implementation */ -/* Declared below */ -static const struct vy_stmt_iterator_iface vy_mem_iterator_iface; - void vy_mem_iterator_open(struct vy_mem_iterator *itr, struct vy_mem_iterator_stat *stat, struct vy_mem *mem, enum iterator_type iterator_type, - struct tuple *key, const struct vy_read_view **rv) + const struct tuple *key, const struct vy_read_view **rv) { - itr->base.iface = &vy_mem_iterator_iface; itr->stat = stat; assert(key != NULL); @@ -433,7 +429,6 @@ vy_mem_iterator_open(struct vy_mem_iterator *itr, struct vy_mem_iterator_stat *s itr->iterator_type = iterator_type; itr->key = key; - tuple_ref(key); itr->read_view = rv; itr->curr_pos = vy_mem_tree_invalid_iterator(); @@ -476,19 +471,10 @@ vy_mem_iterator_next_key_impl(struct vy_mem_iterator *itr) return vy_mem_iterator_find_lsn(itr, itr->iterator_type, itr->key); } -/** - * Find the next record with different key as current and visible lsn. - * @retval 0 success or EOF (*ret == NULL) - */ -static NODISCARD int -vy_mem_iterator_next_key(struct vy_stmt_iterator *vitr, struct tuple **ret, - bool *stop) +NODISCARD int +vy_mem_iterator_next_key(struct vy_mem_iterator *itr, struct tuple **ret) { - (void)stop; - assert(vitr->iface->next_key == vy_mem_iterator_next_key); - struct vy_mem_iterator *itr = (struct vy_mem_iterator *) vitr; *ret = NULL; - if (vy_mem_iterator_next_key_impl(itr) == 0) return vy_mem_iterator_copy_to(itr, ret); return 0; @@ -525,42 +511,62 @@ vy_mem_iterator_next_lsn_impl(struct vy_mem_iterator *itr) return 1; } -/** - * Find next (lower, older) record with the same key as current - * @retval 0 success or EOF (*ret == NULL) - */ -static NODISCARD int -vy_mem_iterator_next_lsn(struct vy_stmt_iterator *vitr, struct tuple **ret) +NODISCARD int +vy_mem_iterator_next_lsn(struct vy_mem_iterator *itr, struct tuple **ret) { - assert(vitr->iface->next_lsn == vy_mem_iterator_next_lsn); - struct vy_mem_iterator *itr = (struct vy_mem_iterator *) vitr; *ret = NULL; if (vy_mem_iterator_next_lsn_impl(itr) == 0) return vy_mem_iterator_copy_to(itr, ret); return 0; } -/** - * Restore the current position (if necessary). - * @sa struct vy_stmt_iterator comments. - * - * @param last_stmt the key the the read iterator was positioned on. - * - * @retval 0 nothing changed - * @retval 1 iterator position was changed - */ -static NODISCARD int -vy_mem_iterator_restore(struct vy_stmt_iterator *vitr, - const struct tuple *last_stmt, - struct tuple **ret, bool *stop) +NODISCARD int +vy_mem_iterator_skip(struct vy_mem_iterator *itr, + const struct tuple *last_stmt, struct tuple **ret) { - (void)stop; + *ret = NULL; + assert(!itr->search_started || itr->version == itr->mem->version); - assert(vitr->iface->restore == vy_mem_iterator_restore); - struct vy_mem_iterator *itr = (struct vy_mem_iterator *) vitr; + /* + * Check if the iterator is already positioned + * at the statement following last_stmt. + */ + if (itr->search_started && + (itr->curr_stmt == NULL || last_stmt == NULL || + iterator_direction(itr->iterator_type) * + vy_stmt_compare(itr->curr_stmt, last_stmt, + itr->mem->cmp_def) > 0)) { + if (itr->curr_stmt != NULL) + *ret = itr->last_stmt; + return 0; + } - assert(itr->search_started); - if (itr->version == itr->mem->version) + const struct tuple *key = itr->key; + enum iterator_type iterator_type = itr->iterator_type; + if (last_stmt != NULL) { + key = last_stmt; + iterator_type = iterator_direction(iterator_type) > 0 ? + ITER_GT : ITER_LT; + } + + itr->search_started = true; + vy_mem_iterator_seek(itr, iterator_type, key); + + if (itr->iterator_type == ITER_EQ && last_stmt != NULL && + itr->curr_stmt != NULL && vy_stmt_compare(itr->key, + itr->curr_stmt, itr->mem->cmp_def) != 0) + itr->curr_stmt = NULL; + + if (itr->curr_stmt != NULL) + return vy_mem_iterator_copy_to(itr, ret); + return 0; +} + +NODISCARD int +vy_mem_iterator_restore(struct vy_mem_iterator *itr, + const struct tuple *last_stmt, struct tuple **ret) +{ + if (!itr->search_started || itr->version == itr->mem->version) return 0; const struct tuple *key = itr->key; @@ -588,27 +594,14 @@ vy_mem_iterator_restore(struct vy_stmt_iterator *vitr, return 1; } -/** - * Close the iterator and free resources. - */ -static void -vy_mem_iterator_close(struct vy_stmt_iterator *vitr) +void +vy_mem_iterator_close(struct vy_mem_iterator *itr) { - assert(vitr->iface->close == vy_mem_iterator_close); - struct vy_mem_iterator *itr = (struct vy_mem_iterator *) vitr; if (itr->last_stmt != NULL) tuple_unref(itr->last_stmt); - tuple_unref(itr->key); TRASH(itr); } -static const struct vy_stmt_iterator_iface vy_mem_iterator_iface = { - .next_key = vy_mem_iterator_next_key, - .next_lsn = vy_mem_iterator_next_lsn, - .restore = vy_mem_iterator_restore, - .close = vy_mem_iterator_close -}; - static NODISCARD int vy_mem_stream_next(struct vy_stmt_stream *virt_stream, struct tuple **ret) { diff --git a/src/box/vy_mem.h b/src/box/vy_mem.h index a03f965aa62c95bfde58b5d486724817a57c1170..13919588ba182747684b8faddba39a676611dd84 100644 --- a/src/box/vy_mem.h +++ b/src/box/vy_mem.h @@ -39,7 +39,8 @@ #include "fiber_cond.h" #include "iterator_type.h" #include "vy_stmt.h" /* for comparators */ -#include "vy_stmt_iterator.h" /* struct vy_stmt_iterator */ +#include "vy_stmt_stream.h" +#include "vy_read_view.h" #include "vy_stat.h" #if defined(__cplusplus) @@ -299,9 +300,6 @@ vy_mem_rollback_stmt(struct vy_mem *mem, const struct tuple *stmt); * key. */ struct vy_mem_iterator { - /** Parent class, must be the first member */ - struct vy_stmt_iterator base; - /** Usage statistics */ struct vy_mem_iterator_stat *stat; @@ -316,7 +314,7 @@ struct vy_mem_iterator { */ enum iterator_type iterator_type; /** Key to search. */ - struct tuple *key; + const struct tuple *key; /* LSN visibility, iterator shows values with lsn <= than that */ const struct vy_read_view **read_view; @@ -344,12 +342,53 @@ struct vy_mem_iterator { }; /** - * Open the iterator. + * Open an iterator over in-memory tree. */ void vy_mem_iterator_open(struct vy_mem_iterator *itr, struct vy_mem_iterator_stat *stat, struct vy_mem *mem, enum iterator_type iterator_type, - struct tuple *key, const struct vy_read_view **rv); + const struct tuple *key, const struct vy_read_view **rv); + +/** + * Advance a mem iterator to the newest statement for the next key. + * The statement is returned in @ret (NULL if EOF). + * Returns 0 on success, -1 on memory allocation error. + */ +NODISCARD int +vy_mem_iterator_next_key(struct vy_mem_iterator *itr, struct tuple **ret); + +/** + * Advance a mem iterator to the older statement for the same key. + * The statement is returned in @ret (NULL if EOF). + * Returns 0 on success, -1 on memory allocation error. + */ +NODISCARD int +vy_mem_iterator_next_lsn(struct vy_mem_iterator *itr, struct tuple **ret); + +/** + * Advance a mem iterator to the newest statement for the first key + * following @last_stmt. The statement is returned in @ret (NULL if EOF). + * Returns 0 on success, -1 on memory allocation error. + */ +NODISCARD int +vy_mem_iterator_skip(struct vy_mem_iterator *itr, + const struct tuple *last_stmt, struct tuple **ret); + +/** + * Check if a mem iterator was invalidated and needs to be restored. + * If it does, set the iterator position to the newest statement for + * the key following @last_stmt and return 1, otherwise return 0. + * Returns -1 on memory allocation error. + */ +NODISCARD int +vy_mem_iterator_restore(struct vy_mem_iterator *itr, + const struct tuple *last_stmt, struct tuple **ret); + +/** + * Close a mem iterator. + */ +void +vy_mem_iterator_close(struct vy_mem_iterator *itr); /** * Simple stream over a mem. @see vy_stmt_stream. diff --git a/src/box/vy_point_iterator.c b/src/box/vy_point_iterator.c index cf4d41660eed6e21e55e85b8e46b290db9c5058b..94445926476fef36fd27b3007ec6ab866987e3d4 100644 --- a/src/box/vy_point_iterator.c +++ b/src/box/vy_point_iterator.c @@ -51,7 +51,6 @@ vy_point_iterator_open(struct vy_point_iterator *itr, struct vy_run_env *run_env itr->tx = tx; itr->p_read_view = rv; itr->key = key; - tuple_ref(key); itr->curr_stmt = NULL; } @@ -91,7 +90,6 @@ vy_point_iterator_close(struct vy_point_iterator *itr) if (itr->curr_stmt != NULL) tuple_unref(itr->curr_stmt); vy_index_unref(itr->index); - tuple_unref(itr->key); TRASH(itr); } @@ -266,9 +264,8 @@ vy_point_iterator_scan_slice(struct vy_point_iterator *itr, itr->p_read_view, index->cmp_def, index->key_def, index->disk_format, index->upsert_format, index->id == 0); - bool unused; struct tuple *stmt; - rc = run_itr.base.iface->next_key(&run_itr.base, &stmt, &unused); + rc = vy_run_iterator_next_key(&run_itr, &stmt); while (rc == 0 && stmt != NULL) { struct vy_stmt_history_node *node = vy_point_iterator_new_node(); if (node == NULL) { @@ -283,9 +280,9 @@ vy_point_iterator_scan_slice(struct vy_point_iterator *itr, *terminal_found = true; break; } - rc = run_itr.base.iface->next_lsn(&run_itr.base, &stmt); + rc = vy_run_iterator_next_lsn(&run_itr, &stmt); } - run_itr.base.iface->close(&run_itr.base); + vy_run_iterator_close(&run_itr); return rc; } diff --git a/src/box/vy_range.c b/src/box/vy_range.c index 68f1cae83aaa254ef6a9d9df6e3452bf6d4d7018..9f29a5e2c7fce00dea5495814d06c8efb19dbf51 100644 --- a/src/box/vy_range.c +++ b/src/box/vy_range.c @@ -224,14 +224,12 @@ vy_range_snprint(char *buf, int size, const struct vy_range *range) int total = 0; SNPRINT(total, snprintf, buf, size, "("); if (range->begin != NULL) - SNPRINT(total, vy_key_snprint, buf, size, - tuple_data(range->begin)); + SNPRINT(total, tuple_snprint, buf, size, range->begin); else SNPRINT(total, snprintf, buf, size, "-inf"); SNPRINT(total, snprintf, buf, size, ".."); if (range->end != NULL) - SNPRINT(total, vy_key_snprint, buf, size, - tuple_data(range->end)); + SNPRINT(total, tuple_snprint, buf, size, range->end); else SNPRINT(total, snprintf, buf, size, "inf"); SNPRINT(total, snprintf, buf, size, ")"); diff --git a/src/box/vy_read_iterator.c b/src/box/vy_read_iterator.c index f9b85802d9ebdafdd4f12eac38d003061ec4a8e8..cd33360493d268b719344a14b05af1b454ec4f84 100644 --- a/src/box/vy_read_iterator.c +++ b/src/box/vy_read_iterator.c @@ -50,10 +50,7 @@ struct vy_read_src { struct vy_mem_iterator mem_iterator; struct vy_txw_iterator txw_iterator; struct vy_cache_iterator cache_iterator; - struct vy_stmt_iterator iterator; }; - /** Set if the source can change after yield. */ - bool is_mutable; /** Set if the iterator was started. */ bool is_started; /** See vy_read_iterator->front_id. */ @@ -90,308 +87,555 @@ vy_read_iterator_reserve(struct vy_read_iterator *itr, uint32_t capacity) /** * Add another source to read iterator. Must be called before actual * iteration start and must not be called after. - * The resulting vy_stmt_iterator must be properly initialized before merge - * iteration start. - * param is_mutable - Source can change during merge iteration */ static struct vy_read_src * -vy_read_iterator_add_src(struct vy_read_iterator *itr, bool is_mutable) +vy_read_iterator_add_src(struct vy_read_iterator *itr) { if (itr->src_count == itr->src_capacity) { if (vy_read_iterator_reserve(itr, itr->src_count + 1) != 0) return NULL; } - if (is_mutable) { - if (itr->mutable_start == itr->mutable_end) - itr->mutable_start = itr->src_count; - itr->mutable_end = itr->src_count + 1; - } itr->src[itr->src_count].front_id = 0; struct vy_read_src *src = &itr->src[itr->src_count++]; memset(src, 0, sizeof(*src)); - src->is_mutable = is_mutable; return src; } /** - * Check if the read iterator needs to be restored. + * Pin all slices open by the read iterator. + * Used to make sure no run slice is invalidated by + * compaction while we are fetching data from disk. + */ +static void +vy_read_iterator_pin_slices(struct vy_read_iterator *itr) +{ + for (uint32_t i = itr->disk_src; i < itr->src_count; i++) { + struct vy_read_src *src = &itr->src[i]; + vy_slice_pin(src->run_iterator.slice); + } +} + +/** + * Unpin all slices open by the read iterator. + * See also: vy_read_iterator_pin_slices(). + */ +static void +vy_read_iterator_unpin_slices(struct vy_read_iterator *itr) +{ + for (uint32_t i = itr->disk_src; i < itr->src_count; i++) { + struct vy_read_src *src = &itr->src[i]; + vy_slice_unpin(src->run_iterator.slice); + } +} + +/** + * Return true if the current statement is outside the current + * range and hence we should move to the next range. + */ +static bool +vy_read_iterator_range_is_done(struct vy_read_iterator *itr) +{ + struct tuple *stmt = itr->curr_stmt; + struct vy_range *range = itr->curr_range; + struct key_def *cmp_def = itr->index->cmp_def; + int dir = iterator_direction(itr->iterator_type); + + if (dir > 0 && range->end != NULL && + (stmt == NULL || vy_tuple_compare_with_key(stmt, + range->end, cmp_def) >= 0)) + return true; + + if (dir < 0 && range->begin != NULL && + (stmt == NULL || vy_tuple_compare_with_key(stmt, + range->begin, cmp_def) < 0)) + return true; + + return false; +} + +/** + * Compare two tuples from the read iterator perspective. * - * @retval 0 if position did not change (iterator started) - * @retval -2 iterator is no more valid + * Returns: + * -1 if statement @a precedes statement @b in the iterator output + * 0 if statements @a and @b are at the same position + * 1 if statement @a supersedes statement @b + * + * NULL denotes the statement following the last one. */ -static NODISCARD int -vy_read_iterator_check_version(struct vy_read_iterator *itr) +static inline int +vy_read_iterator_cmp_stmt(struct vy_read_iterator *itr, + const struct tuple *a, const struct tuple *b) { - if (itr->index->mem_list_version != itr->mem_list_version) - return -2; - if (itr->index->range_tree_version != itr->range_tree_version) - return -2; - if (itr->curr_range != NULL && - itr->curr_range->version != itr->range_version) - return -2; - return 0; + if (a == NULL && b != NULL) + return 1; + if (a != NULL && b == NULL) + return -1; + if (a == NULL && b == NULL) + return 0; + return iterator_direction(itr->iterator_type) * + vy_tuple_compare(a, b, itr->index->cmp_def); } /** - * Iterate to the next key - * @retval 0 success or EOF (*ret == NULL) - * @retval -1 read error - * @retval -2 iterator is not valid anymore + * Return true if the statement matches search criteria + * and older sources don't need to be scanned. */ -static NODISCARD int -vy_read_iterator_next_key(struct vy_read_iterator *itr, struct tuple **ret) +static bool +vy_read_iterator_is_exact_match(struct vy_read_iterator *itr, + struct tuple *stmt) { - *ret = NULL; - const struct key_def *def = itr->index->cmp_def; - if (itr->curr_stmt != NULL && (itr->iterator_type == ITER_EQ || - itr->iterator_type == ITER_REQ) && - tuple_field_count(itr->key) >= def->part_count) { + struct tuple *key = itr->key; + enum iterator_type type = itr->iterator_type; + struct key_def *cmp_def = itr->index->cmp_def; + + /* + * If the index is unique and the search key is full, + * we can avoid disk accesses on the first iteration + * in case the key is found in memory. + */ + return itr->last_stmt == NULL && stmt != NULL && + (type == ITER_EQ || type == ITER_REQ || + type == ITER_GE || type == ITER_LE) && + tuple_field_count(key) >= cmp_def->part_count && + vy_stmt_compare(stmt, key, cmp_def) == 0; +} + +/** + * Check if the statement at which the given read source + * is positioned precedes the current candidate for the + * next key ('curr_stmt') and update the latter if so. + * The 'stop' flag is set if the next key is found and + * older sources don't need to be evaluated. + */ +static void +vy_read_iterator_evaluate_src(struct vy_read_iterator *itr, + struct vy_read_src *src, bool *stop) +{ + int cmp; + uint32_t src_id = src - itr->src; + + if (vy_read_iterator_is_exact_match(itr, src->stmt)) { /* - * There may be one statement at max satisfying - * EQ with a full key. + * If we got an exact match, we can skip a tuple + * comparison, because this source must be on top + * of the heap, otherwise 'curr_stmt' would be an + * exact match as well and so we would not have + * scanned this source at all. */ - return 0; + assert(vy_read_iterator_cmp_stmt(itr, src->stmt, + itr->curr_stmt) < 0); + cmp = -1; + *stop = true; + } else { + cmp = vy_read_iterator_cmp_stmt(itr, src->stmt, + itr->curr_stmt); } - if (vy_read_iterator_check_version(itr)) - return -2; - int dir = iterator_direction(itr->iterator_type); - uint32_t prev_front_id = itr->front_id; - itr->front_id++; - itr->curr_src = UINT32_MAX; - struct tuple *min_stmt = NULL; - int rc = 0; + if (cmp < 0) { + assert(src->stmt != NULL); + tuple_ref(src->stmt); + if (itr->curr_stmt != NULL) + tuple_unref(itr->curr_stmt); + itr->curr_stmt = src->stmt; + itr->curr_src = src_id; + itr->front_id++; + } + if (cmp <= 0) + src->front_id = itr->front_id; + if (*stop || src_id >= itr->skipped_src) + itr->skipped_src = src_id + 1; +} + +/* + * Each of the functions from the vy_read_iterator_scan_* family + * is used by vy_read_iterator_next_key() to: + * + * 1. Update the position of a read source, which implies: + * + * - Starting iteration over the source if it has not been done + * yet or restoring the iterator position in case the source + * has been modified since the last iteration. + * + * - Advancing the iterator position to the first statement + * following the one returned on the previous iteration. + * To avoid an extra tuple comparison, we maintain front_id + * for each source: all sources with front_id equal to the + * front_id of the read iterator were used on the previous + * iteration and hence need to be advanced. + * + * 2. Update the candidate for the next key ('curr_stmt') if the + * statement at which the source is positioned precedes it. + * The 'stop' flag is set if older sources do not need to be + * scanned (e.g. because a chain was found in the cache). + * See also vy_read_iterator_evaluate_src(). + */ - bool was_yield_possible = false; - for (uint32_t i = 0; i < itr->src_count; i++) { - bool is_yield_possible = i >= itr->mutable_end; - was_yield_possible = was_yield_possible || is_yield_possible; +static void +vy_read_iterator_scan_txw(struct vy_read_iterator *itr, bool *stop) +{ + struct vy_read_src *src = &itr->src[itr->txw_src]; + struct vy_txw_iterator *src_itr = &src->txw_iterator; - struct vy_read_src *src = &itr->src[i]; - bool stop = false; + if (itr->tx == NULL) + return; + assert(itr->txw_src < itr->skipped_src); + + int rc = vy_txw_iterator_restore(src_itr, itr->last_stmt, &src->stmt); + if (rc == 0) { if (!src->is_started) { - /* - * This is the first time the source is used. - * Start the iterator. - */ - src->is_started = true; - rc = src->iterator.iface->next_key(&src->iterator, - &src->stmt, &stop); - } else { - /* - * The source might have changed since the last time - * it was used, so the iterator needs to be restored. - */ - rc = src->iterator.iface->restore(&src->iterator, - itr->curr_stmt, - &src->stmt, &stop); - if (rc == 0 && src->front_id == prev_front_id) { - /* - * The source was used on the previous iteration. - * Advance the iterator to the next key unless it - * was restored. - */ - assert(itr->curr_stmt != NULL); - assert(i < itr->skipped_start); - rc = src->iterator.iface->next_key(&src->iterator, - &src->stmt, &stop); - } + vy_txw_iterator_skip(src_itr, itr->last_stmt, + &src->stmt); + } else if (src->front_id == itr->prev_front_id) { + vy_txw_iterator_next(src_itr, &src->stmt); } - if (rc < 0) - return -1; - if (vy_read_iterator_check_version(itr)) - return -2; - if (i >= itr->skipped_start && itr->curr_stmt != NULL) { - /* - * If the source was not used on the last iteration, - * it might have lagged behind the current merge key. - * Advance it until it is up-to-date. - */ - while (src->stmt != NULL && - dir * vy_tuple_compare(src->stmt, itr->curr_stmt, - def) <= 0) { - rc = src->iterator.iface->next_key(&src->iterator, - &src->stmt, - &stop); - if (vy_read_iterator_check_version(itr)) - return -2; - if (rc != 0) - return rc; - } - } - if (i >= itr->skipped_start) - itr->skipped_start++; - - if (stop && src->stmt == NULL && min_stmt == NULL) { - itr->front_id++; - itr->curr_src = i; - src->front_id = itr->front_id; - itr->skipped_start = i + 1; - break; - } - if (src->stmt == NULL) - continue; + src->is_started = true; + } + vy_read_iterator_evaluate_src(itr, src, stop); +} - if (itr->curr_stmt == NULL && (itr->iterator_type == ITER_EQ || - itr->iterator_type == ITER_REQ || - itr->iterator_type == ITER_GE || - itr->iterator_type == ITER_LE) && - tuple_field_count(itr->key) >= def->part_count && - vy_stmt_compare(src->stmt, itr->key, def) == 0) { - /** - * If the index is unique and the search key - * is full, we can avoid disk accesses on the - * first iteration in case the key is found - * in memory. - */ - stop = true; +static void +vy_read_iterator_scan_cache(struct vy_read_iterator *itr, bool *stop) +{ + bool is_interval = false; + struct vy_read_src *src = &itr->src[itr->cache_src]; + struct vy_cache_iterator *src_itr = &src->cache_iterator; + + int rc = vy_cache_iterator_restore(src_itr, itr->last_stmt, + &src->stmt, &is_interval); + if (rc == 0) { + if (!src->is_started || itr->cache_src >= itr->skipped_src) { + vy_cache_iterator_skip(src_itr, itr->last_stmt, + &src->stmt, &is_interval); + } else if (src->front_id == itr->prev_front_id) { + vy_cache_iterator_next(src_itr, &src->stmt, + &is_interval); } + src->is_started = true; + } + vy_read_iterator_evaluate_src(itr, src, stop); - int cmp = min_stmt == NULL ? -1 : - dir * vy_tuple_compare(src->stmt, min_stmt, def); - if (cmp < 0) { - itr->front_id++; - if (min_stmt) - tuple_unref(min_stmt); - min_stmt = src->stmt; - tuple_ref(min_stmt); - itr->curr_src = i; - } - if (cmp <= 0) - src->front_id = itr->front_id; + if (is_interval) { + itr->skipped_src = itr->cache_src + 1; + *stop = true; + } +} - if (stop) { - itr->skipped_start = i + 1; - break; +static NODISCARD int +vy_read_iterator_scan_mem(struct vy_read_iterator *itr, + uint32_t mem_src, bool *stop) +{ + int rc; + struct vy_read_src *src = &itr->src[mem_src]; + struct vy_mem_iterator *src_itr = &src->mem_iterator; + + assert(mem_src >= itr->mem_src && mem_src < itr->disk_src); + + rc = vy_mem_iterator_restore(src_itr, itr->last_stmt, &src->stmt); + if (rc == 0) { + if (!src->is_started || mem_src >= itr->skipped_src) { + rc = vy_mem_iterator_skip(src_itr, itr->last_stmt, + &src->stmt); + } else if (src->front_id == itr->prev_front_id) { + rc = vy_mem_iterator_next_key(src_itr, &src->stmt); } + src->is_started = true; } + if (rc < 0) + return -1; - for (int i = MIN(itr->skipped_start, itr->mutable_end) - 1; - was_yield_possible && i >= (int) itr->mutable_start; i--) { - struct vy_read_src *src = &itr->src[i]; - bool stop; - rc = src->iterator.iface->restore(&src->iterator, - itr->curr_stmt, - &src->stmt, &stop); - if (vy_read_iterator_check_version(itr)) - return -2; - if (rc < 0) - return rc; - if (rc == 0) - continue; + vy_read_iterator_evaluate_src(itr, src, stop); + return 0; +} - int cmp = min_stmt == NULL ? -1 : - dir * vy_tuple_compare(src->stmt, min_stmt, def); - if (cmp > 0) { - /* - * The iterator could have been positioned at - * min_stmt before the restoration, which was - * removed from the source during the yield. - * Make sure, we won't advance it on the next - * iteration, possibly skipping a statement. - */ - src->front_id = 0; - continue; - } +static NODISCARD int +vy_read_iterator_scan_disk(struct vy_read_iterator *itr, + uint32_t disk_src, bool *stop) +{ + int rc = 0; + struct vy_read_src *src = &itr->src[disk_src]; + struct vy_run_iterator *src_itr = &src->run_iterator; - if (cmp < 0 || vy_stmt_lsn(src->stmt) > vy_stmt_lsn(min_stmt)) { - if (min_stmt) - tuple_unref(min_stmt); - min_stmt = src->stmt; - tuple_ref(min_stmt); - } + assert(disk_src >= itr->disk_src && disk_src < itr->src_count); - if (cmp < 0) { - itr->front_id++; - itr->curr_src = i; - } else - itr->curr_src = MIN(itr->curr_src, (uint32_t)i); + if (!src->is_started || disk_src >= itr->skipped_src) + rc = vy_run_iterator_skip(src_itr, itr->last_stmt, &src->stmt); + else if (src->front_id == itr->prev_front_id) + rc = vy_run_iterator_next_key(src_itr, &src->stmt); + src->is_started = true; - src->front_id = itr->front_id; + if (rc < 0) + return -1; + + vy_read_iterator_evaluate_src(itr, src, stop); + return 0; +} + +/** + * Restore the position of the active in-memory tree iterator + * after a yield caused by a disk read and update 'curr_stmt' + * if necessary. + */ +static NODISCARD int +vy_read_iterator_restore_mem(struct vy_read_iterator *itr) +{ + int rc; + int cmp; + struct vy_read_src *src = &itr->src[itr->mem_src]; + + rc = vy_mem_iterator_restore(&src->mem_iterator, + itr->last_stmt, &src->stmt); + if (rc < 0) + return -1; /* memory allocation error */ + if (rc == 0) + return 0; /* nothing changed */ + + cmp = vy_read_iterator_cmp_stmt(itr, src->stmt, itr->curr_stmt); + if (cmp > 0) { + /* + * Memory trees are append-only so if the + * source is not on top of the heap after + * restoration, it was not before. + */ + assert(src->front_id < itr->front_id); + return 0; } + if (cmp < 0 || itr->curr_src != itr->txw_src) { + /* + * The new statement precedes the current + * candidate for the next key or it is a + * newer version of the same key. + */ + tuple_ref(src->stmt); + if (itr->curr_stmt != NULL) + tuple_unref(itr->curr_stmt); + itr->curr_stmt = src->stmt; + itr->curr_src = itr->mem_src; + } else { + /* + * Make sure we don't read the old value + * from the cache while applying UPSERTs. + */ + itr->src[itr->cache_src].front_id = 0; + } + if (cmp < 0) + itr->front_id++; + src->front_id = itr->front_id; + return 0; +} - if (itr->curr_stmt != NULL && min_stmt != NULL) - assert(dir * vy_tuple_compare(min_stmt, itr->curr_stmt, def) > 0); +static void +vy_read_iterator_restore(struct vy_read_iterator *itr); +static void +vy_read_iterator_next_range(struct vy_read_iterator *itr); + +static int +vy_read_iterator_track_read(struct vy_read_iterator *itr, struct tuple *stmt); + +/** + * Iterate to the next key + * @retval 0 success or EOF (*ret == NULL) + * @retval -1 read error + */ +static NODISCARD int +vy_read_iterator_next_key(struct vy_read_iterator *itr, struct tuple **ret) +{ + uint32_t i; + bool stop = false; + + if (itr->last_stmt != NULL && (itr->iterator_type == ITER_EQ || + itr->iterator_type == ITER_REQ) && + tuple_field_count(itr->key) >= itr->index->cmp_def->part_count) { + /* + * There may be one statement at max satisfying + * EQ with a full key. + */ + *ret = NULL; + return 0; + } + /* + * Restore the iterator position if the index has changed + * since the last iteration. + */ + if (itr->mem_list_version != itr->index->mem_list_version || + itr->range_tree_version != itr->index->range_tree_version || + itr->range_version != itr->curr_range->version) { + vy_read_iterator_restore(itr); + } +restart: if (itr->curr_stmt != NULL) tuple_unref(itr->curr_stmt); - itr->curr_stmt = min_stmt; - *ret = itr->curr_stmt; + itr->curr_stmt = NULL; + itr->curr_src = UINT32_MAX; + itr->prev_front_id = itr->front_id; + + /* + * Look up the next key in read sources starting + * from the one that stores newest data. + */ + vy_read_iterator_scan_txw(itr, &stop); + if (stop) + goto done; + vy_read_iterator_scan_cache(itr, &stop); + if (stop) + goto done; + + for (i = itr->mem_src; i < itr->disk_src; i++) { + if (vy_read_iterator_scan_mem(itr, i, &stop) != 0) + return -1; + if (stop) + goto done; + } +rescan_disk: + /* The following code may yield as it needs to access disk. */ + vy_read_iterator_pin_slices(itr); + for (i = itr->disk_src; i < itr->src_count; i++) { + if (vy_read_iterator_scan_disk(itr, i, &stop) != 0) + goto err_disk; + if (stop) + break; + } + vy_read_iterator_unpin_slices(itr); + /* + * The list of in-memory indexes and/or the range tree could + * have been modified by dump/compaction while we were fetching + * data from disk. Restart the iterator if this is the case. + * Note, we don't need to check the current range's version, + * because all slices were pinned and hence could not be + * removed. + */ + if (itr->mem_list_version != itr->index->mem_list_version || + itr->range_tree_version != itr->index->range_tree_version) { + vy_read_iterator_restore(itr); + goto restart; + } + /* + * The transaction write set couldn't change during the yield + * as it is owned exclusively by the current fiber so the only + * source to check is the active in-memory tree. + */ + if (vy_read_iterator_restore_mem(itr) != 0) + return -1; + /* + * Scan the next range in case we transgressed the current + * range's boundaries. + */ + if (vy_read_iterator_range_is_done(itr)) { + vy_read_iterator_next_range(itr); + goto rescan_disk; + } +done: + if (itr->last_stmt != NULL && itr->curr_stmt != NULL) + assert(vy_read_iterator_cmp_stmt(itr, itr->curr_stmt, + itr->last_stmt) > 0); + + if (itr->need_check_eq && itr->curr_stmt != NULL && + vy_tuple_compare_with_key(itr->curr_stmt, itr->key, + itr->index->cmp_def) != 0) + itr->curr_stmt = NULL; + if (vy_read_iterator_track_read(itr, itr->curr_stmt) != 0) + return -1; + + *ret = itr->curr_stmt; return 0; + +err_disk: + vy_read_iterator_unpin_slices(itr); + return -1; } /** * Iterate to the next (elder) version of the same key - * - * Note, we don't need to restore individual sources in this - * function, because sources that may yield (i.e. runs) are - * immutable and iterated last (after txw, cache, and mems) - * as they contain the oldest data. - * * @retval 0 success or EOF (*ret == NULL) * @retval -1 read error - * @retval -2 iterator is not valid anymore */ static NODISCARD int vy_read_iterator_next_lsn(struct vy_read_iterator *itr, struct tuple **ret) { - *ret = NULL; - if (itr->curr_src == UINT32_MAX) - return 0; + uint32_t i; + bool unused; + struct vy_read_src *src; + assert(itr->curr_stmt != NULL); - const struct key_def *def = itr->index->cmp_def; - struct vy_read_src *src = &itr->src[itr->curr_src]; - struct vy_stmt_iterator *sub_itr = &src->iterator; - int rc = sub_itr->iface->next_lsn(sub_itr, &src->stmt); - if (vy_read_iterator_check_version(itr)) - return -2; - if (rc != 0) - return rc; - if (src->stmt != NULL) { - tuple_unref(itr->curr_stmt); - itr->curr_stmt = src->stmt; - tuple_ref(itr->curr_stmt); - *ret = itr->curr_stmt; - return 0; + assert(itr->curr_src < itr->skipped_src); + + /* Cache stores only terminal statements. */ + assert(itr->curr_src != itr->cache_src); + + if (itr->curr_src == itr->txw_src) { + /* + * Write set does not store statement history. + * Look up the older statement in the cache and + * if it isn't there proceed to mems and runs. + */ + src = &itr->src[itr->cache_src]; + if (itr->cache_src >= itr->skipped_src) + vy_read_iterator_scan_cache(itr, &unused); + if (src->front_id == itr->front_id) + goto found; } - for (uint32_t i = itr->curr_src + 1; i < itr->src_count; i++) { - src = &itr->src[i]; - if (i >= itr->skipped_start) { - itr->skipped_start++; - bool stop = false; - int cmp = -1; - while (true) { - rc = src->iterator.iface->next_key(&src->iterator, - &src->stmt, - &stop); - if (vy_read_iterator_check_version(itr)) - return -2; - if (rc != 0) - return rc; - if (src->stmt == NULL) - break; - cmp = vy_tuple_compare(src->stmt, itr->curr_stmt, - def); - if (cmp >= 0) - break; - } - if (cmp == 0) - itr->src[i].front_id = itr->front_id; - } + /* Look up the older statement in in-memory trees. */ + for (i = MAX(itr->curr_src, itr->mem_src); i < itr->disk_src; i++) { + src = &itr->src[i]; + if (i >= itr->skipped_src && + vy_read_iterator_scan_mem(itr, i, &unused) != 0) + return -1; + if (src->front_id != itr->front_id) + continue; + if (i == itr->curr_src && + vy_mem_iterator_next_lsn(&src->mem_iterator, + &src->stmt) != 0) + return -1; + if (src->stmt != NULL) + goto found; + } - if (itr->src[i].front_id == itr->front_id) { - itr->curr_src = i; - tuple_unref(itr->curr_stmt); - itr->curr_stmt = itr->src[i].stmt; - tuple_ref(itr->curr_stmt); - *ret = itr->curr_stmt; - return 0; - } + /* + * Look up the older statement in on-disk runs. + * + * Note, we don't need to check the index version after the yield + * caused by the disk read, because once we've come to this point, + * we won't read any source except run slices, which are pinned + * and hence cannot be removed during the yield. + */ + vy_read_iterator_pin_slices(itr); + for (i = MAX(itr->curr_src, itr->disk_src); i < itr->src_count; i++) { + src = &itr->src[i]; + if (i >= itr->skipped_src && + vy_read_iterator_scan_disk(itr, i, &unused) != 0) + goto err_disk; + if (src->front_id != itr->front_id) + continue; + if (i == itr->curr_src && + vy_run_iterator_next_lsn(&src->run_iterator, + &src->stmt) != 0) + goto err_disk; + if (src->stmt != NULL) + break; } - itr->curr_src = UINT32_MAX; + vy_read_iterator_unpin_slices(itr); + + if (i < itr->src_count) + goto found; + + /* Searched everywhere, found nothing. */ + *ret = NULL; + return 0; +found: + tuple_ref(src->stmt); + if (itr->curr_stmt != NULL) + tuple_unref(itr->curr_stmt); + itr->curr_stmt = src->stmt; + itr->curr_src = src - itr->src; + *ret = itr->curr_stmt; return 0; + +err_disk: + vy_read_iterator_unpin_slices(itr); + return -1; } /** @@ -399,7 +643,6 @@ vy_read_iterator_next_lsn(struct vy_read_iterator *itr, struct tuple **ret) * * @retval 0 success * @retval -1 error - * @retval -2 invalid iterator */ static NODISCARD int vy_read_iterator_squash_upsert(struct vy_read_iterator *itr, @@ -435,56 +678,60 @@ vy_read_iterator_squash_upsert(struct vy_read_iterator *itr, } static void -vy_read_iterator_add_tx(struct vy_read_iterator *itr, - enum iterator_type iterator_type, struct tuple *key) +vy_read_iterator_add_tx(struct vy_read_iterator *itr) { assert(itr->tx != NULL); + enum iterator_type iterator_type = (itr->iterator_type != ITER_REQ ? + itr->iterator_type : ITER_LE); struct vy_txw_iterator_stat *stat = &itr->index->stat.txw.iterator; - struct vy_read_src *sub_src = vy_read_iterator_add_src(itr, true); + struct vy_read_src *sub_src = vy_read_iterator_add_src(itr); vy_txw_iterator_open(&sub_src->txw_iterator, stat, itr->tx, itr->index, - iterator_type, key); + iterator_type, itr->key); } static void -vy_read_iterator_add_cache(struct vy_read_iterator *itr, - enum iterator_type iterator_type, struct tuple *key) +vy_read_iterator_add_cache(struct vy_read_iterator *itr) { - struct vy_read_src *sub_src = vy_read_iterator_add_src(itr, true); + enum iterator_type iterator_type = (itr->iterator_type != ITER_REQ ? + itr->iterator_type : ITER_LE); + struct vy_read_src *sub_src = vy_read_iterator_add_src(itr); vy_cache_iterator_open(&sub_src->cache_iterator, &itr->index->cache, iterator_type, - key, itr->read_view); + itr->key, itr->read_view); } static void -vy_read_iterator_add_mem(struct vy_read_iterator *itr, - enum iterator_type iterator_type, struct tuple *key) +vy_read_iterator_add_mem(struct vy_read_iterator *itr) { + enum iterator_type iterator_type = (itr->iterator_type != ITER_REQ ? + itr->iterator_type : ITER_LE); struct vy_index *index = itr->index; struct vy_read_src *sub_src; /* Add the active in-memory index. */ assert(index->mem != NULL); - sub_src = vy_read_iterator_add_src(itr, true); + sub_src = vy_read_iterator_add_src(itr); vy_mem_iterator_open(&sub_src->mem_iterator, &index->stat.memory.iterator, - index->mem, iterator_type, key, + index->mem, iterator_type, itr->key, itr->read_view); /* Add sealed in-memory indexes. */ struct vy_mem *mem; rlist_foreach_entry(mem, &index->sealed, in_sealed) { - sub_src = vy_read_iterator_add_src(itr, false); + sub_src = vy_read_iterator_add_src(itr); vy_mem_iterator_open(&sub_src->mem_iterator, &index->stat.memory.iterator, - mem, iterator_type, key, + mem, iterator_type, itr->key, itr->read_view); } } static void -vy_read_iterator_add_disk(struct vy_read_iterator *itr, - enum iterator_type iterator_type, struct tuple *key) +vy_read_iterator_add_disk(struct vy_read_iterator *itr) { assert(itr->curr_range != NULL); + enum iterator_type iterator_type = (itr->iterator_type != ITER_REQ ? + itr->iterator_type : ITER_LE); struct vy_index *index = itr->index; struct vy_slice *slice; /* @@ -506,11 +753,11 @@ vy_read_iterator_add_disk(struct vy_read_iterator *itr, if (slice->run->info.min_lsn > index->dump_lsn) continue; assert(slice->run->info.max_lsn <= index->dump_lsn); - struct vy_read_src *sub_src = vy_read_iterator_add_src(itr, false); + struct vy_read_src *sub_src = vy_read_iterator_add_src(itr); vy_run_iterator_open(&sub_src->run_iterator, &index->stat.disk.iterator, itr->run_env, slice, - iterator_type, key, + iterator_type, itr->key, itr->read_view, index->cmp_def, index->key_def, index->disk_format, index->upsert_format, index->id == 0); @@ -518,64 +765,48 @@ vy_read_iterator_add_disk(struct vy_read_iterator *itr, } /** - * Set up the read iterator for the current range. + * Close all open sources and reset the merge state. */ static void -vy_read_iterator_use_range(struct vy_read_iterator *itr) +vy_read_iterator_cleanup(struct vy_read_iterator *itr) { - struct tuple *key = itr->key; - enum iterator_type iterator_type = itr->iterator_type; + uint32_t i; + struct vy_read_src *src; + + if (itr->txw_src < itr->src_count) { + src = &itr->src[itr->txw_src]; + vy_txw_iterator_close(&src->txw_iterator); + } + if (itr->cache_src < itr->src_count) { + src = &itr->src[itr->cache_src]; + vy_cache_iterator_close(&src->cache_iterator); + } + for (i = itr->mem_src; i < itr->disk_src; i++) { + src = &itr->src[i]; + vy_mem_iterator_close(&src->mem_iterator); + } + for (i = itr->disk_src; i < itr->src_count; i++) { + src = &itr->src[i]; + vy_run_iterator_close(&src->run_iterator); + } - /* Close all open sources and reset merge state. */ if (itr->curr_stmt != NULL) tuple_unref(itr->curr_stmt); - for (uint32_t i = 0; i < itr->src_count; i++) - itr->src[i].iterator.iface->close(&itr->src[i].iterator); - itr->src_count = 0; - itr->mutable_start = 0; - itr->mutable_end = 0; - itr->skipped_start = 0; itr->curr_stmt = NULL; itr->curr_src = UINT32_MAX; - itr->front_id = 1; - - /* - * Open all sources starting from the last statement - * returned to the user. Newer sources must be added - * first. - */ - if (itr->last_stmt != NULL) { - if (iterator_type == ITER_EQ || iterator_type == ITER_REQ) - itr->need_check_eq = true; - iterator_type = iterator_direction(iterator_type) >= 0 ? - ITER_GT : ITER_LT; - key = itr->last_stmt; - } else if (iterator_type == ITER_REQ) { - /* - * Source iterators can't handle ITER_REQ. - * Use ITER_LE instead and enable EQ check. - */ - iterator_type = ITER_LE; - itr->need_check_eq = true; - } - - if (itr->tx != NULL) - vy_read_iterator_add_tx(itr, iterator_type, key); - - vy_read_iterator_add_cache(itr, iterator_type, key); - vy_read_iterator_add_mem(itr, iterator_type, key); - - if (itr->curr_range != NULL) { - itr->range_version = itr->curr_range->version; - vy_read_iterator_add_disk(itr, iterator_type, key); - } + itr->txw_src = UINT32_MAX; + itr->cache_src = UINT32_MAX; + itr->mem_src = UINT32_MAX; + itr->disk_src = UINT32_MAX; + itr->skipped_src = UINT32_MAX; + itr->src_count = 0; } void vy_read_iterator_open(struct vy_read_iterator *itr, struct vy_run_env *run_env, struct vy_index *index, struct vy_tx *tx, enum iterator_type iterator_type, struct tuple *key, - const struct vy_read_view **rv) + const struct vy_read_view **rv, double too_long_threshold) { memset(itr, 0, sizeof(*itr)); @@ -585,6 +816,7 @@ vy_read_iterator_open(struct vy_read_iterator *itr, struct vy_run_env *run_env, itr->iterator_type = iterator_type; itr->key = key; itr->read_view = rv; + itr->too_long_threshold = too_long_threshold; if (tuple_field_count(key) == 0) { /* @@ -600,81 +832,93 @@ vy_read_iterator_open(struct vy_read_iterator *itr, struct vy_run_env *run_env, if (iterator_type == ITER_ALL) itr->iterator_type = ITER_GE; -} - -/** - * Prepare the read iterator for the first iteration. - */ -static void -vy_read_iterator_start(struct vy_read_iterator *itr) -{ - assert(!itr->search_started); - assert(itr->last_stmt == NULL); - assert(itr->curr_range == NULL); - itr->search_started = true; - itr->mem_list_version = itr->index->mem_list_version; - itr->range_tree_version = itr->index->range_tree_version; - itr->curr_range = vy_range_tree_find_by_key(itr->index->tree, - itr->iterator_type, itr->key); - vy_read_iterator_use_range(itr); + if (iterator_type == ITER_REQ) { + /* + * Source iterators cannot handle ITER_REQ and + * use ITER_LE instead, so we need to enable EQ + * check in this case. + * + * See vy_read_iterator_add_{tx,cache,mem,run}. + */ + itr->need_check_eq = true; + } - itr->index->stat.lookup++; } /** * Restart the read iterator from the position following * the last statement returned to the user. Called when * the current range or the whole range tree is changed. + * Also used for preparing the iterator for the first + * iteration. */ static void vy_read_iterator_restore(struct vy_read_iterator *itr) { + vy_read_iterator_cleanup(itr); + itr->mem_list_version = itr->index->mem_list_version; itr->range_tree_version = itr->index->range_tree_version; itr->curr_range = vy_range_tree_find_by_key(itr->index->tree, itr->iterator_type, itr->last_stmt ?: itr->key); - vy_read_iterator_use_range(itr); + itr->range_version = itr->curr_range->version; + + if (itr->tx != NULL) { + itr->txw_src = itr->src_count; + vy_read_iterator_add_tx(itr); + } + + itr->cache_src = itr->src_count; + vy_read_iterator_add_cache(itr); + + itr->mem_src = itr->src_count; + vy_read_iterator_add_mem(itr); + + itr->disk_src = itr->src_count; + vy_read_iterator_add_disk(itr); } -static bool +/** + * Iterate to the next range. + */ +static void vy_read_iterator_next_range(struct vy_read_iterator *itr) { - struct vy_index *index = itr->index; struct vy_range *range = itr->curr_range; + struct key_def *cmp_def = itr->index->cmp_def; + int dir = iterator_direction(itr->iterator_type); assert(range != NULL); + while (true) { + range = dir > 0 ? vy_range_tree_next(itr->index->tree, range) : + vy_range_tree_prev(itr->index->tree, range); + assert(range != NULL); - switch (itr->iterator_type) { - case ITER_LT: - case ITER_LE: - case ITER_REQ: - range = vy_range_tree_prev(index->tree, range); - break; - case ITER_GT: - case ITER_GE: - range = vy_range_tree_next(index->tree, range); - break; - case ITER_EQ: - /* A partial key can be found in more than one range. */ - if (range->end != NULL && - vy_stmt_compare_with_key(itr->key, range->end, - range->cmp_def) >= 0) { - range = vy_range_tree_next(index->tree, range); - } else { - range = NULL; - } - break; - default: - unreachable(); + if (itr->last_stmt == NULL) + break; + /* + * We could skip an entire range due to the cache. + * Make sure the next statement falls in the range. + */ + if (dir > 0 && (range->end == NULL || + vy_tuple_compare_with_key(itr->last_stmt, + range->end, cmp_def) < 0)) + break; + if (dir < 0 && vy_tuple_compare_with_key(itr->last_stmt, + range->begin, cmp_def) > 0) + break; } - itr->curr_range = range; - if (range == NULL) - return false; + itr->range_version = range->version; + + for (uint32_t i = itr->disk_src; i < itr->src_count; i++) { + struct vy_read_src *src = &itr->src[i]; + vy_run_iterator_close(&src->run_iterator); + } + itr->src_count = itr->disk_src; - vy_read_iterator_use_range(itr); - return true; + vy_read_iterator_add_disk(itr); } /** @@ -704,66 +948,6 @@ vy_read_iterator_track_read(struct vy_read_iterator *itr, struct tuple *stmt) return rc; } -/** - * Conventional wrapper around vy_read_iterator_next_key() to automatically - * re-create the merge iterator on vy_index/vy_range/vy_run changes. - */ -static NODISCARD int -vy_read_iterator_merge_next_key(struct vy_read_iterator *itr, - struct tuple **ret) -{ - struct key_def *cmp_def = itr->index->cmp_def; - int dir = iterator_direction(itr->iterator_type); - struct tuple *stmt; - - while (true) { - int rc = vy_read_iterator_next_key(itr, &stmt); - if (rc == -1) - return -1; - if (rc == -2) { - vy_read_iterator_restore(itr); - continue; - } - - /* - * Check if the statement is within the current range. - * If it is, return it right away, otherwise move to - * the next range and restart merge. - */ - struct vy_range *range = itr->curr_range; - if (range == NULL) { - /* All ranges have been merged. */ - break; - } - - if (stmt != NULL) { - if (dir > 0 && (range->end == NULL || - vy_tuple_compare_with_key(stmt, - range->end, cmp_def) < 0)) - break; - if (dir < 0 && (range->begin == NULL || - vy_tuple_compare_with_key(stmt, - range->begin, cmp_def) >= 0)) - break; - } - - if (!vy_read_iterator_next_range(itr)) { - /* No more ranges to merge. */ - break; - } - } - - if (itr->need_check_eq && stmt != NULL && - vy_tuple_compare_with_key(stmt, itr->key, cmp_def) != 0) - stmt = NULL; - - if (vy_read_iterator_track_read(itr, stmt) != 0) - return -1; - - *ret = stmt; - return 0; -} - NODISCARD int vy_read_iterator_next(struct vy_read_iterator *itr, struct tuple **result) { @@ -794,8 +978,11 @@ vy_read_iterator_next(struct vy_read_iterator *itr, struct tuple **result) *result = NULL; - if (!itr->search_started) - vy_read_iterator_start(itr); + if (!itr->search_started) { + itr->search_started = true; + itr->index->stat.lookup++; + vy_read_iterator_restore(itr); + } struct tuple *prev_key = itr->last_stmt; if (prev_key != NULL) @@ -806,10 +993,9 @@ vy_read_iterator_next(struct vy_read_iterator *itr, struct tuple **result) struct vy_index *index = itr->index; int rc = 0; while (true) { - if (vy_read_iterator_merge_next_key(itr, &t)) { - rc = -1; + rc = vy_read_iterator_next_key(itr, &t); + if (rc != 0) goto clear; - } if (t == NULL) { if (itr->last_stmt != NULL) tuple_unref(itr->last_stmt); @@ -818,23 +1004,16 @@ vy_read_iterator_next(struct vy_read_iterator *itr, struct tuple **result) break; } rc = vy_read_iterator_squash_upsert(itr, &t); - if (rc == -1) + if (rc != 0) goto clear; - if (rc == -2) { - vy_read_iterator_restore(itr); - continue; - } - if (vy_stmt_type(t) == IPROTO_REPLACE) { - if (itr->last_stmt != NULL) - tuple_unref(itr->last_stmt); - itr->last_stmt = t; + if (itr->last_stmt != NULL) + tuple_unref(itr->last_stmt); + itr->last_stmt = t; + if (vy_stmt_type(t) == IPROTO_REPLACE) break; - } else { - assert(vy_stmt_type(t) == IPROTO_DELETE); - if (vy_stmt_lsn(t) == INT64_MAX) /* t is from write set */ - skipped_txw_delete = true; - tuple_unref(t); - } + assert(vy_stmt_type(t) == IPROTO_DELETE); + if (vy_stmt_lsn(t) == INT64_MAX) /* t is from write set */ + skipped_txw_delete = true; } *result = itr->last_stmt; @@ -888,8 +1067,15 @@ vy_read_iterator_next(struct vy_read_iterator *itr, struct tuple **result) if (prev_key != NULL) tuple_unref(prev_key); - latency_collect(&index->stat.latency, - ev_monotonic_now(loop()) - start_time); + ev_tstamp latency = ev_monotonic_now(loop()) - start_time; + latency_collect(&index->stat.latency, latency); + + if (latency > itr->too_long_threshold) { + say_warn("%s: select(%s, %s) => %s took too long: %.3f sec", + vy_index_name(index), tuple_str(itr->key), + iterator_type_strs[itr->iterator_type], + vy_stmt_str(itr->last_stmt), latency); + } return rc; } @@ -901,10 +1087,7 @@ vy_read_iterator_close(struct vy_read_iterator *itr) { if (itr->last_stmt != NULL) tuple_unref(itr->last_stmt); - if (itr->curr_stmt != NULL) - tuple_unref(itr->curr_stmt); - for (uint32_t i = 0; i < itr->src_count; i++) - itr->src[i].iterator.iface->close(&itr->src[i].iterator); + vy_read_iterator_cleanup(itr); free(itr->src); TRASH(itr); } diff --git a/src/box/vy_read_iterator.h b/src/box/vy_read_iterator.h index 51ebc33db0b47aecf8e11a0074db112f4ca5f704..d04610e89991967ee60fda0a724555d5fca75571 100644 --- a/src/box/vy_read_iterator.h +++ b/src/box/vy_read_iterator.h @@ -59,6 +59,11 @@ struct vy_read_iterator { struct tuple *key; /** Read view the iterator lives in. */ const struct vy_read_view **read_view; + /** + * If a read iteration takes longer than the given value, + * warn about it in the log. + */ + double too_long_threshold; /** * Set if the resulting statement needs to be * checked to match the search key. @@ -100,17 +105,25 @@ struct vy_read_iterator { uint32_t curr_src; /** Statement returned by the current merge source. */ struct tuple *curr_stmt; - /** Offset of the first mutable source. */ - uint32_t mutable_start; - /** Offset of the source following the last mutable source. */ - uint32_t mutable_end; + /** Offset of the transaction write set source. */ + uint32_t txw_src; + /** Offset of the cache source. */ + uint32_t cache_src; + /** Offset of the first memory source. */ + uint32_t mem_src; + /** Offset of the first disk source. */ + uint32_t disk_src; /** Offset of the first skipped source. */ - uint32_t skipped_start; + uint32_t skipped_src; /** * front_id of the current source and all sources * that are on the same key. */ uint32_t front_id; + /** + * front_id from the previous iteration. + */ + uint32_t prev_front_id; }; /** @@ -128,7 +141,7 @@ void vy_read_iterator_open(struct vy_read_iterator *itr, struct vy_run_env *run_env, struct vy_index *index, struct vy_tx *tx, enum iterator_type iterator_type, struct tuple *key, - const struct vy_read_view **rv); + const struct vy_read_view **rv, double too_long_threshold); /** * Get the next statement with another key, or start the iterator, diff --git a/src/box/vy_read_view.h b/src/box/vy_read_view.h new file mode 100644 index 0000000000000000000000000000000000000000..c3b14699e631f2b75bc9c1e01dc711d6e45369b3 --- /dev/null +++ b/src/box/vy_read_view.h @@ -0,0 +1,78 @@ +#ifndef INCLUDES_TARANTOOL_BOX_VY_READ_VIEW_H +#define INCLUDES_TARANTOOL_BOX_VY_READ_VIEW_H +/* + * Copyright 2010-2017, Tarantool AUTHORS, please see AUTHORS file. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the + * following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * AUTHORS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF + * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <stdbool.h> +#include <stdint.h> + +#include <small/rlist.h> + +#if defined(__cplusplus) +extern "C" { +#endif /* defined(__cplusplus) */ + +/** The state of the database the cursor should be looking at. */ +struct vy_read_view { + /** + * Consistent read view LSN. Originally read-only transactions + * receive a read view lsn upon creation and do not see further + * changes. + * Other transactions are expected to be read-write and + * have vlsn == INT64_MAX to read newest data. Once a value read + * by such a transaction (T) is overwritten by another + * commiting transaction, T permanently goes to read view that does + * not see this change. + * If T does not have any write statements by the commit time it will + * be committed successfully, or aborted as conflicted otherwise. + */ + int64_t vlsn; + /** The link in read_views of the TX manager */ + struct rlist in_read_views; + /** + * The number of references to this read view. The global + * read view has zero refs, we don't do reference + * count it as it is missing from read_views list. + */ + int refs; + /** + * Is set to true when the read view which includes + * a prepared but not committed transaction, is + * compromised by a cascading rollback. + */ + bool is_aborted; +}; + +#if defined(__cplusplus) +} /* extern "C" */ +#endif /* defined(__cplusplus) */ + +#endif /* INCLUDES_TARANTOOL_BOX_VY_READ_VIEW_H */ diff --git a/src/box/vy_run.c b/src/box/vy_run.c index 548e6a2e028159a966c4477200eb7289f4a1f1ba..8e7a25e47a7ed951538af30a85c99966679a66b7 100644 --- a/src/box/vy_run.c +++ b/src/box/vy_run.c @@ -949,7 +949,6 @@ vy_page_read_cb_free(struct cbus_call_msg *base) { struct vy_page_read_task *task = (struct vy_page_read_task *)base; vy_page_delete(task->page); - vy_slice_unpin(task->slice); mempool_free(&task->run_env->read_task_pool, task); return 0; } @@ -996,13 +995,6 @@ vy_run_iterator_load_page(struct vy_run_iterator *itr, uint32_t page_no, reader = &env->reader_pool[env->next_reader++]; env->next_reader %= env->reader_pool_size; - /* - * Make sure the run file descriptor won't be closed - * (even worse, reopened) while a reader thread is - * reading it. - */ - vy_slice_pin(slice); - task->slice = slice; task->page_info = *page_info; task->run_env = env; @@ -1016,7 +1008,6 @@ vy_run_iterator_load_page(struct vy_run_iterator *itr, uint32_t page_no, return -1; /* timed out or cancelled */ mempool_free(&env->read_task_pool, task); - vy_slice_unpin(slice); if (rc != 0) { /* posted, but failed */ @@ -1313,29 +1304,13 @@ vy_run_iterator_find_lsn(struct vy_run_iterator *itr, return 0; } -/* - * FIXME: vy_run_iterator_next_key() calls vy_run_iterator_start() which - * recursivly calls vy_run_iterator_next_key(). - */ -static NODISCARD int -vy_run_iterator_next_key(struct vy_stmt_iterator *vitr, struct tuple **ret, - bool *stop); -/** - * Start iteration for a given key and direction. - * Note, this function doesn't check slice boundaries. - * @retval 0 success or EOF (*ret == NULL) - * @retval -1 read or memory error - * Affects: curr_loaded_page, curr_pos, search_ended - */ static NODISCARD int -vy_run_iterator_start_from(struct vy_run_iterator *itr, - enum iterator_type iterator_type, - const struct tuple *key, struct tuple **ret) +vy_run_iterator_do_seek(struct vy_run_iterator *itr, + enum iterator_type iterator_type, + const struct tuple *key, struct tuple **ret) { struct vy_run *run = itr->slice->run; - assert(!itr->search_started); - itr->search_started = true; *ret = NULL; const struct key_def *key_def = itr->key_def; @@ -1413,7 +1388,7 @@ vy_run_iterator_start_from(struct vy_run_iterator *itr, * given (special branch of code in vy_run_iterator_search), * so we need to make a step on previous key */ - return vy_run_iterator_next_key(&itr->base, ret, NULL); + return vy_run_iterator_next_key(itr, ret); } else { assert(iterator_type == ITER_GE || iterator_type == ITER_GT || iterator_type == ITER_EQ); @@ -1429,16 +1404,14 @@ vy_run_iterator_start_from(struct vy_run_iterator *itr, } /** - * Start iteration in a run taking into account slice boundaries. - * This function is a wrapper around vy_run_iterator_start_from() - * which passes a contrived search key and the iterator - * direction to make sure the result falls in the given slice. + * Position the iterator to the first statement satisfying + * the search criteria for a given key and direction. */ static NODISCARD int -vy_run_iterator_start(struct vy_run_iterator *itr, struct tuple **ret) +vy_run_iterator_seek(struct vy_run_iterator *itr, + enum iterator_type iterator_type, + const struct tuple *key, struct tuple **ret) { - enum iterator_type iterator_type = itr->iterator_type; - const struct tuple *key = itr->key; const struct key_def *cmp_def = itr->cmp_def; struct vy_slice *slice = itr->slice; int cmp; @@ -1491,31 +1464,24 @@ vy_run_iterator_start(struct vy_run_iterator *itr, struct tuple **ret) } } - return vy_run_iterator_start_from(itr, iterator_type, key, ret); + return vy_run_iterator_do_seek(itr, iterator_type, key, ret); } /* }}} vy_run_iterator vy_run_iterator support functions */ /* {{{ vy_run_iterator API implementation */ -/** Vtable for vy_stmt_iterator - declared below */ -static struct vy_stmt_iterator_iface vy_run_iterator_iface; - -/** - * Open the iterator. - */ void vy_run_iterator_open(struct vy_run_iterator *itr, struct vy_run_iterator_stat *stat, struct vy_run_env *run_env, struct vy_slice *slice, enum iterator_type iterator_type, - struct tuple *key, const struct vy_read_view **rv, + const struct tuple *key, const struct vy_read_view **rv, const struct key_def *cmp_def, const struct key_def *key_def, struct tuple_format *format, struct tuple_format *upsert_format, bool is_primary) { - itr->base.iface = &vy_run_iterator_iface; itr->stat = stat; itr->cmp_def = cmp_def; itr->key_def = key_def; @@ -1527,7 +1493,6 @@ vy_run_iterator_open(struct vy_run_iterator *itr, itr->iterator_type = iterator_type; itr->key = key; - tuple_ref(key); itr->read_view = rv; itr->curr_stmt = NULL; @@ -1573,28 +1538,19 @@ vy_run_iterator_get(struct vy_run_iterator *itr, struct tuple **result) return rc; } -/** - * Find the next stmt in a page, i.e. a stmt with a different key - * and fresh enough LSN (i.e. skipping the keys - * too old for the current transaction). - * - * @retval 0 success or EOF (*ret == NULL) - * @retval -1 memory or read error - */ -static NODISCARD int -vy_run_iterator_next_key(struct vy_stmt_iterator *vitr, struct tuple **ret, - bool *stop) +NODISCARD int +vy_run_iterator_next_key(struct vy_run_iterator *itr, struct tuple **ret) { - (void)stop; - assert(vitr->iface->next_key == vy_run_iterator_next_key); - struct vy_run_iterator *itr = (struct vy_run_iterator *) vitr; *ret = NULL; int rc; if (itr->search_ended) return 0; - if (!itr->search_started) - return vy_run_iterator_start(itr, ret); + if (!itr->search_started) { + itr->search_started = true; + return vy_run_iterator_seek(itr, itr->iterator_type, + itr->key, ret); + } uint32_t end_page = itr->slice->run->info.page_count; assert(itr->curr_pos.page_no <= end_page); const struct key_def *cmp_def = itr->cmp_def; @@ -1678,16 +1634,9 @@ vy_run_iterator_next_key(struct vy_stmt_iterator *vitr, struct tuple **ret, return vy_run_iterator_find_lsn(itr, itr->iterator_type, itr->key, ret); } -/** - * Find next (lower, older) record with the same key as current - * @retval 0 success or EOF (*ret == NULL) - * @retval -1 memory or read error - */ -static NODISCARD int -vy_run_iterator_next_lsn(struct vy_stmt_iterator *vitr, struct tuple **ret) +NODISCARD int +vy_run_iterator_next_lsn(struct vy_run_iterator *itr, struct tuple **ret) { - assert(vitr->iface->next_lsn == vy_run_iterator_next_lsn); - struct vy_run_iterator *itr = (struct vy_run_iterator *) vitr; *ret = NULL; int rc; @@ -1736,44 +1685,56 @@ vy_run_iterator_next_lsn(struct vy_stmt_iterator *vitr, struct tuple **ret) return vy_run_iterator_get(itr, ret); } -/** Disk runs are immutable so the ->restore() callback is a no-op. */ -static NODISCARD int -vy_run_iterator_restore(struct vy_stmt_iterator *vitr, - const struct tuple *last_stmt, - struct tuple **ret, bool *stop) +NODISCARD int +vy_run_iterator_skip(struct vy_run_iterator *itr, + const struct tuple *last_stmt, struct tuple **ret) { - (void)ret; - (void)stop; - (void)last_stmt; + *ret = NULL; + if (itr->search_ended) + return 0; - assert(vitr->iface->restore == vy_run_iterator_restore); - struct vy_run_iterator *itr = (struct vy_run_iterator *) vitr; + /* + * Check if the iterator is already positioned + * at the statement following last_stmt. + */ + if (itr->search_started && + (itr->curr_stmt == NULL || last_stmt == NULL || + iterator_direction(itr->iterator_type) * + vy_stmt_compare(itr->curr_stmt, last_stmt, + itr->cmp_def) > 0)) { + *ret = itr->curr_stmt; + return 0; + } - assert(itr->search_started); - (void)itr; + const struct tuple *key = itr->key; + enum iterator_type iterator_type = itr->iterator_type; + if (last_stmt != NULL) { + key = last_stmt; + iterator_type = iterator_direction(iterator_type) > 0 ? + ITER_GT : ITER_LT; + } + + itr->search_started = true; + if (vy_run_iterator_seek(itr, iterator_type, key, ret) != 0) + return -1; + + if (itr->iterator_type == ITER_EQ && last_stmt != NULL && + *ret != NULL && vy_stmt_compare(itr->key, *ret, + itr->cmp_def) != 0) { + vy_run_iterator_cache_clean(itr); + itr->search_ended = true; + *ret = NULL; + } return 0; } -/** - * Close the iterator and free resources. - */ -static void -vy_run_iterator_close(struct vy_stmt_iterator *vitr) +void +vy_run_iterator_close(struct vy_run_iterator *itr) { - assert(vitr->iface->close == vy_run_iterator_close); - struct vy_run_iterator *itr = (struct vy_run_iterator *) vitr; vy_run_iterator_cache_clean(itr); - tuple_unref(itr->key); TRASH(itr); } -static struct vy_stmt_iterator_iface vy_run_iterator_iface = { - .next_key = vy_run_iterator_next_key, - .next_lsn = vy_run_iterator_next_lsn, - .restore = vy_run_iterator_restore, - .close = vy_run_iterator_close, -}; - /* }}} vy_run_iterator API implementation */ /** Account a page to run statistics. */ diff --git a/src/box/vy_run.h b/src/box/vy_run.h index 5effe2c7c3e065a90d05e6a1fbbd8d4805626a54..bbd6098fedaba21f1f2e9ed63a25a1bbc3de8396 100644 --- a/src/box/vy_run.h +++ b/src/box/vy_run.h @@ -37,7 +37,8 @@ #include "fiber_cond.h" #include "iterator_type.h" #include "vy_stmt.h" /* for comparators */ -#include "vy_stmt_iterator.h" /* struct vy_stmt_iterator */ +#include "vy_stmt_stream.h" +#include "vy_read_view.h" #include "vy_stat.h" #include "index_def.h" @@ -204,8 +205,6 @@ struct vy_run_iterator_pos { * key. */ struct vy_run_iterator { - /** Parent class, must be the first member */ - struct vy_stmt_iterator base; /** Usage statistics */ struct vy_run_iterator_stat *stat; /** Vinyl run environment. */ @@ -236,7 +235,7 @@ struct vy_run_iterator { */ enum iterator_type iterator_type; /** Key to search. */ - struct tuple *key; + const struct tuple *key; /* LSN visibility, iterator shows values with lsn <= vlsn */ const struct vy_read_view **read_view; @@ -464,17 +463,54 @@ vy_slice_cut(struct vy_slice *slice, int64_t id, const struct key_def *cmp_def, struct vy_slice **result); +/** + * Open an iterator over on-disk run. + * + * Note, it is the caller's responsibility to make sure the slice + * is not compacted while the iterator is reading it. + */ void vy_run_iterator_open(struct vy_run_iterator *itr, struct vy_run_iterator_stat *stat, struct vy_run_env *run_env, struct vy_slice *slice, enum iterator_type iterator_type, - struct tuple *key, const struct vy_read_view **rv, + const struct tuple *key, const struct vy_read_view **rv, const struct key_def *cmp_def, const struct key_def *key_def, struct tuple_format *format, struct tuple_format *upsert_format, bool is_primary); +/** + * Advance a run iterator to the newest statement for the next key. + * The statement is returned in @ret (NULL if EOF). + * Returns 0 on success, -1 on memory allocation or IO error. + */ +NODISCARD int +vy_run_iterator_next_key(struct vy_run_iterator *itr, struct tuple **ret); + +/** + * Advance a run iterator to the older statement for the same key. + * The statement is returned in @ret (NULL if EOF). + * Returns 0 on success, -1 on memory allocation or IO error. + */ +NODISCARD int +vy_run_iterator_next_lsn(struct vy_run_iterator *itr, struct tuple **ret); + +/** + * Advance a run iterator to the newest statement for the first key + * following @last_stmt. The statement is returned in @ret (NULL if EOF). + * Returns 0 on success, -1 on memory allocation or IO error. + */ +NODISCARD int +vy_run_iterator_skip(struct vy_run_iterator *itr, + const struct tuple *last_stmt, struct tuple **ret); + +/** + * Close a run iterator. + */ +void +vy_run_iterator_close(struct vy_run_iterator *itr); + /** * Simple stream over a slice. @see vy_stmt_stream. */ diff --git a/src/box/vy_stmt.c b/src/box/vy_stmt.c index df545ac3be01bd49199de996128104006686732b..cbbaee2a9bb5f840144363d6204ced5096486028 100644 --- a/src/box/vy_stmt.c +++ b/src/box/vy_stmt.c @@ -599,25 +599,6 @@ vy_stmt_decode(struct xrow_header *xrow, const struct key_def *key_def, return stmt; } -int -vy_key_snprint(char *buf, int size, const char *key) -{ - if (key == NULL) - return snprintf(buf, size, "[]"); - - int total = 0; - SNPRINT(total, snprintf, buf, size, "["); - uint32_t count = mp_decode_array(&key); - for (uint32_t i = 0; i < count; i++) { - if (i > 0) - SNPRINT(total, snprintf, buf, size, ", "); - SNPRINT(total, mp_snprint, buf, size, key); - mp_next(&key); - } - SNPRINT(total, snprintf, buf, size, "]"); - return total; -} - int vy_stmt_snprint(char *buf, int size, const struct tuple *stmt) { @@ -640,15 +621,6 @@ vy_stmt_snprint(char *buf, int size, const struct tuple *stmt) return total; } -const char * -vy_key_str(const char *key) -{ - char *buf = tt_static_buf(); - if (vy_key_snprint(buf, TT_STATIC_BUF_LEN, key) < 0) - return "<failed to format key>"; - return buf; -} - const char * vy_stmt_str(const struct tuple *stmt) { diff --git a/src/box/vy_stmt.h b/src/box/vy_stmt.h index b5bd9ec210ebc4a04c59e75dd9abad1f0bf97157..98dfcae816dc3ad219cd3e5c262914b9ae336b5b 100644 --- a/src/box/vy_stmt.h +++ b/src/box/vy_stmt.h @@ -630,14 +630,6 @@ vy_stmt_decode(struct xrow_header *xrow, const struct key_def *key_def, struct tuple_format *upsert_format, bool is_primary); -/** - * Format a key into string. - * Example: [1, 2, "string"] - * \sa mp_snprint() - */ -int -vy_key_snprint(char *buf, int size, const char *key); - /** * Format a statement into string. * Example: REPLACE([1, 2, "string"], lsn=48) @@ -646,18 +638,10 @@ int vy_stmt_snprint(char *buf, int size, const struct tuple *stmt); /* -* Format a key into string using a static buffer. -* Useful for gdb and say_debug(). -* \sa vy_key_snprint() -*/ -const char * -vy_key_str(const char *key); - -/* -* Format a statement into string using a static buffer. -* Useful for gdb and say_debug(). -* \sa vy_stmt_snprint() -*/ + * Format a statement into string using a static buffer. + * Useful for gdb and say_debug(). + * \sa vy_stmt_snprint() + */ const char * vy_stmt_str(const struct tuple *stmt); diff --git a/src/box/vy_stmt_iterator.h b/src/box/vy_stmt_iterator.h deleted file mode 100644 index 4b09d2f152bb31987a9ca90171eb2b7bc067020c..0000000000000000000000000000000000000000 --- a/src/box/vy_stmt_iterator.h +++ /dev/null @@ -1,194 +0,0 @@ -#ifndef INCLUDES_TARANTOOL_BOX_VY_STMT_ITERATOR_H -#define INCLUDES_TARANTOOL_BOX_VY_STMT_ITERATOR_H -/* - * Copyright 2010-2017, Tarantool AUTHORS, please see AUTHORS file. - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * 1. Redistributions of source code must retain the above - * copyright notice, this list of conditions and the - * following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY AUTHORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED - * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL - * AUTHORS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, - * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF - * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include <trivia/util.h> -#include <stdbool.h> -#include "small/rlist.h" - -#if defined(__cplusplus) -extern "C" { -#endif /* defined(__cplusplus) */ - -struct vy_stmt_iterator; -struct tuple; - -typedef NODISCARD int -(*vy_iterator_next_key_f)(struct vy_stmt_iterator *virt_iterator, - struct tuple **ret, bool *stop); -typedef NODISCARD int -(*vy_iterator_next_lsn_f)(struct vy_stmt_iterator *virt_iterator, - struct tuple **ret); -/** - * The restore() function moves an iterator to the specified - * statement (@arg last_stmt) and returns the new statement via @arg ret. - * In addition two cases are possible either the position of the iterator - * has been changed after the restoration or it hasn't. - * - * 1) The position wasn't changed. This case appears if the iterator is moved - * to the statement that equals to the old statement by key and less - * or equal by LSN. - * - * Example of the unchanged position: - * ┃ ... ┃ ┃ ... ┃ - * ┃ k2, lsn = 10 ┣▶ read_iterator ┃ k3, lsn = 20 ┃ - * ┃ k2, lsn = 9 ┃ position ┃ ┃ - * ┃ k2, lsn = 8 ┃ ┃ k2, lsn = 8 ┣▶ read_iterator - * ┃ ┃ restoration ▶▶ ┃ ┃ position - the - * ┃ k1, lsn = 10 ┃ ┃ k1, lsn = 10 ┃ same key and the - * ┃ k1, lsn = 9 ┃ ┃ k1, lsn = 9 ┃ older LSN - * ┃ ... ┃ ┃ ... ┃ - * - * 2) Otherwise the position was changed and points on a statement with another - * key or with the same key but the bigger LSN. - * - * Example of the changed position: - * ┃ ... ┃ ┃ ... ┃ - * ┃ k2, lsn = 10 ┣▶ read_iterator ┃ k2, lsn = 11 ┣▶ read_iterator - * ┃ k2, lsn = 9 ┃ position ┃ k2, lsn = 10 ┃ position - found - * ┃ k2, lsn = 8 ┃ ┃ k2, lsn = 9 ┃ the newer LSN - * ┃ ┃ restoration ▶▶ ┃ k2, lsn = 8 ┃ - * ┃ k1, lsn = 10 ┃ ┃ ┃ - * ┃ k1, lsn = 9 ┃ ┃ k1, lsn = 10 ┃ - * ┃ ... ┃ ┃ ... ┃ - * - * Another example: - * ┃ ... ┃ ┃ ┃ - * ┃ k3, lsn = 20 ┃ ┃ ... ┃ - * ┃ ┃ ┃ k3, lsn = 10 ┃ - * ┃ k2, lsn = 8 ┣▶ read_iterator ┃ k3, lsn = 9 ┃ - * ┃ ┃ position ┃ k3, lsn = 8 ┣▶ read_iterator - * ┃ k1, lsn = 10 ┃ ┃ ┃ position - k2 was - * ┃ k1, lsn = 9 ┃ restoration ▶▶ ┃ k1, lsn = 10 ┃ not found, so go - * ┃ ... ┃ ┃ ... ┃ to the next key - */ -typedef NODISCARD int -(*vy_iterator_restore_f)(struct vy_stmt_iterator *virt_iterator, - const struct tuple *last_stmt, struct tuple **ret, - bool *stop); - -typedef void -(*vy_iterator_close_f)(struct vy_stmt_iterator *virt_iterator); - -struct vy_stmt_iterator_iface { - vy_iterator_next_key_f next_key; - vy_iterator_next_lsn_f next_lsn; - vy_iterator_restore_f restore; - vy_iterator_close_f close; -}; - -/** - * Common interface for iterator over run, mem, etc. - */ -struct vy_stmt_iterator { - const struct vy_stmt_iterator_iface *iface; -}; - -/** - * The stream is a very simple iterator (generally over a mem or a run) - * that output all the tuples on increasing order. - */ -struct vy_stmt_stream; - -/** - * Start streaming - */ -typedef NODISCARD int -(*vy_stream_start_f)(struct vy_stmt_stream *virt_stream); - -/** - * Get next tuple from a stream. - */ -typedef NODISCARD int -(*vy_stream_next_f)(struct vy_stmt_stream *virt_stream, struct tuple **ret); - -/** - * Close the stream. - */ -typedef void -(*vy_stream_close_f)(struct vy_stmt_stream *virt_stream); - -/** - * The interface description for streams over run and mem. - */ -struct vy_stmt_stream_iface { - vy_stream_start_f start; - vy_stream_next_f next; - vy_stream_close_f stop; - vy_stream_close_f close; -}; - -/** - * Common interface for streams over run and mem. - */ -struct vy_stmt_stream { - const struct vy_stmt_stream_iface *iface; -}; - - -/** The state of the database the cursor should be looking at. */ -struct vy_read_view { - /** - * Consistent read view LSN. Originally read-only transactions - * receive a read view lsn upon creation and do not see further - * changes. - * Other transactions are expected to be read-write and - * have vlsn == INT64_MAX to read newest data. Once a value read - * by such a transaction (T) is overwritten by another - * commiting transaction, T permanently goes to read view that does - * not see this change. - * If T does not have any write statements by the commit time it will - * be committed successfully, or aborted as conflicted otherwise. - */ - int64_t vlsn; - /** The link in read_views of the TX manager */ - struct rlist in_read_views; - /** - * The number of references to this read view. The global - * read view has zero refs, we don't do reference - * count it as it is missing from read_views list. - */ - int refs; - /** - * Is set to true when the read view which includes - * a prepared but not committed transaction, is - * compromised by a cascading rollback. - */ - bool is_aborted; -}; - - -#if defined(__cplusplus) -} /* extern "C" */ -#endif /* defined(__cplusplus) */ - -#endif /* INCLUDES_TARANTOOL_BOX_VY_STMT_ITERATOR_H */ diff --git a/src/box/vinyl_index.h b/src/box/vy_stmt_stream.h similarity index 53% rename from src/box/vinyl_index.h rename to src/box/vy_stmt_stream.h index c6955657ef8efeae36613cd539f639f851f4bdab..098cc8ebf6963a1a8dcd9008cd542b911728ce66 100644 --- a/src/box/vinyl_index.h +++ b/src/box/vy_stmt_stream.h @@ -1,7 +1,7 @@ -#ifndef TARANTOOL_BOX_VINYL_INDEX_H_INCLUDED -#define TARANTOOL_BOX_VINYL_INDEX_H_INCLUDED +#ifndef INCLUDES_TARANTOOL_BOX_VY_STMT_STREAM_H +#define INCLUDES_TARANTOOL_BOX_VY_STMT_STREAM_H /* - * Copyright 2010-2016, Tarantool AUTHORS, please see AUTHORS file. + * Copyright 2010-2017, Tarantool AUTHORS, please see AUTHORS file. * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following @@ -16,11 +16,11 @@ * disclaimer in the documentation and/or other materials * provided with the distribution. * - * THIS SOFTWARE IS PROVIDED BY <COPYRIGHT HOLDER> ``AS IS'' AND + * THIS SOFTWARE IS PROVIDED BY AUTHORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL - * <COPYRIGHT HOLDER> OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * AUTHORS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR @@ -30,30 +30,58 @@ * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ -#include "index.h" + +#include <trivia/util.h> #if defined(__cplusplus) extern "C" { #endif /* defined(__cplusplus) */ -struct vy_index; -struct tuple_format; -struct vinyl_engine; +struct tuple; -struct vinyl_index { - struct index base; - struct vy_index *db; -}; +/** + * The stream is a very simple iterator (generally over a mem or a run) + * that output all the tuples on increasing order. + */ +struct vy_stmt_stream; -struct vinyl_index * -vinyl_index_new(struct vinyl_engine *vinyl, struct index_def *def, - struct tuple_format *format, struct vy_index *pk); +/** + * Start streaming + */ +typedef NODISCARD int +(*vy_stream_start_f)(struct vy_stmt_stream *virt_stream); -int -vinyl_index_open(struct vinyl_index *index); +/** + * Get next tuple from a stream. + */ +typedef NODISCARD int +(*vy_stream_next_f)(struct vy_stmt_stream *virt_stream, struct tuple **ret); + +/** + * Close the stream. + */ +typedef void +(*vy_stream_close_f)(struct vy_stmt_stream *virt_stream); + +/** + * The interface description for streams over run and mem. + */ +struct vy_stmt_stream_iface { + vy_stream_start_f start; + vy_stream_next_f next; + vy_stream_close_f stop; + vy_stream_close_f close; +}; + +/** + * Common interface for streams over run and mem. + */ +struct vy_stmt_stream { + const struct vy_stmt_stream_iface *iface; +}; #if defined(__cplusplus) } /* extern "C" */ #endif /* defined(__cplusplus) */ -#endif /* TARANTOOL_BOX_VINYL_INDEX_H_INCLUDED */ +#endif /* INCLUDES_TARANTOOL_BOX_VY_STMT_STREAM_H */ diff --git a/src/box/vy_tx.c b/src/box/vy_tx.c index d71e8896b6b1450835e0f07d2c6d181b72ff901f..da3660146c870e32eadb47fa02357e757f5f42e6 100644 --- a/src/box/vy_tx.c +++ b/src/box/vy_tx.c @@ -54,9 +54,9 @@ #include "vy_mem.h" #include "vy_stat.h" #include "vy_stmt.h" -#include "vy_stmt_iterator.h" #include "vy_upsert.h" #include "vy_read_set.h" +#include "vy_read_view.h" int write_set_cmp(struct txv *a, struct txv *b) @@ -840,33 +840,23 @@ vy_tx_set(struct vy_tx *tx, struct vy_index *index, struct tuple *stmt) return 0; } -static struct vy_stmt_iterator_iface vy_txw_iterator_iface; - void vy_txw_iterator_open(struct vy_txw_iterator *itr, struct vy_txw_iterator_stat *stat, struct vy_tx *tx, struct vy_index *index, - enum iterator_type iterator_type, struct tuple *key) + enum iterator_type iterator_type, + const struct tuple *key) { - itr->base.iface = &vy_txw_iterator_iface; itr->stat = stat; itr->tx = tx; itr->index = index; itr->iterator_type = iterator_type; itr->key = key; - tuple_ref(key); itr->version = UINT32_MAX; itr->curr_txv = NULL; itr->search_started = false; } -static void -vy_txw_iterator_get(struct vy_txw_iterator *itr, struct tuple **ret) -{ - *ret = itr->curr_txv->stmt; - vy_stmt_counter_acct_tuple(&itr->stat->get, *ret); -} - /** * Position the iterator to the first entry in the transaction * write set satisfying the search criteria for a given key and @@ -923,19 +913,10 @@ vy_txw_iterator_seek(struct vy_txw_iterator *itr, itr->curr_txv = txv; } -/** - * Advance an iterator to the next statement. - * Always returns 0. On EOF, *ret is set to NULL. - */ -static NODISCARD int -vy_txw_iterator_next_key(struct vy_stmt_iterator *vitr, struct tuple **ret, - bool *stop) +void +vy_txw_iterator_next(struct vy_txw_iterator *itr, struct tuple **ret) { - (void)stop; - assert(vitr->iface->next_key == vy_txw_iterator_next_key); - struct vy_txw_iterator *itr = (struct vy_txw_iterator *) vitr; *ret = NULL; - if (!itr->search_started) { itr->search_started = true; vy_txw_iterator_seek(itr, itr->iterator_type, itr->key); @@ -943,7 +924,7 @@ vy_txw_iterator_next_key(struct vy_stmt_iterator *vitr, struct tuple **ret, } assert(itr->version == itr->tx->write_set_version); if (itr->curr_txv == NULL) - return 0; + return; if (itr->iterator_type == ITER_LE || itr->iterator_type == ITER_LT) itr->curr_txv = write_set_prev(&itr->tx->write_set, itr->curr_txv); else @@ -955,46 +936,61 @@ vy_txw_iterator_next_key(struct vy_stmt_iterator *vitr, struct tuple **ret, itr->index->cmp_def) != 0) itr->curr_txv = NULL; out: - if (itr->curr_txv != NULL) - vy_txw_iterator_get(itr, ret); - return 0; + if (itr->curr_txv != NULL) { + *ret = itr->curr_txv->stmt; + vy_stmt_counter_acct_tuple(&itr->stat->get, *ret); + } } -/** - * This function does nothing. It is only needed to conform - * to the common iterator interface. - */ -static NODISCARD int -vy_txw_iterator_next_lsn(struct vy_stmt_iterator *vitr, struct tuple **ret) +void +vy_txw_iterator_skip(struct vy_txw_iterator *itr, + const struct tuple *last_stmt, struct tuple **ret) { - assert(vitr->iface->next_lsn == vy_txw_iterator_next_lsn); - struct vy_txw_iterator *itr = (struct vy_txw_iterator *) vitr; + *ret = NULL; + assert(!itr->search_started || + itr->version == itr->tx->write_set_version); - assert(itr->search_started); - assert(itr->version == itr->tx->write_set_version); - (void)itr; + /* + * Check if the iterator is already positioned + * at the statement following last_stmt. + */ + if (itr->search_started && + (itr->curr_txv == NULL || last_stmt == NULL || + iterator_direction(itr->iterator_type) * + vy_stmt_compare(itr->curr_txv->stmt, last_stmt, + itr->index->cmp_def) > 0)) { + if (itr->curr_txv != NULL) + *ret = itr->curr_txv->stmt; + return; + } - *ret = NULL; - return 0; -} + const struct tuple *key = itr->key; + enum iterator_type iterator_type = itr->iterator_type; + if (last_stmt != NULL) { + key = last_stmt; + iterator_type = iterator_direction(iterator_type) > 0 ? + ITER_GT : ITER_LT; + } -/** - * Restore the iterator position after a change in the write set. - * Iterator is positioned to the statement following @last_stmt. - * Returns 1 if the iterator position changed, 0 otherwise. - */ -static NODISCARD int -vy_txw_iterator_restore(struct vy_stmt_iterator *vitr, - const struct tuple *last_stmt, - struct tuple **ret, bool *stop) -{ - (void)stop; + itr->search_started = true; + vy_txw_iterator_seek(itr, iterator_type, key); + + if (itr->iterator_type == ITER_EQ && last_stmt != NULL && + itr->curr_txv != NULL && vy_stmt_compare(itr->key, + itr->curr_txv->stmt, itr->index->cmp_def) != 0) + itr->curr_txv = NULL; - assert(vitr->iface->restore == vy_txw_iterator_restore); - struct vy_txw_iterator *itr = (struct vy_txw_iterator *) vitr; + if (itr->curr_txv != NULL) { + *ret = itr->curr_txv->stmt; + vy_stmt_counter_acct_tuple(&itr->stat->get, *ret); + } +} - assert(itr->search_started); - if (itr->version == itr->tx->write_set_version) +int +vy_txw_iterator_restore(struct vy_txw_iterator *itr, + const struct tuple *last_stmt, struct tuple **ret) +{ + if (!itr->search_started || itr->version == itr->tx->write_set_version) return 0; const struct tuple *key = itr->key; @@ -1017,26 +1013,19 @@ vy_txw_iterator_restore(struct vy_stmt_iterator *vitr, return 0; *ret = NULL; - if (itr->curr_txv != NULL) - vy_txw_iterator_get(itr, ret); + if (itr->curr_txv != NULL) { + *ret = itr->curr_txv->stmt; + vy_stmt_counter_acct_tuple(&itr->stat->get, *ret); + } return 1; } /** * Close a txw iterator. */ -static void -vy_txw_iterator_close(struct vy_stmt_iterator *vitr) +void +vy_txw_iterator_close(struct vy_txw_iterator *itr) { - assert(vitr->iface->close == vy_txw_iterator_close); - struct vy_txw_iterator *itr = (struct vy_txw_iterator *) vitr; - tuple_unref(itr->key); + (void)itr; /* suppress warn if NDEBUG */ TRASH(itr); } - -static struct vy_stmt_iterator_iface vy_txw_iterator_iface = { - .next_key = vy_txw_iterator_next_key, - .next_lsn = vy_txw_iterator_next_lsn, - .restore = vy_txw_iterator_restore, - .close = vy_txw_iterator_close -}; diff --git a/src/box/vy_tx.h b/src/box/vy_tx.h index bfbeffa438a33613961c8f701efca9a77549082e..f2a70bb9270b155039d8df9677d44ef640024f1c 100644 --- a/src/box/vy_tx.h +++ b/src/box/vy_tx.h @@ -44,8 +44,8 @@ #include "trivia/util.h" #include "vy_index.h" #include "vy_stat.h" -#include "vy_stmt_iterator.h" #include "vy_read_set.h" +#include "vy_read_view.h" #if defined(__cplusplus) extern "C" { @@ -353,8 +353,6 @@ vy_tx_set(struct vy_tx *tx, struct vy_index *index, struct tuple *stmt); * Iterator over the write set of a transaction. */ struct vy_txw_iterator { - /** Parent class, must be the first member. */ - struct vy_stmt_iterator base; /** Iterator statistics. */ struct vy_txw_iterator_stat *stat; /** Transaction whose write set is iterated. */ @@ -369,7 +367,7 @@ struct vy_txw_iterator { */ enum iterator_type iterator_type; /** Search key. */ - struct tuple *key; + const struct tuple *key; /* Last seen value of the write set version. */ uint32_t version; /* Current position in the write set. */ @@ -385,7 +383,38 @@ void vy_txw_iterator_open(struct vy_txw_iterator *itr, struct vy_txw_iterator_stat *stat, struct vy_tx *tx, struct vy_index *index, - enum iterator_type iterator_type, struct tuple *key); + enum iterator_type iterator_type, + const struct tuple *key); + +/** + * Advance a txw iterator to the next statement. + * The next statement is returned in @ret (NULL if EOF). + */ +void +vy_txw_iterator_next(struct vy_txw_iterator *itr, struct tuple **ret); + +/** + * Advance a txw iterator to the statement following @last_stmt. + * The statement is returned in @ret (NULL if EOF). + */ +void +vy_txw_iterator_skip(struct vy_txw_iterator *itr, + const struct tuple *last_stmt, struct tuple **ret); + +/** + * Check if a txw iterator was invalidated and needs to be restored. + * If it does, set the iterator position to the statement following + * @last_stmt and return 1, otherwise return 0. + */ +int +vy_txw_iterator_restore(struct vy_txw_iterator *itr, + const struct tuple *last_stmt, struct tuple **ret); + +/** + * Close a txw iterator. + */ +void +vy_txw_iterator_close(struct vy_txw_iterator *itr); #if defined(__cplusplus) } /* extern "C" */ diff --git a/src/box/vy_write_iterator.c b/src/box/vy_write_iterator.c index 87f5efaf4db893bb96bb607be208cf8d344cc956..fbc0c786b92e4b034e081ddf65aae455cec7da58 100644 --- a/src/box/vy_write_iterator.c +++ b/src/box/vy_write_iterator.c @@ -720,6 +720,17 @@ vy_read_view_merge(struct vy_write_iterator *stream, struct tuple *hint, assert(rv->tuple == NULL); assert(rv->history != NULL); struct vy_write_history *h = rv->history; + /* + * Optimization 5: discard a DELETE statement referenced + * by a read view if it is preceded by another DELETE for + * the same key. + */ + if (hint != NULL && vy_stmt_type(hint) == IPROTO_DELETE && + vy_stmt_type(h->tuple) == IPROTO_DELETE) { + vy_write_history_destroy(h); + rv->history = NULL; + return 0; + } /* * Two possible hints to remove the current UPSERT. * 1. If the stream is working on the last level, we @@ -815,10 +826,12 @@ vy_write_iterator_build_read_views(struct vy_write_iterator *stream, int *count) continue; if (vy_read_view_merge(stream, hint, rv) != 0) goto error; + assert(rv->history == NULL); + if (rv->tuple == NULL) + continue; stream->rv_used_count++; ++*count; hint = rv->tuple; - assert(rv->history == NULL); } region_truncate(region, used); return 0; diff --git a/src/box/vy_write_iterator.h b/src/box/vy_write_iterator.h index f516540001326c30b0a2cc231049b1fa2952c380..a7d130380a8a16f6d4d513f83ee0a76b18359d8b 100644 --- a/src/box/vy_write_iterator.h +++ b/src/box/vy_write_iterator.h @@ -31,7 +31,8 @@ * SUCH DAMAGE. */ #include "trivia/util.h" -#include "vy_stmt_iterator.h" +#include "vy_stmt_stream.h" +#include "vy_read_view.h" #include <stdbool.h> #include <pthread.h> @@ -163,6 +164,22 @@ * * See implementation details in * vy_write_iterator_build_read_views. + * + * --------------------------------------------------------------- + * Optimization #5: discard a tautological DELETE statement, i.e. + * a statement that was not removed from the history because it + * is referenced by read view, but that is preceeded by another + * DELETE and hence not needed. + * + * -------- + * SAME KEY + * -------- + * + * VLSN(i) VLSN(i+1) VLSN(i+2) + * | | | + * | LSN1 LSN2 ... DELETE | LSNi LSNi+1 ... DELETE | + * \________________/\_______/ \_________________/\______/ + * skip keep skip discard */ struct vy_write_iterator; diff --git a/src/box/xrow.c b/src/box/xrow.c index 72b56ab2bda4a57f661a56071829b28d91e3eb38..33f3825542c65b452a217ad34ab48e0d20b5c16e 100644 --- a/src/box/xrow.c +++ b/src/box/xrow.c @@ -719,7 +719,6 @@ xrow_decode_auth(const struct xrow_header *row, struct auth_request *request) } memset(request, 0, sizeof(*request)); - request->header = row; uint32_t map_size = mp_decode_map(&data); for (uint32_t i = 0; i < map_size; ++i) { diff --git a/src/box/xrow.h b/src/box/xrow.h index d4ca4c8662d126b53131914f0159e4db47d00982..ca5becd8c6e4ebceea2d4e01123602393256a609 100644 --- a/src/box/xrow.h +++ b/src/box/xrow.h @@ -209,8 +209,6 @@ xrow_decode_call(const struct xrow_header *row, struct call_request *request); * AUTH request */ struct auth_request { - /** Request header */ - const struct xrow_header *header; /** MessagePack encoded name of the user to authenticate. */ const char *user_name; /** Auth scramble. @sa scramble.h */ diff --git a/src/coio_file.c b/src/coio_file.c index 8b5bbde919fb7882615548d8a2a10261e9e0904d..c5b2db781e69eb1fa66f55d778266b42db872843 100644 --- a/src/coio_file.c +++ b/src/coio_file.c @@ -32,9 +32,10 @@ #include "coio_task.h" #include "fiber.h" #include "say.h" +#include "fio.h" #include <stdio.h> #include <stdlib.h> - +#include <dirent.h> /** * A context of libeio request for any @@ -91,6 +92,16 @@ struct coio_file_task { struct { char *tpl; } tempdir; + + struct { + char **bufp; + const char *pathname; + } readdir; + + struct { + const char *source; + const char *dest; + } copyfile; }; }; @@ -490,3 +501,130 @@ coio_fdatasync(int fd) eio_req *req = eio_fdatasync(fd, 0, coio_complete, &eio); return coio_wait_done(req, &eio); } + +static void +coio_do_readdir(eio_req *req) +{ + struct coio_file_task *eio = (struct coio_file_task *)req->data; + DIR *dirp = opendir(eio->readdir.pathname); + if (dirp == NULL) + goto error; + size_t capacity = 128; + size_t len = 0; + struct dirent *entry; + char *buf = (char *) malloc(capacity); + if (buf == NULL) + goto mem_error; + req->result = 0; + do { + entry = readdir(dirp); + if (entry == NULL || + strcmp(entry->d_name, ".") == 0 || + strcmp(entry->d_name, "..") == 0) + continue; + size_t namlen = strlen(entry->d_name); + size_t needed = len + namlen + 1; + if (needed > capacity) { + if (needed <= capacity * 2) + capacity *= 2; + else + capacity = needed * 2; + char *new_buf = (char *) realloc(buf, capacity); + if (new_buf == NULL) + goto mem_error; + buf = new_buf; + } + memcpy(&buf[len], entry->d_name, namlen); + len += namlen; + buf[len++] = '\n'; + req->result++; + } while(entry != NULL); + + if (len > 0) + buf[len - 1] = 0; + else + buf[0] = 0; + + *eio->readdir.bufp = buf; + closedir(dirp); + return; + +mem_error: + free(buf); + closedir(dirp); +error: + req->result = -1; + req->errorno = errno; +} + +int +coio_readdir(const char *dir_path, char **buf) +{ + INIT_COEIO_FILE(eio) + eio.readdir.bufp = buf; + eio.readdir.pathname = dir_path; + eio_req *req = eio_custom(coio_do_readdir, 0, coio_complete, &eio); + return coio_wait_done(req, &eio); +} + +static void +coio_do_copyfile(eio_req *req) +{ + struct coio_file_task *eio = (struct coio_file_task *)req->data; + + struct stat st; + if (stat(eio->copyfile.source, &st) < 0) { + goto error; + } + + int source_fd = open(eio->copyfile.source, O_RDONLY); + if (source_fd < 0) { + goto error; + } + + int dest_fd = open(eio->copyfile.dest, O_WRONLY | O_CREAT, + st.st_mode & 0777); + if (dest_fd < 0) { + goto error_dest; + } + + enum { COPY_FILE_BUF_SIZE = 4096 }; + + char buf[COPY_FILE_BUF_SIZE]; + + while (true) { + ssize_t nread = fio_read(source_fd, buf, sizeof(buf)); + if (nread < 0) + goto error_copy; + + if (nread == 0) + break; /* eof */ + + ssize_t nwritten = fio_writen(dest_fd, buf, nread); + if (nwritten < 0) + goto error_copy; + } + req->result = 0; + close(source_fd); + close(dest_fd); + return; + +error_copy: + close(dest_fd); +error_dest: + close(source_fd); +error: + req->errorno = errno; + req->result = -1; + return; +} + +int +coio_copyfile(const char *source, const char *dest) +{ + INIT_COEIO_FILE(eio) + eio.copyfile.source = source; + eio.copyfile.dest = dest; + eio_req *req = eio_custom(coio_do_copyfile, 0, coio_complete, &eio); + return coio_wait_done(req, &eio); +} diff --git a/src/coio_file.h b/src/coio_file.h index 4bb295dad2e7686d4800d07a928215c55d9f7170..7db5611d4eb6bd3322ee3763da61346b983287a6 100644 --- a/src/coio_file.h +++ b/src/coio_file.h @@ -82,6 +82,8 @@ int coio_fdatasync(int fd); int coio_tempdir(char *path, size_t path_len); +int coio_readdir(const char *path, char **buf); +int coio_copyfile(const char *source, const char *dest); #if defined(__cplusplus) } /* extern "C" */ #endif /* defined(__cplusplus) */ diff --git a/src/iobuf.h b/src/iobuf.h index 35da31aec64f9851bbee01359c94f9f0f9d67edc..b41522f0fad1cefd638132cc1e5d5d8f3617585e 100644 --- a/src/iobuf.h +++ b/src/iobuf.h @@ -83,16 +83,6 @@ iobuf_new_mt(struct slab_cache *slabc_out); void iobuf_reset(struct iobuf *iobuf); -/** Return true if there is no input and no output and - * no one has pinned the buffer - i.e. it's safe to - * destroy it. - */ -static inline bool -iobuf_is_idle(struct iobuf *iobuf) -{ - return ibuf_used(&iobuf->in) == 0 && obuf_used(&iobuf->out) == 0; -} - /** * Got to be called in each thread iobuf subsystem is * used in. diff --git a/src/lua/digest.c b/src/lua/digest.c index b8523704c6de0b7c8c8dd621bd902a2dba4673d3..bf738b3f5a51f11db64e70d1444527eb4736ae2e 100644 --- a/src/lua/digest.c +++ b/src/lua/digest.c @@ -32,6 +32,13 @@ #include <string.h> #include <lua/digest.h> #include <third_party/sha1.h> +#include <openssl/evp.h> +#include <coio_task.h> +#include <lua.h> +#include <lauxlib.h> +#include "utils.h" + +#define PBKDF2_MAX_DIGEST_SIZE 128 unsigned char * SHA1internal(const unsigned char *d, size_t n, unsigned char *md) @@ -46,3 +53,50 @@ SHA1internal(const unsigned char *d, size_t n, unsigned char *md) memcpy(md, result, 20); return result; } + +static ssize_t +digest_pbkdf2_f(va_list ap) +{ + char *password = va_arg(ap, char *); + size_t password_size = va_arg(ap, size_t); + const unsigned char *salt = va_arg(ap, unsigned char *); + size_t salt_size = va_arg(ap, size_t); + unsigned char *digest = va_arg(ap, unsigned char *); + int num_iterations = va_arg(ap, int); + int digest_len = va_arg(ap, int); + if (PKCS5_PBKDF2_HMAC(password, password_size, salt, salt_size, + num_iterations, EVP_sha256(), + digest_len, digest) == 0) { + return -1; + } + return 0; +} + +int +lua_pbkdf2(lua_State *L) +{ + const char *password = lua_tostring(L, 1); + const char *salt = lua_tostring(L, 2); + int num_iterations = lua_tointeger(L, 3); + int digest_len = lua_tointeger(L, 4); + static __thread unsigned char digest[PBKDF2_MAX_DIGEST_SIZE]; + + if (coio_call(digest_pbkdf2_f, password, strlen(password), salt, + strlen(salt), digest, num_iterations, digest_len) < 0) { + lua_pushnil(L); + return 1; + } + lua_pushlstring(L, (char *) digest, digest_len); + return 1; +} + +void +tarantool_lua_digest_init(struct lua_State *L) +{ + static const struct luaL_Reg lua_digest_methods [] = { + {"pbkdf2", lua_pbkdf2}, + {NULL, NULL} + }; + luaL_register_module(L, "digest", lua_digest_methods); + lua_pop(L, 1); +}; \ No newline at end of file diff --git a/src/lua/digest.h b/src/lua/digest.h index 07994b7b4b4afa9c4ee858ae36e97fb9ca97681f..c10b087dab5d1a5d81cf740dcf6779f7c041dfb9 100644 --- a/src/lua/digest.h +++ b/src/lua/digest.h @@ -40,6 +40,10 @@ extern "C" { unsigned char * SHA1internal(const unsigned char *d, size_t n, unsigned char *md); +struct lua_State; + +void +tarantool_lua_digest_init(struct lua_State *L); #if defined(__cplusplus) } diff --git a/src/lua/digest.lua b/src/lua/digest.lua index a3ab3e9d9a05b4cd73c92af6f6ebc4074cc9084a..314ede1edaa21a5ed5cf779f4822baf579c3a8d4 100644 --- a/src/lua/digest.lua +++ b/src/lua/digest.lua @@ -44,6 +44,7 @@ local digest_shortcuts = { md5 = 'MD5', md4 = 'MD4', } +local internal = require("digest") local PMurHash local PMurHash_methods = { @@ -141,6 +142,24 @@ setmetatable(CRC32, { end }) +local pbkdf2 = function(pass, salt, iters, digest_len) + if type(pass) ~= 'string' or type(salt) ~= 'string' then + error("Usage: digest.pbkdf2(pass, salt[,iters][,digest_len])") + end + if iters and type(iters) ~= 'number' then + error("iters must be a number") + end + if digest_len and type(digest_len) ~= 'number' then + error("digest_len must be a number") + end + iters = iters or 100000 + digest_len = digest_len or 128 + if digest_len > 128 then + error("too big digest size") + end + return internal.pbkdf2(pass, salt, iters, digest_len) +end + local m = { base64_encode = function(bin, options) if type(bin) ~= 'string' or @@ -215,7 +234,16 @@ local m = { return ffi.string(buf, n) end, - murmur = PMurHash + murmur = PMurHash, + + pbkdf2 = pbkdf2, + + pbkdf2_hex = function(pass, salt, iters, digest_len) + if type(pass) ~= 'string' or type(salt) ~= 'string' then + error("Usage: digest.pbkdf2_hex(pass, salt)") + end + return string.hex(pbkdf2(pass, salt, iters, digest_len)) + end } for digest, name in pairs(digest_shortcuts) do diff --git a/src/lua/fio.c b/src/lua/fio.c index 7113b157482b7095240b65ccd9811c96fd007fe1..806f4256b0b20cde82952789b2d41ebdca2184a8 100644 --- a/src/lua/fio.c +++ b/src/lua/fio.c @@ -47,6 +47,13 @@ #include "lua/utils.h" #include "coio_file.h" +static inline void +lbox_fio_pushsyserror(struct lua_State *L) +{ + diag_set(SystemError, "fio: %s", strerror(errno)); + luaT_pusherror(L, diag_get()->last); +} + static int lbox_fio_open(struct lua_State *L) { @@ -62,6 +69,11 @@ lbox_fio_open(struct lua_State *L) int mode = lua_tointeger(L, 3); int fh = coio_file_open(pathname, flags, mode); + if (fh < 0) { + lua_pushnil(L); + lbox_fio_pushsyserror(L); + return 2; + } lua_pushinteger(L, fh); return 1; } @@ -71,12 +83,18 @@ lbox_fio_pwrite(struct lua_State *L) { int fh = lua_tointeger(L, 1); const char *buf = lua_tostring(L, 2); + uint32_t ctypeid = 0; if (buf == NULL) - luaL_error(L, "fio.pwrite(): buffer is not a string"); + buf = *(const char **)luaL_checkcdata(L, 2, &ctypeid); size_t len = lua_tonumber(L, 3); size_t offset = lua_tonumber(L, 4); int res = coio_pwrite(fh, buf, len, offset); + if (res < 0) { + lua_pushnil(L); + lbox_fio_pushsyserror(L); + return 2; + } lua_pushinteger(L, res); return 1; } @@ -85,33 +103,35 @@ static int lbox_fio_pread(struct lua_State *L) { int fh = lua_tointeger(L, 1); - size_t len = lua_tonumber(L, 2); - size_t offset = lua_tonumber(L, 3); + uint32_t ctypeid; + char *buf = *(char **)luaL_checkcdata(L, 2, &ctypeid); + size_t len = lua_tonumber(L, 3); + size_t offset = lua_tonumber(L, 4); if (!len) { - lua_pushliteral(L, ""); + lua_pushinteger(L, 0); return 1; } - /* allocate buffer at lua stack */ - void *buf = lua_newuserdata(L, len); - if (!buf) { - errno = ENOMEM; - lua_pushnil(L); - return 1; - } - - int res = coio_pread(fh, buf, len, offset); - if (res < 0) { - lua_pop(L, 1); lua_pushnil(L); - return 1; + lbox_fio_pushsyserror(L); + return 2; + } + lua_pushinteger(L, res); + return 1; +} + +static inline int +lbox_fio_pushbool(struct lua_State *L, bool res) +{ + lua_pushboolean(L, res); + if (!res) { + lbox_fio_pushsyserror(L); + return 2; } - lua_pushlstring(L, (char *)buf, res); - lua_remove(L, -2); return 1; } @@ -131,8 +151,7 @@ lbox_fio_rename(struct lua_State *L) goto usage; int res = coio_rename(oldpath, newpath); - lua_pushboolean(L, res == 0); - return 1; + return lbox_fio_pushbool(L, res == 0); } static int @@ -147,8 +166,7 @@ lbox_fio_unlink(struct lua_State *L) if (pathname == NULL) goto usage; int res = coio_unlink(pathname); - lua_pushboolean(L, res == 0); - return 1; + return lbox_fio_pushbool(L, res == 0); } static int @@ -157,8 +175,7 @@ lbox_fio_ftruncate(struct lua_State *L) int fd = lua_tointeger(L, 1); off_t length = lua_tonumber(L, 2); int res = coio_ftruncate(fd, length); - lua_pushboolean(L, res == 0); - return 1; + return lbox_fio_pushbool(L, res == 0); } static int @@ -180,8 +197,7 @@ lbox_fio_truncate(struct lua_State *L) length = 0; int res = coio_truncate(pathname, length); - lua_pushboolean(L, res == 0); - return 1; + return lbox_fio_pushbool(L, res == 0); } static int @@ -189,11 +205,17 @@ lbox_fio_write(struct lua_State *L) { int fh = lua_tointeger(L, 1); const char *buf = lua_tostring(L, 2); + uint32_t ctypeid = 0; if (buf == NULL) - luaL_error(L, "fio.write(): buffer is not a string"); - + buf = *(const char **)luaL_checkcdata(L, 2, &ctypeid); size_t len = lua_tonumber(L, 3); + int res = coio_write(fh, buf, len); + if (res < 0) { + lua_pushnil(L); + lbox_fio_pushsyserror(L); + return 2; + } lua_pushinteger(L, res); return 1; } @@ -242,8 +264,7 @@ lbox_fio_chown(struct lua_State *L) group = entry->gr_gid; } int res = coio_chown(pathname, owner, group); - lua_pushboolean(L, res == 0); - return 1; + return lbox_fio_pushbool(L, res == 0); } static int @@ -259,39 +280,30 @@ lbox_fio_chmod(struct lua_State *L) goto usage; mode_t mode = lua_tointeger(L, 2); - lua_pushboolean(L, coio_chmod(pathname, mode) == 0); - return 1; + return lbox_fio_pushbool(L, coio_chmod(pathname, mode) == 0); } static int lbox_fio_read(struct lua_State *L) { int fh = lua_tointeger(L, 1); - size_t len = lua_tonumber(L, 2); + uint32_t ctypeid; + char *buf = *(char **)luaL_checkcdata(L, 2, &ctypeid); + size_t len = lua_tonumber(L, 3); if (!len) { - lua_pushliteral(L, ""); + lua_pushinteger(L, 0); return 1; } - /* allocate buffer at lua stack */ - void *buf = lua_newuserdata(L, len); - if (!buf) { - errno = ENOMEM; - lua_pushnil(L); - return 1; - } - - int res = coio_read(fh, buf, len); if (res < 0) { - lua_pop(L, 1); lua_pushnil(L); - return 1; + lbox_fio_pushsyserror(L); + return 2; } - lua_pushlstring(L, (char *)buf, res); - lua_remove(L, -2); + lua_pushinteger(L, res); return 1; } @@ -357,8 +369,13 @@ DEF_STAT_METHOD(is_sock, S_ISSOCK); #endif static int -lbox_fio_pushstat(struct lua_State *L, const struct stat *stat) +lbox_fio_pushstat(struct lua_State *L, int res, const struct stat *stat) { + if (res < 0) { + lua_pushnil(L); + lbox_fio_pushsyserror(L); + return 2; + } lua_newtable(L); PUSHTABLE("dev", lua_pushinteger, stat->st_dev); @@ -425,11 +442,7 @@ lbox_fio_lstat(struct lua_State *L) struct stat stat; int res = coio_lstat(pathname, &stat); - if (res < 0) { - lua_pushnil(L); - return 1; - } - return lbox_fio_pushstat(L, &stat); + return lbox_fio_pushstat(L, res, &stat); } static int @@ -446,11 +459,7 @@ lbox_fio_stat(struct lua_State *L) struct stat stat; int res = coio_stat(pathname, &stat); - if (res < 0) { - lua_pushnil(L); - return 1; - } - return lbox_fio_pushstat(L, &stat); + return lbox_fio_pushstat(L, res, &stat); } static int @@ -459,11 +468,7 @@ lbox_fio_fstat(struct lua_State *L) int fd = lua_tointeger(L, 1); struct stat stat; int res = coio_fstat(fd, &stat); - if (res < 0) { - lua_pushnil(L); - return 1; - } - return lbox_fio_pushstat(L, &stat); + return lbox_fio_pushstat(L, res, &stat); } @@ -483,12 +488,11 @@ lbox_fio_mkdir(struct lua_State *L) mode_t mode; - if (top >= 2) + if (top >= 2 && !lua_isnil(L, 2)) mode = lua_tointeger(L, 2); else mode = 0777; - lua_pushboolean(L, coio_mkdir(pathname, mode) == 0); - return 1; + return lbox_fio_pushbool(L, coio_mkdir(pathname, mode) == 0); } static int @@ -502,8 +506,25 @@ lbox_fio_rmdir(struct lua_State *L) pathname = lua_tostring(L, 1); if (pathname == NULL) goto usage; + return lbox_fio_pushbool(L, coio_rmdir(pathname) == 0); +} - lua_pushboolean(L, coio_rmdir(pathname) == 0); +static int +lbox_fio_listdir(struct lua_State *L) +{ + const char *pathname; + if (lua_gettop(L) < 1) { + luaL_error(L, "Usage: fio.listdir(pathname)"); + } + pathname = lua_tostring(L, 1); + char *buf; + if (coio_readdir(pathname, &buf) < 0) { + lua_pushnil(L); + lbox_fio_pushsyserror(L); + return 2; + } + lua_pushstring(L, buf); + free(buf); return 1; } @@ -560,8 +581,7 @@ lbox_fio_link(struct lua_State *L) linkpath = lua_tostring(L, 2); if (target == NULL || linkpath == NULL) goto usage; - lua_pushboolean(L, coio_link(target, linkpath) == 0); - return 1; + return lbox_fio_pushbool(L, coio_link(target, linkpath) == 0); } static int @@ -577,8 +597,7 @@ lbox_fio_symlink(struct lua_State *L) linkpath = lua_tostring(L, 2); if (target == NULL || linkpath == NULL) goto usage; - lua_pushboolean(L, coio_symlink(target, linkpath) == 0); - return 1; + return lbox_fio_pushbool(L, coio_symlink(target, linkpath) == 0); } static int @@ -596,7 +615,8 @@ lbox_fio_readlink(struct lua_State *L) int res = coio_readlink(pathname, path, PATH_MAX); if (res < 0) { lua_pushnil(L); - return 1; + lbox_fio_pushsyserror(L); + return 2; } lua_pushlstring(L, path, res); lua_remove(L, -2); @@ -610,16 +630,17 @@ lbox_fio_tempdir(struct lua_State *L) if (!buf) { errno = ENOMEM; lua_pushnil(L); - return 1; + lbox_fio_pushsyserror(L); + return 2; } - - if (coio_tempdir(buf, PATH_MAX) == 0) { - lua_pushstring(L, buf); - lua_remove(L, -2); - } else { + if (coio_tempdir(buf, PATH_MAX) != 0) { lua_pushnil(L); + lbox_fio_pushsyserror(L); + return 2; } + lua_pushstring(L, buf); + lua_remove(L, -2); return 1; } @@ -630,7 +651,8 @@ lbox_fio_cwd(struct lua_State *L) if (!buf) { errno = ENOMEM; lua_pushnil(L); - return 1; + lbox_fio_pushsyserror(L); + return 2; } @@ -638,7 +660,9 @@ lbox_fio_cwd(struct lua_State *L) lua_pushstring(L, buf); lua_remove(L, -2); } else { + lbox_fio_pushsyserror(L); lua_pushnil(L); + return 2; } return 1; } @@ -647,33 +671,37 @@ static int lbox_fio_fsync(struct lua_State *L) { int fd = lua_tointeger(L, 1); - lua_pushboolean(L, coio_fsync(fd) == 0); - return 1; + return lbox_fio_pushbool(L, coio_fsync(fd) == 0); } static int lbox_fio_fdatasync(struct lua_State *L) { int fd = lua_tointeger(L, 1); - lua_pushboolean(L, coio_fdatasync(fd) == 0); - return 1; + return lbox_fio_pushbool(L, coio_fdatasync(fd) == 0); } static int lbox_fio_sync(struct lua_State *L) { - lua_pushboolean(L, coio_sync() == 0); - return 1; + return lbox_fio_pushbool(L, coio_sync() == 0); } static int lbox_fio_close(struct lua_State *L) { int fd = lua_tointeger(L, 1); - lua_pushboolean(L, coio_file_close(fd) == 0); - return 1; + return lbox_fio_pushbool(L, coio_file_close(fd) == 0); } +static int +lbox_fio_copyfile(struct lua_State *L) +{ + const char *source = lua_tostring(L, -2); + const char *dest = lua_tostring(L, -1); + assert(source != NULL && dest != NULL); + return lbox_fio_pushbool(L, coio_copyfile(source, dest) == 0); +} @@ -716,7 +744,9 @@ tarantool_lua_fio_init(struct lua_State *L) { "ftruncate", lbox_fio_ftruncate }, { "fsync", lbox_fio_fsync }, { "fdatasync", lbox_fio_fdatasync }, + { "listdir", lbox_fio_listdir }, { "fstat", lbox_fio_fstat }, + { "copyfile", lbox_fio_copyfile, }, { NULL, NULL } }; luaL_register(L, NULL, internal_methods); diff --git a/src/lua/fio.lua b/src/lua/fio.lua index 32b23ef83be40d0bfa218c46a7db3a3fc3e57532..8e7645e8e8f24dbae3a549a9f55c298dd663de3b 100644 --- a/src/lua/fio.lua +++ b/src/lua/fio.lua @@ -2,6 +2,7 @@ local fio = require('fio') local ffi = require('ffi') +local buffer = require('buffer') ffi.cdef[[ int umask(int mask); @@ -9,6 +10,8 @@ ffi.cdef[[ int chdir(const char *path); ]] +local const_char_ptr_t = ffi.typeof('const char *') + local internal = fio.internal fio.internal = nil @@ -21,49 +24,86 @@ end local fio_methods = {} -fio_methods.read = function(self, size) - if size == nil then - return '' +-- read(size) -> str +-- read(buf, size) -> len +fio_methods.read = function(self, buf, size) + local tmpbuf + if not ffi.istype(const_char_ptr_t, buf) then + size = buf + tmpbuf = buffer.IBUF_SHARED + tmpbuf:reset() + buf = tmpbuf:reserve(size) end - - return internal.read(self.fh, tonumber(size)) + local res, err = internal.read(self.fh, buf, size) + if res == nil then + if tmpbuf ~= nil then + tmpbuf:recycle() + end + return nil, err + end + if tmpbuf ~= nil then + tmpbuf:alloc(res) + res = ffi.string(tmpbuf.rpos, tmpbuf:size()) + tmpbuf:recycle() + end + return res end -fio_methods.write = function(self, data) - data = tostring(data) - local res = internal.write(self.fh, data, #data) +-- write(str) +-- write(buf, len) +fio_methods.write = function(self, data, len) + if not ffi.istype(const_char_ptr_t, data) then + data = tostring(data) + len = #data + end + local res, err = internal.write(self.fh, data, len) + if err ~= nil then + return false, err + end return res >= 0 end -fio_methods.pwrite = function(self, data, offset) - data = tostring(data) - local len = #data - if len == 0 then - return true +-- pwrite(str, offset) +-- pwrite(buf, len, offset) +fio_methods.pwrite = function(self, data, len, offset) + if not ffi.istype(const_char_ptr_t, data) then + data = tostring(data) + offset = len + len = #data end - - if offset == nil then - offset = 0 - else - offset = tonumber(offset) + local res, err = internal.pwrite(self.fh, data, len, offset) + if err ~= nil then + return false, err end - - local res = internal.pwrite(self.fh, data, len, offset) return res >= 0 end -fio_methods.pread = function(self, len, offset) - if len == nil then - return '' +-- pread(size, offset) -> str +-- pread(buf, size, offset) -> len +fio_methods.pread = function(self, buf, size, offset) + local tmpbuf + if not ffi.istype(const_char_ptr_t, buf) then + offset = size + size = buf + tmpbuf = buffer.IBUF_SHARED + tmpbuf:reset() + buf = tmpbuf:reserve(size) end - if offset == nil then - offset = 0 + local res, err = internal.pread(self.fh, buf, size, offset) + if res == nil then + if tmpbuf ~= nil then + tmpbuf:recycle() + end + return nil, err end - - return internal.pread(self.fh, tonumber(len), tonumber(offset)) + if tmpbuf ~= nil then + tmpbuf:alloc(res) + res = ffi.string(tmpbuf.rpos, tmpbuf:size()) + tmpbuf:recycle() + end + return res end - fio_methods.truncate = function(self, length) if length == nil then length = 0 @@ -85,16 +125,15 @@ fio_methods.seek = function(self, offset, whence) end local res = internal.lseek(self.fh, tonumber(offset), whence) - - if res < 0 then - return nil - end return tonumber(res) end fio_methods.close = function(self) - local res = internal.close(self.fh) + local res, err = internal.close(self.fh) self.fh = -1 + if err ~= nil then + return false, err + end return res end @@ -106,7 +145,6 @@ fio_methods.fdatasync = function(self) return internal.fdatasync(self.fh) end - fio_methods.stat = function(self) return internal.fstat(self.fh) end @@ -117,12 +155,14 @@ local fio_mt = { __index = fio_methods } fio.open = function(path, flags, mode) local iflag = 0 local imode = 0 - + if type(path) ~= 'string' then + error("Usage open(path[, flags[, mode]])") + end if type(flags) ~= 'table' then flags = { flags } end if type(mode) ~= 'table' then - mode = { mode or 0x1FF } -- 0777 + mode = { mode or (bit.band(0x1FF, fio.umask())) } end @@ -148,9 +188,9 @@ fio.open = function(path, flags, mode) end end - local fh = internal.open(tostring(path), iflag, imode) - if fh < 0 then - return nil + local fh, err = internal.open(tostring(path), iflag, imode) + if err ~= nil then + return nil, err end fh = { fh = fh } @@ -190,8 +230,8 @@ fio.pathjoin = function(path, ...) end fio.basename = function(path, suffix) - if path == nil then - return nil + if type(path) ~= 'string' then + error("Usage fio.basename(path[, suffix])") end path = tostring(path) @@ -209,10 +249,9 @@ fio.basename = function(path, suffix) end fio.dirname = function(path) - if path == nil then - return nil + if type(path) ~= 'string' then + error("Usage fio.dirname(path)") end - path = tostring(path) path = ffi.new('char[?]', #path + 1, path) return ffi.string(ffi.C.dirname(path)) end @@ -235,9 +274,9 @@ fio.abspath = function(path) -- following established conventions of fio module: -- letting nil through and converting path to string if path == nil then - return nil + error("Usage fio.abspath(path)") end - path = tostring(path) + path = path local joined_path = '' local path_tab = {} if string.sub(path, 1, 1) == '/' then @@ -256,10 +295,149 @@ fio.abspath = function(path) end fio.chdir = function(path) - if path == nil or type(path)~='string' then - return false + if type(path)~='string' then + error("Usage: fio.chdir(path)") end return ffi.C.chdir(path) == 0 end +fio.listdir = function(path) + if type(path) ~= 'string' then + error("Usage: fio.listdir(path)") + end + local str, err = internal.listdir(path) + if err ~= nil then + return nil, string.format("can't listdir %s: %s", path, err) + end + local t = {} + if str == "" then + return t + end + local names = string.split(str, "\n") + for i, name in ipairs(names) do + table.insert(t, name) + end + return t +end + +fio.mktree = function(path, mode) + if type(path) ~= "string" then + error("Usage: fio.mktree(path[, mode])") + end + path = fio.abspath(path) + + local path = string.gsub(path, '^/', '') + local dirs = string.split(path, "/") + + if #dirs == 1 then + return fio.mkdir(path, mode) + end + local st, err + local current_dir = "/" + for i, dir in ipairs(dirs) do + current_dir = fio.pathjoin(current_dir, dir) + if not fio.stat(current_dir) then + st, err = fio.mkdir(current_dir, mode) + if err ~= nil then + return false, "Error create dir " .. current_dir .. err + end + end + end + return true +end + +fio.rmtree = function(path) + if type(path) ~= 'string' then + error("Usage: fio.rmtree(path)") + end + local status, err + path = fio.abspath(path) + local ls, err = fio.listdir(path) + if err ~= nil then + return nil, err + end + for i, f in ipairs(ls) do + local tmppath = fio.pathjoin(path, f) + local st = fio.stat(tmppath) + if st and st:is_dir() then + st, err = fio.rmtree(tmppath) + if err ~= nil then + return nil, err + end + end + end + status, err = fio.rmdir(path) + if err ~= nil then + return false, string.format("failed to remove %s: %s", path, err) + end + return true +end + +fio.copyfile = function(from, to) + if type(from) ~= 'string' or type(to) ~= 'string' then + error('Usage: fio.copyfile(from, to)') + end + local st = fio.stat(to) + if st and st:is_dir() then + to = fio.pathjoin(to, fio.basename(from)) + end + local _, err = internal.copyfile(from, to) + if err ~= nil then + return false, string.format("failed to copy %s to %s: %s", from, to, err) + end + return true +end + +fio.copytree = function(from, to) + if type(from) ~= 'string' or type(to) ~= 'string' then + error('Usage: fio.copytree(from, to)') + end + local status, reason + local st = fio.stat(from) + if not st then + return false, string.format("Directory %s does not exist", from) + end + if not st:is_dir() then + return false, errno.strerror(errno.ENOTDIR) + end + local ls, err = fio.listdir(from) + if err ~= nil then + return false, err + end + + -- create tree of destination + status, reason = fio.mktree(to) + if reason ~= nil then + return false, reason + end + for i, f in ipairs(ls) do + local ffrom = fio.pathjoin(from, f) + local fto = fio.pathjoin(to, f) + local st = fio.lstat(ffrom) + if st and st:is_dir() then + status, reason = fio.copytree(ffrom, fto) + if reason ~= nil then + return false, reason + end + end + if st:is_reg() then + status, reason = fio.copyfile(ffrom, fto) + if reason ~= nil then + return false, reason + end + end + if st:is_link() then + local link_to, reason = fio.readlink(ffrom) + if reason ~= nil then + return false, reason + end + status, reason = fio.symlink(link_to, fto) + if reason ~= nil then + return false, "can't create symlink in place of existing file "..fto + end + end + end + return true +end + return fio diff --git a/src/lua/init.c b/src/lua/init.c index d4e008d302ed9e591c19dfc505811e837db47daf..9bdf85ae297438ed42b702b1ab02e2b9bca16c69 100644 --- a/src/lua/init.c +++ b/src/lua/init.c @@ -56,6 +56,7 @@ #include "lua/pickle.h" #include "lua/fio.h" #include "lua/httpc.h" +#include "digest.h" #include <small/ibuf.h> #include <ctype.h> @@ -387,6 +388,7 @@ tarantool_lua_init(const char *tarantool_bin, int argc, char **argv) tarantool_lua_fio_init(L); tarantool_lua_socket_init(L); tarantool_lua_pickle_init(L); + tarantool_lua_digest_init(L); luaopen_http_client_driver(L); lua_pop(L, 1); luaopen_msgpack(L); diff --git a/src/lua/log.lua b/src/lua/log.lua index 6a3d32ccd73f08e897b3d3448f834c2af2f4bfa0..5de566119f93afbc8168493467a63ace60189813 100644 --- a/src/lua/log.lua +++ b/src/lua/log.lua @@ -44,7 +44,6 @@ local json = require("json") local special_fields = { "file", "level", - "message", "pid", "line", "cord_name", diff --git a/src/main.cc b/src/main.cc index d39cd918704ca3ad2ca4436831873ba6fcd7bbe5..b61068ef7387d4fbe1920d43f28eed5e1165c456 100644 --- a/src/main.cc +++ b/src/main.cc @@ -84,7 +84,7 @@ static char *pid_file = NULL; static char **main_argv; static int main_argc; /** Signals handled after start as part of the event loop. */ -static ev_signal ev_sigs[4]; +static ev_signal ev_sigs[5]; static const int ev_sig_count = sizeof(ev_sigs)/sizeof(*ev_sigs); static double start_time; @@ -130,6 +130,16 @@ signal_cb(ev_loop *loop, struct ev_signal *w, int revents) ev_break(loop, EVBREAK_ALL); } +static void +signal_sigwinch_cb(ev_loop *loop, struct ev_signal *w, int revents) +{ + (void) loop; + (void) w; + (void) revents; + if (rl_instream) + rl_resize_terminal(); +} + /** Try to log as much as possible before dumping a core. * * Core files are not aways allowed and it takes an effort to @@ -210,6 +220,7 @@ signal_reset() sigaction(SIGINT, &sa, NULL) == -1 || sigaction(SIGTERM, &sa, NULL) == -1 || sigaction(SIGHUP, &sa, NULL) == -1 || + sigaction(SIGWINCH, &sa, NULL) == -1 || sigaction(SIGSEGV, &sa, NULL) == -1 || sigaction(SIGFPE, &sa, NULL) == -1) say_syserror("sigaction"); @@ -260,6 +271,7 @@ signal_init(void) ev_signal_init(&ev_sigs[1], signal_cb, SIGINT); ev_signal_init(&ev_sigs[2], signal_cb, SIGTERM); ev_signal_init(&ev_sigs[3], signal_cb, SIGHUP); + ev_signal_init(&ev_sigs[4], signal_sigwinch_cb, SIGWINCH); for (int i = 0; i < ev_sig_count; i++) ev_signal_start(loop(), &ev_sigs[i]); diff --git a/test/app-tap/logger.test.lua b/test/app-tap/logger.test.lua index e80b182585d791058232ae8e80b8271c3ff08f15..4c35c5738da24901b29e4f0db80617af71acce00 100755 --- a/test/app-tap/logger.test.lua +++ b/test/app-tap/logger.test.lua @@ -1,7 +1,7 @@ #!/usr/bin/env tarantool local test = require('tap').test('log') -test:plan(19) +test:plan(20) -- -- Check that Tarantool creates ADMIN session for #! script @@ -34,12 +34,17 @@ test:is(file:read():match('I>%s+(.*)'), "gh-2340: %s %D", "formatting without ar log.info({key="value"}) test:is(file:read():match('I>%s+(.*)'), '{"key":"value"}', "table is handled as json") +-- +--gh-2923 dropping message field +-- +log.info({message="value"}) +test:is(file:read():match('I>%s+(.*)'), '{"message":"value"}', "table is handled as json") function help() log.info("gh-2340: %s %s", 'help') end xpcall(help, function(err) test:ok(err:match("bad argument #3"), "found error string") - test:ok(err:match("logger.test.lua:38:"), "found error place") + test:ok(err:match("logger.test.lua:"), "found error place") end) file:close() diff --git a/test/app/digest.result b/test/app/digest.result index 43ea73937cbe057fc12cf235d2e09c7e83eb6c8a..1a86bcaffd28385cdad59e56aa0732e661b40d45 100644 --- a/test/app/digest.result +++ b/test/app/digest.result @@ -506,6 +506,51 @@ digest.base64_decode(b) --- - aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa ... +digest.pbkdf2("password", "salt", 4096, 32) +--- +- !!binary xeR41ZKIyEGqUw22hFxMjZYok6ABzk4RpJY4c6qYE0o= +... +digest.pbkdf2_hex("password", "salt", 4096, 32) +--- +- c5e478d59288c841aa530db6845c4c8d962893a001ce4e11a4963873aa98134a +... +digest.pbkdf2_hex("password", "salt") +--- +- 0394a2ede332c9a13eb82e9b24631604c31df978b4e2f0fbd2c549944f9d79a536ceea9b92c6170cbbf0153ef33a4ff57321e17b7a5fadc33f7023ddd325da4744753b6d61571dcba34ae96099068cea39a07a4303263cf3749b5fbc93222946a3987f75f2d6aeea024acc4f95a0d6e7141cdb0b1f12065030ac169507f91b32 +... +s, err = pcall(digest.pbkdf2, 12, "salt") +--- +... +s +--- +- false +... +err:match("Usage") +--- +- Usage +... +s, err = pcall(digest.pbkdf2_hex, 12, "salt") +--- +... +s +--- +- false +... +err:match("Usage") +--- +- Usage +... +s, err = pcall(digest.pbkdf2_hex, "password", "salt", "lol", "lol") +--- +... +s +--- +- false +... +err:match("number") +--- +- number +... digest = nil --- ... diff --git a/test/app/digest.test.lua b/test/app/digest.test.lua index f834eca17511fa5788f17c56d8ed30fc4e775591..76e71fe2da571aaaf3d61d785c68767ca3d5c220 100644 --- a/test/app/digest.test.lua +++ b/test/app/digest.test.lua @@ -161,6 +161,17 @@ b = digest.base64_encode(string.rep('a', 100), { nowrap = true }) b digest.base64_decode(b) - +digest.pbkdf2("password", "salt", 4096, 32) +digest.pbkdf2_hex("password", "salt", 4096, 32) +digest.pbkdf2_hex("password", "salt") +s, err = pcall(digest.pbkdf2, 12, "salt") +s +err:match("Usage") +s, err = pcall(digest.pbkdf2_hex, 12, "salt") +s +err:match("Usage") +s, err = pcall(digest.pbkdf2_hex, "password", "salt", "lol", "lol") +s +err:match("number") digest = nil test_run:cmd("clear filter") diff --git a/test/app/fio.result b/test/app/fio.result index 60d69cd161795555e0dc115cc27897991b7da8da..37b0b4076b64dbc68477474a815efc63b0d0b9b6 100644 --- a/test/app/fio.result +++ b/test/app/fio.result @@ -1,7 +1,10 @@ fio = require 'fio' --- ... -errno = require 'errno' +ffi = require 'ffi' +--- +... +buffer = require 'buffer' --- ... -- umask @@ -14,9 +17,16 @@ fio.umask() - 0 ... -- pathjoin -fio.basename(nil, nil) +st, err = pcall(fio.basename, nil, nil) --- -- null +... +st +--- +- false +... +err:match("basename") ~= nil +--- +- true ... fio.pathjoin('abc', 'cde') --- @@ -35,9 +45,16 @@ fio.pathjoin('/', '/cde') - /cde ... -- basename -fio.basename(nil) +st, err = pcall(fio.basename, nil) --- -- null +... +st +--- +- false +... +err:match("basename") ~= nil +--- +- true ... fio.basename('/') --- @@ -75,9 +92,16 @@ file3 = fio.pathjoin(tmpdir, 'file.3') file4 = fio.pathjoin(tmpdir, 'file.4') --- ... -fio.open(nil) +st, err = pcall(fio.open, nil) --- -- null +... +st +--- +- false +... +err:match("open") ~= nil +--- +- true ... fh1 = fio.open(file1, { 'O_RDWR', 'O_TRUNC', 'O_CREAT' }, 0777) --- @@ -341,12 +365,15 @@ string.format('%04o', bit.band(fio.stat(dir2).mode, 0x1FF)) - - true - true ... -{ fh1:close(), errno.strerror(), fh3:close(), errno.strerror() } +fh1:close() --- -- - false - - Bad file descriptor - - false - - Bad file descriptor +- false +- 'fio: Bad file descriptor' +... +fh3:close() +--- +- false +- 'fio: Bad file descriptor' ... fio.rmdir(nil) --- @@ -373,15 +400,16 @@ fio.rmdir(dir2) - false - false - false + - 'fio: No such file or directory' ... fio.rmdir(tmpdir) --- - true ... -{ fio.rmdir(tmpdir), errno.strerror() } +fio.rmdir(tmpdir) --- -- - false - - No such file or directory +- false +- 'fio: No such file or directory' ... fio.unlink() --- @@ -395,9 +423,9 @@ fio.unlink(nil) fh4 = fio.open('newfile', {'O_RDWR','O_CREAT','O_EXCL'}) --- ... -string.format('%o', bit.band(fh4:stat().mode, 0x1FF)) +bit.band(fh4:stat().mode, 0x1FF) == bit.band(fio.umask(), 0x1ff) --- -- '777' +- true ... fh4:close() --- @@ -408,9 +436,16 @@ fio.unlink('newfile') - true ... -- dirname -fio.dirname(nil) +st, err = pcall(fio.dirname, nil) --- -- null +... +st +--- +- false +... +err:match("dirname") ~= nil +--- +- true ... fio.dirname('abc') --- @@ -433,9 +468,16 @@ fio.dirname('/') - / ... -- abspath -fio.abspath(nil) +st, err = pcall(fio.abspath, nil) --- -- null +... +st +--- +- false +... +err:match("abspath") ~= nil +--- +- true ... fio.abspath("/") --- @@ -477,14 +519,28 @@ type(string.find(fio.abspath("tmp"), "tmp")) old_cwd = fio.cwd() --- ... -fio.chdir(nil) +st, err = pcall(fio.chdir, nil) +--- +... +st --- - false ... -fio.chdir(42) +err:match("chdir") ~= nil +--- +- true +... +st, err = pcall(fio.chdir, 42) +--- +... +st --- - false ... +err:match("chdir") ~= nil +--- +- true +... fio.chdir('/no/such/file/or/directory') --- - false @@ -505,3 +561,307 @@ fio.cwd() == old_cwd --- - true ... +-- listdir +tmpdir = fio.tempdir() +--- +... +dir3 = fio.pathjoin(tmpdir, "dir3") +--- +... +st, err = pcall(fio.mkdir, nil) +--- +... +st +--- +- false +... +err:match("mkdir") ~= nil +--- +- true +... +fio.mkdir(dir3) +--- +- true +... +fio.mkdir(fio.pathjoin(dir3, "1")) +--- +- true +... +fio.mkdir(fio.pathjoin(dir3, "2")) +--- +- true +... +fio.mkdir(fio.pathjoin(dir3, "3")) +--- +- true +... +fio.listdir("/no/such/directory/") +--- +- null +- 'can''t listdir /no/such/directory/: fio: No such file or directory' +... +ls = fio.listdir(dir3) +--- +... +table.sort(ls, function(a, b) return tonumber(a) < tonumber(b) end) +--- +... +ls +--- +- - '1' + - '2' + - '3' +... +-- rmtree +fio.stat(dir3) ~= nil +--- +- true +... +fio.rmtree(dir3) +--- +- true +... +fio.stat(dir3) == nil +--- +- true +... +st, err = fio.rmtree(dir3) +--- +... +st +--- +- null +... +err:match("No such") ~= nil +--- +- true +... +-- mktree +tmp1 = fio.pathjoin(tmpdir, "1") +--- +... +tmp2 = fio.pathjoin(tmp1, "2") +--- +... +tree = fio.pathjoin(tmp2, "3") +--- +... +tree2 = fio.pathjoin(tmpdir, "4") +--- +... +st, err = pcall(fio.mktree, nil) +--- +... +st +--- +- false +... +err:match("mktree") ~= nil +--- +- true +... +fio.mktree(tree) +--- +- true +... +fio.stat(tree) ~= nil +--- +- true +... +fio.stat(tmp2) ~= nil +--- +- true +... +fio.mktree(tree2, 1) +--- +- true +... +-- copy and copytree +file1 = fio.pathjoin(tmp1, 'file.1') +--- +... +file2 = fio.pathjoin(tmp2, 'file.2') +--- +... +file3 = fio.pathjoin(tree, 'file.3') +--- +... +fh1 = fio.open(file1, { 'O_RDWR', 'O_TRUNC', 'O_CREAT' }, 0777) +--- +... +fh1:write("gogo") +--- +- true +... +fh1:close() +--- +- true +... +fh1 = fio.open(file2, { 'O_RDWR', 'O_TRUNC', 'O_CREAT' }, 0777) +--- +... +fh1:write("lolo") +--- +- true +... +fh1:close() +--- +- true +... +fio.symlink(file1, file3) +--- +- true +... +fio.copyfile(file1, tmp2) +--- +- true +... +fio.stat(fio.pathjoin(tmp2, "file.1")) ~= nil +--- +- true +... +res, err = fio.copyfile(fio.pathjoin(tmp1, 'not_exists.txt'), tmp1) +--- +... +res +--- +- false +... +err:match("failed to copy") ~= nil +--- +- true +... +newdir = fio.pathjoin(tmpdir, "newdir") +--- +... +fio.copytree(fio.pathjoin(tmpdir, "1"), newdir) +--- +- true +... +fio.stat(fio.pathjoin(newdir, "file.1")) ~= nil +--- +- true +... +fio.stat(fio.pathjoin(newdir, "2", "file.2")) ~= nil +--- +- true +... +fio.stat(fio.pathjoin(newdir, "2", "3", "file.3")) ~= nil +--- +- true +... +fio.readlink(fio.pathjoin(newdir, "2", "3", "file.3")) == file1 +--- +- true +... +fio.copytree("/no/such/dir", "/some/where") +--- +- false +- Directory /no/such/dir does not exist +... +-- ibuf read/write +buf = buffer.ibuf() +--- +... +tmpdir = fio.tempdir() +--- +... +tmpfile = fio.pathjoin(tmpdir, "test") +--- +... +fh = fio.open(tmpfile, { 'O_RDWR', 'O_TRUNC', 'O_CREAT' }, 0777) +--- +... +fh:write('helloworld!') +--- +- true +... +fh:seek(0) +--- +- 0 +... +len = fh:read(buf:reserve(5), 5) +--- +... +ffi.string(buf:alloc(len), len) +--- +- hello +... +len = fh:read(buf:reserve(5), 5) +--- +... +ffi.string(buf:alloc(len), len) +--- +- world +... +len = fh:read(buf:reserve(5), 5) +--- +... +ffi.string(buf:alloc(len), len) +--- +- '!' +... +buf:reset() +--- +... +len = fh:pread(buf:reserve(5), 5, 5) +--- +... +ffi.string(buf:alloc(len), len) +--- +- world +... +len = fh:pread(buf:reserve(5), 5) +--- +... +ffi.string(buf:alloc(len), len) +--- +- hello +... +fh:seek(0) +--- +- 0 +... +fh:write(buf.rpos, buf:size()) +--- +- true +... +fh:seek(0) +--- +- 0 +... +fh:read(64) +--- +- worldhello! +... +fh:pwrite(buf:read(5), 5, 5) +--- +- true +... +fh:pwrite(buf:read(5), 5) +--- +- true +... +fh:seek(0) +--- +- 0 +... +fh:read(64) +--- +- helloworld! +... +buf:recycle() +--- +... +fh:close() +--- +- true +... +fio.unlink(tmpfile) +--- +- true +... +fio.rmdir(tmpdir) +--- +- true +... diff --git a/test/app/fio.test.lua b/test/app/fio.test.lua index e95f6397b26d4cfe0fcb5d375b3901c923da24d7..57bedd05f29c262bc9769cb99a0859110943ad80 100644 --- a/test/app/fio.test.lua +++ b/test/app/fio.test.lua @@ -1,5 +1,6 @@ fio = require 'fio' -errno = require 'errno' +ffi = require 'ffi' +buffer = require 'buffer' -- umask @@ -7,14 +8,18 @@ type(fio.umask(0)) fio.umask() -- pathjoin -fio.basename(nil, nil) +st, err = pcall(fio.basename, nil, nil) +st +err:match("basename") ~= nil fio.pathjoin('abc', 'cde') fio.pathjoin('/', 'abc') fio.pathjoin('abc/', '/cde') fio.pathjoin('/', '/cde') -- basename -fio.basename(nil) +st, err = pcall(fio.basename, nil) +st +err:match("basename") ~= nil fio.basename('/') fio.basename('abc') fio.basename('abc.cde', '.cde') @@ -32,7 +37,9 @@ file3 = fio.pathjoin(tmpdir, 'file.3') file4 = fio.pathjoin(tmpdir, 'file.4') -fio.open(nil) +st, err = pcall(fio.open, nil) +st +err:match("open") ~= nil fh1 = fio.open(file1, { 'O_RDWR', 'O_TRUNC', 'O_CREAT' }, 0777) fh1 ~= nil f1s = fh1:stat() @@ -113,7 +120,8 @@ string.format('%04o', bit.band(fio.stat(dir2).mode, 0x1FF)) -- cleanup directories { fh1:close(), fh3:close() } -{ fh1:close(), errno.strerror(), fh3:close(), errno.strerror() } +fh1:close() +fh3:close() fio.rmdir(nil) fio.rmdir(dir1) @@ -122,20 +130,22 @@ fio.rmdir(dir2) { fio.unlink(file1), fio.unlink(file2), fio.unlink(file3), fio.unlink(file4) } { fio.unlink(file1), fio.unlink(file2), fio.unlink(file3), fio.unlink(file4) } fio.rmdir(tmpdir) -{ fio.rmdir(tmpdir), errno.strerror() } +fio.rmdir(tmpdir) fio.unlink() fio.unlink(nil) -- gh-1211 use 0777 if mode omitted in open fh4 = fio.open('newfile', {'O_RDWR','O_CREAT','O_EXCL'}) -string.format('%o', bit.band(fh4:stat().mode, 0x1FF)) +bit.band(fh4:stat().mode, 0x1FF) == bit.band(fio.umask(), 0x1ff) fh4:close() fio.unlink('newfile') -- dirname -fio.dirname(nil) +st, err = pcall(fio.dirname, nil) +st +err:match("dirname") ~= nil fio.dirname('abc') fio.dirname('/abc') fio.dirname('/abc/cde') @@ -143,7 +153,9 @@ fio.dirname('/abc/cde/') fio.dirname('/') -- abspath -fio.abspath(nil) +st, err = pcall(fio.abspath, nil) +st +err:match("abspath") ~= nil fio.abspath("/") fio.abspath("/tmp") fio.abspath("/tmp/test/../") @@ -156,10 +168,118 @@ type(string.find(fio.abspath("tmp"), "tmp")) -- chdir old_cwd = fio.cwd() -fio.chdir(nil) -fio.chdir(42) +st, err = pcall(fio.chdir, nil) +st +err:match("chdir") ~= nil +st, err = pcall(fio.chdir, 42) +st +err:match("chdir") ~= nil fio.chdir('/no/such/file/or/directory') fio.chdir('/') fio.cwd() fio.chdir(old_cwd) fio.cwd() == old_cwd + +-- listdir +tmpdir = fio.tempdir() +dir3 = fio.pathjoin(tmpdir, "dir3") +st, err = pcall(fio.mkdir, nil) +st +err:match("mkdir") ~= nil +fio.mkdir(dir3) +fio.mkdir(fio.pathjoin(dir3, "1")) +fio.mkdir(fio.pathjoin(dir3, "2")) +fio.mkdir(fio.pathjoin(dir3, "3")) +fio.listdir("/no/such/directory/") +ls = fio.listdir(dir3) +table.sort(ls, function(a, b) return tonumber(a) < tonumber(b) end) +ls + +-- rmtree +fio.stat(dir3) ~= nil +fio.rmtree(dir3) +fio.stat(dir3) == nil +st, err = fio.rmtree(dir3) +st +err:match("No such") ~= nil + +-- mktree +tmp1 = fio.pathjoin(tmpdir, "1") +tmp2 = fio.pathjoin(tmp1, "2") +tree = fio.pathjoin(tmp2, "3") +tree2 = fio.pathjoin(tmpdir, "4") +st, err = pcall(fio.mktree, nil) +st +err:match("mktree") ~= nil +fio.mktree(tree) +fio.stat(tree) ~= nil +fio.stat(tmp2) ~= nil +fio.mktree(tree2, 1) + +-- copy and copytree +file1 = fio.pathjoin(tmp1, 'file.1') +file2 = fio.pathjoin(tmp2, 'file.2') +file3 = fio.pathjoin(tree, 'file.3') + +fh1 = fio.open(file1, { 'O_RDWR', 'O_TRUNC', 'O_CREAT' }, 0777) +fh1:write("gogo") +fh1:close() +fh1 = fio.open(file2, { 'O_RDWR', 'O_TRUNC', 'O_CREAT' }, 0777) +fh1:write("lolo") +fh1:close() +fio.symlink(file1, file3) +fio.copyfile(file1, tmp2) +fio.stat(fio.pathjoin(tmp2, "file.1")) ~= nil + +res, err = fio.copyfile(fio.pathjoin(tmp1, 'not_exists.txt'), tmp1) +res +err:match("failed to copy") ~= nil + +newdir = fio.pathjoin(tmpdir, "newdir") +fio.copytree(fio.pathjoin(tmpdir, "1"), newdir) +fio.stat(fio.pathjoin(newdir, "file.1")) ~= nil +fio.stat(fio.pathjoin(newdir, "2", "file.2")) ~= nil +fio.stat(fio.pathjoin(newdir, "2", "3", "file.3")) ~= nil +fio.readlink(fio.pathjoin(newdir, "2", "3", "file.3")) == file1 +fio.copytree("/no/such/dir", "/some/where") + +-- ibuf read/write +buf = buffer.ibuf() + +tmpdir = fio.tempdir() +tmpfile = fio.pathjoin(tmpdir, "test") +fh = fio.open(tmpfile, { 'O_RDWR', 'O_TRUNC', 'O_CREAT' }, 0777) + +fh:write('helloworld!') +fh:seek(0) + +len = fh:read(buf:reserve(5), 5) +ffi.string(buf:alloc(len), len) +len = fh:read(buf:reserve(5), 5) +ffi.string(buf:alloc(len), len) +len = fh:read(buf:reserve(5), 5) +ffi.string(buf:alloc(len), len) + +buf:reset() +len = fh:pread(buf:reserve(5), 5, 5) +ffi.string(buf:alloc(len), len) +len = fh:pread(buf:reserve(5), 5) +ffi.string(buf:alloc(len), len) + +fh:seek(0) +fh:write(buf.rpos, buf:size()) + +fh:seek(0) +fh:read(64) + +fh:pwrite(buf:read(5), 5, 5) +fh:pwrite(buf:read(5), 5) + +fh:seek(0) +fh:read(64) + +buf:recycle() + +fh:close() +fio.unlink(tmpfile) +fio.rmdir(tmpdir) diff --git a/test/replication/errinj.result b/test/replication/errinj.result index 3b7233c984906fb23f979f4c359761b352d0d860..0d0ab0c9836b4058e65b59a6a9695993adbf8ec7 100644 --- a/test/replication/errinj.result +++ b/test/replication/errinj.result @@ -235,6 +235,10 @@ box.info.replication[1].upstream.status --- - follow ... +box.info.replication[1].upstream.lag +--- +- 0 +... -- wait for ack timeout while box.info.replication[1].upstream.message ~= 'timed out' do fiber.sleep(0.0001) end --- diff --git a/test/replication/errinj.test.lua b/test/replication/errinj.test.lua index f4af30d171ae93cf6105a173823bc2b17e2b074c..5d4ef4e5716a1d97a96ad885fae23802e5ed5978 100644 --- a/test/replication/errinj.test.lua +++ b/test/replication/errinj.test.lua @@ -102,6 +102,7 @@ test_run:cmd("switch replica") -- wait for reconnect while box.info.replication[1].upstream.status ~= 'follow' do fiber.sleep(0.0001) end box.info.replication[1].upstream.status +box.info.replication[1].upstream.lag -- wait for ack timeout while box.info.replication[1].upstream.message ~= 'timed out' do fiber.sleep(0.0001) end diff --git a/test/unit/vy_cache.c b/test/unit/vy_cache.c index 981272231214f4b7d3b32e7578c4773bba5657d5..37d988280622f6dae86049585707b08fc3d2e0b3 100644 --- a/test/unit/vy_cache.c +++ b/test/unit/vy_cache.c @@ -86,7 +86,7 @@ test_basic() struct tuple *ret; bool unused; for (int i = 0; i < 4; ++i) - itr.base.iface->next_key(&itr.base, &ret, &unused); + vy_cache_iterator_next(&itr, &ret, &unused); ok(vy_stmt_are_same(ret, &chain1[3], format, NULL, NULL), "next_key * 4"); @@ -108,13 +108,13 @@ test_basic() */ struct tuple *last_stmt = vy_new_simple_stmt(format, NULL, NULL, &chain1[0]); - ok(itr.base.iface->restore(&itr.base, last_stmt, &ret, &unused) >= 0, + ok(vy_cache_iterator_restore(&itr, last_stmt, &ret, &unused) >= 0, "restore"); ok(vy_stmt_are_same(ret, &chain1[1], format, NULL, NULL), "restore on position after last"); tuple_unref(last_stmt); - itr.base.iface->close(&itr.base); + vy_cache_iterator_close(&itr); tuple_unref(select_all); destroy_test_cache(&cache, key_def, format); diff --git a/test/unit/vy_iterators_helper.h b/test/unit/vy_iterators_helper.h index a89fc4b904ad19a8c88592b7a29dfb002d5adc84..216cf6045a18e55d0466a190298acc1daebf6ab4 100644 --- a/test/unit/vy_iterators_helper.h +++ b/test/unit/vy_iterators_helper.h @@ -37,8 +37,8 @@ #include "small/rlist.h" #include "small/lsregion.h" #include "vy_mem.h" -#include "vy_stmt_iterator.h" #include "vy_cache.h" +#include "vy_read_view.h" #define vyend 99999999 #define MAX_FIELDS_COUNT 100 diff --git a/test/unit/vy_mem.c b/test/unit/vy_mem.c index debac5cfa8cade7f399fe4db66689dc008d00483..207f1346a278d43e1f8476fb61114a5794e9cfd4 100644 --- a/test/unit/vy_mem.c +++ b/test/unit/vy_mem.c @@ -194,8 +194,7 @@ test_iterator_restore_after_insertion() direct ? ITER_GE : ITER_LE, select_key, &prv); struct tuple *t; - bool stop = false; - int rc = itr.base.iface->next_key(&itr.base, &t, &stop); + int rc = vy_mem_iterator_next_key(&itr, &t); assert(rc == 0); size_t j = 0; while (t != NULL) { @@ -215,7 +214,7 @@ test_iterator_restore_after_insertion() break; else if(!direct && val <= middle_value) break; - int rc = itr.base.iface->next_key(&itr.base, &t, &stop); + int rc = vy_mem_iterator_next_key(&itr, &t); assert(rc == 0); } if (t == NULL && j != expected_count) @@ -267,9 +266,9 @@ test_iterator_restore_after_insertion() } if (direct) - rc = itr.base.iface->restore(&itr.base, restore_on_key, &t, &stop); + rc = vy_mem_iterator_restore(&itr, restore_on_key, &t); else - rc = itr.base.iface->restore(&itr.base, restore_on_key_reverse, &t, &stop); + rc = vy_mem_iterator_restore(&itr, restore_on_key_reverse, &t); j = 0; while (t != NULL) { @@ -285,7 +284,7 @@ test_iterator_restore_after_insertion() break; } j++; - int rc = itr.base.iface->next_key(&itr.base, &t, &stop); + int rc = vy_mem_iterator_next_key(&itr, &t); assert(rc == 0); } if (j != expected_count) diff --git a/test/unit/vy_write_iterator.c b/test/unit/vy_write_iterator.c index 37a730a34b045e8e2bf82e08cfa97bb6567fcd98..9c5c97d1b26b67e52620ebe3cf44138b174d35e9 100644 --- a/test/unit/vy_write_iterator.c +++ b/test/unit/vy_write_iterator.c @@ -75,7 +75,7 @@ void test_basic(void) { header(); - plan(36); + plan(38); /* Create key_def */ uint32_t fields[] = { 0 }; @@ -395,6 +395,36 @@ test_basic(void) compare_write_iterator_results(key_def, content, content_count, expected, expected_count, vlsns, vlsns_count, true, false); +} +{ +/* + * STATEMENT: REPL DEL REPL DEL REPL DEL + * LSN: 4 5 6 7 8 9 + * READ VIEW: * * * + * \_______/\_______________/ + * merge skip + * + * is_last_level = false + * + * Check that tautological DELETEs referenced by newer + * read views are skipped. + */ + const struct vy_stmt_template content[] = { + STMT_TEMPLATE(4, REPLACE, 1, 1), + STMT_TEMPLATE(5, DELETE, 1), + STMT_TEMPLATE(6, REPLACE, 1, 2), + STMT_TEMPLATE(7, DELETE, 1), + STMT_TEMPLATE(8, REPLACE, 1, 3), + STMT_TEMPLATE(9, DELETE, 1), + }; + const struct vy_stmt_template expected[] = { content[1] }; + const int vlsns[] = {5, 7, 9}; + int content_count = sizeof(content) / sizeof(content[0]); + int expected_count = sizeof(expected) / sizeof(expected[0]); + int vlsns_count = sizeof(vlsns) / sizeof(vlsns[0]); + compare_write_iterator_results(key_def, content, content_count, + expected, expected_count, + vlsns, vlsns_count, true, false); } box_key_def_delete(key_def); fiber_gc(); diff --git a/test/unit/vy_write_iterator.result b/test/unit/vy_write_iterator.result index 86029f9864fc6e4cc3fcb1f570c8c54a4bd4edfc..141a11f3f0e4bf3dcc61fc28c7a5443846a08e36 100644 --- a/test/unit/vy_write_iterator.result +++ b/test/unit/vy_write_iterator.result @@ -1,5 +1,5 @@ *** test_basic *** -1..36 +1..38 ok 1 - stmt 0 is correct ok 2 - stmt 1 is correct ok 3 - stmt 2 is correct @@ -36,4 +36,6 @@ ok 33 - stmt 0 is correct ok 34 - correct results count ok 35 - stmt 0 is correct ok 36 - correct results count +ok 37 - stmt 0 is correct +ok 38 - correct results count *** test_basic: done *** diff --git a/test/vinyl/errinj.result b/test/vinyl/errinj.result index 4a5bc14d07c71c81aeff4d8491e3b1900ad15796..9f4c82c178b96e8eee1dfe3d81ea1d39112fcd8d 100644 --- a/test/vinyl/errinj.result +++ b/test/vinyl/errinj.result @@ -282,6 +282,40 @@ s:select() - [9, 'test str9'] - [10, 'test str10'] ... +-- gh-2871: check that long reads are logged +too_long_threshold = box.cfg.too_long_threshold +--- +... +box.cfg{too_long_threshold = 0.01} +--- +... +errinj.set("ERRINJ_VY_READ_PAGE_TIMEOUT", true) +--- +- ok +... +#s:select(5, {iterator = 'LE'}) == 5 +--- +- true +... +errinj.set("ERRINJ_VY_READ_PAGE_TIMEOUT", false); +--- +- ok +... +test_run:cmd("push filter 'lsn=[0-9]+' to 'lsn=<lsn>'") +--- +- true +... +test_run:grep_log('default', 'select.* took too long') +--- +- select([5], LE) => REPLACE([5, "test str5"], lsn=<lsn>) took too long +... +test_run:cmd("clear filter") +--- +- true +... +box.cfg{too_long_threshold = too_long_threshold} +--- +... s:drop() --- ... diff --git a/test/vinyl/errinj.test.lua b/test/vinyl/errinj.test.lua index 5def06a0ec96ada67406a3dd835b27af7a937109..98d31182013e8e7c864576eec8a933294b6262b1 100644 --- a/test/vinyl/errinj.test.lua +++ b/test/vinyl/errinj.test.lua @@ -95,6 +95,18 @@ fiber.sleep(0.1) errinj.set("ERRINJ_VY_READ_PAGE_TIMEOUT", false); errinj.set("ERRINJ_VY_READ_PAGE", false); s:select() + +-- gh-2871: check that long reads are logged +too_long_threshold = box.cfg.too_long_threshold +box.cfg{too_long_threshold = 0.01} +errinj.set("ERRINJ_VY_READ_PAGE_TIMEOUT", true) +#s:select(5, {iterator = 'LE'}) == 5 +errinj.set("ERRINJ_VY_READ_PAGE_TIMEOUT", false); +test_run:cmd("push filter 'lsn=[0-9]+' to 'lsn=<lsn>'") +test_run:grep_log('default', 'select.* took too long') +test_run:cmd("clear filter") +box.cfg{too_long_threshold = too_long_threshold} + s:drop() s = box.schema.space.create('test', {engine='vinyl'}); diff --git a/test/vinyl/info.lua b/test/vinyl/info.lua new file mode 100644 index 0000000000000000000000000000000000000000..af31516eebb33e183f002218255197c176f8cdf7 --- /dev/null +++ b/test/vinyl/info.lua @@ -0,0 +1,7 @@ +#!/usr/bin/env tarantool + +box.cfg{ + vinyl_cache = 15 * 1024, -- 15K to test cache eviction +} + +require('console').listen(os.getenv('ADMIN')) diff --git a/test/vinyl/info.result b/test/vinyl/info.result new file mode 100644 index 0000000000000000000000000000000000000000..c95f709ba14d3dac7dbfe4843509113ef95d3687 --- /dev/null +++ b/test/vinyl/info.result @@ -0,0 +1,921 @@ +test_run = require('test_run').new() +--- +... +-- Since we store LSNs in data files, the data size may differ +-- from run to run. Deploy a new server to make sure it will be +-- the same so that we can check it. +test_run:cmd('create server test with script = "vinyl/info.lua"') +--- +- true +... +test_run:cmd('start server test') +--- +- true +... +test_run:cmd('switch test') +--- +- true +... +fiber = require('fiber') +--- +... +s = box.schema.space.create('test', {engine = 'vinyl'}) +--- +... +_ = s:create_index('pk', {page_size = 4096, range_size = 16384, run_count_per_level = 1, run_size_ratio = 1000}) +--- +... +-- +-- Helper functions. +-- +test_run:cmd("setopt delimiter ';'") +--- +- true +... +-- Generate random 1K padding. +function pad() + local t = {} + for i = 1, 1024 do + t[i] = string.char(math.random(65, 90)) + end + return table.concat(t) +end; +--- +... +-- Insert a tuple into the test space. +function put(val) + box.space.test:replace{val, pad()} +end; +--- +... +-- Compute the difference between two tables containing stats. +-- If a field value is the same, it will be set to nil in the +-- resulting table. If 'path' is not 'nil', compare statistics +-- starting from 'path'. +function stat_diff(stat1, stat2, path) + while path ~= nil and path ~= '' do + local i = path:find('%.') or path:len() + 1 + local node = path:sub(1, i - 1) + path = path:sub(i + 1, path:len()) + stat1 = stat1[node] + stat2 = stat2[node] + end + if type(stat1) == 'string' then + return nil + end + if type(stat1) == 'number' then + return stat1 ~= stat2 and stat1 - stat2 or nil + end + assert(type(stat1) == 'table') + local diff + for k, v1 in pairs(stat1) do + local v2 = stat2[k] + local d = stat_diff(v1, v2) + if d ~= nil then + if diff == nil then + diff = {} + end + diff[k] = d + end + end + return diff +end; +--- +... +-- Return index statistics. +-- +-- Note, latency measurement is beyond the scope of this test +-- so we just filter it out. +function istat() + local st = box.space.test.index.pk:info() + st.latency = nil + return st +end; +--- +... +-- Return global statistics. +-- +-- Note, quota watermark checking is beyond the scope of this +-- test so we just filter out related statistics. +function gstat() + local st = box.info.vinyl() + st.quota.use_rate = nil + st.quota.dump_bandwidth = nil + st.quota.watermark = nil + return st +end; +--- +... +-- Wait until a stat counter changes. +function wait(stat_func, stat, path, diff) + while (stat_diff(stat_func(), stat, path) or 0) < diff do + fiber.sleep(0.01) + end +end; +--- +... +test_run:cmd("setopt delimiter ''"); +--- +- true +... +-- initially stats are empty +istat() +--- +- rows: 0 + run_avg: 0 + bytes: 0 + upsert: + squashed: 0 + applied: 0 + lookup: 0 + run_count: 0 + cache: + invalidate: + rows: 0 + bytes: 0 + rows: 0 + evict: + rows: 0 + bytes: 0 + put: + rows: 0 + bytes: 0 + lookup: 0 + bytes: 0 + get: + rows: 0 + bytes: 0 + range_count: 1 + put: + rows: 0 + bytes: 0 + disk: + dump: + in: + rows: 0 + bytes: 0 + count: 0 + out: + rows: 0 + bytes: 0 + compact: + in: + rows: 0 + bytes: 0 + count: 0 + out: + rows: 0 + bytes: 0 + rows: 0 + iterator: + read: + bytes_compressed: 0 + pages: 0 + rows: 0 + bytes: 0 + bloom: + hit: 0 + miss: 0 + lookup: 0 + get: + rows: 0 + bytes: 0 + pages: 0 + bytes_compressed: 0 + bytes: 0 + txw: + bytes: 0 + rows: 0 + iterator: + lookup: 0 + get: + rows: 0 + bytes: 0 + run_histogram: '[0]:1' + memory: + bytes: 0 + rows: 0 + iterator: + lookup: 0 + get: + rows: 0 + bytes: 0 + get: + rows: 0 + bytes: 0 +... +gstat() +--- +- cache: + limit: 15360 + tuples: 0 + used: 0 + tx: + conflict: 0 + commit: 0 + rollback: 0 + statements: 0 + transactions: 0 + gap_locks: 0 + read_views: 0 + quota: + limit: 134217728 + used: 0 +... +-- +-- Index statistics. +-- +-- Compressed data size may differ as padding is random. +-- Besides, it may depend on the zstd version so let's +-- filter it out. +test_run:cmd("push filter 'bytes_compressed: .*' to 'bytes_compressed: <bytes_compressed>'") +--- +- true +... +-- put + dump +st = istat() +--- +... +for i = 1, 100, 4 do put(i) end +--- +... +box.snapshot() +--- +- ok +... +wait(istat, st, 'disk.dump.count', 1) +--- +... +stat_diff(istat(), st) +--- +- rows: 25 + run_avg: 1 + run_count: 1 + disk: + dump: + in: + rows: 25 + bytes: 26525 + count: 1 + out: + rows: 25 + bytes: 26049 + rows: 25 + pages: 7 + bytes_compressed: <bytes_compressed> + bytes: 26049 + bytes: 26049 + put: + rows: 25 + bytes: 26525 +... +-- put + dump + compaction +st = istat() +--- +... +for i = 1, 100, 2 do put(i) end +--- +... +box.snapshot() +--- +- ok +... +wait(istat, st, 'disk.compact.count', 1) +--- +... +stat_diff(istat(), st) +--- +- disk: + dump: + in: + rows: 50 + bytes: 53050 + count: 1 + out: + rows: 50 + bytes: 52091 + compact: + in: + rows: 75 + bytes: 78140 + count: 1 + out: + rows: 50 + bytes: 52091 + rows: 25 + pages: 6 + bytes_compressed: <bytes_compressed> + bytes: 26042 + put: + rows: 50 + bytes: 53050 + rows: 25 + bytes: 26042 +... +-- point lookup from disk + cache put +st = istat() +--- +... +s:get(1) ~= nil +--- +- true +... +stat_diff(istat(), st) +--- +- cache: + bytes: 1061 + lookup: 1 + rows: 1 + put: + rows: 1 + bytes: 1061 + lookup: 1 + disk: + iterator: + read: + bytes: 4167 + pages: 1 + bytes_compressed: <bytes_compressed> + rows: 4 + lookup: 1 + get: + rows: 1 + bytes: 1061 + memory: + iterator: + lookup: 1 + get: + rows: 1 + bytes: 1061 +... +-- point lookup from cache +st = istat() +--- +... +s:get(1) ~= nil +--- +- true +... +stat_diff(istat(), st) +--- +- cache: + lookup: 1 + put: + rows: 1 + bytes: 1061 + get: + rows: 1 + bytes: 1061 + lookup: 1 + get: + rows: 1 + bytes: 1061 +... +-- put in memory + cache invalidate +st = istat() +--- +... +put(1) +--- +... +stat_diff(istat(), st) +--- +- cache: + invalidate: + rows: 1 + bytes: 1061 + rows: -1 + bytes: -1061 + rows: 1 + memory: + bytes: 1061 + rows: 1 + put: + rows: 1 + bytes: 1061 + bytes: 1061 +... +-- point lookup from memory +st = istat() +--- +... +s:get(1) ~= nil +--- +- true +... +stat_diff(istat(), st) +--- +- cache: + bytes: 1061 + lookup: 1 + rows: 1 + put: + rows: 1 + bytes: 1061 + memory: + iterator: + lookup: 1 + get: + rows: 1 + bytes: 1061 + lookup: 1 + get: + rows: 1 + bytes: 1061 +... +-- put in txw + point lookup from txw +st = istat() +--- +... +box.begin() +--- +... +put(1) +--- +... +s:get(1) ~= nil +--- +- true +... +stat_diff(istat(), st) +--- +- txw: + rows: 1 + bytes: 1061 + iterator: + lookup: 1 + get: + rows: 1 + bytes: 1061 + lookup: 1 + get: + rows: 1 + bytes: 1061 +... +box.rollback() +--- +... +-- apply upsert in txw +st = istat() +--- +... +box.begin() +--- +... +_ = s:replace{1} +--- +... +_ = s:upsert({1}, {{'=', 2, pad()}}) +--- +... +stat_diff(istat(), st, 'upsert') +--- +- squashed: 1 + applied: 1 +... +box.rollback() +--- +... +-- apply upsert on get +st = istat() +--- +... +_ = s:upsert({5}, {{'=', 2, pad()}}) +--- +... +s:get(5) ~= nil +--- +- true +... +stat_diff(istat(), st, 'upsert') +--- +- applied: 1 +... +-- cache eviction +assert(box.cfg.vinyl_cache < 100 * 1024) +--- +- true +... +for i = 1, 100 do put(i) end +--- +... +st = istat() +--- +... +for i = 1, 100 do s:get(i) end +--- +... +stat_diff(istat(), st, 'cache') +--- +- rows: 14 + bytes: 14854 + evict: + rows: 86 + bytes: 91246 + lookup: 100 + put: + rows: 100 + bytes: 106100 +... +-- range split +for i = 1, 100 do put(i) end +--- +... +st = istat() +--- +... +box.snapshot() +--- +- ok +... +wait(istat, st, 'disk.compact.count', 2) +--- +... +st = istat() +--- +... +st.range_count -- 2 +--- +- 2 +... +st.run_count -- 2 +--- +- 2 +... +st.run_avg -- 1 +--- +- 1 +... +st.run_histogram -- [1]:2 +--- +- '[1]:2' +... +-- range lookup +for i = 1, 100 do put(i) end +--- +... +box.begin() +--- +... +for i = 1, 100, 2 do put(i) end +--- +... +st = istat() +--- +... +#s:select() +--- +- 100 +... +stat_diff(istat(), st) +--- +- cache: + rows: 13 + bytes: 13793 + evict: + rows: 37 + bytes: 39257 + lookup: 1 + put: + rows: 51 + bytes: 54111 + disk: + iterator: + read: + bytes: 104300 + pages: 25 + bytes_compressed: <bytes_compressed> + rows: 100 + lookup: 2 + get: + rows: 100 + bytes: 106100 + txw: + iterator: + lookup: 1 + get: + rows: 50 + bytes: 53050 + memory: + iterator: + lookup: 1 + get: + rows: 100 + bytes: 106100 + lookup: 1 + get: + rows: 100 + bytes: 106100 +... +box.rollback() +--- +... +-- range lookup from cache +assert(box.cfg.vinyl_cache > 10 * 1024) +--- +- true +... +for i = 1, 100 do put(i) end +--- +... +box.begin() +--- +... +#s:select({}, {limit = 5}) +--- +- 5 +... +st = istat() +--- +... +#s:select({}, {limit = 5}) +--- +- 5 +... +stat_diff(istat(), st) +--- +- cache: + lookup: 1 + put: + rows: 5 + bytes: 5305 + get: + rows: 9 + bytes: 9549 + txw: + iterator: + lookup: 1 + lookup: 1 + get: + rows: 5 + bytes: 5305 +... +box.rollback() +--- +... +-- +-- Global statistics. +-- +-- use quota +st = gstat() +--- +... +put(1) +--- +... +stat_diff(gstat(), st, 'quota') +--- +- used: 1061 +... +-- use cache +st = gstat() +--- +... +_ = s:get(1) +--- +... +stat_diff(gstat(), st, 'cache') +--- +- used: 1101 + tuples: 1 +... +s:delete(1) +--- +... +-- rollback +st = gstat() +--- +... +box.begin() +--- +... +_ = s:insert{1} +--- +... +box.rollback() +--- +... +stat_diff(gstat(), st, 'tx') +--- +- rollback: 1 +... +-- conflict +st = gstat() +--- +... +ch1 = fiber.channel(1) +--- +... +ch2 = fiber.channel(1) +--- +... +test_run:cmd("setopt delimiter ';'") +--- +- true +... +_ = fiber.create(function() + box.begin() + s:insert{1} + ch1:put(true) + ch2:get() + pcall(box.commit) + ch1:put(true) +end); +--- +... +test_run:cmd("setopt delimiter ''"); +--- +- true +... +ch1:get() +--- +- true +... +_ = s:insert{1} +--- +... +ch2:put(true) +--- +- true +... +ch1:get() +--- +- true +... +stat_diff(gstat(), st, 'tx') +--- +- conflict: 1 + commit: 1 + rollback: 1 +... +s:delete(1) +--- +... +-- tx statements +st = gstat() +--- +... +box.begin() +--- +... +for i = 1, 10 do s:replace{i} end +--- +... +stat_diff(gstat(), st, 'tx') +--- +- statements: 10 + transactions: 1 +... +box.rollback() +--- +... +stat_diff(gstat(), st, 'tx') +--- +- rollback: 1 +... +-- transactions +st = gstat() +--- +... +ch1 = fiber.channel(5) +--- +... +ch2 = fiber.channel(5) +--- +... +test_run:cmd("setopt delimiter ';'") +--- +- true +... +for i = 1, 5 do + fiber.create(function() + box.begin() + s:replace{i} + ch1:put(true) + ch2:get() + box.rollback() + ch1:put(true) + end) +end; +--- +... +test_run:cmd("setopt delimiter ''"); +--- +- true +... +for i = 1, 5 do ch1:get() end +--- +... +stat_diff(gstat(), st, 'tx') +--- +- statements: 5 + transactions: 5 +... +for i = 1, 5 do ch2:put(true) end +--- +... +for i = 1, 5 do ch1:get() end +--- +... +stat_diff(gstat(), st, 'tx') +--- +- rollback: 5 +... +-- read view +st = gstat() +--- +... +ch1 = fiber.channel(1) +--- +... +ch2 = fiber.channel(1) +--- +... +test_run:cmd("setopt delimiter ';'") +--- +- true +... +_ = fiber.create(function() + box.begin() + s:select() + ch1:put(true) + ch2:get() + pcall(box.commit) + ch1:put(true) +end); +--- +... +test_run:cmd("setopt delimiter ''"); +--- +- true +... +ch1:get() +--- +- true +... +_ = s:insert{1} +--- +... +stat_diff(gstat(), st, 'tx') +--- +- transactions: 1 + gap_locks: 1 + commit: 1 + read_views: 1 +... +ch2:put(true) +--- +- true +... +ch1:get() +--- +- true +... +stat_diff(gstat(), st, 'tx') +--- +- commit: 2 +... +s:delete(1) +--- +... +-- gap locks +st = gstat() +--- +... +box.begin() +--- +... +_ = s:select({10}, {iterator = 'LT'}) +--- +... +_ = s:select({20}, {iterator = 'GT'}) +--- +... +stat_diff(gstat(), st, 'tx') +--- +- transactions: 1 + gap_locks: 2 +... +box.commit() +--- +... +stat_diff(gstat(), st, 'tx') +--- +- commit: 1 +... +test_run:cmd('switch default') +--- +- true +... +test_run:cmd('stop server test') +--- +- true +... +test_run:cmd('cleanup server test') +--- +- true +... +test_run:cmd("clear filter") +--- +- true +... diff --git a/test/vinyl/info.test.lua b/test/vinyl/info.test.lua new file mode 100644 index 0000000000000000000000000000000000000000..b1c9bd5f3c8d65d59af0e87a0b56fba18dfbf15a --- /dev/null +++ b/test/vinyl/info.test.lua @@ -0,0 +1,315 @@ +test_run = require('test_run').new() + +-- Since we store LSNs in data files, the data size may differ +-- from run to run. Deploy a new server to make sure it will be +-- the same so that we can check it. +test_run:cmd('create server test with script = "vinyl/info.lua"') +test_run:cmd('start server test') +test_run:cmd('switch test') + +fiber = require('fiber') + +s = box.schema.space.create('test', {engine = 'vinyl'}) +_ = s:create_index('pk', {page_size = 4096, range_size = 16384, run_count_per_level = 1, run_size_ratio = 1000}) + +-- +-- Helper functions. +-- + +test_run:cmd("setopt delimiter ';'") + +-- Generate random 1K padding. +function pad() + local t = {} + for i = 1, 1024 do + t[i] = string.char(math.random(65, 90)) + end + return table.concat(t) +end; + +-- Insert a tuple into the test space. +function put(val) + box.space.test:replace{val, pad()} +end; + +-- Compute the difference between two tables containing stats. +-- If a field value is the same, it will be set to nil in the +-- resulting table. If 'path' is not 'nil', compare statistics +-- starting from 'path'. +function stat_diff(stat1, stat2, path) + while path ~= nil and path ~= '' do + local i = path:find('%.') or path:len() + 1 + local node = path:sub(1, i - 1) + path = path:sub(i + 1, path:len()) + stat1 = stat1[node] + stat2 = stat2[node] + end + if type(stat1) == 'string' then + return nil + end + if type(stat1) == 'number' then + return stat1 ~= stat2 and stat1 - stat2 or nil + end + assert(type(stat1) == 'table') + local diff + for k, v1 in pairs(stat1) do + local v2 = stat2[k] + local d = stat_diff(v1, v2) + if d ~= nil then + if diff == nil then + diff = {} + end + diff[k] = d + end + end + return diff +end; + +-- Return index statistics. +-- +-- Note, latency measurement is beyond the scope of this test +-- so we just filter it out. +function istat() + local st = box.space.test.index.pk:info() + st.latency = nil + return st +end; + +-- Return global statistics. +-- +-- Note, quota watermark checking is beyond the scope of this +-- test so we just filter out related statistics. +function gstat() + local st = box.info.vinyl() + st.quota.use_rate = nil + st.quota.dump_bandwidth = nil + st.quota.watermark = nil + return st +end; + +-- Wait until a stat counter changes. +function wait(stat_func, stat, path, diff) + while (stat_diff(stat_func(), stat, path) or 0) < diff do + fiber.sleep(0.01) + end +end; + +test_run:cmd("setopt delimiter ''"); + +-- initially stats are empty +istat() +gstat() + +-- +-- Index statistics. +-- + +-- Compressed data size may differ as padding is random. +-- Besides, it may depend on the zstd version so let's +-- filter it out. +test_run:cmd("push filter 'bytes_compressed: .*' to 'bytes_compressed: <bytes_compressed>'") + +-- put + dump +st = istat() +for i = 1, 100, 4 do put(i) end +box.snapshot() +wait(istat, st, 'disk.dump.count', 1) +stat_diff(istat(), st) + +-- put + dump + compaction +st = istat() +for i = 1, 100, 2 do put(i) end +box.snapshot() +wait(istat, st, 'disk.compact.count', 1) +stat_diff(istat(), st) + +-- point lookup from disk + cache put +st = istat() +s:get(1) ~= nil +stat_diff(istat(), st) + +-- point lookup from cache +st = istat() +s:get(1) ~= nil +stat_diff(istat(), st) + +-- put in memory + cache invalidate +st = istat() +put(1) +stat_diff(istat(), st) + +-- point lookup from memory +st = istat() +s:get(1) ~= nil +stat_diff(istat(), st) + +-- put in txw + point lookup from txw +st = istat() +box.begin() +put(1) +s:get(1) ~= nil +stat_diff(istat(), st) +box.rollback() + +-- apply upsert in txw +st = istat() +box.begin() +_ = s:replace{1} +_ = s:upsert({1}, {{'=', 2, pad()}}) +stat_diff(istat(), st, 'upsert') +box.rollback() + +-- apply upsert on get +st = istat() +_ = s:upsert({5}, {{'=', 2, pad()}}) +s:get(5) ~= nil +stat_diff(istat(), st, 'upsert') + +-- cache eviction +assert(box.cfg.vinyl_cache < 100 * 1024) +for i = 1, 100 do put(i) end +st = istat() +for i = 1, 100 do s:get(i) end +stat_diff(istat(), st, 'cache') + +-- range split +for i = 1, 100 do put(i) end +st = istat() +box.snapshot() +wait(istat, st, 'disk.compact.count', 2) +st = istat() +st.range_count -- 2 +st.run_count -- 2 +st.run_avg -- 1 +st.run_histogram -- [1]:2 + +-- range lookup +for i = 1, 100 do put(i) end +box.begin() +for i = 1, 100, 2 do put(i) end +st = istat() +#s:select() +stat_diff(istat(), st) +box.rollback() + +-- range lookup from cache +assert(box.cfg.vinyl_cache > 10 * 1024) +for i = 1, 100 do put(i) end +box.begin() +#s:select({}, {limit = 5}) +st = istat() +#s:select({}, {limit = 5}) +stat_diff(istat(), st) +box.rollback() + +-- +-- Global statistics. +-- + +-- use quota +st = gstat() +put(1) +stat_diff(gstat(), st, 'quota') + +-- use cache +st = gstat() +_ = s:get(1) +stat_diff(gstat(), st, 'cache') + +s:delete(1) + +-- rollback +st = gstat() +box.begin() +_ = s:insert{1} +box.rollback() +stat_diff(gstat(), st, 'tx') + +-- conflict +st = gstat() +ch1 = fiber.channel(1) +ch2 = fiber.channel(1) +test_run:cmd("setopt delimiter ';'") +_ = fiber.create(function() + box.begin() + s:insert{1} + ch1:put(true) + ch2:get() + pcall(box.commit) + ch1:put(true) +end); +test_run:cmd("setopt delimiter ''"); +ch1:get() +_ = s:insert{1} +ch2:put(true) +ch1:get() +stat_diff(gstat(), st, 'tx') + +s:delete(1) + +-- tx statements +st = gstat() +box.begin() +for i = 1, 10 do s:replace{i} end +stat_diff(gstat(), st, 'tx') +box.rollback() +stat_diff(gstat(), st, 'tx') + +-- transactions +st = gstat() +ch1 = fiber.channel(5) +ch2 = fiber.channel(5) +test_run:cmd("setopt delimiter ';'") +for i = 1, 5 do + fiber.create(function() + box.begin() + s:replace{i} + ch1:put(true) + ch2:get() + box.rollback() + ch1:put(true) + end) +end; +test_run:cmd("setopt delimiter ''"); +for i = 1, 5 do ch1:get() end +stat_diff(gstat(), st, 'tx') +for i = 1, 5 do ch2:put(true) end +for i = 1, 5 do ch1:get() end +stat_diff(gstat(), st, 'tx') + +-- read view +st = gstat() +ch1 = fiber.channel(1) +ch2 = fiber.channel(1) +test_run:cmd("setopt delimiter ';'") +_ = fiber.create(function() + box.begin() + s:select() + ch1:put(true) + ch2:get() + pcall(box.commit) + ch1:put(true) +end); +test_run:cmd("setopt delimiter ''"); +ch1:get() +_ = s:insert{1} +stat_diff(gstat(), st, 'tx') +ch2:put(true) +ch1:get() +stat_diff(gstat(), st, 'tx') + +s:delete(1) + +-- gap locks +st = gstat() +box.begin() +_ = s:select({10}, {iterator = 'LT'}) +_ = s:select({20}, {iterator = 'GT'}) +stat_diff(gstat(), st, 'tx') +box.commit() +stat_diff(gstat(), st, 'tx') + +test_run:cmd('switch default') +test_run:cmd('stop server test') +test_run:cmd('cleanup server test') +test_run:cmd("clear filter") diff --git a/test/xlog/checkpoint_daemon.result b/test/xlog/checkpoint_daemon.result index dd0b7de6fd5f558948be634dfc6196f785e853bb..b30c856b339565b81efc1f6af3b20dc2c331661f 100644 --- a/test/xlog/checkpoint_daemon.result +++ b/test/xlog/checkpoint_daemon.result @@ -241,3 +241,25 @@ daemon.control == nil --- - true ... +-- gh-2780 check that scheduled snapshots are performed +PERIOD = 0.03 +--- +... +if jit.os ~= 'Linux' then PERIOD = 1.5 end +--- +... +box.cfg{ checkpoint_interval = PERIOD} +--- +... +fiber.sleep(3 * PERIOD) +--- +... +-- check that it's not first snapshot +test_run:grep_log("default", "saving snapshot", 400) +--- +- null +... +test_run:grep_log("default", "making snapshot", 400) +--- +- making snapshot +... diff --git a/test/xlog/checkpoint_daemon.test.lua b/test/xlog/checkpoint_daemon.test.lua index 55d7defd94ef5ca64cd8a788b54b0f0dc7441242..e42c0d6e0830556b29a48c1d371d95d7cb702916 100644 --- a/test/xlog/checkpoint_daemon.test.lua +++ b/test/xlog/checkpoint_daemon.test.lua @@ -112,3 +112,12 @@ box.cfg{ checkpoint_count = 2, checkpoint_interval = 0} daemon.next_snapshot_time daemon.fiber == nil daemon.control == nil + +-- gh-2780 check that scheduled snapshots are performed +PERIOD = 0.03 +if jit.os ~= 'Linux' then PERIOD = 1.5 end +box.cfg{ checkpoint_interval = PERIOD} +fiber.sleep(3 * PERIOD) +-- check that it's not first snapshot +test_run:grep_log("default", "saving snapshot", 400) +test_run:grep_log("default", "making snapshot", 400) \ No newline at end of file