diff --git a/src/box/key_def.c b/src/box/key_def.c index d4c979aa14115331f8a4cb45d60ceae840a722da..55dcf1eb55c718aaea52825332c354347501fbb2 100644 --- a/src/box/key_def.c +++ b/src/box/key_def.c @@ -709,6 +709,43 @@ key_def_merge(const struct key_def *first, const struct key_def *second) return new_def; } +struct key_def * +key_def_find_pk_in_cmp_def(const struct key_def *cmp_def, + const struct key_def *pk_def, + struct region *region) +{ + struct key_def *extracted_def = NULL; + size_t region_svp = region_used(region); + + /* First, dump primary key parts as is. */ + struct key_part_def *parts = region_alloc(region, + pk_def->part_count * sizeof(*parts)); + if (parts == NULL) { + diag_set(OutOfMemory, pk_def->part_count * sizeof(*parts), + "region", "key def parts"); + goto out; + } + if (key_def_dump_parts(pk_def, parts, region) != 0) + goto out; + /* + * Second, update field numbers to match the primary key + * parts in a secondary key. + */ + for (uint32_t i = 0; i < pk_def->part_count; i++) { + const struct key_part *part = key_def_find(cmp_def, + &pk_def->parts[i]); + assert(part != NULL); + parts[i].fieldno = part - cmp_def->parts; + parts[i].path = NULL; + } + + /* Finally, allocate the new key definition. */ + extracted_def = key_def_new(parts, pk_def->part_count); +out: + region_truncate(region, region_svp); + return extracted_def; +} + int key_validate_parts(const struct key_def *key_def, const char *key, uint32_t part_count, bool allow_nullable) diff --git a/src/box/key_def.h b/src/box/key_def.h index e3a34ce1ce5dcdd72871949b665129cc9ff3e28e..7c9f8c6b562315f98a828a35aa789bf654274d54 100644 --- a/src/box/key_def.h +++ b/src/box/key_def.h @@ -375,6 +375,21 @@ key_def_contains(const struct key_def *first, const struct key_def *second); struct key_def * key_def_merge(const struct key_def *first, const struct key_def *second); +/** + * Create a key definition suitable for extracting primary key + * parts from an extended secondary key. + * @param cmp_def Extended secondary key definition + * (must include primary key parts). + * @param pk_def Primary key definition. + * @param region Region used for temporary allocations. + * @retval not NULL Pointer to the extracted key definition. + * @retval NULL Memory allocation error. + */ +struct key_def * +key_def_find_pk_in_cmp_def(const struct key_def *cmp_def, + const struct key_def *pk_def, + struct region *region); + /* * Check that parts of the key match with the key definition. * @param key_def Key definition. diff --git a/src/box/vinyl.c b/src/box/vinyl.c index e5411aafad9d2639a54510a988e791c107e1a614..1cbffcfb9a15da68234e09acfea3570f9584a1a3 100644 --- a/src/box/vinyl.c +++ b/src/box/vinyl.c @@ -719,10 +719,9 @@ vinyl_space_create_index(struct space *space, struct index_def *index_def) pk = vy_lsm(space_index(space, 0)); assert(pk != NULL); } - struct vy_lsm *lsm = vy_lsm_new(&env->lsm_env, &env->stmt_env, - &env->cache_env, &env->mem_env, - index_def, space->format, pk, - space_group_id(space)); + struct vy_lsm *lsm = vy_lsm_new(&env->lsm_env, &env->cache_env, + &env->mem_env, index_def, space->format, + pk, space_group_id(space)); if (lsm == NULL) { free(index); return NULL; @@ -1301,13 +1300,36 @@ vy_get_by_secondary_tuple(struct vy_lsm *lsm, struct vy_tx *tx, const struct vy_read_view **rv, struct tuple *tuple, struct tuple **result) { + int rc = 0; assert(lsm->index_id > 0); - if (vy_point_lookup(lsm->pk, tx, rv, tuple, result) != 0) - return -1; + /* + * Lookup the full tuple by a secondary statement. + * There are two cases: the secondary statement may be + * a key, if we got this tuple from disk, in which case + * we need to extract the primary key parts from it; or + * it may be a full tuple, if we got this tuple from + * the tuple cache or level 0, in which case we may pass + * it immediately to the iterator. + */ + struct tuple *key; + if (vy_stmt_is_key(tuple)) { + key = vy_stmt_extract_key(tuple, lsm->pk_in_cmp_def, + lsm->env->key_format); + if (key == NULL) + return -1; + } else { + key = tuple; + tuple_ref(key); + } + + if (vy_point_lookup(lsm->pk, tx, rv, key, result) != 0) { + rc = -1; + goto out; + } if (*result == NULL || - vy_stmt_compare(*result, tuple, lsm->key_def) != 0) { + vy_stmt_compare(*result, tuple, lsm->cmp_def) != 0) { /* * If a tuple read from a secondary index doesn't * match the tuple corresponding to it in the @@ -1327,7 +1349,7 @@ vy_get_by_secondary_tuple(struct vy_lsm *lsm, struct vy_tx *tx, * the tuple cache implementation. */ vy_cache_on_write(&lsm->cache, tuple, NULL); - return 0; + goto out; } /* @@ -1340,13 +1362,15 @@ vy_get_by_secondary_tuple(struct vy_lsm *lsm, struct vy_tx *tx, */ if (tx != NULL && vy_tx_track_point(tx, lsm->pk, *result) != 0) { tuple_unref(*result); - return -1; + rc = -1; + goto out; } if ((*rv)->vlsn == INT64_MAX) - vy_cache_add(&lsm->pk->cache, *result, NULL, tuple, ITER_EQ); - - return 0; + vy_cache_add(&lsm->pk->cache, *result, NULL, key, ITER_EQ); +out: + tuple_unref(key); + return rc; } /** diff --git a/src/box/vy_lsm.c b/src/box/vy_lsm.c index ea801f8259829575571cdc2c938cf36d6cd7bcc3..0461def5e0f6d5dd0e4b080f9210311cef8fb111 100644 --- a/src/box/vy_lsm.c +++ b/src/box/vy_lsm.c @@ -118,10 +118,9 @@ vy_lsm_mem_tree_size(struct vy_lsm *lsm) } struct vy_lsm * -vy_lsm_new(struct vy_lsm_env *lsm_env, struct vy_stmt_env *stmt_env, - struct vy_cache_env *cache_env, struct vy_mem_env *mem_env, - struct index_def *index_def, struct tuple_format *format, - struct vy_lsm *pk, uint32_t group_id) +vy_lsm_new(struct vy_lsm_env *lsm_env, struct vy_cache_env *cache_env, + struct vy_mem_env *mem_env, struct index_def *index_def, + struct tuple_format *format, struct vy_lsm *pk, uint32_t group_id) { static int64_t run_buckets[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 15, 20, 25, 50, 100, @@ -156,10 +155,20 @@ vy_lsm_new(struct vy_lsm_env *lsm_env, struct vy_stmt_env *stmt_env, */ lsm->disk_format = format; } else { - lsm->disk_format = vy_stmt_format_new(stmt_env, &cmp_def, 1, - NULL, 0, 0, NULL); - if (lsm->disk_format == NULL) - goto fail_format; + /* + * To save disk space, we do not store full tuples + * in secondary index runs. Instead we only store + * extended keys (i.e. keys consisting of secondary + * and primary index parts). This is enough to look + * up a full tuple in the primary index. + */ + lsm->disk_format = lsm_env->key_format; + + lsm->pk_in_cmp_def = key_def_find_pk_in_cmp_def(lsm->cmp_def, + pk->key_def, + &fiber()->gc); + if (lsm->pk_in_cmp_def == NULL) + goto fail_pk_in_cmp_def; } tuple_format_ref(lsm->disk_format); @@ -208,7 +217,9 @@ vy_lsm_new(struct vy_lsm_env *lsm_env, struct vy_stmt_env *stmt_env, vy_lsm_stat_destroy(&lsm->stat); fail_stat: tuple_format_unref(lsm->disk_format); -fail_format: + if (lsm->pk_in_cmp_def != NULL) + key_def_delete(lsm->pk_in_cmp_def); +fail_pk_in_cmp_def: key_def_delete(cmp_def); fail_cmp_def: key_def_delete(key_def); @@ -262,6 +273,8 @@ vy_lsm_delete(struct vy_lsm *lsm) tuple_format_unref(lsm->disk_format); key_def_delete(lsm->cmp_def); key_def_delete(lsm->key_def); + if (lsm->pk_in_cmp_def != NULL) + key_def_delete(lsm->pk_in_cmp_def); histogram_delete(lsm->run_hist); vy_lsm_stat_destroy(&lsm->stat); vy_cache_destroy(&lsm->cache); diff --git a/src/box/vy_lsm.h b/src/box/vy_lsm.h index b94e7a4308427877c5767b6fd155e7e2ffb5c6de..e969e45c2e4d20ae9beb425450ed0000686cbfe2 100644 --- a/src/box/vy_lsm.h +++ b/src/box/vy_lsm.h @@ -199,6 +199,12 @@ struct vy_lsm { struct key_def *cmp_def; /** Key definition passed by the user. */ struct key_def *key_def; + /** + * Key definition to extract primary key parts from + * a secondary key. NULL if this LSM tree corresponds + * to a primary index. + */ + struct key_def *pk_in_cmp_def; /** * If the following flag is set, the index this LSM tree * is created for is unique and it must be checked for @@ -333,10 +339,9 @@ vy_lsm_mem_tree_size(struct vy_lsm *lsm); /** Allocate a new LSM tree object. */ struct vy_lsm * -vy_lsm_new(struct vy_lsm_env *lsm_env, struct vy_stmt_env *stmt_env, - struct vy_cache_env *cache_env, struct vy_mem_env *mem_env, - struct index_def *index_def, struct tuple_format *format, - struct vy_lsm *pk, uint32_t group_id); +vy_lsm_new(struct vy_lsm_env *lsm_env, struct vy_cache_env *cache_env, + struct vy_mem_env *mem_env, struct index_def *index_def, + struct tuple_format *format, struct vy_lsm *pk, uint32_t group_id); /** Free an LSM tree object. */ void diff --git a/src/box/vy_run.c b/src/box/vy_run.c index 76b2f94acde98d1df92e3828958383330b6ffb0d..928ca57f9518bd88d8f4ffa6997e0bf1443eea2a 100644 --- a/src/box/vy_run.c +++ b/src/box/vy_run.c @@ -2139,7 +2139,8 @@ vy_run_writer_start_page(struct vy_run_writer *writer, if (run->info.page_count >= writer->page_info_capacity && vy_run_alloc_page_info(run, &writer->page_info_capacity) != 0) return -1; - const char *key = tuple_extract_key(first_stmt, writer->cmp_def, NULL); + const char *key = vy_stmt_is_key(first_stmt) ? tuple_data(first_stmt) : + tuple_extract_key(first_stmt, writer->cmp_def, NULL); if (key == NULL) return -1; if (run->info.page_count == 0) { @@ -2289,7 +2290,9 @@ vy_run_writer_commit(struct vy_run_writer *writer) } assert(writer->last_stmt != NULL); - const char *key = tuple_extract_key(writer->last_stmt, + const char *key = vy_stmt_is_key(writer->last_stmt) ? + tuple_data(writer->last_stmt) : + tuple_extract_key(writer->last_stmt, writer->cmp_def, NULL); if (key == NULL) goto out; @@ -2360,6 +2363,7 @@ vy_run_rebuild_index(struct vy_run *run, const char *dir, int64_t max_lsn = 0; int64_t min_lsn = INT64_MAX; struct tuple *prev_tuple = NULL; + char *page_min_key = NULL; struct tuple_bloom_builder *bloom_builder = NULL; if (opts->bloom_fpr < 1) { @@ -2377,7 +2381,6 @@ vy_run_rebuild_index(struct vy_run *run, const char *dir, if (run->info.page_count == page_info_capacity && vy_run_alloc_page_info(run, &page_info_capacity) != 0) goto close_err; - const char *page_min_key = NULL; uint32_t page_row_count = 0; uint64_t page_row_index_offset = 0; uint64_t row_offset = xlog_cursor_tx_pos(&cursor); @@ -2400,7 +2403,8 @@ vy_run_rebuild_index(struct vy_run *run, const char *dir, tuple_unref(tuple); goto close_err; } - key = tuple_extract_key(tuple, cmp_def, NULL); + key = vy_stmt_is_key(tuple) ? tuple_data(tuple) : + tuple_extract_key(tuple, cmp_def, NULL); if (prev_tuple != NULL) tuple_unref(prev_tuple); prev_tuple = tuple; @@ -2411,8 +2415,11 @@ vy_run_rebuild_index(struct vy_run *run, const char *dir, if (run->info.min_key == NULL) goto close_err; } - if (page_min_key == NULL) - page_min_key = key; + if (page_min_key == NULL) { + page_min_key = vy_key_dup(key); + if (page_min_key == NULL) + goto close_err; + } if (xrow.lsn > max_lsn) max_lsn = xrow.lsn; if (xrow.lsn < min_lsn) @@ -2429,11 +2436,9 @@ vy_run_rebuild_index(struct vy_run *run, const char *dir, info->row_index_offset = page_row_index_offset; ++run->info.page_count; vy_run_acct_page(run, info); - } - if (prev_tuple != NULL) { - tuple_unref(prev_tuple); - prev_tuple = NULL; + free(page_min_key); + page_min_key = NULL; } if (key != NULL) { @@ -2444,6 +2449,10 @@ vy_run_rebuild_index(struct vy_run *run, const char *dir, run->info.max_lsn = max_lsn; run->info.min_lsn = min_lsn; + if (prev_tuple != NULL) { + tuple_unref(prev_tuple); + prev_tuple = NULL; + } region_truncate(region, mem_used); run->fd = cursor.fd; xlog_cursor_close(&cursor, true); @@ -2473,6 +2482,8 @@ vy_run_rebuild_index(struct vy_run *run, const char *dir, region_truncate(region, mem_used); if (prev_tuple != NULL) tuple_unref(prev_tuple); + if (page_min_key != NULL) + free(page_min_key); if (bloom_builder != NULL) tuple_bloom_builder_delete(bloom_builder); if (xlog_cursor_is_open(&cursor)) diff --git a/src/box/vy_stmt.c b/src/box/vy_stmt.c index 726fbfc7fbe72bd6e3cbbebfad892a24d99bd005..d593c5207be0e3e912b9fe7676ac6a80b9d519de 100644 --- a/src/box/vy_stmt.c +++ b/src/box/vy_stmt.c @@ -747,6 +747,8 @@ int vy_stmt_encode_primary(const struct tuple *value, struct key_def *key_def, uint32_t space_id, struct xrow_header *xrow) { + assert(!vy_stmt_is_key(value)); + memset(xrow, 0, sizeof(*xrow)); enum iproto_type type = vy_stmt_type(value); xrow->type = type; @@ -803,7 +805,9 @@ vy_stmt_encode_secondary(const struct tuple *value, struct key_def *cmp_def, memset(&request, 0, sizeof(request)); request.type = type; uint32_t size; - const char *extracted = tuple_extract_key(value, cmp_def, &size); + const char *extracted = vy_stmt_is_key(value) ? + tuple_data_range(value, &size) : + tuple_extract_key(value, cmp_def, &size); if (extracted == NULL) return -1; if (type == IPROTO_REPLACE || type == IPROTO_INSERT) { @@ -833,27 +837,23 @@ vy_stmt_decode(struct xrow_header *xrow, const struct key_def *key_def, if (xrow_decode_dml(xrow, &request, key_map) != 0) return NULL; struct tuple *stmt = NULL; - const char *key; - (void) key; struct iovec ops; switch (request.type) { case IPROTO_DELETE: - /* extract key */ - stmt = vy_stmt_new_surrogate_from_key(request.key, - IPROTO_DELETE, - key_def, format); + if (is_primary) + stmt = vy_stmt_new_surrogate_from_key(request.key, + IPROTO_DELETE, + key_def, format); + else + stmt = vy_stmt_new_with_ops(format, request.key, + request.key_end, + NULL, 0, IPROTO_DELETE); break; case IPROTO_INSERT: case IPROTO_REPLACE: - if (is_primary) { - stmt = vy_stmt_new_with_ops(format, request.tuple, - request.tuple_end, - NULL, 0, request.type); - } else { - stmt = vy_stmt_new_surrogate_from_key(request.tuple, - request.type, - key_def, format); - } + stmt = vy_stmt_new_with_ops(format, request.tuple, + request.tuple_end, + NULL, 0, request.type); break; case IPROTO_UPSERT: ops.iov_base = (char *)request.ops; diff --git a/test/unit/vy_point_lookup.c b/test/unit/vy_point_lookup.c index 38e9f403e069c28e12de0485c8e30e8e0c025f2b..f3cd84d4efb427f91d59e681eb843919c5f80023 100644 --- a/test/unit/vy_point_lookup.c +++ b/test/unit/vy_point_lookup.c @@ -94,7 +94,7 @@ test_basic() index_def_new(512, 0, "primary", sizeof("primary") - 1, TREE, &index_opts, key_def, NULL); - struct vy_lsm *pk = vy_lsm_new(&lsm_env, &stmt_env, &cache_env, &mem_env, + struct vy_lsm *pk = vy_lsm_new(&lsm_env, &cache_env, &mem_env, index_def, format, NULL, 0); isnt(pk, NULL, "lsm is not NULL")