From e92db8b7ade1711b88d1403807e730b270841c1a Mon Sep 17 00:00:00 2001 From: Ilya Verbin <iverbin@tarantool.org> Date: Wed, 7 Jun 2023 17:05:31 +0300 Subject: [PATCH] box: introduce tuple_builder class It encapsulates the logic that helps to build a new MsgPack array by concatenating tuple fields from various locations. The idea is to postpone memory allocation and copying until the finalization. Needed for #8157 NO_DOC=internal NO_CHANGELOG=internal --- src/box/CMakeLists.txt | 1 + src/box/tuple_builder.c | 109 ++++++++++++++++++++++++ src/box/tuple_builder.h | 77 +++++++++++++++++ test/unit/CMakeLists.txt | 5 ++ test/unit/tuple_builder.c | 172 ++++++++++++++++++++++++++++++++++++++ 5 files changed, 364 insertions(+) create mode 100644 src/box/tuple_builder.c create mode 100644 src/box/tuple_builder.h create mode 100644 test/unit/tuple_builder.c diff --git a/src/box/CMakeLists.txt b/src/box/CMakeLists.txt index 535d498d7b..53564068d3 100644 --- a/src/box/CMakeLists.txt +++ b/src/box/CMakeLists.txt @@ -105,6 +105,7 @@ set(tuple_sources tuple_format.c tuple_constraint_def.c tuple_constraint.c + tuple_builder.c xrow_update.c xrow_update_field.c xrow_update_array.c diff --git a/src/box/tuple_builder.c b/src/box/tuple_builder.c new file mode 100644 index 0000000000..e8bfb7c0b5 --- /dev/null +++ b/src/box/tuple_builder.c @@ -0,0 +1,109 @@ +/* + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright 2010-2023, Tarantool AUTHORS, please see AUTHORS file. + */ +#include <stddef.h> +#include <stdint.h> +#include "tuple.h" +#include "msgpuck.h" +#include "small/region.h" +#include "salad/stailq.h" +#include "tuple_builder.h" + +/** + * A chunk of data with tuple fields. + */ +struct tuple_chunk { + /** Start of the data. */ + const char *data; + /** End of the data. */ + const char *data_end; + /** Number of NULL fields. If > 0 then data/data_end are not used. */ + uint32_t null_count; + /** Link in `tuple_builder::chunks`. */ + struct stailq_entry in_builder; +}; + +void +tuple_builder_new(struct tuple_builder *builder, struct region *region) +{ + stailq_create(&builder->chunks); + builder->field_count = 0; + builder->size = 0; + builder->region = region; +} + +void +tuple_builder_add_nil(struct tuple_builder *builder) +{ + builder->field_count++; + builder->size += mp_sizeof_nil(); + + struct tuple_chunk *chunk; + if (!stailq_empty(&builder->chunks)) { + chunk = stailq_last_entry(&builder->chunks, struct tuple_chunk, + in_builder); + /* Avoid unnecessary allocation. */ + if (chunk->null_count > 0) { + chunk->null_count++; + return; + } + } + chunk = xregion_alloc_object(builder->region, typeof(*chunk)); + chunk->data = NULL; + chunk->data_end = NULL; + chunk->null_count = 1; + stailq_add_tail_entry(&builder->chunks, chunk, in_builder); +} + +void +tuple_builder_add(struct tuple_builder *builder, const char *data, + size_t data_size, uint32_t field_count) +{ + const char *data_end = data + data_size; + builder->field_count += field_count; + builder->size += data_size; + + struct tuple_chunk *chunk; + if (!stailq_empty(&builder->chunks)) { + chunk = stailq_last_entry(&builder->chunks, struct tuple_chunk, + in_builder); + /* Avoid unnecessary allocation. */ + if (chunk->data_end == data) { + chunk->data_end = data_end; + return; + } + } + chunk = xregion_alloc_object(builder->region, typeof(*chunk)); + chunk->data = data; + chunk->data_end = data_end; + chunk->null_count = 0; + stailq_add_tail_entry(&builder->chunks, chunk, in_builder); +} + +void +tuple_builder_finalize(struct tuple_builder *builder, const char **data, + const char **data_end) +{ + size_t data_size = builder->size + + mp_sizeof_array(builder->field_count); + char *buf = xregion_alloc(builder->region, data_size); + *data = buf; + *data_end = buf + data_size; + buf = mp_encode_array(buf, builder->field_count); + + struct tuple_chunk *chunk; + stailq_foreach_entry(chunk, &builder->chunks, in_builder) { + if (chunk->null_count == 0) { + uint32_t size = chunk->data_end - chunk->data; + memcpy(buf, chunk->data, size); + buf += size; + } else { + for (uint32_t i = 0; i < chunk->null_count; i++) + buf = mp_encode_nil(buf); + } + } + assert(buf == *data_end); + mp_tuple_assert(*data, *data_end); +} diff --git a/src/box/tuple_builder.h b/src/box/tuple_builder.h new file mode 100644 index 0000000000..6d7730a104 --- /dev/null +++ b/src/box/tuple_builder.h @@ -0,0 +1,77 @@ +/* + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright 2010-2023, Tarantool AUTHORS, please see AUTHORS file. + */ +#pragma once + +#include <stddef.h> +#include <stdint.h> +#include "salad/stailq.h" + +#if defined(__cplusplus) +extern "C" { +#endif /* defined(__cplusplus) */ + +struct region; + +/** + * A builder that helps to construct a tuple by concatenating chunks of data. + * A chunk represents one or more tuple fields (MsgPack objects). + * + * First, chunks are added to a builder object. The builder doesn't allocate + * any memory for the MsgPack, and doesn't copy it, only pointers to the start + * and to the end of the data are preserved. + * + * Once all chunks have been added, the builder can be used to encode them into + * the final MsgPack array. + */ +struct tuple_builder { + /** List of chunks, linked by `tuple_chunk::in_builder`. */ + struct stailq chunks; + /** + * Number of tuple fields. It can be greater than the number of + * elements in the list of chunks. + */ + uint32_t field_count; + /** Total size of memory required to encode chunks from the list. */ + size_t size; + /** The region used to perform memory allocation. */ + struct region *region; +}; + +/** + * Initialize the builder. The region argument is saved to perform memory + * allocation for internal structures and for the resulting MsgPack array. + */ +void +tuple_builder_new(struct tuple_builder *builder, struct region *region); + +/** + * Add a NULL tuple field to the builder. + */ +void +tuple_builder_add_nil(struct tuple_builder *builder); + +/** + * Add a chunk of data with `field_count` tuple fields to the builder. + * If the chunk is adjacent to the previous one, only single pointer is updated, + * otherwise a new list element is allocated on builder->region and added to + * builder->chunks. + */ +void +tuple_builder_add(struct tuple_builder *builder, const char *data, + size_t data_size, uint32_t field_count); + +/** + * Encode tuple fields added to the builder into the new MsgPack array. + * The buffer is allocated on builder->region, and the address is returned + * in data and data_end. + */ +void +tuple_builder_finalize(struct tuple_builder *builder, const char **data, + const char **data_end); + +#if defined(__cplusplus) +} /* extern "C" */ +#endif /* defined(__cplusplus) */ diff --git a/test/unit/CMakeLists.txt b/test/unit/CMakeLists.txt index 2bf5f47693..911014c6be 100644 --- a/test/unit/CMakeLists.txt +++ b/test/unit/CMakeLists.txt @@ -567,6 +567,11 @@ create_unit_test(PREFIX key_def LIBRARIES unit box core ) +create_unit_test(PREFIX tuple_builder + SOURCES tuple_builder.c box_test_utils.c + LIBRARIES unit box core +) + create_unit_test(PREFIX getenv_safe SOURCES getenv_safe.c core_test_utils.c LIBRARIES unit core diff --git a/test/unit/tuple_builder.c b/test/unit/tuple_builder.c new file mode 100644 index 0000000000..a3f1b25a9b --- /dev/null +++ b/test/unit/tuple_builder.c @@ -0,0 +1,172 @@ +#include "fiber.h" +#include "memory.h" +#include "msgpuck.h" +#include "tuple.h" +#include "tuple_builder.h" + +#define UNIT_TAP_COMPATIBLE 1 +#include "unit.h" + +static void +test_tuple_builder_empty(void) +{ + plan(2); + header(); + + const char *data, *data_end; + struct region *region = &fiber()->gc; + size_t region_svp = region_used(region); + + struct tuple_builder builder; + tuple_builder_new(&builder, region); + tuple_builder_finalize(&builder, &data, &data_end); + + is(mp_typeof(*data), MP_ARRAY, "type is MP_ARRAY"); + is(mp_decode_array(&data), 0, "array is empty"); + region_truncate(region, region_svp); + + footer(); + check_plan(); +} + +static void +test_tuple_builder_nulls(void) +{ + plan(4); + header(); + + const char *data, *data_end; + struct region *region = &fiber()->gc; + size_t region_svp = region_used(region); + + struct tuple_builder builder; + tuple_builder_new(&builder, region); + tuple_builder_add_nil(&builder); + tuple_builder_add_nil(&builder); + tuple_builder_add_nil(&builder); + tuple_builder_finalize(&builder, &data, &data_end); + + is(mp_decode_array(&data), 3, "array contains 3 elements"); + is(mp_typeof(*data), MP_NIL, "[0] MP_NIL"); + mp_decode_nil(&data); + is(mp_typeof(*data), MP_NIL, "[1] MP_NIL"); + mp_decode_nil(&data); + is(mp_typeof(*data), MP_NIL, "[2] MP_NIL"); + region_truncate(region, region_svp); + + footer(); + check_plan(); +} + +static struct tuple * +create_tuple1(void) +{ + char data[16]; + char *end = data; + end = mp_encode_array(end, 5); + end = mp_encode_uint(end, 0); + end = mp_encode_uint(end, 111); + end = mp_encode_uint(end, 222); + end = mp_encode_uint(end, 333); + end = mp_encode_uint(end, 444); + + struct tuple *tuple = tuple_new(tuple_format_runtime, data, end); + tuple_ref(tuple); + return tuple; +} + +static struct tuple * +create_tuple2(void) +{ + char data[16]; + char *end = data; + end = mp_encode_array(end, 3); + end = mp_encode_str0(end, "xxx"); + end = mp_encode_str0(end, "yyy"); + end = mp_encode_str0(end, "zzz"); + + struct tuple *tuple = tuple_new(tuple_format_runtime, data, end); + tuple_ref(tuple); + return tuple; +} + +static void +test_tuple_builder_merge(void) +{ + plan(9); + header(); + + uint32_t len; + const char *str, *data, *data_end; + struct region *region = &fiber()->gc; + size_t region_svp = region_used(region); + + struct tuple *tuple1 = create_tuple1(); + struct tuple *tuple2 = create_tuple2(); + const char *t1f2 = tuple_field(tuple1, 2); + const char *t1f3 = tuple_field(tuple1, 3); + const char *t1f4 = tuple_field(tuple1, 4); + const char *t2f0 = tuple_field(tuple2, 0); + const char *t2f1 = tuple_field(tuple2, 1); + const char *t2f2 = tuple_field(tuple2, 2); + + struct tuple_builder builder; + tuple_builder_new(&builder, region); + tuple_builder_add(&builder, t1f2, t1f4 - t1f2, 2); + tuple_builder_add(&builder, t2f0, t2f2 - t2f0, 2); + tuple_builder_add_nil(&builder); + tuple_builder_add(&builder, t2f1, t2f2 - t2f1, 1); + tuple_builder_add(&builder, t1f2, t1f3 - t1f2, 1); + tuple_builder_add_nil(&builder); + tuple_builder_finalize(&builder, &data, &data_end); + + tuple_unref(tuple1); + tuple_unref(tuple2); + + is(mp_decode_array(&data), 8, "array contains 8 elements"); + is(mp_decode_uint(&data), 222, "[0] MP_UINT is 222"); + is(mp_decode_uint(&data), 333, "[1] MP_UINT is 333"); + str = mp_decode_str(&data, &len); + is(strncmp(str, "xxx", 3), 0, "[2] MP_STR is xxx"); + str = mp_decode_str(&data, &len); + is(strncmp(str, "yyy", 3), 0, "[3] MP_STR is yyy"); + is(mp_typeof(*data), MP_NIL, "[4] MP_NIL"); + mp_decode_nil(&data); + str = mp_decode_str(&data, &len); + is(strncmp(str, "yyy", 3), 0, "[5] MP_STR is yyy"); + is(mp_decode_uint(&data), 222, "[6] MP_UINT is 222"); + is(mp_typeof(*data), MP_NIL, "[7] MP_NIL"); + region_truncate(region, region_svp); + + footer(); + check_plan(); +} + +static int +test_tuple_builder(void) +{ + plan(3); + header(); + + test_tuple_builder_empty(); + test_tuple_builder_nulls(); + test_tuple_builder_merge(); + + footer(); + return check_plan(); +} + +int +main(void) +{ + memory_init(); + fiber_init(fiber_c_invoke); + tuple_init(NULL); + + int rc = test_tuple_builder(); + + tuple_free(); + fiber_free(); + memory_free(); + return rc; +} -- GitLab