From 94345e4d856d647b1afc108097e94ac3f48df66d Mon Sep 17 00:00:00 2001 From: Vladislav Shpilevoy <v.shpilevoy@tarantool.org> Date: Sun, 17 May 2020 18:19:40 +0200 Subject: [PATCH] tuple: use unaligned store-load for field map A tuple can have a field map. It is an array of uint32_t values, stored right after 'struct tuple' object in the same memory block. 'struct tuple' is 10 byte size, and is aligned by 4 bytes (even though it is 'PACKED', so does not need an alignment). It means, that field map is stored by an address like this: 4*N + 10. This address is never aligned by 4, needed for uint32_t field map array. So the array members should be accessed using operations aware of unaligned nature of the addresses. Unfortunately, it is impossible to make the field map aligned, because that requires + 2 bytes for each tuple. It is unaffordable luxury, since tuple count can be millions and even billions. Every new byte may result in gigabytes of memory in a cluster. The patch makes field map accessed using unaligned store-load operations. Part of #4609 --- src/box/field_map.c | 13 +++++++++---- src/box/field_map.h | 21 ++++++++++++--------- 2 files changed, 21 insertions(+), 13 deletions(-) diff --git a/src/box/field_map.c b/src/box/field_map.c index 5f46619413..dc903115e0 100644 --- a/src/box/field_map.c +++ b/src/box/field_map.c @@ -101,16 +101,21 @@ field_map_build(struct field_map_builder *builder, char *buffer) (uint32_t *)(buffer + field_map_build_size(builder)); char *extent_wptr = buffer; for (int32_t i = -1; i >= -(int32_t)builder->slot_count; i--) { + /* + * Can not access field_map as a normal uint32 + * array because its alignment may be < 4 bytes. + * Need to use unaligned store-load operations + * explicitly. + */ if (!builder->slots[i].has_extent) { - field_map[i] = builder->slots[i].offset; + store_u32(&field_map[i], builder->slots[i].offset); continue; } struct field_map_builder_slot_extent *extent = builder->slots[i].extent; /** Retrive memory for the extent. */ - field_map[i] = - (uint32_t)((char *)extent_wptr - (char *)field_map); - *(uint32_t *)extent_wptr = extent->size; + store_u32(&field_map[i], extent_wptr - (char *)field_map); + store_u32(extent_wptr, extent->size); uint32_t extent_offset_sz = extent->size * sizeof(uint32_t); memcpy(&((uint32_t *) extent_wptr)[1], extent->offset, extent_offset_sz); diff --git a/src/box/field_map.h b/src/box/field_map.h index a1a5a9dba8..d8ef726a1e 100644 --- a/src/box/field_map.h +++ b/src/box/field_map.h @@ -33,6 +33,7 @@ #include <assert.h> #include <stdint.h> #include <stddef.h> +#include "bit/bit.h" struct region; struct field_map_builder_slot; @@ -151,20 +152,22 @@ static inline uint32_t field_map_get_offset(const uint32_t *field_map, int32_t offset_slot, int multikey_idx) { - uint32_t offset; - if (multikey_idx != MULTIKEY_NONE && - (int32_t) field_map[offset_slot] < 0) { + /* + * Can not access field_map as a normal uint32 array + * because its alignment may be < 4 bytes. Need to use + * unaligned store-load operations explicitly. + */ + uint32_t offset = load_u32(&field_map[offset_slot]); + if (multikey_idx != MULTIKEY_NONE && (int32_t)offset < 0) { /** * The field_map extent has the following * structure: [size=N|slot1|slot2|..|slotN] */ - uint32_t *extent = (uint32_t *)((char *)field_map + - (int32_t)field_map[offset_slot]); - if ((uint32_t)multikey_idx >= extent[0]) + const uint32_t *extent = (const uint32_t *) + ((const char *)field_map + (int32_t)offset); + if ((uint32_t)multikey_idx >= load_u32(&extent[0])) return 0; - offset = extent[multikey_idx + 1]; - } else { - offset = field_map[offset_slot]; + offset = load_u32(&extent[multikey_idx + 1]); } return offset; } -- GitLab