From a2c19aa812754451f6c828ce074bf7443847496e Mon Sep 17 00:00:00 2001 From: Vladislav Shpilevoy <v.shpilevoy@tarantool.org> Date: Sun, 19 May 2019 01:00:38 +0300 Subject: [PATCH] buffer: replace all ffi.new(type[1]) with cached union Lua, which suffers from lack of ability to pass values by pointers into FFI functions, nor has an address operator '&' to take an address of integer or char or anything. Because of that a user need to either use ffi.new(type[1]) or use static buffer, but for such small allocations they are both too expensive and aggravate GC problem. Now buffer module provides preallocated basic types to use in FFI functions. The commit is motivated by one another place where ffi.new('int[1]') appeared, in SWIM module, to obtain payload size as an out parameter of a C function. --- src/lua/buffer.lua | 42 +++++++++++++++++++++++++++++++++ src/lua/crypto.lua | 13 +++++------ src/lua/msgpackffi.lua | 50 ++++++++++++++++------------------------ src/lua/socket.lua | 25 ++++++++++++-------- src/lua/string.lua | 4 ++-- test/app/buffer.result | 27 ++++++++++++++++++++++ test/app/buffer.test.lua | 11 +++++++++ 7 files changed, 123 insertions(+), 49 deletions(-) diff --git a/src/lua/buffer.lua b/src/lua/buffer.lua index 503f753c26..ef0118c7c4 100644 --- a/src/lua/buffer.lua +++ b/src/lua/buffer.lua @@ -36,6 +36,34 @@ ibuf_reserve_slow(struct ibuf *ibuf, size_t size); void * lua_static_aligned_alloc(size_t size, size_t alignment); + +/** + * Register is a buffer to use with FFI functions, which usually + * operate with pointers to scalar values like int, char, size_t, + * void *. To avoid doing 'ffi.new(<type>[1])' on each such FFI + * function invocation, a module can use one of attributes of the + * register union. + * + * Naming policy of the attributes is easy to remember: + * 'a' for array type + type name first letters + 'p' for pointer. + * + * For example: + * - int[1] - <a>rray of <i>nt - ai; + * - const unsigned char *[1] - + * <a>rray of <c>onst <u>nsigned <c>har <p> pointer - acucp. + */ +union c_register { + size_t as[1]; + void *ap[1]; + int ai[1]; + char ac[1]; + const unsigned char *acucp[1]; + unsigned long aul[1]; + uint16_t u16; + uint32_t u32; + uint64_t u64; + int64_t i64; +}; ]] local builtin = ffi.C @@ -205,9 +233,23 @@ local function static_alloc(type, size) return ffi.new(type..'[?]', size) end +-- +-- Sometimes it is wanted to use several temporary registers at +-- once. For example, when a C function takes 2 C pointers. Then +-- one register is not enough - its attributes share memory. Note, +-- registers are not allocated with separate ffi.new calls +-- deliberately. With a single allocation they fit into 1 cache +-- line, and reduce the heap fragmentation. +-- +local reg_array = ffi.new('union c_register[?]', 2) + return { ibuf = ibuf_new; IBUF_SHARED = ffi.C.tarantool_lua_ibuf; READAHEAD = READAHEAD; static_alloc = static_alloc, + -- Keep reference. + reg_array = reg_array, + reg1 = reg_array[0], + reg2 = reg_array[1] } diff --git a/src/lua/crypto.lua b/src/lua/crypto.lua index cb37a73fbe..d23ba8e3a7 100644 --- a/src/lua/crypto.lua +++ b/src/lua/crypto.lua @@ -2,6 +2,7 @@ local ffi = require('ffi') local buffer = require('buffer') +local reg = buffer.reg1 ffi.cdef[[ /* from openssl/err.h */ @@ -102,7 +103,6 @@ local function digest_new(digest) digest = digest, buf = buffer.ibuf(64), initialized = false, - outl = ffi.new('int[1]') }, digest_mt) self:init() return self @@ -132,10 +132,10 @@ local function digest_final(self) return error('Digest not initialized') end self.initialized = false - if ffi.C.EVP_DigestFinal_ex(self.ctx, self.buf.wpos, self.outl) ~= 1 then + if ffi.C.EVP_DigestFinal_ex(self.ctx, self.buf.wpos, reg.ai) ~= 1 then return error('Can\'t finalize digest: ' .. openssl_err_str()) end - return ffi.string(self.buf.wpos, self.outl[0]) + return ffi.string(self.buf.wpos, reg.ai[0]) end local function digest_free(self) @@ -174,9 +174,7 @@ local function hmac_new(digest, key) local self = setmetatable({ ctx = ctx, digest = digest, - buf = buffer.ibuf(64), initialized = false, - outl = ffi.new('int[1]') }, hmac_mt) self:init(key) return self @@ -206,10 +204,11 @@ local function hmac_final(self) return error('HMAC not initialized') end self.initialized = false - if ffi.C.HMAC_Final(self.ctx, self.buf.wpos, self.outl) ~= 1 then + local buf = buffer.static_alloc('char', 64) + if ffi.C.HMAC_Final(self.ctx, buf, reg.ai) ~= 1 then return error('Can\'t finalize HMAC: ' .. openssl_err_str()) end - return ffi.string(self.buf.wpos, self.outl[0]) + return ffi.string(buf, reg.ai[0]) end local function hmac_free(self) diff --git a/src/lua/msgpackffi.lua b/src/lua/msgpackffi.lua index 609334cffe..bfeedbc4b8 100644 --- a/src/lua/msgpackffi.lua +++ b/src/lua/msgpackffi.lua @@ -21,19 +21,10 @@ float mp_decode_float(const char **data); double mp_decode_double(const char **data); -union tmpint { - uint16_t u16; - uint32_t u32; - uint64_t u64; -}; ]]) local strict_alignment = (jit.arch == 'arm') - -local tmpint -if strict_alignment then - tmpint = ffi.new('union tmpint[1]') -end +local reg = buffer.reg1 local function bswap_u16(num) return bit.rshift(bit.bswap(tonumber(num)), 16) @@ -70,10 +61,10 @@ end local encode_u16 if strict_alignment then encode_u16 = function(buf, code, num) - tmpint[0].u16 = bswap_u16(num) + reg.u16 = bswap_u16(num) local p = buf:alloc(3) p[0] = code - ffi.copy(p + 1, tmpint, 2) + ffi.copy(p + 1, reg, 2) end else encode_u16 = function(buf, code, num) @@ -86,11 +77,10 @@ end local encode_u32 if strict_alignment then encode_u32 = function(buf, code, num) - tmpint[0].u32 = - ffi.cast('uint32_t', bit.bswap(tonumber(num))) + reg.u32 = ffi.cast('uint32_t', bit.bswap(tonumber(num))) local p = buf:alloc(5) p[0] = code - ffi.copy(p + 1, tmpint, 4) + ffi.copy(p + 1, reg, 4) end else encode_u32 = function(buf, code, num) @@ -104,10 +94,10 @@ end local encode_u64 if strict_alignment then encode_u64 = function(buf, code, num) - tmpint[0].u64 = bit.bswap(ffi.cast('uint64_t', num)) + reg.u64 = bit.bswap(ffi.cast('uint64_t', num)) local p = buf:alloc(9) p[0] = code - ffi.copy(p + 1, tmpint, 8) + ffi.copy(p + 1, reg, 8) end else encode_u64 = function(buf, code, num) @@ -324,9 +314,9 @@ end local decode_u16 if strict_alignment then decode_u16 = function(data) - ffi.copy(tmpint, data[0], 2) + ffi.copy(reg, data[0], 2) data[0] = data[0] + 2 - return tonumber(bswap_u16(tmpint[0].u16)) + return tonumber(bswap_u16(reg.u16)) end else decode_u16 = function(data) @@ -339,10 +329,10 @@ end local decode_u32 if strict_alignment then decode_u32 = function(data) - ffi.copy(tmpint, data[0], 4) + ffi.copy(reg, data[0], 4) data[0] = data[0] + 4 return tonumber( - ffi.cast('uint32_t', bit.bswap(tonumber(tmpint[0].u32)))) + ffi.cast('uint32_t', bit.bswap(tonumber(reg.u32)))) end else decode_u32 = function(data) @@ -356,9 +346,9 @@ end local decode_u64 if strict_alignment then decode_u64 = function(data) - ffi.copy(tmpint, data[0], 8); + ffi.copy(reg, data[0], 8); data[0] = data[0] + 8 - local num = bit.bswap(tmpint[0].u64) + local num = bit.bswap(reg.u64) if num <= DBL_INT_MAX then return tonumber(num) -- return as 'number' end @@ -385,8 +375,8 @@ end local decode_i16 if strict_alignment then decode_i16 = function(data) - ffi.copy(tmpint, data[0], 2) - local num = bswap_u16(tmpint[0].u16) + ffi.copy(reg, data[0], 2) + local num = bswap_u16(reg.u16) data[0] = data[0] + 2 -- note: this double cast is actually necessary return tonumber(ffi.cast('int16_t', ffi.cast('uint16_t', num))) @@ -403,8 +393,8 @@ end local decode_i32 if strict_alignment then decode_i32 = function(data) - ffi.copy(tmpint, data[0], 4) - local num = bit.bswap(tonumber(tmpint[0].u32)) + ffi.copy(reg, data[0], 4) + local num = bit.bswap(tonumber(reg.u32)) data[0] = data[0] + 4 return num end @@ -419,9 +409,9 @@ end local decode_i64 if strict_alignment then decode_i64 = function(data) - ffi.copy(tmpint, data[0], 8) + ffi.copy(reg, data[0], 8) data[0] = data[0] + 8 - local num = bit.bswap(ffi.cast('int64_t', tmpint[0].u64)) + local num = bit.bswap(reg.i64) if num >= -DBL_INT_MAX and num <= DBL_INT_MAX then return tonumber(num) -- return as 'number' end @@ -552,7 +542,7 @@ end -- element. It is significally faster on LuaJIT to use double pointer than -- return result, newpos. -- -local bufp = ffi.new('const unsigned char *[1]'); +local bufp = reg.acucp; local function check_offset(offset, len) if offset == nil then diff --git a/src/lua/socket.lua b/src/lua/socket.lua index 2bb5712ebf..2dba0a8d2a 100644 --- a/src/lua/socket.lua +++ b/src/lua/socket.lua @@ -10,6 +10,8 @@ local fiber = require('fiber') local fio = require('fio') local log = require('log') local buffer = require('buffer') +local reg1 = buffer.reg1 +local reg2 = buffer.reg2 local static_alloc = buffer.static_alloc local format = string.format @@ -473,9 +475,9 @@ local function socket_setsockopt(self, level, name, value) end if info.type == 1 then - local value = ffi.new("int[1]", value) + reg1.ai[0] = value local res = ffi.C.setsockopt(fd, - level, info.iname, value, ffi.sizeof('int')) + level, info.iname, reg1.ai, ffi.sizeof('int')) if res < 0 then self._errno = boxerrno() @@ -517,8 +519,10 @@ local function socket_getsockopt(self, level, name) self._errno = nil if info.type == 1 then - local value = ffi.new("int[1]", 0) - local len = ffi.new("size_t[1]", ffi.sizeof('int')) + local value = reg1.ai + value[0] = 0 + local len = reg2.as + len[0] = ffi.sizeof('int') local res = ffi.C.getsockopt(fd, level, info.iname, value, len) if res < 0 then @@ -533,8 +537,9 @@ local function socket_getsockopt(self, level, name) end if info.type == 2 then - local value = ffi.new("char[256]", { 0 }) - local len = ffi.new("size_t[1]", 256) + local value = static_alloc('char', 256) + local len = reg1.as + len[0] = 256 local res = ffi.C.getsockopt(fd, level, info.iname, value, len) if res < 0 then self._errno = boxerrno() @@ -556,8 +561,9 @@ local function socket_linger(self, active, timeout) local info = internal.SO_OPT[level].SO_LINGER self._errno = nil if active == nil then - local value = ffi.new("linger_t[1]") - local len = ffi.new("size_t[1]", 2 * ffi.sizeof('int')) + local value = static_alloc('linger_t') + local len = reg1.as + len[0] = ffi.sizeof('linger_t') local res = ffi.C.getsockopt(fd, level, info.iname, value, len) if res < 0 then self._errno = boxerrno() @@ -799,8 +805,7 @@ local function get_recv_size(self, size) -- them using message peek. local iflags = get_iflags(internal.SEND_FLAGS, {'MSG_PEEK'}) assert(iflags ~= nil) - local buf = ffi.new('char[?]', 1) - size = tonumber(ffi.C.recv(fd, buf, 1, iflags)) + size = tonumber(ffi.C.recv(fd, reg1.ac, 1, iflags)) -- Prevent race condition: proceed with the case when -- a datagram of length > 0 has been arrived after the -- getsockopt call above. diff --git a/src/lua/string.lua b/src/lua/string.lua index bb4adfc784..6e12c59ae7 100644 --- a/src/lua/string.lua +++ b/src/lua/string.lua @@ -15,8 +15,8 @@ ffi.cdef[[ ]] local c_char_ptr = ffi.typeof('const char *') -local strip_newstart = ffi.new("unsigned long[1]") -local strip_newlen = ffi.new("unsigned long[1]") +local strip_newstart = buffer.reg1.aul +local strip_newlen = buffer.reg2.aul local memcmp = ffi.C.memcmp local memmem = ffi.C.memmem diff --git a/test/app/buffer.result b/test/app/buffer.result index e0aad9bc7a..beccc5a877 100644 --- a/test/app/buffer.result +++ b/test/app/buffer.result @@ -7,6 +7,33 @@ buffer = require('buffer') ffi = require('ffi') --- ... +-- Registers. +reg1 = buffer.reg1 +--- +... +reg1.u16 = 100 +--- +... +u16 = ffi.new('uint16_t[1]') +--- +... +ffi.copy(u16, reg1, 2) +--- +... +u16[0] +--- +- 100 +... +u16[0] = 200 +--- +... +ffi.copy(reg1, u16, 2) +--- +... +reg1.u16 +--- +- 200 +... -- Alignment. _ = buffer.static_alloc('char') -- This makes buffer pos unaligned. --- diff --git a/test/app/buffer.test.lua b/test/app/buffer.test.lua index ba7299f33f..a1c3806807 100644 --- a/test/app/buffer.test.lua +++ b/test/app/buffer.test.lua @@ -2,6 +2,17 @@ test_run = require('test_run').new() buffer = require('buffer') ffi = require('ffi') +-- Registers. +reg1 = buffer.reg1 +reg1.u16 = 100 +u16 = ffi.new('uint16_t[1]') +ffi.copy(u16, reg1, 2) +u16[0] + +u16[0] = 200 +ffi.copy(reg1, u16, 2) +reg1.u16 + -- Alignment. _ = buffer.static_alloc('char') -- This makes buffer pos unaligned. p = buffer.static_alloc('int') -- GitLab