From f998ea39e96d93113823d92727a1faf9860c8ea6 Mon Sep 17 00:00:00 2001 From: Oleg Babin <babinoleg@mail.ru> Date: Fri, 4 Dec 2020 00:06:38 +0300 Subject: [PATCH] digest: introduce FFI bindings for xxHash32/64 This patch introduces new hash types for digest module - xxHash32 and xxHash64. Closes #2003 @TarantoolBot document Title: digest module supports xxHash32/64 ```lua -- Examples below demonstrate xxHash32. -- xxHash64 has exactly the same interface -- Calculate the 32-bits hash (default seed is 0). digest.xxhash32(string[, seed]) -- Streaming -- Start a new hash by initializing state with a seed. -- If no value provided, 0 is used as default. xxhash = digest.xxhash32.new([seed]) -- Also it's possible to specify seed manually. If no value -- provided a value initially passed to "new" is used. -- Here and below "seed" expected to be unsigned -- number. Function returns nothing. xxhash:clear([seed]) -- Feed the hash state by calling "update" as many times as -- necessary. Function returns nothing. xxhash:update('string') -- Produce a hash value. xxhash:result() ``` --- .../unreleased/add-xxhash-to-digest-module.md | 4 + src/exports.h | 10 + src/lua/digest.lua | 113 ++++++++++ test/app/digest.result | 195 +++++++++++++++++- test/app/digest.test.lua | 65 +++++- 5 files changed, 378 insertions(+), 9 deletions(-) create mode 100644 changelogs/unreleased/add-xxhash-to-digest-module.md diff --git a/changelogs/unreleased/add-xxhash-to-digest-module.md b/changelogs/unreleased/add-xxhash-to-digest-module.md new file mode 100644 index 0000000000..f417ad84b3 --- /dev/null +++ b/changelogs/unreleased/add-xxhash-to-digest-module.md @@ -0,0 +1,4 @@ +## feature/lua/digest + + * Introduce new hash types in digest module - `xxhash32` and `xxhash64` + (gh-2003). diff --git a/src/exports.h b/src/exports.h index 41357636af..a4f3833cc6 100644 --- a/src/exports.h +++ b/src/exports.h @@ -521,3 +521,13 @@ EXPORT(uri_format) EXPORT(uri_parse) EXPORT(uuid_nil) EXPORT(uuid_unpack) +EXPORT(XXH32) +EXPORT(XXH32_copyState) +EXPORT(XXH32_digest) +EXPORT(XXH32_reset) +EXPORT(XXH32_update) +EXPORT(XXH64) +EXPORT(XXH64_copyState) +EXPORT(XXH64_digest) +EXPORT(XXH64_reset) +EXPORT(XXH64_update) diff --git a/src/lua/digest.lua b/src/lua/digest.lua index 54a09c2b1a..12d0ee2ced 100644 --- a/src/lua/digest.lua +++ b/src/lua/digest.lua @@ -31,6 +31,50 @@ ffi.cdef[[ void PMurHash32_Process(uint32_t *ph1, uint32_t *pcarry, const void *key, int len); uint32_t PMurHash32_Result(uint32_t h1, uint32_t carry, uint32_t total_length); uint32_t PMurHash32(uint32_t seed, const void *key, int len); + + /* from third_party/zstd/lib/common/xxhash.c */ + typedef enum { XXH_OK=0, XXH_ERROR } XXH_errorcode; + struct XXH32_state_s { + unsigned total_len_32; + unsigned large_len; + unsigned v1; + unsigned v2; + unsigned v3; + unsigned v4; + unsigned mem32[4]; /* buffer defined as U32 for alignment */ + unsigned memsize; + unsigned reserved; /* never read nor write, will be removed in a future version */ + }; + + struct XXH64_state_s { + unsigned long long total_len; + unsigned long long v1; + unsigned long long v2; + unsigned long long v3; + unsigned long long v4; + unsigned long long mem64[4]; /* buffer defined as U64 for alignment */ + unsigned memsize; + unsigned reserved[2]; /* never read nor write, will be removed in a future version */ + }; + + typedef unsigned int XXH32_hash_t; + typedef unsigned long long XXH64_hash_t; + XXH32_hash_t XXH32 (const void* input, size_t length, unsigned int seed); + XXH64_hash_t XXH64 (const void* input, size_t length, unsigned long long seed); + + typedef struct XXH32_state_s XXH32_state_t; + typedef struct XXH64_state_s XXH64_state_t; + + XXH_errorcode XXH32_reset (XXH32_state_t* statePtr, unsigned int seed); + XXH_errorcode XXH32_update (XXH32_state_t* statePtr, const void* input, size_t length); + XXH32_hash_t XXH32_digest (const XXH32_state_t* statePtr); + + XXH_errorcode XXH64_reset (XXH64_state_t* statePtr, unsigned long long seed); + XXH_errorcode XXH64_update (XXH64_state_t* statePtr, const void* input, size_t length); + XXH64_hash_t XXH64_digest (const XXH64_state_t* statePtr); + + void XXH32_copyState(XXH32_state_t* restrict dst_state, const XXH32_state_t* restrict src_state); + void XXH64_copyState(XXH64_state_t* restrict dst_state, const XXH64_state_t* restrict src_state); ]] local builtin = ffi.C @@ -280,4 +324,73 @@ m['aes256cbc'] = { end } +for _, var in ipairs({'32', '64'}) do + local xxHash + + local xxh_template = 'XXH%s_%s' + local update_fn_name = string.format(xxh_template, var, 'update') + local digest_fn_name = string.format(xxh_template, var, 'digest') + local reset_fn_name = string.format(xxh_template, var, 'reset') + local copy_fn_name = string.format(xxh_template, var, 'copyState') + + local function update(self, str) + if type(str) ~= 'string' then + local message = string.format("Usage xxhash%s:update(string)", var) + error(message, 2) + end + builtin[update_fn_name](self.value, str, #str) + end + + local function result(self) + return builtin[digest_fn_name](self.value) + end + + local function clear(self, seed) + if seed == nil then + seed = self.default_seed + end + builtin[reset_fn_name](self.value, seed) + end + + local function copy(self) + local copy = xxHash.new() + builtin[copy_fn_name](copy.value, self.value) + return copy + end + + local state_type_name = string.format(xxh_template, var, 'state_t') + local XXH_state_t = ffi.typeof(state_type_name) + + xxHash = { + new = function(seed) + local self = { + update = update, + result = result, + clear = clear, + copy = copy, + value = ffi.new(XXH_state_t), + default_seed = seed or 0, + } + self:clear(self.default_seed) + return self + end, + } + + local call_fn_name = 'XXH' .. var + setmetatable(xxHash, { + __call = function(_, str, seed) + if type(str) ~= 'string' then + local message = string.format("Usage digest.xxhash%s(string[, unsigned number])", var) + error(message, 2) + end + if seed == nil then + seed = 0 + end + return builtin[call_fn_name](str, #str, seed) + end, + }) + + m['xxhash' .. var] = xxHash +end + return m diff --git a/test/app/digest.result b/test/app/digest.result index d946c6a3bf..40e49ace5b 100644 --- a/test/app/digest.result +++ b/test/app/digest.result @@ -572,13 +572,6 @@ err:match("number") --- - number ... -digest = nil ---- -... -test_run:cmd("clear filter") ---- -- true -... -- gh-3396: fiber-safe pbkdf2 res = {} --- @@ -636,3 +629,191 @@ res - bafac115a0022b2894f2983b5b5102455bdd3ba7cfbeb09f219a9fde8f3ee6a9 - bafac115a0022b2894f2983b5b5102455bdd3ba7cfbeb09f219a9fde8f3ee6a9 ... +-- +-- gh-2003 xxHash. +-- +xxhash32 = digest.xxhash32.new() +--- +... +xxhash32:result() +--- +- 46947589 +... +xxhash64 = digest.xxhash64.new() +--- +... +xxhash64:result() +--- +- 17241709254077376921ULL +... +-- New takes seed optionally. +digest.xxhash32.new(1):result() +--- +- 187479954 +... +digest.xxhash64.new(1):result() +--- +- 15397730242686860875ULL +... +-- String is expected as input value. +digest.xxhash32(1) +--- +- error: Usage digest.xxhash32(string[, unsigned number]) +... +digest.xxhash64(1) +--- +- error: Usage digest.xxhash64(string[, unsigned number]) +... +digest.xxhash32.new():update(1) +--- +- error: Usage xxhash32:update(string) +... +digest.xxhash64.new():update(1) +--- +- error: Usage xxhash64:update(string) +... +-- Seed is an optional second argument (default = 0). +digest.xxhash32('12345') +--- +- 3003995828 +... +digest.xxhash32('12345', 0) +--- +- 3003995828 +... +digest.xxhash32('12345', 1) +--- +- 2544060598 +... +xxhash32:result() +--- +- 46947589 +... +xxhash32:clear(1) +--- +... +xxhash32:result() +--- +- 187479954 +... +xxhash32:update('123') +--- +... +xxhash32:result() +--- +- 2569538424 +... +xxhash32:update('45') +--- +... +xxhash32:result() +--- +- 2544060598 +... +xxhash32:clear() +--- +... +xxhash32:result() +--- +- 46947589 +... +xxhash32_copy = xxhash32:copy() +--- +... +xxhash32_copy:result() +--- +- 46947589 +... +xxhash32_copy ~= xxhash32 +--- +- true +... +xxhash32_copy:clear(1ULL) +--- +... +xxhash32_copy:result() +--- +- 187479954 +... +xxhash32 = nil +--- +... +xxhash32_copy = nil +--- +... +-- Seed is an optional second argument (default = 0). +digest.xxhash64('12345') +--- +- 14335752410685132726ULL +... +digest.xxhash64('12345', 0) +--- +- 14335752410685132726ULL +... +digest.xxhash64('12345', 1) +--- +- 10037897083593476069ULL +... +xxhash64:result() +--- +- 17241709254077376921ULL +... +xxhash64:clear(1) +--- +... +xxhash64:result() +--- +- 15397730242686860875ULL +... +xxhash64:update('123') +--- +... +xxhash64:result() +--- +- 5440451180712653975ULL +... +xxhash64:update('45') +--- +... +xxhash64:result() +--- +- 10037897083593476069ULL +... +xxhash64:clear() +--- +... +xxhash64:result() +--- +- 17241709254077376921ULL +... +xxhash64_copy = xxhash64:copy() +--- +... +xxhash64_copy:result() +--- +- 17241709254077376921ULL +... +xxhash64_copy ~= xxhash64 +--- +- true +... +xxhash64_copy:clear(1ULL) +--- +... +xxhash64_copy:result() +--- +- 15397730242686860875ULL +... +xxhash64 = nil +--- +... +xxhash64_copy = nil +--- +... +test_run:cmd("clear filter") +--- +- true +... +digest = nil +--- +... diff --git a/test/app/digest.test.lua b/test/app/digest.test.lua index 7ecda91bcb..d2cc9d39cd 100644 --- a/test/app/digest.test.lua +++ b/test/app/digest.test.lua @@ -183,8 +183,6 @@ err:match("Usage") s, err = pcall(digest.pbkdf2_hex, "password", "salt", "lol", "lol") s err:match("number") -digest = nil -test_run:cmd("clear filter") -- gh-3396: fiber-safe pbkdf2 res = {} @@ -203,3 +201,66 @@ _ = fiber.create(test_pbkdf2) _ = sentry:get() _ = sentry:get() res + +-- +-- gh-2003 xxHash. +-- +xxhash32 = digest.xxhash32.new() +xxhash32:result() +xxhash64 = digest.xxhash64.new() +xxhash64:result() + +-- New takes seed optionally. +digest.xxhash32.new(1):result() +digest.xxhash64.new(1):result() + +-- String is expected as input value. +digest.xxhash32(1) +digest.xxhash64(1) +digest.xxhash32.new():update(1) +digest.xxhash64.new():update(1) + +-- Seed is an optional second argument (default = 0). +digest.xxhash32('12345') +digest.xxhash32('12345', 0) +digest.xxhash32('12345', 1) +xxhash32:result() +xxhash32:clear(1) +xxhash32:result() +xxhash32:update('123') +xxhash32:result() +xxhash32:update('45') +xxhash32:result() +xxhash32:clear() +xxhash32:result() +xxhash32_copy = xxhash32:copy() +xxhash32_copy:result() +xxhash32_copy ~= xxhash32 +xxhash32_copy:clear(1ULL) +xxhash32_copy:result() +xxhash32 = nil +xxhash32_copy = nil + +-- Seed is an optional second argument (default = 0). +digest.xxhash64('12345') +digest.xxhash64('12345', 0) +digest.xxhash64('12345', 1) +xxhash64:result() +xxhash64:clear(1) +xxhash64:result() +xxhash64:update('123') +xxhash64:result() +xxhash64:update('45') +xxhash64:result() +xxhash64:clear() +xxhash64:result() +xxhash64_copy = xxhash64:copy() +xxhash64_copy:result() +xxhash64_copy ~= xxhash64 +xxhash64_copy:clear(1ULL) +xxhash64_copy:result() +xxhash64 = nil +xxhash64_copy = nil + +test_run:cmd("clear filter") +digest = nil -- GitLab