From c7447748b7927e23aaad77ef5de594527218467b Mon Sep 17 00:00:00 2001 From: Roman Tsisyk <roman@tarantool.org> Date: Tue, 20 Jun 2017 11:09:06 +0300 Subject: [PATCH] yaml: move utf8_check_printable() to src/util.c This code was added by me, it is not covered by lua-yaml copyrights and can be moved to src/. See cd0d6f25a "incorrect error handling in lyaml". Needed for #128 --- src/trivia/util.h | 9 ++++++ src/util.c | 59 +++++++++++++++++++++++++++++++++++ third_party/lua-yaml/lyaml.cc | 59 ++--------------------------------- 3 files changed, 70 insertions(+), 57 deletions(-) diff --git a/src/trivia/util.h b/src/trivia/util.h index 724bdc1951..b80267d812 100644 --- a/src/trivia/util.h +++ b/src/trivia/util.h @@ -384,6 +384,15 @@ abspath(const char *filename); char * int2str(long long int val); +/** + * Check that @a str is valid utf-8 sequence and can be printed + * unescaped. + * @param str string + * @param length string length + */ +int +utf8_check_printable(const char *str, size_t length); + #ifndef HAVE_MEMMEM /* Declare memmem(). */ void * diff --git a/src/util.c b/src/util.c index 259f2ca8a7..04bb88a0a9 100644 --- a/src/util.c +++ b/src/util.c @@ -202,3 +202,62 @@ int2str(long long int val) return buf; } +int +utf8_check_printable(const char *start, size_t length) +{ + const unsigned char *end = (const unsigned char *) start + length; + const unsigned char *pointer = (const unsigned char *) start; + + while (pointer < end) { + unsigned char octet; + unsigned int width; + unsigned int value; + size_t k; + + octet = pointer[0]; + width = (octet & 0x80) == 0x00 ? 1 : + (octet & 0xE0) == 0xC0 ? 2 : + (octet & 0xF0) == 0xE0 ? 3 : + (octet & 0xF8) == 0xF0 ? 4 : 0; + value = (octet & 0x80) == 0x00 ? octet & 0x7F : + (octet & 0xE0) == 0xC0 ? octet & 0x1F : + (octet & 0xF0) == 0xE0 ? octet & 0x0F : + (octet & 0xF8) == 0xF0 ? octet & 0x07 : 0; + if (!width) + return 0; + if (pointer + width > end) + return 0; + for (k = 1; k < width; k++) { + octet = pointer[k]; + if ((octet & 0xC0) != 0x80) return 0; + value = (value << 6) + (octet & 0x3F); + } + if (!((width == 1) || + (width == 2 && value >= 0x80) || + (width == 3 && value >= 0x800) || + (width == 4 && value >= 0x10000))) + return 0; + + /* + * gh-354: yaml incorrectly escapes special characters in a string + * Check that the string can be actually printed unescaped. + */ + if (*pointer > 0x7F && + !((pointer[0] == 0x0A) || + (pointer[0] >= 0x20 && pointer[0] <= 0x7E) || + (pointer[0] == 0xC2 && pointer[1] >= 0xA0) || + (pointer[0] > 0xC2 && pointer[0] < 0xED) || + (pointer[0] == 0xED && pointer[1] < 0xA0) || + (pointer[0] == 0xEE) || + (pointer[0] == 0xEF && + !(pointer[1] == 0xBB && pointer[2] == 0xBF) && + !(pointer[1] == 0xBF && + (pointer[2] == 0xBE || pointer[2] == 0xBF))) + ) + ) { + return 0; + } + pointer += width; + } + return 1; +} diff --git a/third_party/lua-yaml/lyaml.cc b/third_party/lua-yaml/lyaml.cc index e17691c97a..4d875fab44 100644 --- a/third_party/lua-yaml/lyaml.cc +++ b/third_party/lua-yaml/lyaml.cc @@ -28,6 +28,8 @@ #include "lyaml.h" +#include "trivia/util.h" + #include <string.h> #include <stdlib.h> #include <stdbool.h> @@ -447,63 +449,6 @@ static int dump_array(struct lua_yaml_dumper *dumper, struct luaL_field *field){ return 1; } -static int -utf8_check_printable(const char *start, size_t length) -{ - const unsigned char *end = (const unsigned char *) start + length; - const unsigned char *pointer = (const unsigned char *) start; - - while (pointer < end) { - unsigned char octet; - unsigned int width; - unsigned int value; - size_t k; - - octet = pointer[0]; - width = (octet & 0x80) == 0x00 ? 1 : - (octet & 0xE0) == 0xC0 ? 2 : - (octet & 0xF0) == 0xE0 ? 3 : - (octet & 0xF8) == 0xF0 ? 4 : 0; - value = (octet & 0x80) == 0x00 ? octet & 0x7F : - (octet & 0xE0) == 0xC0 ? octet & 0x1F : - (octet & 0xF0) == 0xE0 ? octet & 0x0F : - (octet & 0xF8) == 0xF0 ? octet & 0x07 : 0; - if (!width) return 0; - if (pointer+width > end) return 0; - for (k = 1; k < width; k ++) { - octet = pointer[k]; - if ((octet & 0xC0) != 0x80) return 0; - value = (value << 6) + (octet & 0x3F); - } - if (!((width == 1) || - (width == 2 && value >= 0x80) || - (width == 3 && value >= 0x800) || - (width == 4 && value >= 0x10000))) return 0; - - /* - * gh-354: yaml incorrectly escapes special characters in a string - * Check that the string can be actually printed unescaped. - */ - if (*pointer > 0x7F && !( - (pointer[0] == 0x0A) || - (pointer[0] >= 0x20 && pointer[0] <= 0x7E) || - (pointer[0] == 0xC2 && pointer[1] >= 0xA0) || - (pointer[0] > 0xC2 && pointer[0] < 0xED) || - (pointer[0] == 0xED && pointer[1] < 0xA0) || - (pointer[0] == 0xEE) || - (pointer[0] == 0xEF && - !(pointer[1] == 0xBB && pointer[2] == 0xBF) && - !(pointer[1] == 0xBF && - (pointer[2] == 0xBE || pointer[2] == 0xBF))) - )) { - return 0; - } - pointer += width; - } - - return 1; -} - static int yaml_is_flow_mode(struct lua_yaml_dumper *dumper) { /* * Tarantool-specific: always quote strings in FLOW SEQUENCE -- GitLab