From c7447748b7927e23aaad77ef5de594527218467b Mon Sep 17 00:00:00 2001
From: Roman Tsisyk <roman@tarantool.org>
Date: Tue, 20 Jun 2017 11:09:06 +0300
Subject: [PATCH] yaml: move utf8_check_printable() to src/util.c

This code was added by me, it is not covered by lua-yaml
copyrights and can be moved to src/.

See cd0d6f25a "incorrect error handling in lyaml".

Needed for #128
---
 src/trivia/util.h             |  9 ++++++
 src/util.c                    | 59 +++++++++++++++++++++++++++++++++++
 third_party/lua-yaml/lyaml.cc | 59 ++---------------------------------
 3 files changed, 70 insertions(+), 57 deletions(-)

diff --git a/src/trivia/util.h b/src/trivia/util.h
index 724bdc1951..b80267d812 100644
--- a/src/trivia/util.h
+++ b/src/trivia/util.h
@@ -384,6 +384,15 @@ abspath(const char *filename);
 char *
 int2str(long long int val);
 
+/**
+ * Check that @a str is valid utf-8 sequence and can be printed
+ * unescaped.
+ * @param str string
+ * @param length string length
+ */
+int
+utf8_check_printable(const char *str, size_t length);
+
 #ifndef HAVE_MEMMEM
 /* Declare memmem(). */
 void *
diff --git a/src/util.c b/src/util.c
index 259f2ca8a7..04bb88a0a9 100644
--- a/src/util.c
+++ b/src/util.c
@@ -202,3 +202,62 @@ int2str(long long int val)
 	return buf;
 }
 
+int
+utf8_check_printable(const char *start, size_t length)
+{
+	const unsigned char *end = (const unsigned char *) start + length;
+	const unsigned char *pointer = (const unsigned char *) start;
+
+	while (pointer < end) {
+		unsigned char octet;
+		unsigned int width;
+		unsigned int value;
+		size_t k;
+
+		octet = pointer[0];
+		width = (octet & 0x80) == 0x00 ? 1 :
+			(octet & 0xE0) == 0xC0 ? 2 :
+			(octet & 0xF0) == 0xE0 ? 3 :
+			(octet & 0xF8) == 0xF0 ? 4 : 0;
+		value = (octet & 0x80) == 0x00 ? octet & 0x7F :
+			(octet & 0xE0) == 0xC0 ? octet & 0x1F :
+			(octet & 0xF0) == 0xE0 ? octet & 0x0F :
+			(octet & 0xF8) == 0xF0 ? octet & 0x07 : 0;
+		if (!width)
+			return 0;
+		if (pointer + width > end)
+			return 0;
+		for (k = 1; k < width; k++) {
+			octet = pointer[k];
+			if ((octet & 0xC0) != 0x80) return 0;
+			value = (value << 6) + (octet & 0x3F);
+		}
+		if (!((width == 1) ||
+		      (width == 2 && value >= 0x80) ||
+		      (width == 3 && value >= 0x800) ||
+		      (width == 4 && value >= 0x10000)))
+			return 0;
+
+		/*
+		 * gh-354: yaml incorrectly escapes special characters in a string
+		 * Check that the string can be actually printed unescaped.
+		 */
+		if (*pointer > 0x7F &&
+		    !((pointer[0] == 0x0A) ||
+		      (pointer[0] >= 0x20 && pointer[0] <= 0x7E) ||
+		      (pointer[0] == 0xC2 && pointer[1] >= 0xA0) ||
+		      (pointer[0]  > 0xC2 && pointer[0]  < 0xED) ||
+		      (pointer[0] == 0xED && pointer[1]  < 0xA0) ||
+		      (pointer[0] == 0xEE) ||
+		      (pointer[0] == 0xEF &&
+		       !(pointer[1] == 0xBB && pointer[2] == 0xBF) &&
+		       !(pointer[1] == 0xBF &&
+			 (pointer[2] == 0xBE || pointer[2] == 0xBF)))
+		      )
+		    ) {
+			return 0;
+		}
+		pointer += width;
+	}
+	return 1;
+}
diff --git a/third_party/lua-yaml/lyaml.cc b/third_party/lua-yaml/lyaml.cc
index e17691c97a..4d875fab44 100644
--- a/third_party/lua-yaml/lyaml.cc
+++ b/third_party/lua-yaml/lyaml.cc
@@ -28,6 +28,8 @@
 
 #include "lyaml.h"
 
+#include "trivia/util.h"
+
 #include <string.h>
 #include <stdlib.h>
 #include <stdbool.h>
@@ -447,63 +449,6 @@ static int dump_array(struct lua_yaml_dumper *dumper, struct luaL_field *field){
    return 1;
 }
 
-static int
-utf8_check_printable(const char *start, size_t length)
-{
-    const unsigned char *end = (const unsigned char *) start + length;
-    const unsigned char *pointer = (const unsigned char *) start;
-
-    while (pointer < end) {
-        unsigned char octet;
-        unsigned int width;
-        unsigned int value;
-        size_t k;
-
-        octet = pointer[0];
-        width = (octet & 0x80) == 0x00 ? 1 :
-                (octet & 0xE0) == 0xC0 ? 2 :
-                (octet & 0xF0) == 0xE0 ? 3 :
-                (octet & 0xF8) == 0xF0 ? 4 : 0;
-        value = (octet & 0x80) == 0x00 ? octet & 0x7F :
-                (octet & 0xE0) == 0xC0 ? octet & 0x1F :
-                (octet & 0xF0) == 0xE0 ? octet & 0x0F :
-                (octet & 0xF8) == 0xF0 ? octet & 0x07 : 0;
-        if (!width) return 0;
-        if (pointer+width > end) return 0;
-        for (k = 1; k < width; k ++) {
-            octet = pointer[k];
-            if ((octet & 0xC0) != 0x80) return 0;
-            value = (value << 6) + (octet & 0x3F);
-        }
-        if (!((width == 1) ||
-            (width == 2 && value >= 0x80) ||
-            (width == 3 && value >= 0x800) ||
-            (width == 4 && value >= 0x10000))) return 0;
-
-        /*
-         * gh-354: yaml incorrectly escapes special characters in a string
-         * Check that the string can be actually printed unescaped.
-         */
-        if (*pointer > 0x7F && !(
-            (pointer[0] == 0x0A) ||
-            (pointer[0] >= 0x20 && pointer[0] <= 0x7E) ||
-            (pointer[0] == 0xC2 && pointer[1] >= 0xA0) ||
-            (pointer[0]  > 0xC2 && pointer[0]  < 0xED) ||
-            (pointer[0] == 0xED && pointer[1]  < 0xA0) ||
-            (pointer[0] == 0xEE) ||
-            (pointer[0] == 0xEF &&
-             !(pointer[1] == 0xBB && pointer[2] == 0xBF) &&
-             !(pointer[1] == 0xBF &&
-               (pointer[2] == 0xBE || pointer[2] == 0xBF)))
-            )) {
-            return 0;
-        }
-        pointer += width;
-    }
-
-    return 1;
-}
-
 static int yaml_is_flow_mode(struct lua_yaml_dumper *dumper) {
    /*
     * Tarantool-specific: always quote strings in FLOW SEQUENCE
-- 
GitLab