Skip to content
Snippets Groups Projects
Commit c7447748 authored by Roman Tsisyk's avatar Roman Tsisyk
Browse files

yaml: move utf8_check_printable() to src/util.c

This code was added by me, it is not covered by lua-yaml
copyrights and can be moved to src/.

See cd0d6f25 "incorrect error handling in lyaml".

Needed for #128
parent 61ca7510
No related branches found
No related tags found
No related merge requests found
......@@ -384,6 +384,15 @@ abspath(const char *filename);
char *
int2str(long long int val);
/**
* Check that @a str is valid utf-8 sequence and can be printed
* unescaped.
* @param str string
* @param length string length
*/
int
utf8_check_printable(const char *str, size_t length);
#ifndef HAVE_MEMMEM
/* Declare memmem(). */
void *
......
......@@ -202,3 +202,62 @@ int2str(long long int val)
return buf;
}
int
utf8_check_printable(const char *start, size_t length)
{
const unsigned char *end = (const unsigned char *) start + length;
const unsigned char *pointer = (const unsigned char *) start;
while (pointer < end) {
unsigned char octet;
unsigned int width;
unsigned int value;
size_t k;
octet = pointer[0];
width = (octet & 0x80) == 0x00 ? 1 :
(octet & 0xE0) == 0xC0 ? 2 :
(octet & 0xF0) == 0xE0 ? 3 :
(octet & 0xF8) == 0xF0 ? 4 : 0;
value = (octet & 0x80) == 0x00 ? octet & 0x7F :
(octet & 0xE0) == 0xC0 ? octet & 0x1F :
(octet & 0xF0) == 0xE0 ? octet & 0x0F :
(octet & 0xF8) == 0xF0 ? octet & 0x07 : 0;
if (!width)
return 0;
if (pointer + width > end)
return 0;
for (k = 1; k < width; k++) {
octet = pointer[k];
if ((octet & 0xC0) != 0x80) return 0;
value = (value << 6) + (octet & 0x3F);
}
if (!((width == 1) ||
(width == 2 && value >= 0x80) ||
(width == 3 && value >= 0x800) ||
(width == 4 && value >= 0x10000)))
return 0;
/*
* gh-354: yaml incorrectly escapes special characters in a string
* Check that the string can be actually printed unescaped.
*/
if (*pointer > 0x7F &&
!((pointer[0] == 0x0A) ||
(pointer[0] >= 0x20 && pointer[0] <= 0x7E) ||
(pointer[0] == 0xC2 && pointer[1] >= 0xA0) ||
(pointer[0] > 0xC2 && pointer[0] < 0xED) ||
(pointer[0] == 0xED && pointer[1] < 0xA0) ||
(pointer[0] == 0xEE) ||
(pointer[0] == 0xEF &&
!(pointer[1] == 0xBB && pointer[2] == 0xBF) &&
!(pointer[1] == 0xBF &&
(pointer[2] == 0xBE || pointer[2] == 0xBF)))
)
) {
return 0;
}
pointer += width;
}
return 1;
}
......@@ -28,6 +28,8 @@
#include "lyaml.h"
#include "trivia/util.h"
#include <string.h>
#include <stdlib.h>
#include <stdbool.h>
......@@ -447,63 +449,6 @@ static int dump_array(struct lua_yaml_dumper *dumper, struct luaL_field *field){
return 1;
}
static int
utf8_check_printable(const char *start, size_t length)
{
const unsigned char *end = (const unsigned char *) start + length;
const unsigned char *pointer = (const unsigned char *) start;
while (pointer < end) {
unsigned char octet;
unsigned int width;
unsigned int value;
size_t k;
octet = pointer[0];
width = (octet & 0x80) == 0x00 ? 1 :
(octet & 0xE0) == 0xC0 ? 2 :
(octet & 0xF0) == 0xE0 ? 3 :
(octet & 0xF8) == 0xF0 ? 4 : 0;
value = (octet & 0x80) == 0x00 ? octet & 0x7F :
(octet & 0xE0) == 0xC0 ? octet & 0x1F :
(octet & 0xF0) == 0xE0 ? octet & 0x0F :
(octet & 0xF8) == 0xF0 ? octet & 0x07 : 0;
if (!width) return 0;
if (pointer+width > end) return 0;
for (k = 1; k < width; k ++) {
octet = pointer[k];
if ((octet & 0xC0) != 0x80) return 0;
value = (value << 6) + (octet & 0x3F);
}
if (!((width == 1) ||
(width == 2 && value >= 0x80) ||
(width == 3 && value >= 0x800) ||
(width == 4 && value >= 0x10000))) return 0;
/*
* gh-354: yaml incorrectly escapes special characters in a string
* Check that the string can be actually printed unescaped.
*/
if (*pointer > 0x7F && !(
(pointer[0] == 0x0A) ||
(pointer[0] >= 0x20 && pointer[0] <= 0x7E) ||
(pointer[0] == 0xC2 && pointer[1] >= 0xA0) ||
(pointer[0] > 0xC2 && pointer[0] < 0xED) ||
(pointer[0] == 0xED && pointer[1] < 0xA0) ||
(pointer[0] == 0xEE) ||
(pointer[0] == 0xEF &&
!(pointer[1] == 0xBB && pointer[2] == 0xBF) &&
!(pointer[1] == 0xBF &&
(pointer[2] == 0xBE || pointer[2] == 0xBF)))
)) {
return 0;
}
pointer += width;
}
return 1;
}
static int yaml_is_flow_mode(struct lua_yaml_dumper *dumper) {
/*
* Tarantool-specific: always quote strings in FLOW SEQUENCE
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment