From e7558062d3559e6bcc18f91eacb88269428321dc Mon Sep 17 00:00:00 2001 From: Kirill Shcherbatov <kshcherbatov@tarantool.org> Date: Fri, 15 Feb 2019 12:59:34 +0300 Subject: [PATCH] sql: store regular identifiers in case-normal form MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduced a new sql_normalize_name routine performing SQL name conversion to case-normal form via unicode character folding. For example, ß is converted to SS. The result is similar to SQL UPPER function. Closes #3931 --- src/box/lua/lua_sql.c | 9 +- src/box/sql/build.c | 21 +--- src/box/sql/expr.c | 160 ++++++++++++++++++-------- src/box/sql/parse.y | 25 +++- src/box/sql/select.c | 12 +- src/box/sql/sqlInt.h | 62 +++++++++- src/box/sql/trigger.c | 13 ++- src/box/sql/util.c | 76 +++++++++--- src/lib/core/errinj.h | 1 + test/box/errinj.result | 2 + test/sql-tap/identifier_case.test.lua | 12 +- test/sql/errinj.result | 18 +++ test/sql/errinj.test.lua | 8 ++ 13 files changed, 310 insertions(+), 109 deletions(-) diff --git a/src/box/lua/lua_sql.c b/src/box/lua/lua_sql.c index f5a7b7819f..3d0047e168 100644 --- a/src/box/lua/lua_sql.c +++ b/src/box/lua/lua_sql.c @@ -176,13 +176,10 @@ lbox_sql_create_function(struct lua_State *L) } size_t name_len; const char *name = lua_tolstring(L, 1, &name_len); - char *normalized_name = (char *) region_alloc(&fiber()->gc, - name_len + 1); + char *normalized_name = + sql_normalized_name_region_new(&fiber()->gc, name, name_len); if (normalized_name == NULL) - return luaL_error(L, "out of memory"); - memcpy(normalized_name, name, name_len); - normalized_name[name_len] = '\0'; - sqlNormalizeName(normalized_name); + return luaT_error(L); struct lua_sql_func_info *func_info = (struct lua_sql_func_info *) malloc(sizeof(*func_info)); if (func_info == NULL) diff --git a/src/box/sql/build.c b/src/box/sql/build.c index 4070630086..5b1e933c70 100644 --- a/src/box/sql/build.c +++ b/src/box/sql/build.c @@ -229,20 +229,6 @@ sql_space_column_is_in_pk(struct space *space, uint32_t column) return false; } -char * -sql_name_from_token(struct sql *db, struct Token *name_token) -{ - assert(name_token != NULL && name_token->z != NULL); - char *name = sqlDbStrNDup(db, name_token->z, name_token->n); - if (name == NULL) { - diag_set(OutOfMemory, name_token->n + 1, "sqlDbStrNDup", - "name"); - return NULL; - } - sqlNormalizeName(name); - return name; -} - /* * This routine is used to check if the UTF-8 string zName is a legal * unqualified name for an identifier. @@ -445,16 +431,11 @@ sqlAddColumn(Parse * pParse, Token * pName, struct type_def *type_def) if (sql_field_retrieve(pParse, def, def->field_count) == NULL) return; struct region *region = &pParse->region; - z = region_alloc(region, pName->n + 1); + z = sql_normalized_name_region_new(region, pName->z, pName->n); if (z == NULL) { - diag_set(OutOfMemory, pName->n + 1, - "region_alloc", "z"); pParse->is_aborted = true; return; } - memcpy(z, pName->z, pName->n); - z[pName->n] = 0; - sqlNormalizeName(z); for (uint32_t i = 0; i < def->field_count; i++) { if (strcmp(z, def->fields[i].name) == 0) { diag_set(ClientError, ER_SPACE_FIELD_IS_DUPLICATE, z); diff --git a/src/box/sql/expr.c b/src/box/sql/expr.c index 48db9b45dc..838fbd21ad 100644 --- a/src/box/sql/expr.c +++ b/src/box/sql/expr.c @@ -886,58 +886,117 @@ sqlExprSetHeightAndFlags(Parse * pParse, Expr * p) #define exprSetHeight(y) #endif /* SQL_MAX_EXPR_DEPTH>0 */ -struct Expr * -sql_expr_new(struct sql *db, int op, const struct Token *token) +/** + * Allocate a new empty expression object with reserved extra + * memory. + * @param db SQL context. + * @param op Expression value type. + * @param extra_size Extra size, needed to be allocated together + * with the expression. + * @retval Not NULL Success. An empty expression. + * @retval NULL Error. A diag message is set. + */ +static struct Expr * +sql_expr_new_empty(struct sql *db, int op, int extra_size) { - int extra_sz = 0; - int val = 0; - if (token != NULL) { - if (op != TK_INTEGER || token->z == NULL || - sqlGetInt32(token->z, &val) == 0) { - extra_sz = token->n + 1; - assert(val >= 0); - } - } - struct Expr *expr = sqlDbMallocRawNN(db, sizeof(*expr) + extra_sz); - if (expr == NULL) { - diag_set(OutOfMemory, sizeof(*expr), "sqlDbMallocRawNN", - "expr"); + struct Expr *e = sqlDbMallocRawNN(db, sizeof(*e) + extra_size); + if (e == NULL) { + diag_set(OutOfMemory, sizeof(*e), "sqlDbMallocRawNN", "e"); return NULL; } - - memset(expr, 0, sizeof(*expr)); - expr->op = (u8)op; - expr->iAgg = -1; + memset(e, 0, sizeof(*e)); + e->op = (u8)op; + e->iAgg = -1; #if SQL_MAX_EXPR_DEPTH > 0 - expr->nHeight = 1; + e->nHeight = 1; #endif - if (token == NULL) - return expr; + return e; +} - if (extra_sz == 0) { - expr->flags |= EP_IntValue; - expr->u.iValue = val; - } else { - expr->u.zToken = (char *)&expr[1]; - assert(token->z != NULL || token->n == 0); - memcpy(expr->u.zToken, token->z, token->n); - expr->u.zToken[token->n] = '\0'; +/** + * Try to convert a token of a specified type to integer. + * @param op Token type. + * @param token Token itself. + * @param[out] res Result integer. + * @retval 0 Success. @A res stores a result. + * @retval -1 Error. Can not be converted. No diag. + */ +static inline int +sql_expr_token_to_int(int op, const struct Token *token, int *res) +{ + if (op == TK_INTEGER && token->z != NULL && + sqlGetInt32(token->z, res) > 0) + return 0; + return -1; +} + +/** Create an expression of a constant integer. */ +static inline struct Expr * +sql_expr_new_int(struct sql *db, int value) +{ + struct Expr *e = sql_expr_new_empty(db, TK_INTEGER, 0); + if (e != NULL) { + e->flags |= EP_IntValue; + e->u.iValue = value; + } + return e; +} + +struct Expr * +sql_expr_new(struct sql *db, int op, const struct Token *token) +{ + int extra_sz = 0; + if (token != NULL) { + int val; + if (sql_expr_token_to_int(op, token, &val) == 0) + return sql_expr_new_int(db, val); + extra_sz = token->n + 1; } - return expr; + struct Expr *e = sql_expr_new_empty(db, op, extra_sz); + if (e == NULL || token == NULL) + return e; + e->u.zToken = (char *) &e[1]; + assert(token->z != NULL || token->n == 0); + memcpy(e->u.zToken, token->z, token->n); + e->u.zToken[token->n] = '\0'; + return e; } struct Expr * sql_expr_new_dequoted(struct sql *db, int op, const struct Token *token) { - struct Expr *e = sql_expr_new(db, op, token); - if (e == NULL || (e->flags & EP_IntValue) != 0 || e->u.zToken == NULL) + int extra_size = 0; + bool is_name = false; + if (token != NULL) { + int val; + assert(token->z != NULL || token->n == 0); + if (sql_expr_token_to_int(op, token, &val) == 0) + return sql_expr_new_int(db, val); + is_name = op == TK_ID || op == TK_COLLATE || op == TK_FUNCTION; + if (is_name) { + extra_size = sql_normalize_name(NULL, 0, token->z, + token->n); + if (extra_size < 0) + return NULL; + } else { + extra_size = token->n + 1; + } + } + struct Expr *e = sql_expr_new_empty(db, op, extra_size); + if (e == NULL || token == NULL || token->n == 0) return e; - if (e->u.zToken[0] == '"') + e->u.zToken = (char *) &e[1]; + if (token->z[0] == '"') e->flags |= EP_DblQuoted; - if (e->op == TK_ID || e->op == TK_COLLATE || e->op == TK_FUNCTION) - sqlNormalizeName(e->u.zToken); - else + if (! is_name) { + memcpy(e->u.zToken, token->z, token->n); + e->u.zToken[token->n] = '\0'; sqlDequote(e->u.zToken); + } else if (sql_normalize_name(e->u.zToken, extra_size, token->z, + token->n) < 0) { + sql_expr_delete(db, e, false); + return NULL; + } return e; } @@ -1831,19 +1890,22 @@ sqlExprListSetName(Parse * pParse, /* Parsing context */ int dequote /* True to cause the name to be dequoted */ ) { - assert(pList != 0 || pParse->db->mallocFailed != 0); - if (pList) { - struct ExprList_item *pItem; - assert(pList->nExpr > 0); - pItem = &pList->a[pList->nExpr - 1]; - assert(pItem->zName == 0); - pItem->zName = sqlDbStrNDup(pParse->db, pName->z, pName->n); - if (dequote) - sqlNormalizeName(pItem->zName); - /* n = 0 in case of select * */ - if (pName->n != 0) - sqlCheckIdentifierName(pParse, pItem->zName); + struct sql *db = pParse->db; + assert(pList != NULL || db->mallocFailed != 0); + if (pList == NULL || pName->n == 0) + return; + assert(pList->nExpr > 0); + struct ExprList_item *item = &pList->a[pList->nExpr - 1]; + assert(item->zName == NULL); + if (dequote) { + item->zName = sql_normalized_name_db_new(db, pName->z, pName->n); + if (item->zName == NULL) + pParse->is_aborted = true; + } else { + item->zName = sqlDbStrNDup(db, pName->z, pName->n); } + if (item->zName != NULL) + sqlCheckIdentifierName(pParse, item->zName); } /* diff --git a/src/box/sql/parse.y b/src/box/sql/parse.y index 8c1d1983aa..c37b8d429f 100644 --- a/src/box/sql/parse.y +++ b/src/box/sql/parse.y @@ -856,7 +856,16 @@ idlist(A) ::= nm(Y). { ** that created the expression. */ static void spanExpr(ExprSpan *pOut, Parse *pParse, int op, Token t){ - Expr *p = sqlDbMallocRawNN(pParse->db, sizeof(Expr)+t.n+1); + int name_sz = 0; + struct Expr *p = NULL; + if (op != TK_VARIABLE) { + name_sz = sql_normalize_name(NULL, 0, t.z, t.n); + if (name_sz < 0) + goto tarantool_error; + } else { + name_sz = t.n + 1; + } + p = sqlDbMallocRawNN(pParse->db, sizeof(Expr) + name_sz); if( p ){ memset(p, 0, sizeof(Expr)); switch (op) { @@ -889,10 +898,12 @@ idlist(A) ::= nm(Y). { p->flags = EP_Leaf; p->iAgg = -1; p->u.zToken = (char*)&p[1]; - memcpy(p->u.zToken, t.z, t.n); - p->u.zToken[t.n] = 0; - if (op != TK_VARIABLE){ - sqlNormalizeName(p->u.zToken); + if (op != TK_VARIABLE) { + if (sql_normalize_name(p->u.zToken, name_sz, t.z, t.n) < 0) + goto tarantool_error; + } else { + memcpy(p->u.zToken, t.z, t.n); + p->u.zToken[t.n] = 0; } #if SQL_MAX_EXPR_DEPTH>0 p->nHeight = 1; @@ -901,6 +912,10 @@ idlist(A) ::= nm(Y). { pOut->pExpr = p; pOut->zStart = t.z; pOut->zEnd = &t.z[t.n]; + return; +tarantool_error: + sqlDbFree(pParse->db, p); + pParse->is_aborted = true; } } diff --git a/src/box/sql/select.c b/src/box/sql/select.c index 190578f754..d279ba7cac 100644 --- a/src/box/sql/select.c +++ b/src/box/sql/select.c @@ -4191,10 +4191,14 @@ flattenSubquery(Parse * pParse, /* Parsing context */ pList = pParent->pEList; for (i = 0; i < pList->nExpr; i++) { if (pList->a[i].zName == 0) { - char *zName = - sqlDbStrDup(db, pList->a[i].zSpan); - sqlNormalizeName(zName); - pList->a[i].zName = zName; + char *str = pList->a[i].zSpan; + int len = strlen(str); + char *name = + sql_normalized_name_db_new(db, str, + len); + if (name == NULL) + pParse->is_aborted = true; + pList->a[i].zName = name; } } if (pSub->pOrderBy) { diff --git a/src/box/sql/sqlInt.h b/src/box/sql/sqlInt.h index 049d5aeada..72bd4ee0fe 100644 --- a/src/box/sql/sqlInt.h +++ b/src/box/sql/sqlInt.h @@ -3200,7 +3200,57 @@ void sqlTreeViewWith(TreeView *, const With *); void sqlSetString(char **, sql *, const char *); void sqlDequote(char *); -void sqlNormalizeName(char *z); + +/** + * Perform SQL name normalization: cast name to the upper-case + * (via Unicode Character Folding). Casing is locale-independent + * and context-sensitive. The result may be longer or shorter + * than the original. The source string and the destination buffer + * must not overlap. + * For example, ß is converted to SS. + * The result is similar to SQL UPPER function. + * + * @param dst A buffer for the result string. The result will be + * 0-terminated if the buffer is large enough. The contents + * is undefined in case of failure. + * @param dst_size The size of the buffer (number of bytes). If it + * is 0, then dest may be NULL and the function will only + * return the length of the result without writing any of + * the result string + * @param src The original string. + * @param src_len The length of the original string. + * @retval The count of bytes written(or need to be written) on + * success. + * @retval < 0 Otherwise. The diag message is set. + */ +int +sql_normalize_name(char *dst, int dst_size, const char *src, int src_len); + +/** + * Duplicate a normalized version of @a name onto an sqlMalloc. + * For normalization rules @sa sql_normalize_name(). + * @param db SQL context. + * @param name Source string. + * @param len Length of @a name. + * @retval Not NULL Success. A normalized string is returned. + * @retval NULL Error. A diag message is set. + */ +char * +sql_normalized_name_db_new(struct sql *db, const char *name, int len); + +/** + * Duplicate a normalized version of @a name onto a region @a r. + * For normalization rules @sa sql_normalize_name(). + * @param r Region allocator. + * @param name Source string. + * @param len Length of @a name. + * @retval Not NULL Success. A normalized string is returned. + * @retval NULL Error. A diag message is set. Region is not + * truncated back. + */ +char * +sql_normalized_name_region_new(struct region *r, const char *name, int len); + void sqlTokenInit(Token *, char *); int sqlKeywordCode(const unsigned char *, int); int sqlRunParser(Parse *, const char *); @@ -3747,12 +3797,16 @@ void sqlExprIfFalse(Parse *, Expr *, int, int); * string is \000 terminated and is persistent. * * @param db The database connection. - * @param name_token The source token with text. + * @param t The source token with text. * @retval Not NULL Formatted name on new memory. * @retval NULL Error. Diag message is set. */ -char * -sql_name_from_token(struct sql *db, struct Token *name_token); +static inline char * +sql_name_from_token(struct sql *db, struct Token *t) +{ + assert(t != NULL && t->z != NULL); + return sql_normalized_name_db_new(db, t->z, t->n); +} int sqlExprCompare(Expr *, Expr *, int); int sqlExprListCompare(ExprList *, ExprList *, int); diff --git a/src/box/sql/trigger.c b/src/box/sql/trigger.c index 378d6a2778..c75dad0433 100644 --- a/src/box/sql/trigger.c +++ b/src/box/sql/trigger.c @@ -279,15 +279,22 @@ sql_trigger_select_step(struct sql *db, struct Select *select) static struct TriggerStep * sql_trigger_step_new(struct sql *db, u8 op, struct Token *target_name) { - int size = sizeof(struct TriggerStep) + target_name->n + 1; + int name_size = + sql_normalize_name(NULL, 0, target_name->z, target_name->n); + if (name_size < 0) + return NULL; + int size = sizeof(struct TriggerStep) + name_size; struct TriggerStep *trigger_step = sqlDbMallocZero(db, size); if (trigger_step == NULL) { diag_set(OutOfMemory, size, "sqlDbMallocZero", "trigger_step"); return NULL; } char *z = (char *)&trigger_step[1]; - memcpy(z, target_name->z, target_name->n); - sqlNormalizeName(z); + if (sql_normalize_name(z, name_size, target_name->z, + target_name->n) < 0) { + sqlDbFree(db, trigger_step); + return NULL; + } trigger_step->zTarget = z; trigger_step->op = op; return trigger_step; diff --git a/src/box/sql/util.c b/src/box/sql/util.c index cac404f70c..e9553b3a4a 100644 --- a/src/box/sql/util.c +++ b/src/box/sql/util.c @@ -41,6 +41,8 @@ #if HAVE_ISNAN || SQL_HAVE_ISNAN #include <math.h> #endif +#include <unicode/ucasemap.h> +#include "errinj.h" /* * Routine needed to support the testcase() macro. @@ -253,23 +255,71 @@ sqlDequote(char *z) z[j] = 0; } +int +sql_normalize_name(char *dst, int dst_size, const char *src, int src_len) +{ + assert(src != NULL); + if (sqlIsquote(src[0])){ + if (dst_size == 0) + return src_len + 1; + memcpy(dst, src, src_len); + dst[src_len] = '\0'; + sqlDequote(dst); + return src_len + 1; + } + UErrorCode status = U_ZERO_ERROR; + ERROR_INJECT(ERRINJ_SQL_NAME_NORMALIZATION, { + status = U_MEMORY_ALLOCATION_ERROR; + goto error; + }); + UCaseMap *case_map = ucasemap_open(NULL, 0, &status); + if (case_map == NULL) + goto error; + int len = ucasemap_utf8ToUpper(case_map, dst, dst_size, src, src_len, + &status); + ucasemap_close(case_map); + assert(U_SUCCESS(status) || + (dst_size == 0 && status == U_BUFFER_OVERFLOW_ERROR)); + return len + 1; +error: + diag_set(CollationError, + "string conversion to the uppercase failed: %s", + u_errorName(status)); + return -1; +} -void -sqlNormalizeName(char *z) +char * +sql_normalized_name_db_new(struct sql *db, const char *name, int len) { - char quote; - int i=0; - if (z == 0) - return; - quote = z[0]; - if (sqlIsquote(quote)){ - sqlDequote(z); - return; + int size = sql_normalize_name(NULL, 0, name, len); + if (size < 0) + return NULL; + char *res = sqlDbMallocRawNN(db, size); + if (res == NULL) { + diag_set(OutOfMemory, size, "sqlDbMallocRawNN", "res"); + return NULL; + } + if (sql_normalize_name(res, size, name, len) < 0) { + sqlDbFree(db, res); + return NULL; } - while(z[i]!=0){ - z[i] = (char)sqlToupper(z[i]); - i++; + return res; +} + +char * +sql_normalized_name_region_new(struct region *r, const char *name, int len) +{ + int size = sql_normalize_name(NULL, 0, name, len); + if (size < 0) + return NULL; + char *res = (char *) region_alloc(r, size); + if (res == NULL) { + diag_set(OutOfMemory, size, "region_alloc", "res"); + return NULL; } + if (sql_normalize_name(res, size, name, len) < 0) + return NULL; + return res; } /* diff --git a/src/lib/core/errinj.h b/src/lib/core/errinj.h index 99891c5b9e..6663b17c47 100644 --- a/src/lib/core/errinj.h +++ b/src/lib/core/errinj.h @@ -126,6 +126,7 @@ struct errinj { _(ERRINJ_TUPLE_FORMAT_COUNT, ERRINJ_INT, {.iparam = -1}) \ _(ERRINJ_MEMTX_DELAY_GC, ERRINJ_BOOL, {.bparam = false}) \ _(ERRINJ_SIO_READ_MAX, ERRINJ_INT, {.iparam = -1}) \ + _(ERRINJ_SQL_NAME_NORMALIZATION, ERRINJ_BOOL, {.bparam = false}) \ ENUM0(errinj_id, ERRINJ_LIST); extern struct errinj errinjs[]; diff --git a/test/box/errinj.result b/test/box/errinj.result index 2bc41ac5b6..af95134ad0 100644 --- a/test/box/errinj.result +++ b/test/box/errinj.result @@ -22,6 +22,8 @@ errinj.info() state: false ERRINJ_SNAP_WRITE_ROW_TIMEOUT: state: 0 + ERRINJ_SQL_NAME_NORMALIZATION: + state: false ERRINJ_VY_SCHED_TIMEOUT: state: 0 ERRINJ_WAL_WRITE_PARTIAL: diff --git a/test/sql-tap/identifier_case.test.lua b/test/sql-tap/identifier_case.test.lua index 74c7ce2fb6..9c800dd2c2 100755 --- a/test/sql-tap/identifier_case.test.lua +++ b/test/sql-tap/identifier_case.test.lua @@ -1,6 +1,6 @@ #!/usr/bin/env tarantool test = require("sqltester") -test:plan(71) +test:plan(73) local test_prefix = "identifier_case-" @@ -13,8 +13,10 @@ local data = { { 6, [[ "Table1" ]], {0} }, -- non ASCII characters case is not supported { 7, [[ руÑÑкий ]], {0} }, - { 8, [[ РуÑÑкий ]], {0} }, - { 9, [[ "руÑÑкий" ]], {"/already exists/"} }, + { 8, [[ "руÑÑкий" ]], {0} }, + { 9, [[ Großschreibweise ]], {0} }, + { 10, [[ РуÑÑкий ]], {"/already exists/"} }, + { 11, [[ Grossschreibweise ]], {"/already exists/"} }, } for _, row in ipairs(data) do @@ -35,7 +37,7 @@ data = { { 5, [[ "table1" ]], {5}}, { 6, [[ "Table1" ]], {6}}, { 7, [[ руÑÑкий ]], {7}}, - { 8, [[ РуÑÑкий ]], {8}}, + { 8, [[ "руÑÑкий" ]], {8}}, } for _, row in ipairs(data) do @@ -66,7 +68,7 @@ test:do_test( function () return test:drop_all_tables() end, - 3) + 4) data = { { 1, [[ columnn ]], {0} }, diff --git a/test/sql/errinj.result b/test/sql/errinj.result index a1e7cc4a38..c974ab7149 100644 --- a/test/sql/errinj.result +++ b/test/sql/errinj.result @@ -388,3 +388,21 @@ errinj.set("ERRINJ_WAL_DELAY", false) --- - ok ... +-- +-- gh-3931: Store regular identifiers in case-normal form +-- +errinj = box.error.injection +--- +... +errinj.set("ERRINJ_SQL_NAME_NORMALIZATION", true) +--- +- ok +... +box.sql.execute("CREATE TABLE hello (id INT primary key,x INT,y INT);") +--- +- error: 'string conversion to the uppercase failed: U_MEMORY_ALLOCATION_ERROR' +... +errinj.set("ERRINJ_SQL_NAME_NORMALIZATION", false) +--- +- ok +... diff --git a/test/sql/errinj.test.lua b/test/sql/errinj.test.lua index d8833feb44..f9e7a3c490 100644 --- a/test/sql/errinj.test.lua +++ b/test/sql/errinj.test.lua @@ -139,3 +139,11 @@ box.sql.execute("INSERT INTO t VALUES (2);") box.sql.execute("UPDATE t SET id = 2;") -- Finish drop space. errinj.set("ERRINJ_WAL_DELAY", false) + +-- +-- gh-3931: Store regular identifiers in case-normal form +-- +errinj = box.error.injection +errinj.set("ERRINJ_SQL_NAME_NORMALIZATION", true) +box.sql.execute("CREATE TABLE hello (id INT primary key,x INT,y INT);") +errinj.set("ERRINJ_SQL_NAME_NORMALIZATION", false) -- GitLab