From 78befdf92972a725da646aabec45ced028567876 Mon Sep 17 00:00:00 2001
From: Mergen Imeev <imeevma@gmail.com>
Date: Thu, 18 Nov 2021 11:02:20 +0300
Subject: [PATCH] sql: properly check bind variable names

After this patch, variable names will have to follow the rules defined
for identifiers in SQL. Essentially, this means that a digit can no
longer be used as the first character of a bind variable name.

Part of #4763

NO_DOC=Will be added later.
NO_CHANGELOG=Will be added later.

(cherry picked from commit 899fbaebd8835282202cadf6fb6d04cfcb999a52)
---
 src/box/sql/expr.c             | 45 ++++++++++++++++++++++
 src/box/sql/parse.y            | 69 ++++++++++------------------------
 src/box/sql/sqlInt.h           | 11 ++++++
 src/box/sql/tokenize.c         | 26 ++++---------
 test/sql-luatest/bind_test.lua | 30 +++++++++++++++
 test/sql-tap/misc1.test.lua    |  4 +-
 test/sql/iproto.result         |  2 +-
 7 files changed, 116 insertions(+), 71 deletions(-)
 create mode 100644 test/sql-luatest/bind_test.lua

diff --git a/src/box/sql/expr.c b/src/box/sql/expr.c
index 1f02441d8c..6fd0885eea 100644
--- a/src/box/sql/expr.c
+++ b/src/box/sql/expr.c
@@ -1314,6 +1314,51 @@ sqlExprAssignVarNumber(Parse * pParse, Expr * pExpr, u32 n)
 	}
 }
 
+struct Expr *
+expr_new_variable(struct Parse *parse, const struct Token *spec,
+		  const struct Token *id)
+{
+	assert(spec != NULL && spec->n == 1);
+	uint32_t len = 1;
+	if (parse->parse_only) {
+		diag_set(ClientError, ER_SQL_PARSER_GENERIC_WITH_POS,
+			 parse->line_count, parse->line_pos,
+			 "bindings are not allowed in DDL");
+		parse->is_aborted = true;
+		return NULL;
+	}
+	if (id != NULL) {
+		assert(spec->z[0] != '?');
+		if (id->z - spec->z != 1) {
+			diag_set(ClientError, ER_SQL_UNKNOWN_TOKEN,
+				 parse->line_count, spec->z - parse->zTail + 1,
+				 spec->n, spec->z);
+			parse->is_aborted = true;
+			return NULL;
+		}
+		if (spec->z[0] == '#' && sqlIsdigit(id->z[0])) {
+			diag_set(ClientError, ER_SQL_SYNTAX_NEAR_TOKEN,
+				 parse->line_count, spec->n, spec->z);
+			parse->is_aborted = true;
+			return NULL;
+		}
+		len += id->n;
+	}
+	struct Expr *expr = sql_expr_new_empty(parse->db, TK_VARIABLE, len + 1);
+	if (expr == NULL)
+		return NULL;
+	expr->type = FIELD_TYPE_BOOLEAN;
+	expr->flags = EP_Leaf;
+	expr->u.zToken = (char *)(expr + 1);
+	expr->u.zToken[0] = spec->z[0];
+	if (id != NULL)
+		memcpy(expr->u.zToken + 1, id->z, id->n);
+	expr->u.zToken[len] = '\0';
+
+	sqlExprAssignVarNumber(parse, expr, len);
+	return expr;
+}
+
 /*
  * Recursively delete an expression tree.
  */
diff --git a/src/box/sql/parse.y b/src/box/sql/parse.y
index adf2ba14b2..971f9e30a7 100644
--- a/src/box/sql/parse.y
+++ b/src/box/sql/parse.y
@@ -1000,15 +1000,6 @@ idlist(A) ::= nm(Y). {
       case TK_UNKNOWN:
         p->type = FIELD_TYPE_BOOLEAN;
         break;
-      case TK_VARIABLE:
-        /*
-         * For variables we set BOOLEAN type since
-         * unassigned bindings will be replaced
-         * with NULL automatically, i.e. without
-         * explicit call of sql_bind_*().
-         */
-        p->type = FIELD_TYPE_BOOLEAN;
-        break;
       default:
         p->type = FIELD_TYPE_SCALAR;
         break;
@@ -1017,20 +1008,16 @@ idlist(A) ::= nm(Y). {
       p->flags = EP_Leaf;
       p->iAgg = -1;
       p->u.zToken = (char*)&p[1];
-      if (op != TK_VARIABLE) {
-        int rc = sql_normalize_name(p->u.zToken, name_sz, t.z, t.n);
-        if (rc > name_sz) {
-          name_sz = rc;
-          p = sqlDbReallocOrFree(pParse->db, p, sizeof(*p) + name_sz);
-          if (p == NULL)
-            goto tarantool_error;
-          p->u.zToken = (char *) &p[1];
-          if (sql_normalize_name(p->u.zToken, name_sz, t.z, t.n) > name_sz)
-              unreachable();
+      int rc = sql_normalize_name(p->u.zToken, name_sz, t.z, t.n);
+      if (rc > name_sz) {
+        name_sz = rc;
+        p = sqlDbReallocOrFree(pParse->db, p, sizeof(*p) + name_sz);
+        if (p == NULL) {
+          pParse->is_aborted = true;
+          return;
         }
-      } else {
-        memcpy(p->u.zToken, t.z, t.n);
-        p->u.zToken[t.n] = 0;
+        p->u.zToken = (char *)&p[1];
+        sql_normalize_name(p->u.zToken, name_sz, t.z, t.n);
       }
 #if SQL_MAX_EXPR_DEPTH>0
       p->nHeight = 1;
@@ -1040,9 +1027,6 @@ idlist(A) ::= nm(Y). {
     pOut->zStart = t.z;
     pOut->zEnd = &t.z[t.n];
     return;
-tarantool_error:
-    sqlDbFree(pParse->db, p);
-    pParse->is_aborted = true;
   }
 }
 
@@ -1085,30 +1069,17 @@ term(A) ::= INTEGER(X). {
   A.zEnd = X.z + X.n;
   if( A.pExpr ) A.pExpr->flags |= EP_Leaf;
 }
-expr(A) ::= VARIABLE(X).     {
-  Token t = X;
-  if (pParse->parse_only) {
-    spanSet(&A, &t, &t);
-    diag_set(ClientError, ER_SQL_PARSER_GENERIC_WITH_POS, pParse->line_count,
-             pParse->line_pos, "bindings are not allowed in DDL");
-    pParse->is_aborted = true;
-    A.pExpr = NULL;
-  } else if (!(X.z[0]=='#' && sqlIsdigit(X.z[1]))) {
-    u32 n = X.n;
-    spanExpr(&A, pParse, TK_VARIABLE, X);
-    if (A.pExpr->u.zToken[0] == '?' && n > 1) {
-      diag_set(ClientError, ER_SQL_SYNTAX_NEAR_TOKEN, pParse->line_count, t.n, t.z);
-      pParse->is_aborted = true;
-    } else {
-      sqlExprAssignVarNumber(pParse, A.pExpr, n);
-    }
-  }else{
-    assert( t.n>=2 );
-    spanSet(&A, &t, &t);
-    diag_set(ClientError, ER_SQL_SYNTAX_NEAR_TOKEN, pParse->line_count, t.n, t.z);
-    pParse->is_aborted = true;
-    A.pExpr = NULL;
-  }
+expr(A) ::= VARNUM(X). {
+  A.pExpr = expr_new_variable(pParse, &X, NULL);
+  spanSet(&A, &X, &X);
+}
+expr(A) ::= VARIABLE(X) id(Y). {
+  A.pExpr = expr_new_variable(pParse, &X, &Y);
+  spanSet(&A, &X, &Y);
+}
+expr(A) ::= VARIABLE(X) INTEGER(Y). {
+  A.pExpr = expr_new_variable(pParse, &X, &Y);
+  spanSet(&A, &X, &Y);
 }
 expr(A) ::= expr(A) COLLATE id(C). {
   A.pExpr = sqlExprAddCollateToken(pParse, A.pExpr, &C, 1);
diff --git a/src/box/sql/sqlInt.h b/src/box/sql/sqlInt.h
index 393b5e9a79..52157658e5 100644
--- a/src/box/sql/sqlInt.h
+++ b/src/box/sql/sqlInt.h
@@ -2630,6 +2630,17 @@ Expr *sqlExprFunction(Parse *, ExprList *, Token *);
 void sqlExprAssignVarNumber(Parse *, Expr *, u32);
 ExprList *sqlExprListAppendVector(Parse *, ExprList *, IdList *, Expr *);
 
+/**
+ * Parse tokens as a name or a position of bound variable.
+ *
+ * @param parse Parse context.
+ * @param spec Special symbol for bound variable.
+ * @param id Name or position number of bound variable.
+ */
+struct Expr *
+expr_new_variable(struct Parse *parse, const struct Token *spec,
+		  const struct Token *id);
+
 /**
  * Set the sort order for the last element on the given ExprList.
  *
diff --git a/src/box/sql/tokenize.c b/src/box/sql/tokenize.c
index f2d5a2df51..8bc519b9da 100644
--- a/src/box/sql/tokenize.c
+++ b/src/box/sql/tokenize.c
@@ -58,8 +58,8 @@
 #define CC_KYWD       1		/* Alphabetics or '_'.  Usable in a keyword */
 #define CC_ID         2		/* unicode characters usable in IDs */
 #define CC_DIGIT      3		/* Digits */
-#define CC_DOLLAR     4		/* '$' */
-#define CC_VARALPHA   5		/* '@', '#', ':'.  Alphabetic SQL variables */
+/** SQL variables: '@', '#', ':', and '$'. */
+#define CC_VARALPHA   5
 #define CC_VARNUM     6		/* '?'.  Numeric SQL variables */
 #define CC_SPACE      7		/* Space characters */
 #define CC_QUOTE      8		/* '\''. String literals */
@@ -90,7 +90,7 @@ static const char sql_ascii_class[] = {
 /*       x0  x1  x2  x3  x4  x5  x6  x7  x8 x9  xa xb  xc xd xe  xf */
 /* 0x */ 27, 27, 27, 27, 27, 27, 27, 27, 27, 7, 28, 7, 7, 7, 27, 27,
 /* 1x */ 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
-/* 2x */ 7, 15, 9, 5, 4, 22, 24, 8, 17, 18, 21, 20, 23, 11, 26, 16,
+/* 2x */ 7, 15, 9, 5, 5, 22, 24, 8, 17, 18, 21, 20, 23, 11, 26, 16,
 /* 3x */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 5, 19, 12, 14, 13, 6,
 /* 4x */ 5, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 /* 5x */ 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 29, 27, 30, 27, 1,
@@ -184,7 +184,7 @@ int
 sql_token(const char *z, int *type, bool *is_reserved)
 {
 	*is_reserved = false;
-	int i, n;
+	int i;
 	char c, delim;
 	/* Switch on the character-class of the first byte
 	 * of the token. See the comment on the CC_ defines
@@ -369,23 +369,11 @@ sql_token(const char *z, int *type, bool *is_reserved)
 		}
 		return i;
 	case CC_VARNUM:
-		*type = TK_VARIABLE;
-		for (i = 1; sqlIsdigit(z[i]); i++) {
-		}
-		return i;
-	case CC_DOLLAR:
+		*type = TK_VARNUM;
+		return 1;
 	case CC_VARALPHA:
-		n = 0;
 		*type = TK_VARIABLE;
-		for (i = 1; (c = z[i]) != 0; i++) {
-			if (IdChar(c))
-				n++;
-			else
-				break;
-		}
-		if (n == 0)
-			*type = TK_ILLEGAL;
-		return i;
+		return 1;
 	case CC_KYWD:
 		for (i = 1; sql_ascii_class[*(unsigned char*)(z+i)] <= CC_KYWD;
 		     i++) {
diff --git a/test/sql-luatest/bind_test.lua b/test/sql-luatest/bind_test.lua
new file mode 100644
index 0000000000..8ff6c6ae97
--- /dev/null
+++ b/test/sql-luatest/bind_test.lua
@@ -0,0 +1,30 @@
+local server = require('test.luatest_helpers.server')
+local t = require('luatest')
+local g = t.group()
+
+g.before_all(function()
+    g.server = server:new({alias = 'bind'})
+    g.server:start()
+end)
+
+g.after_all(function()
+    g.server:stop()
+end)
+
+g.test_bind_1 = function()
+    g.server:exec(function()
+        local t = require('luatest')
+        local sql = [[SELECT @1asd;]]
+        local res = "At line 1 at or near position 9: unrecognized token '1asd'"
+        local _, err = box.execute(sql, {{['@1asd'] = 123}})
+        t.assert_equals(err.message, res)
+    end)
+end
+
+g.test_bind_2 = function()
+    local conn = g.server.net_box
+    local sql = [[SELECT @1asd;]]
+    local res = [[At line 1 at or near position 9: unrecognized token '1asd']]
+    local _, err = pcall(conn.execute, conn, sql, {{['@1asd'] = 123}})
+    t.assert_equals(err.message, res)
+end
diff --git a/test/sql-tap/misc1.test.lua b/test/sql-tap/misc1.test.lua
index f207d3e921..204e070d2e 100755
--- a/test/sql-tap/misc1.test.lua
+++ b/test/sql-tap/misc1.test.lua
@@ -1052,7 +1052,7 @@ test:do_catchsql_test(
         select''like''like''like#0;
     ]], {
         -- <misc1-21.1>
-        1, [[Syntax error at line 1 near '#0']]
+        1, [[Syntax error at line 1 near '#']]
         -- </misc1-21.1>
     })
 
@@ -1062,7 +1062,7 @@ test:do_catchsql_test(
         VALUES(0,0x0MATCH#0;
     ]], {
         -- <misc1-21.2>
-        1, [[Syntax error at line 1 near '#0']]
+        1, [[Syntax error at line 1 near '#']]
         -- </misc1-21.2>
     })
 
diff --git a/test/sql/iproto.result b/test/sql/iproto.result
index 6212aa0c08..5a424596e2 100644
--- a/test/sql/iproto.result
+++ b/test/sql/iproto.result
@@ -351,7 +351,7 @@ cn:execute('drop table if exists test3')
 --
 cn:execute('select ?1, ?2, ?3', {1, 2, 3})
 ---
-- error: Syntax error at line 1 near '?1'
+- error: Syntax error at line 1 near '1'
 ...
 cn:execute('select $name, $name2', {1, 2})
 ---
-- 
GitLab