json: make index_base support for json_lexer

Introduced a new index_base field for json_lexer class - this value is a base field offset for emitted JSON_TOKEN_NUM tokens. Thus, we get rid of the need to perform manual casts using the TUPLE_INDEX_BASE constant in the majority of cases. This will also ensure that the extracted tuples are correctly inserted into the numerical level of JSON tree. @locker: use int instead of unsigned for index_base. Needed for #1012

json: make index_base support for json_lexer
ab96fa26 · Kirill Shcherbatov · Vladimir Davydov · ed23ef3a · ab96fa26 · ab96fa26
Commit ab96fa26 authored 6 years ago by Kirill Shcherbatov Committed by Vladimir Davydov 6 years ago
--- a/src/box/tuple_format.c
+++ b/src/box/tuple_format.c
@@ -491,7 +491,7 @@ box_tuple_format_unref(box_tuple_format_t *format)
 /**
 * Propagate @a field to MessagePack(field)[index].
 * @param[in][out] field Field to propagate.
- * @param index 1-based index to propagate to.
+ * @param index 0-based index to propagate to.
 *
 * @retval  0 Success, the index was found.
 * @retval -1 Not found.
@@ -501,10 +501,6 @@ tuple_field_go_to_index(const char **field, uint64_t index)
 {
 	enum mp_type type = mp_typeof(**field);
 	if (type == MP_ARRAY) {
-		if (index == 0)
-			return -1;
-		/* Make index 0-based. */
-		index -= TUPLE_INDEX_BASE;
 		uint32_t count = mp_decode_array(field);
 		if (index >= count)
 			return -1;
@@ -512,6 +508,7 @@ tuple_field_go_to_index(const char **field, uint64_t index)
 			mp_next(field);
 		return 0;
 	} else if (type == MP_MAP) {
+		index += TUPLE_INDEX_BASE;
 		uint64_t count = mp_decode_map(field);
 		for (; count > 0; --count) {
 			type = mp_typeof(**field);
@@ -582,7 +579,7 @@ tuple_field_go_to_path(const char **data, const char *path, uint32_t path_len)
 	int rc;
 	struct json_lexer lexer;
 	struct json_token token;
-	json_lexer_create(&lexer, path, path_len);
+	json_lexer_create(&lexer, path, path_len, TUPLE_INDEX_BASE);
 	while ((rc = json_lexer_next_token(&lexer, &token)) == 0) {
 		switch (token.type) {
 		case JSON_TOKEN_NUM:
@@ -624,18 +621,13 @@ tuple_field_raw_by_path(struct tuple_format *format, const char *tuple,
 	}
 	struct json_lexer lexer;
 	struct json_token token;
-	json_lexer_create(&lexer, path, path_len);
+	json_lexer_create(&lexer, path, path_len, TUPLE_INDEX_BASE);
 	int rc = json_lexer_next_token(&lexer, &token);
 	if (rc != 0)
 		goto error;
 	switch(token.type) {
 	case JSON_TOKEN_NUM: {
 		int index = token.num;
-		if (index == 0) {
-			*field = NULL;
-			return 0;
-		}
-		index -= TUPLE_INDEX_BASE;
 		*field = tuple_field_raw(format, tuple, field_map, index);
 		if (*field == NULL)
 			return 0;

--- a/src/lib/json/json.c
+++ b/src/lib/json/json.c
@@ -144,10 +144,12 @@ json_parse_integer(struct json_lexer *lexer, struct json_token *token)
 		value = value * 10 + c - (int)'0';
 		++len;
 	} while (++pos < end && isdigit((c = *pos)));
+	if (value < lexer->index_base)
+		return lexer->symbol_count + 1;
 	lexer->offset += len;
 	lexer->symbol_count += len;
 	token->type = JSON_TOKEN_NUM;
-	token->num = value;
+	token->num = value - lexer->index_base;
 	return 0;
 }


--- a/src/lib/json/json.h
+++ b/src/lib/json/json.h
@@ -48,6 +48,11 @@ struct json_lexer {
 	int offset;
 	/** Current lexer's offset in symbols. */
 	int symbol_count;
+	/**
+	 * Base field offset for emitted JSON_TOKEN_NUM tokens,
+	 * e.g. 0 for C and 1 for Lua.
+	 */
+	int index_base;
 };

 enum json_token_type {
@@ -81,14 +86,18 @@ struct json_token {
 * @param[out] lexer Lexer to create.
 * @param src Source string.
 * @param src_len Length of @a src.
+ * @param index_base Base field offset for emitted JSON_TOKEN_NUM
+ *                   tokens e.g. 0 for C and 1 for Lua.
 */
 static inline void
-json_lexer_create(struct json_lexer *lexer, const char *src, int src_len)
+json_lexer_create(struct json_lexer *lexer, const char *src, int src_len,
+		  int index_base)
 {
 	lexer->src = src;
 	lexer->src_len = src_len;
 	lexer->offset = 0;
 	lexer->symbol_count = 0;
+	lexer->index_base = index_base;
 }

 /**

--- a/test/engine/tuple.result
+++ b/test/engine/tuple.result
@@ -823,7 +823,7 @@ t[0]
 ...
 t["[0]"]
 ---
- null
+- error: Illegal parameters, error in path on position 2
 ...
 t["[1000]"]
 ---
@@ -847,7 +847,7 @@ t["[2][6].key100"]
 ...
 t["[2][0]"] -- 0-based index in array.
 ---
- null
+- error: Illegal parameters, error in path on position 5
 ...
 t["[4][3]"] -- Can not index string.
 ---

--- a/test/unit/json_path.c
+++ b/test/unit/json_path.c
@@ -3,10 +3,12 @@
 #include "trivia/util.h"
 #include <string.h>

+#define INDEX_BASE 1
+
 #define reset_to_new_path(value) \
 	path = value; \
 	len = strlen(value); \
-	json_lexer_create(&lexer, path, len);
+	json_lexer_create(&lexer, path, len, INDEX_BASE);

 #define is_next_index(value_len, value) \
 	path = lexer.src + lexer.offset; \
@@ -32,18 +34,18 @@ test_basic()
 	struct json_lexer lexer;
 	struct json_token token;

-	reset_to_new_path("[0].field1.field2['field3'][5]");
+	reset_to_new_path("[1].field1.field2['field3'][5]");
 	is_next_index(3, 0);
 	is_next_key("field1");
 	is_next_key("field2");
 	is_next_key("field3");
-	is_next_index(3, 5);
+	is_next_index(3, 4);

 	reset_to_new_path("[3].field[2].field")
-	is_next_index(3, 3);
-	is_next_key("field");
 	is_next_index(3, 2);
 	is_next_key("field");
+	is_next_index(3, 1);
+	is_next_key("field");

 	reset_to_new_path("[\"f1\"][\"f2'3'\"]");
 	is_next_key("f1");
@@ -57,7 +59,7 @@ test_basic()

 	/* Long number. */
 	reset_to_new_path("[1234]");
-	is_next_index(6, 1234);
+	is_next_index(6, 1233);

 	/* Empty path. */
 	reset_to_new_path("");
@@ -70,8 +72,8 @@ test_basic()

 	/* Unicode. */
 	reset_to_new_path("[2][6]['привет中国world']['中国a']");
-	is_next_index(3, 2);
-	is_next_index(3, 6);
+	is_next_index(3, 1);
+	is_next_index(3, 5);
 	is_next_key("привет中国world");
 	is_next_key("中国a");

@@ -94,7 +96,7 @@ void
 test_errors()
 {
 	header();
-	plan(20);
+	plan(21);
 	const char *path;
 	int len;
 	struct json_lexer lexer;
@@ -155,6 +157,10 @@ test_errors()
 	json_lexer_next_token(&lexer, &token);
 	is(json_lexer_next_token(&lexer, &token), 6, "tab inside identifier");

+	reset_to_new_path("[0]");
+	is(json_lexer_next_token(&lexer, &token), 2,
+	   "invalid token for index_base %d", INDEX_BASE);
+
 	check_plan();
 	footer();
 }

--- a/test/unit/json_path.result
+++ b/test/unit/json_path.result
@@ -2,9 +2,9 @@
 1..2
 	*** test_basic ***
    1..71
-    ok 1 - parse <[0]>
-    ok 2 - <[0]> is num
-    ok 3 - <[0]> is 0
+    ok 1 - parse <[1]>
+    ok 2 - <[1]> is num
+    ok 3 - <[1]> is 0
    ok 4 - parse <field1>
    ok 5 - <field1> is str
    ok 6 - len is 6
@@ -19,17 +19,17 @@
    ok 15 - str is field3
    ok 16 - parse <[5]>
    ok 17 - <[5]> is num
-    ok 18 - <[5]> is 5
+    ok 18 - <[5]> is 4
    ok 19 - parse <[3]>
    ok 20 - <[3]> is num
-    ok 21 - <[3]> is 3
+    ok 21 - <[3]> is 2
    ok 22 - parse <field>
    ok 23 - <field> is str
    ok 24 - len is 5
    ok 25 - str is field
    ok 26 - parse <[2]>
    ok 27 - <[2]> is num
-    ok 28 - <[2]> is 2
+    ok 28 - <[2]> is 1
    ok 29 - parse <field>
    ok 30 - <field> is str
    ok 31 - len is 5
@@ -52,7 +52,7 @@
    ok 48 - str is field1
    ok 49 - parse <[1234]>
    ok 50 - <[1234]> is num
-    ok 51 - <[1234]> is 1234
+    ok 51 - <[1234]> is 1233
    ok 52 - parse empty path
    ok 53 - is str
    ok 54 - parse <field1>
@@ -61,10 +61,10 @@
    ok 57 - str is field1
    ok 58 - parse <[2]>
    ok 59 - <[2]> is num
-    ok 60 - <[2]> is 2
+    ok 60 - <[2]> is 1
    ok 61 - parse <[6]>
    ok 62 - <[6]> is num
-    ok 63 - <[6]> is 6
+    ok 63 - <[6]> is 5
    ok 64 - parse <привет中国world>
    ok 65 - <привет中国world> is str
    ok 66 - len is 23
@@ -76,7 +76,7 @@
 ok 1 - subtests
 	*** test_basic: done ***
 	*** test_errors ***
-    1..20
+    1..21
    ok 1 - error on position 2 for <[[>
    ok 2 - error on position 2 for <[field]>
    ok 3 - error on position 1 for <'field1'.field2>
@@ -97,6 +97,7 @@ ok 1 - subtests
    ok 18 - error in leading <.>
    ok 19 - space inside identifier
    ok 20 - tab inside identifier
+    ok 21 - invalid token for index_base 1
 ok 2 - subtests
 	*** test_errors: done ***
 	*** main: done ***