From d8fe9316e3634a3cd36b280afd01d9a4c452189a Mon Sep 17 00:00:00 2001
From: Vladislav Shpilevoy <v.shpilevoy@tarantool.org>
Date: Wed, 4 Sep 2019 23:14:49 +0200
Subject: [PATCH] app: raise an error on too nested tables serialization

Closes #4434
Follow-up #4366

@TarantoolBot document
Title: json/msgpack.cfg.encode_deep_as_nil option

Tarantool has several so called serializers to convert data
between Lua and another format: YAML, JSON, msgpack.

YAML is a crazy serializer without depth restrictions. But for
JSON, msgpack, and msgpackffi a user could set encode_max_depth
option. That option led to crop of a table when it had too many
nested levels. Sometimes such behaviour is undesirable.

Now an error is raised instead of data corruption:

    t = nil
    for i = 1, 100 do t = {t} end
    msgpack.encode(t) -- Here an exception is thrown.

To disable it and return the old behaviour back here is a new
option:

    <serializer>.cfg({encode_deep_as_nil = true})

Option encode_deep_as_nil works for JSON, msgpack, and msgpackffi
modules, and is false by default. It means, that now if some
existing users have cropping, even intentional, they will get the
exception.

(cherry picked from commit d7a8942afb203474714b610f9d4e7f07e099e582)
---
 src/lua/msgpack.c                    |  8 ++++++++
 src/lua/msgpackffi.lua               |  4 ++++
 src/lua/utils.c                      |  1 +
 src/lua/utils.h                      |  7 +++++++
 test/app-tap/json.test.lua           |  6 ++++--
 test/app-tap/lua/serializer_test.lua | 21 ++++++++++++++++++++-
 test/app-tap/msgpackffi.test.lua     | 11 +++++++++--
 test/box/tuple.result                |  8 +++++++-
 test/box/tuple.test.lua              |  4 +++-
 third_party/lua-cjson/lua_cjson.c    | 10 ++++++++--
 10 files changed, 71 insertions(+), 9 deletions(-)

diff --git a/src/lua/msgpack.c b/src/lua/msgpack.c
index 2126988ebe..3229e01d4b 100644
--- a/src/lua/msgpack.c
+++ b/src/lua/msgpack.c
@@ -139,6 +139,10 @@ luamp_encode_r(struct lua_State *L, struct luaL_serializer *cfg,
 	case MP_MAP:
 		/* Map */
 		if (level >= cfg->encode_max_depth) {
+			if (! cfg->encode_deep_as_nil) {
+				return luaL_error(L, "Too high nest level - %d",
+						  level + 1);
+			}
 			mpstream_encode_nil(stream); /* Limit nested maps */
 			return MP_NIL;
 		}
@@ -160,6 +164,10 @@ luamp_encode_r(struct lua_State *L, struct luaL_serializer *cfg,
 	case MP_ARRAY:
 		/* Array */
 		if (level >= cfg->encode_max_depth) {
+			if (! cfg->encode_deep_as_nil) {
+				return luaL_error(L, "Too high nest level - %d",
+						  level + 1);
+			}
 			mpstream_encode_nil(stream); /* Limit nested arrays */
 			return MP_NIL;
 		}
diff --git a/src/lua/msgpackffi.lua b/src/lua/msgpackffi.lua
index 75fcc5ba01..63d9b33b7c 100644
--- a/src/lua/msgpackffi.lua
+++ b/src/lua/msgpackffi.lua
@@ -206,6 +206,10 @@ local function encode_r(buf, obj, level)
         encode_str(buf, obj)
     elseif type(obj) == "table" then
         if level >= msgpack.cfg.encode_max_depth then
+            if not msgpack.cfg.encode_deep_as_nil then
+                error(string.format('Too high nest level - %d',
+                                    msgpack.cfg.encode_max_depth + 1))
+            end
             encode_nil(buf)
             return
         end
diff --git a/src/lua/utils.c b/src/lua/utils.c
index 48401042e8..6b545b8163 100644
--- a/src/lua/utils.c
+++ b/src/lua/utils.c
@@ -235,6 +235,7 @@ static struct {
 	OPTION(LUA_TNUMBER,  encode_sparse_ratio, 2),
 	OPTION(LUA_TNUMBER,  encode_sparse_safe, 10),
 	OPTION(LUA_TNUMBER,  encode_max_depth, 32),
+	OPTION(LUA_TBOOLEAN, encode_deep_as_nil, 0),
 	OPTION(LUA_TBOOLEAN, encode_invalid_numbers, 1),
 	OPTION(LUA_TNUMBER,  encode_number_precision, 14),
 	OPTION(LUA_TBOOLEAN, encode_load_metatables, 1),
diff --git a/src/lua/utils.h b/src/lua/utils.h
index 2485e10906..8f8630f8dc 100644
--- a/src/lua/utils.h
+++ b/src/lua/utils.h
@@ -210,6 +210,13 @@ struct luaL_serializer {
 	int encode_sparse_safe;
 	/** Max recursion depth for encoding (MsgPack, CJSON only) */
 	int encode_max_depth;
+	/**
+	 * A flag whether a table with too high nest level should
+	 * be cropped. The not-encoded fields are replaced with
+	 * one null. If not set, too high nesting is considered an
+	 * error.
+	 */
+	int encode_deep_as_nil;
 	/** Enables encoding of NaN and Inf numbers */
 	int encode_invalid_numbers;
 	/** Floating point numbers precision (YAML, CJSON only) */
diff --git a/test/app-tap/json.test.lua b/test/app-tap/json.test.lua
index 0a89668665..cd210c85fc 100755
--- a/test/app-tap/json.test.lua
+++ b/test/app-tap/json.test.lua
@@ -38,9 +38,10 @@ tap.test("json", function(test)
     --
     -- gh-2888: Check the possibility of using options in encode()/decode().
     --
+    local orig_encode_deep_as_nil = serializer.cfg.encode_deep_as_nil
     local orig_encode_max_depth = serializer.cfg.encode_max_depth
     local sub = {a = 1, { b = {c = 1, d = {e = 1}}}}
-    serializer.cfg({encode_max_depth = 1})
+    serializer.cfg({encode_max_depth = 1, encode_deep_as_nil = true})
     test:ok(serializer.encode(sub) == '{"1":null,"a":1}',
             'depth of encoding is 1 with .cfg')
     serializer.cfg({encode_max_depth = orig_encode_max_depth})
@@ -121,5 +122,6 @@ tap.test("json", function(test)
     rec4['b'] = rec4
     test:is(serializer.encode(rec4),
             '{"a":{"a":null,"b":null},"b":{"a":null,"b":null}}')
-    serializer.cfg({encode_max_depth = orig_encode_max_depth})
+    serializer.cfg({encode_max_depth = orig_encode_max_depth,
+                    encode_deep_as_nil = orig_encode_deep_as_nil})
 end)
diff --git a/test/app-tap/lua/serializer_test.lua b/test/app-tap/lua/serializer_test.lua
index bedbf95a58..ce655da95f 100644
--- a/test/app-tap/lua/serializer_test.lua
+++ b/test/app-tap/lua/serializer_test.lua
@@ -390,7 +390,7 @@ local function test_ucdata(test, s)
 end
 
 local function test_depth(test, s)
-    test:plan(1)
+    test:plan(3)
     --
     -- gh-4434: serializer update should be reflected in Lua.
     --
@@ -399,6 +399,25 @@ local function test_depth(test, s)
     test:is(s.cfg.encode_max_depth, max_depth + 5,
             "cfg({<name> = value}) is reflected in cfg.<name>")
     s.cfg({encode_max_depth = max_depth})
+
+    --
+    -- gh-4434 (yes, the same issue): let users choose whether
+    -- they want to raise an error on tables with too high nest
+    -- level.
+    --
+    local deep_as_nil = s.cfg.encode_deep_as_nil
+    s.cfg({encode_deep_as_nil = false})
+
+    local t = nil
+    for i = 1, max_depth + 1 do t = {t} end
+    local ok, err = pcall(s.encode, t)
+    test:ok(not ok, "too deep encode depth")
+
+    s.cfg({encode_max_depth = max_depth + 1})
+    ok, err = pcall(s.encode, t)
+    test:ok(ok, "no throw in a corner case")
+
+    s.cfg({encode_deep_as_nil = deep_as_nil, encode_max_depth = max_depth})
 end
 
 return {
diff --git a/test/app-tap/msgpackffi.test.lua b/test/app-tap/msgpackffi.test.lua
index 7277a5b89f..1059449640 100755
--- a/test/app-tap/msgpackffi.test.lua
+++ b/test/app-tap/msgpackffi.test.lua
@@ -36,7 +36,7 @@ local function test_offsets(test, s)
 end
 
 local function test_other(test, s)
-    test:plan(23)
+    test:plan(24)
     local buf = string.char(0x93, 0x6e, 0xcb, 0x42, 0x2b, 0xed, 0x30, 0x47,
         0x6f, 0xff, 0xff, 0xac, 0x77, 0x6b, 0x61, 0x71, 0x66, 0x7a, 0x73,
         0x7a, 0x75, 0x71, 0x71, 0x78)
@@ -82,6 +82,8 @@ local function test_other(test, s)
         return level
     end
     local msgpack = require('msgpack')
+    local deep_as_nil = msgpack.cfg.encode_deep_as_nil
+    msgpack.cfg({encode_deep_as_nil = true})
     local max_depth = msgpack.cfg.encode_max_depth
     local result_depth = check_depth(max_depth + 5)
     test:is(result_depth, max_depth,
@@ -105,7 +107,12 @@ local function test_other(test, s)
     while t ~= nil do level = level + 1 t = t.key end
     test:is(level, max_depth + 5, "recursive map")
 
-    msgpack.cfg({encode_max_depth = max_depth})
+    msgpack.cfg({encode_deep_as_nil = false})
+    local ok = pcall(check_depth, max_depth + 6)
+    test:ok(not ok, "exception is thrown when crop is not allowed")
+
+    msgpack.cfg({encode_deep_as_nil = deep_as_nil,
+                 encode_max_depth = max_depth})
 end
 
 tap.test("msgpackffi", function(test)
diff --git a/test/box/tuple.result b/test/box/tuple.result
index dcbec0fa02..1082ae84e9 100644
--- a/test/box/tuple.result
+++ b/test/box/tuple.result
@@ -1258,6 +1258,12 @@ s2:drop()
 max_depth = msgpack.cfg.encode_max_depth
 ---
 ...
+deep_as_nil = msgpack.cfg.encode_deep_as_nil
+---
+...
+msgpack.cfg({encode_deep_as_nil = true})
+---
+...
 t = nil
 ---
 ...
@@ -1295,6 +1301,6 @@ level == max_depth + 5 or {level, max_depth}
 ---
 - true
 ...
-msgpack.cfg({encode_max_depth = max_depth})
+msgpack.cfg({encode_max_depth = max_depth, encode_deep_as_nil = deep_as_nil})
 ---
 ...
diff --git a/test/box/tuple.test.lua b/test/box/tuple.test.lua
index b2b834376b..8dd15a0a10 100644
--- a/test/box/tuple.test.lua
+++ b/test/box/tuple.test.lua
@@ -431,6 +431,8 @@ s2:drop()
 -- gh-4434: tuple should use global msgpack serializer.
 --
 max_depth = msgpack.cfg.encode_max_depth
+deep_as_nil = msgpack.cfg.encode_deep_as_nil
+msgpack.cfg({encode_deep_as_nil = true})
 t = nil
 for i = 1, max_depth + 5 do t = {t} end
 tuple = box.tuple.new(t):totable()
@@ -446,4 +448,4 @@ while tuple ~= nil do level = level + 1 tuple = tuple[1] end
 -- serializer allows deeper tables.
 level == max_depth + 5 or {level, max_depth}
 
-msgpack.cfg({encode_max_depth = max_depth})
+msgpack.cfg({encode_max_depth = max_depth, encode_deep_as_nil = deep_as_nil})
diff --git a/third_party/lua-cjson/lua_cjson.c b/third_party/lua-cjson/lua_cjson.c
index be2416c692..3c5bb3765c 100644
--- a/third_party/lua-cjson/lua_cjson.c
+++ b/third_party/lua-cjson/lua_cjson.c
@@ -400,14 +400,20 @@ static void json_append_data(lua_State *l, struct luaL_serializer *cfg,
     json_append_nil(cfg, json);
     break;
     case MP_MAP:
-    if (current_depth >= cfg->encode_max_depth)
+    if (current_depth >= cfg->encode_max_depth) {
+        if (! cfg->encode_deep_as_nil)
+            luaL_error(l, "Too high nest level");
         return json_append_nil(cfg, json); /* Limit nested maps */
+    }
     json_append_object(l, cfg, current_depth + 1, json);
     return;
     case MP_ARRAY:
     /* Array */
-    if (current_depth >= cfg->encode_max_depth)
+    if (current_depth >= cfg->encode_max_depth) {
+        if (! cfg->encode_deep_as_nil)
+            luaL_error(l, "Too high nest level");
         return json_append_nil(cfg, json); /* Limit nested arrays */
+    }
     json_append_array(l, cfg, current_depth + 1, json, field.size);
     return;
     case MP_EXT:
-- 
GitLab