diff --git a/src/lua/tap.lua b/src/lua/tap.lua
index 546d153924f22a6f3965db2c0d5eaeb7620077c3..94b080d5a9f67722114d9cad5db00634307a4e64 100644
--- a/src/lua/tap.lua
+++ b/src/lua/tap.lua
@@ -91,15 +91,14 @@ local function cmpdeeply(got, expected, extra)
 
     if ffi.istype('bool', got) then got = (got == 1) end
     if ffi.istype('bool', expected) then expected = (expected == 1) end
-    if got == nil and expected == nil then return true end
 
-    if type(got) ~= type(expected) then
+    if extra.strict and type(got) ~= type(expected) then
         extra.got = type(got)
         extra.expected = type(expected)
         return false
     end
 
-    if type(got) ~= 'table' then
+    if type(got) ~= 'table' or type(expected) ~= 'table' then
         extra.got = got
         extra.expected = expected
         return got == expected
@@ -117,8 +116,8 @@ local function cmpdeeply(got, expected, extra)
     end
 
     -- check if expected contains more keys then got
-    for i, _ in pairs(expected) do
-        if visited_keys[i] ~= true then
+    for i, v in pairs(expected) do
+        if visited_keys[i] ~= true and (extra.strict or v ~= box.NULL) then
             extra.expected = 'key ' .. tostring(i)
             extra.got = 'nil'
             return false
@@ -148,14 +147,18 @@ local function is(test, got, expected, message, extra)
     extra = extra or {}
     extra.got = got
     extra.expected = expected
-    return ok(test, got == expected, message, extra)
+    local rc = (test.strict == false or type(got) == type(expected)) and
+               got == expected
+    return ok(test, rc, message, extra)
 end
 
 local function isnt(test, got, unexpected, message, extra)
     extra = extra or {}
     extra.got = got
     extra.unexpected = unexpected
-    return ok(test, got ~= unexpected, message, extra)
+    local rc = (test.strict == true and type(got) ~= type(unexpected)) or
+               got ~= unexpected
+    return ok(test, rc, message, extra)
 end
 
 
@@ -163,6 +166,7 @@ local function is_deeply(test, got, expected, message, extra)
     extra = extra or {}
     extra.got = got
     extra.expected = expected
+    extra.strict = test.strict
     return ok(test, cmpdeeply(got, expected, extra), message, extra)
 end
 
@@ -225,6 +229,7 @@ local function test(parent, name, fun, ...)
         failed  = 0;
         planned = 0;
         trace   = parent == nil and true or parent.trace;
+        strict = false;
     }, test_mt)
     if fun ~= nil then
         test:diag('%s', test.name)
diff --git a/test/app-tap/tap.result b/test/app-tap/tap.result
index 3e7882331599fad91d2a2f6d2aefa9efa7a62a43..12bf86ec23bf2a98f2c9e32d14f9db7926638776 100644
--- a/test/app-tap/tap.result
+++ b/test/app-tap/tap.result
@@ -1,5 +1,5 @@
 TAP version 13
-1..32
+1..42
 ok - true
 ok - extra information is not printed on success
 not ok - extra printed using yaml only on failure
@@ -69,6 +69,24 @@ not ok - cdata type
   expected: ctype<int>
   got: ctype<unsigned int>
   ...
+not ok - box.NULL == nil strict = true
+  ---
+  got: null
+  ...
+not ok - nil == box.NULL strict = true
+  ---
+  expected: null
+  ...
+ok - box.NULL == box.NULL strict = true
+ok - nil == nil strict = true
+ok - box.NULL != nil strict = true
+ok - nil != box.NULL strict = true
+not ok - box.NULL != box.NULL strict = true
+  ---
+  unexpected: null
+  got: null
+  ...
+not ok - nil != nil strict = true
     # subtest 1
     1..2
     ok - true
@@ -119,7 +137,7 @@ not ok - failed subtests
   failed: 1
   ...
     # is_deeply
-    1..6
+    1..20
     ok - 1 and 1
     ok - abc and abc
     ok - empty tables
@@ -127,20 +145,69 @@ not ok - failed subtests
     not ok - {1} and {2}
       ---
       path: //1
+      strict: false
       expected: 2
       got: 1
       ...
     not ok - {1,2,{3,4}} and {1,2,{3,5}}
       ---
       path: //3/2
+      strict: false
       expected: 5
       got: 4
       ...
+    ok - {} and {a = box.NULL} strict = false
+    ok - {a = box.NULL} and {} strict = false
+    ok - {a = box.NULL} and {b = box.NULL} strict = false
+    ok - {a = box.NULL} and {b = box.NULL, c = box.NULL} strict = false
+    ok - nil and box.NULL strict = false
+    ok - box.NULL and nil strict = false
+    ok - {a = box.NULL} and {a = box.NULL} strict false
+    not ok - {} and {a = box.NULL} strict = true
+      ---
+      strict: true
+      expected: key a
+      got: nil
+      ...
+    not ok - {a = box.NULL} and {} strict = true
+      ---
+      path: //a
+      strict: true
+      expected: nil
+      got: cdata
+      ...
+    not ok - {a = box.NULL} and {b = box.NULL} strict = true
+      ---
+      path: //a
+      strict: true
+      expected: nil
+      got: cdata
+      ...
+    not ok - {a = box.NULL} and {b = box.NULL, c = box.NULL} strict = true
+      ---
+      path: //a
+      strict: true
+      expected: nil
+      got: cdata
+      ...
+    not ok - nil and box.NULL strict = true
+      ---
+      got: nil
+      expected: cdata
+      strict: true
+      ...
+    not ok - box.NULL and nil strict = true
+      ---
+      got: cdata
+      expected: nil
+      strict: true
+      ...
+    ok - {a = box.NULL} and {a = box.NULL} strict true
     # is_deeply: end
 not ok - failed subtests
   ---
-  planned: 6
-  failed: 2
+  planned: 20
+  failed: 8
   ...
     # like
     1..2
@@ -148,4 +215,22 @@ not ok - failed subtests
     ok - unlike(abcde, acd)
     # like: end
 ok - like
-# failed subtest: 15
+not ok - compare {1, 2, 3} and '200'
+  ---
+  strict: false
+  expected: '200'
+  got:
+  - 1
+  - 2
+  - 3
+  ...
+not ok - compare '200' and {1, 2, 3}
+  ---
+  strict: false
+  expected:
+  - 1
+  - 2
+  - 3
+  got: '200'
+  ...
+# failed subtest: 21
diff --git a/test/app-tap/tap.test.lua b/test/app-tap/tap.test.lua
index 0e1de7f1cf3edebc8d0ce920f47c304271ad59cf..e2a78f6301e8a3211b8caa12cc892b29ade80525 100755
--- a/test/app-tap/tap.test.lua
+++ b/test/app-tap/tap.test.lua
@@ -20,7 +20,7 @@ test.trace = false
 -- ok, fail and skip predicates
 --
 
-test:plan(32) -- plan to run 3 test
+test:plan(42)
 test:ok(true, 'true') -- basic function
 local extra = { state = 'some userful information to debug on failure',
         details = 'a table argument formatted using yaml.encode()' }
@@ -60,6 +60,19 @@ test:iscdata(10, 'int', 'cdata type')
 test:iscdata(ffi.new('int', 10), 'int', 'cdata type')
 test:iscdata(ffi.new('unsigned int', 10), 'int', 'cdata type')
 
+--
+-- gh-4125: Strict nulls comparisons.
+--
+test.strict = true
+test:is(box.NULL, nil, "box.NULL == nil strict = true")
+test:is(nil, box.NULL, "nil == box.NULL strict = true")
+test:is(box.NULL, box.NULL, "box.NULL == box.NULL strict = true")
+test:is(nil, nil, "nil == nil strict = true")
+test:isnt(box.NULL, nil, "box.NULL != nil strict = true")
+test:isnt(nil, box.NULL, "nil != box.NULL strict = true")
+test:isnt(box.NULL, box.NULL, "box.NULL != box.NULL strict = true")
+test:isnt(nil, nil, "nil != nil strict = true")
+test.strict = false
 --
 -- Any test also can create unlimited number of sub tests.
 -- Subtest with callbacks (preferred).
@@ -118,7 +131,7 @@ end)
 
 
 test:test('is_deeply', function(t)
-    t:plan(6)
+    t:plan(20)
 
     t:is_deeply(1, 1, '1 and 1')
     t:is_deeply('abc', 'abc', 'abc and abc')
@@ -127,6 +140,32 @@ test:test('is_deeply', function(t)
     t:is_deeply({1}, {2}, '{1} and {2}')
     t:is_deeply({1, 2, { 3, 4 }}, {1, 2, { 3, 5 }}, '{1,2,{3,4}} and {1,2,{3,5}}')
 
+    --
+    -- gh-4125: is_deeply inconsistently works with box.NULL.
+    --
+    t:is_deeply({}, {a = box.NULL}, '{} and {a = box.NULL} strict = false')
+    t:is_deeply({a = box.NULL}, {}, '{a = box.NULL} and {} strict = false')
+    t:is_deeply({a = box.NULL}, {b = box.NULL},
+                '{a = box.NULL} and {b = box.NULL} strict = false')
+    t:is_deeply({a = box.NULL}, {b = box.NULL, c = box.NULL},
+                '{a = box.NULL} and {b = box.NULL, c = box.NULL} strict = false')
+    t:is_deeply(nil, box.NULL, 'nil and box.NULL strict = false')
+    t:is_deeply(box.NULL, nil, 'box.NULL and nil strict = false')
+    t:is_deeply({a = box.NULL}, {a = box.NULL},
+                '{a = box.NULL} and {a = box.NULL} strict false')
+
+    t.strict = true
+    t:is_deeply({}, {a = box.NULL}, '{} and {a = box.NULL} strict = true')
+    t:is_deeply({a = box.NULL}, {}, '{a = box.NULL} and {} strict = true')
+    t:is_deeply({a = box.NULL}, {b = box.NULL},
+                '{a = box.NULL} and {b = box.NULL} strict = true')
+    t:is_deeply({a = box.NULL}, {b = box.NULL, c = box.NULL},
+                '{a = box.NULL} and {b = box.NULL, c = box.NULL} strict = true')
+    t:is_deeply(nil, box.NULL, 'nil and box.NULL strict = true')
+    t:is_deeply(box.NULL, nil, 'box.NULL and nil strict = true')
+    t:is_deeply({a = box.NULL}, {a = box.NULL},
+                '{a = box.NULL} and {a = box.NULL} strict true')
+    t.strict = false
 end)
 
 
@@ -136,6 +175,13 @@ test:test('like', function(t)
     t:unlike('abcde', 'acd', 'unlike(abcde, acd)')
 end)
 
+--
+-- Test, that in case of not strict comparison the order of
+-- arguments does not matter.
+--
+test:is_deeply({1, 2, 3}, '200', "compare {1, 2, 3} and '200'")
+test:is_deeply('200', {1, 2, 3}, "compare '200' and {1, 2, 3}")
+
 --
 -- Finish root test. Since we used non-callback variant, we have to
 -- call check explicitly.