From 3d5b4daaa05c00152cb147c48c83c35ae1fd7981 Mon Sep 17 00:00:00 2001 From: Vladislav Shpilevoy <v.shpilevoy@tarantool.org> Date: Sun, 9 Feb 2020 18:23:01 +0100 Subject: [PATCH] fio: close unused descriptors automatically Fio.open() returned a file descriptor, which was not closed automatically after all its links were nullified. In other words, GC didn't close the descriptor. This was not really useful, because after fio.open() an exception may appear, and user needed to workaround this to manually call fio_object:close(). Also this was not consistent with io.open(). Now fio.open() object closes the descriptor automatically when GCed. Closes #4727 @TarantoolBot document Title: fio descriptor is closed automatically by GC fio.open() returns a descriptor which can be closed manually by calling :close() method, or it will be closed automatically, when it has no references, and GC deletes it. :close() method existed always, auto GC was added just now. Keep in mind, that the number of file descriptors is limited, and they can end earlier than GC will be triggered to collect not used descriptors. It is always better to close them manually as soon as possible. --- src/lua/fio.lua | 33 ++++++++-- test/app/gh-4727-fio-gc.result | 104 +++++++++++++++++++++++++++++++ test/app/gh-4727-fio-gc.test.lua | 60 ++++++++++++++++++ 3 files changed, 192 insertions(+), 5 deletions(-) create mode 100644 test/app/gh-4727-fio-gc.result create mode 100644 test/app/gh-4727-fio-gc.test.lua diff --git a/src/lua/fio.lua b/src/lua/fio.lua index 4692e10266..d3c257b88b 100644 --- a/src/lua/fio.lua +++ b/src/lua/fio.lua @@ -5,11 +5,16 @@ local ffi = require('ffi') local buffer = require('buffer') local fiber = require('fiber') local errno = require('errno') +local schedule_task = fiber._internal.schedule_task ffi.cdef[[ int umask(int mask); char *dirname(char *path); int chdir(const char *path); + + struct fio_handle { + int fh; + }; ]] local const_char_ptr_t = ffi.typeof('const char *') @@ -160,7 +165,22 @@ fio_methods.stat = function(self) return internal.fstat(self.fh) end -local fio_mt = { __index = fio_methods } +fio_methods.__serialize = function(self) + return {fh = self.fh} +end + +local fio_mt = { + __index = fio_methods, + __gc = function(obj) + if obj.fh >= 0 then + -- FFI GC can't yield. Internal.close() yields. + -- Collect the garbage later, in a worker fiber. + schedule_task(internal.close, obj.fh) + end + end, +} + +ffi.metatype('struct fio_handle', fio_mt) fio.open = function(path, flags, mode) local iflag = 0 @@ -202,10 +222,13 @@ fio.open = function(path, flags, mode) if err ~= nil then return nil, err end - - fh = { fh = fh } - setmetatable(fh, fio_mt) - return fh + local ok, res = pcall(ffi.new, 'struct fio_handle', fh) + if not ok then + internal.close(fh) + -- This is OOM. + return error(res) + end + return res end fio.pathjoin = function(...) diff --git a/test/app/gh-4727-fio-gc.result b/test/app/gh-4727-fio-gc.result new file mode 100644 index 0000000000..d793eded32 --- /dev/null +++ b/test/app/gh-4727-fio-gc.result @@ -0,0 +1,104 @@ +-- test-run result file version 2 +test_run = require('test_run').new() + | --- + | ... +fiber = require('fiber') + | --- + | ... +fio = require('fio') + | --- + | ... +-- +-- gh-4727: fio handler GC. +-- +flags = {'O_CREAT', 'O_RDWR'} + | --- + | ... +mode = {'S_IRWXU'} + | --- + | ... +filename = 'test4727.txt' + | --- + | ... +fh1 = nil + | --- + | ... +fh2 = nil + | --- + | ... +-- Idea of the test is that according to the Open Group standard, +-- open() always returns the smallest available descriptor. This +-- means, that in 'open() + close() + open()' the second open() +-- should return the same value as the first call, if no other +-- threads/fibers managed to interfere. Because of the +-- interference the sequence may need to be called multiple times +-- to catch a couple of equal descriptors. +test_run:wait_cond(function() \ + collectgarbage('collect') \ + local f = fio.open(filename, flags, mode) \ + fh1 = f.fh \ + f = nil \ + collectgarbage('collect') \ +-- GC function of a fio object works in a separate fiber. Give it \ +-- time to execute. \ + fiber.yield() \ + f = fio.open(filename, flags, mode) \ + fh2 = f.fh \ + f = nil \ + collectgarbage('collect') \ + fiber.yield() \ + return fh1 == fh2 \ +end) or {fh1, fh2} + | --- + | - true + | ... + +-- Ensure, that GC does not break anything after explicit close. +-- Idea of the test is the same as in the previous test, but now +-- the second descriptor is used for something. If GC of the first +-- fio object is called even after close(), it would close the +-- same descriptor, already used by the second fio object. And it +-- won't be able to write anything. Or will write, but to a +-- totally different descriptor created by some other +-- fiber/thread. This is why read() is called on the same file +-- afterwards. +f = nil + | --- + | ... +test_run:wait_cond(function() \ + f = fio.open(filename, flags, mode) \ + fh1 = f.fh \ + f:close() \ + f = fio.open(filename, flags, mode) \ + fh2 = f.fh \ + return fh1 == fh2 \ +end) + | --- + | - true + | ... +collectgarbage('collect') + | --- + | - 0 + | ... +fiber.yield() + | --- + | ... +f:write('test') + | --- + | - true + | ... +f:close() + | --- + | - true + | ... +f = fio.open(filename, flags, mode) + | --- + | ... +f:read() + | --- + | - test + | ... +f:close() + | --- + | - true + | ... diff --git a/test/app/gh-4727-fio-gc.test.lua b/test/app/gh-4727-fio-gc.test.lua new file mode 100644 index 0000000000..d0ab585caa --- /dev/null +++ b/test/app/gh-4727-fio-gc.test.lua @@ -0,0 +1,60 @@ +test_run = require('test_run').new() +fiber = require('fiber') +fio = require('fio') +-- +-- gh-4727: fio handler GC. +-- +flags = {'O_CREAT', 'O_RDWR'} +mode = {'S_IRWXU'} +filename = 'test4727.txt' +fh1 = nil +fh2 = nil +-- Idea of the test is that according to the Open Group standard, +-- open() always returns the smallest available descriptor. This +-- means, that in 'open() + close() + open()' the second open() +-- should return the same value as the first call, if no other +-- threads/fibers managed to interfere. Because of the +-- interference the sequence may need to be called multiple times +-- to catch a couple of equal descriptors. +test_run:wait_cond(function() \ + collectgarbage('collect') \ + local f = fio.open(filename, flags, mode) \ + fh1 = f.fh \ + f = nil \ + collectgarbage('collect') \ +-- GC function of a fio object works in a separate fiber. Give it \ +-- time to execute. \ + fiber.yield() \ + f = fio.open(filename, flags, mode) \ + fh2 = f.fh \ + f = nil \ + collectgarbage('collect') \ + fiber.yield() \ + return fh1 == fh2 \ +end) or {fh1, fh2} + +-- Ensure, that GC does not break anything after explicit close. +-- Idea of the test is the same as in the previous test, but now +-- the second descriptor is used for something. If GC of the first +-- fio object is called even after close(), it would close the +-- same descriptor, already used by the second fio object. And it +-- won't be able to write anything. Or will write, but to a +-- totally different descriptor created by some other +-- fiber/thread. This is why read() is called on the same file +-- afterwards. +f = nil +test_run:wait_cond(function() \ + f = fio.open(filename, flags, mode) \ + fh1 = f.fh \ + f:close() \ + f = fio.open(filename, flags, mode) \ + fh2 = f.fh \ + return fh1 == fh2 \ +end) +collectgarbage('collect') +fiber.yield() +f:write('test') +f:close() +f = fio.open(filename, flags, mode) +f:read() +f:close() -- GitLab