From f9202a552fa940adf32c9b258c56f56dbf652fcd Mon Sep 17 00:00:00 2001 From: Daniil Medvedev <medvdanil@gmail.com> Date: Fri, 17 Jul 2015 19:11:34 +0300 Subject: [PATCH] csv string input/output\n documentation on text format --- csv.documentation | 40 +++++++++++ src/lua/csv.lua | 158 +++++++++++++++++++++++++++--------------- test/app/csv.result | 15 ++++ test/app/csv.test.lua | 5 +- 4 files changed, 162 insertions(+), 56 deletions(-) create mode 100644 csv.documentation diff --git a/csv.documentation b/csv.documentation new file mode 100644 index 0000000000..3c96cd7ab1 --- /dev/null +++ b/csv.documentation @@ -0,0 +1,40 @@ +Tarantool supports CSV file input/output. +CSV is comma separated values, like this: +example.txt: +package,method,return value +fio,pathjoin,string +csv,load,table +none,",comma in field", and ""quote"" + +Commas in fields must be in quotes +If there are quotes in a field, it must be double-quotes. +You can set delimiter and quote character: + csv.delimiter = ',' + csv.quote = '"' + +csv.iterate = function(readable[, csv_chunk_size]) +--@brief parse csv string by string +--@param readable must be string or object with method read(num) returns string +--@param csv_chunk_size (default 4096). Parser will read by csv_chunk_size symbols +--@return iter function, iterator state + +Example: + f = require("fio").open("example.txt", { "O_RDONLY"}) + for tup in csv.iterate(f) do + print(tup[1], tup[2], tup[3]) + end +Output: + package method return value + fio pathjoin string + csv load table + none ,comma in field and "quote" + +csv.load = function(readable, csv_chunk_size) +--@brief parse csv and make table +--@return table + + +csv.dump = function(t, writable) +--@brief dumps tuple or table as csv +--@param writable must be object with method write(string) like file or socket +--@return there is no writable it returns csv as string \ No newline at end of file diff --git a/src/lua/csv.lua b/src/lua/csv.lua index 18fdc13112..58b0783866 100644 --- a/src/lua/csv.lua +++ b/src/lua/csv.lua @@ -75,64 +75,112 @@ local iter = function(csvstate) end end -local csv = { - iterate = function(readable, csv_chunk_size) - csv_chunk_size = csv_chunk_size or 4096 - if type(readable.read) ~= "function" then - error("Usage: load(object with read method)") - end +local make_readable = function(s) + rd = {} + rd.val = s + rd.read = function(self, cnt) + local res = self.val; + self.val = "" + return res + end + return rd +end - local it = ffi.new('csv_iterator_t[1]') - local csv = ffi.new('csv_t[1]') - ffi.C.csv_create(csv) - ffi.C.csv_iter_create(it, csv) - - return iter, {readable, csv_chunk_size, csv, it} - end, - load = function(readable, csv_chunk_size) - csv_chunk_size = csv_chunk_size or 4096 - if type(readable.read) ~= "function" then - error("Usage: load(object with read method)") - end +local make_writable = function() + wr = {} + wr.returnstring = "" + wr.write = function(self, s) + wr.returnstring = wr.returnstring .. s + end + return wr +end - result = {} - for tup in csv.iterate(readable, csv_chunk_size) do - table.insert(result, tup) - end +local module = {} - return result - end, - dump = function(writable, t) - if type(writable.write) ~= "function" or type(t) ~= "table" then - error("Usage: dump(writable, table)") - end - local csv = ffi.new('csv_t[1]') - ffi.C.csv_create(csv) - local bufsz = 256 - --local buf = ffi.new('char[?]', bufsz) - local buf = csv[0].csv_realloc(ffi.cast(ffi.typeof('void *'), 0), bufsz) - if type(t[1]) ~= 'table' then - t = {t} - end - for k, line in pairs(t) do - local first = true - for k2, field in pairs(line) do - strf = tostring(field) - if (strf:len() + 1) * 2 > bufsz then - bufsz = (strf:len() + 1) * 2 - buf = csv[0].csv_realloc(buf, bufsz) - end - local len = ffi.C.csv_escape_field(csv, strf, buf) - if first then - first = false - else - writable:write(',') - end - writable:write(ffi.string(buf, len)) +module.delimiter = ',' +module.quote = '"' + +--@brief parse csv string by string +--@param readable must be string or object with method read(num) returns string +--@param csv_chunk_size (default 4096). Parser will read by csv_chunk_size symbols +--@return iter function, iterator state +module.iterate = function(readable, csv_chunk_size) + csv_chunk_size = csv_chunk_size or 4096 + if type(readable) == "string" then + readable = make_readable(readable) + end + if type(readable.read) ~= "function" then + error("Usage: load(object with method read(num) returns string)") + end + + local str = readable:read(csv_chunk_size) + if not str then + error("Usage: load(object with method read(num) returns string)") + end + local it = ffi.new('csv_iterator_t[1]') + local csv = ffi.new('csv_t[1]') + ffi.C.csv_create(csv) + csv[0].csv_delim = string.byte(module.delimiter) + csv[0].csv_quote = string.byte(module.quote) + ffi.C.csv_iter_create(it, csv) + ffi.C.csv_feed(it, str) + + return iter, {readable, csv_chunk_size, csv, it} +end + +--@brief parse csv and make table +--@return table +module.load = function(readable, csv_chunk_size) + csv_chunk_size = csv_chunk_size or 4096 + result = {} + for tup in module.iterate(readable, csv_chunk_size) do + table.insert(result, tup) + end + + return result +end + +--@brief dumps tuple or table as csv +--@param writable must be object with method write(string) like file or socket +--@return there is no writable it returns csv as string +module.dump = function(t, writable) + if type(writable) == "nil" then + writable = make_writable() + end + if type(writable.write) ~= "function" or type(t) ~= "table" then + error("Usage: dump(writable, table)") + end + local csv = ffi.new('csv_t[1]') + ffi.C.csv_create(csv) + csv[0].csv_delim = string.byte(module.delimiter) + csv[0].csv_quote = string.byte(module.quote) + local bufsz = 256 + local buf = csv[0].csv_realloc(ffi.cast(ffi.typeof('void *'), 0), bufsz) + if type(t[1]) ~= 'table' then + t = {t} + end + for k, line in pairs(t) do + local first = true + for k2, field in pairs(line) do + strf = tostring(field) + if (strf:len() + 1) * 2 > bufsz then + bufsz = (strf:len() + 1) * 2 + buf = csv[0].csv_realloc(buf, bufsz) + end + local len = ffi.C.csv_escape_field(csv, strf, buf) + if first then + first = false + else + writable:write(module.delimiter) end - writable:write('\n') + writable:write(ffi.string(buf, len)) end - csv[0].csv_realloc(buf, 0) + writable:write('\n') end -} -return csv + csv[0].csv_realloc(buf, 0) + if writable.returnstring then + return writable.returnstring + end +end + +return module diff --git a/test/app/csv.result b/test/app/csv.result index 50695c594b..fb51b0d409 100644 --- a/test/app/csv.result +++ b/test/app/csv.result @@ -51,3 +51,18 @@ aba local res = internal.pwrite(self.fh iflag = bit.bor(iflag +test str dump: +quote"" d,",and, comma","both "" of "" t,h,e,m" +"""""",","",""" +"mul +ti +li +ne + +",field + +"" +" +" + +test load(dump(t)): true diff --git a/test/app/csv.test.lua b/test/app/csv.test.lua index f7d0105b16..fbdb2bd466 100755 --- a/test/app/csv.test.lua +++ b/test/app/csv.test.lua @@ -84,7 +84,7 @@ t = { } f = require("fio").open(file3, { "O_WRONLY", "O_TRUNC" , "O_CREAT"}, 0x1FF) -csv.dump(f, t) +csv.dump(t, f) f:close() f = fio.open(file3, {'O_RDONLY'}) t2 = csv.load(f, 5) @@ -100,6 +100,9 @@ for tup in csv.iterate(f) do end f:close() +print("test str dump:") +print(csv.dump(t)) +print("test load(dump(t)): " .. tostring(table2str(t) == table2str(csv.load(csv.dump(t))))) fio.unlink(file1) fio.unlink(file2) -- GitLab