diff --git a/src/lib/csv/csv.c b/src/lib/csv/csv.c index 454b9d8bc8941e3c12a422ff048ef7f688af8682..c2afb317be7ade9cf63e6b43646668919ab80824 100644 --- a/src/lib/csv/csv.c +++ b/src/lib/csv/csv.c @@ -1,3 +1,32 @@ +/* + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the + * following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY <COPYRIGHT HOLDER> ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * <COPYRIGHT HOLDER> OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF + * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + #include "csv.h" #include <ctype.h> @@ -5,6 +34,7 @@ #include <string.h> #include <assert.h> #include <stdarg.h> +#include <stdbool.h> static const double csv_buf_expand_factor = 2.0; @@ -24,9 +54,9 @@ void csv_create(struct csv *csv) { memset(csv, 0, sizeof(struct csv)); - csv->csv_delim= ','; - csv->csv_quote = '\"'; - csv->csv_realloc = realloc; + csv->delimiter= ','; + csv->quote_char = '\"'; + csv->realloc = realloc; csv->emit_field = csv_emit_field_empty; csv->emit_row = csv_emit_row_empty; } @@ -34,8 +64,8 @@ csv_create(struct csv *csv) void csv_destroy(struct csv *csv) { - if(csv->buf) { - csv->csv_realloc(csv->buf, 0); + if (csv->buf) { + csv->realloc(csv->buf, 0); csv->buf = NULL; } } @@ -43,19 +73,23 @@ csv_destroy(struct csv *csv) int csv_isvalid(struct csv *csv) { - if (csv->prevsymb == csv->csv_quote) { - csv->state = csv->state == CSV_BUF_IN_QUOTES ? CSV_BUF_OUT_OF_QUOTES : CSV_BUF_IN_QUOTES; - csv->prevsymb = ' '; + if (csv->prev_symbol == csv->quote_char) { + if (csv->state == CSV_IN_QUOTES) + csv->state = CSV_OUT_OF_QUOTES; + else + csv->state = CSV_IN_QUOTES; + csv->prev_symbol = ' '; } - if (csv->csv_error_status == CSV_ER_OK && csv->state == CSV_BUF_IN_QUOTES) - csv->csv_error_status = CSV_ER_INVALID; - return !csv->csv_error_status; + if (csv->error_status == CSV_ER_OK && + csv->state == CSV_IN_QUOTES) + csv->error_status = CSV_ER_INVALID; + return !csv->error_status; } int csv_get_error_status(struct csv *csv) { - return csv->csv_error_status; + return csv->error_status; } void @@ -65,26 +99,30 @@ csv_setopt(struct csv *csv, int opt, ...) va_start(args, opt); switch(opt) { case CSV_OPT_DELIMITER: - csv->csv_delim = va_arg(args, int); + csv->delimiter = va_arg(args, int); break; case CSV_OPT_QUOTE: - csv->csv_quote = va_arg(args, int); + csv->quote_char = va_arg(args, int); break; case CSV_OPT_REALLOC: - csv->csv_realloc = va_arg(args, void* (*)(void*, long unsigned int)); + csv->realloc = va_arg(args, void* (*)(void*, size_t)); break; case CSV_OPT_EMIT_FIELD: - csv->emit_field = va_arg(args, void (*)(void*, const char *, const char *)); + csv->emit_field = va_arg(args, csv_emit_field_t); case CSV_OPT_EMIT_ROW: - csv->emit_row = va_arg(args, void (*)(void*)); - case CSV_OPT_CTX: + csv->emit_row = va_arg(args, csv_emit_row_t); + case CSV_OPT_EMIT_CTX: csv->emit_ctx = va_arg(args, void*); } va_end(args); } +/** + * both of methods (emitting and iterating) are implemening by one function + * firstonly == true means iteration method. + **/ const char * -csv_parse_common(struct csv *csv, const char *s, const char *end, int onlyfirst) +csv_parse_impl(struct csv *csv, const char *s, const char *end, bool firstonly) { if (end - s == 0) return NULL; @@ -93,67 +131,82 @@ csv_parse_common(struct csv *csv, const char *s, const char *end, int onlyfirst) assert(csv->emit_row); const char *p = s; - while(p != end) { - int isendl = (*p == '\n' || *p == '\r'); - - if (csv->buf == 0 || (csv->bufp && csv->buf_len < csv->bufp - csv->buf + 1)) { - csv->buf_len = (int)((csv->bufp - csv->buf + 1) * csv_buf_expand_factor + 1); - char *new_buf = (char *) csv->csv_realloc(csv->buf, csv->buf_len); - if(new_buf == NULL) { - csv->csv_error_status = CSV_ER_MEMORY_ERROR; + while (p != end) { + bool is_line_end = (*p == '\n' || *p == '\r'); + //realloc buffer + if (csv->buf == 0 || + (csv->bufp && csv->buf_len < csv->bufp - csv->buf + 1)) { + csv->buf_len = (int)((csv->bufp - csv->buf + 1) * + csv_buf_expand_factor + 1); + char *new_buf = (char *) csv->realloc(csv->buf, csv->buf_len); + if (new_buf == NULL) { + csv->error_status = CSV_ER_MEMORY_ERROR; return NULL; } csv->bufp = csv->bufp - csv->buf + new_buf; csv->buf = new_buf; } - - if (csv->prevsymb == csv->csv_quote) { - if(*p == csv->csv_quote) { - *csv->bufp++ = csv->csv_quote; - csv->prevsymb = ' '; + /** parser should keep previous symbol, because of "" and \r\n + * and to prevent additional states of FSM + */ + if (csv->prev_symbol == csv->quote_char) { + //double-quote "" + if (*p == csv->quote_char) { + *csv->bufp++ = csv->quote_char; + csv->prev_symbol = ' '; p++; continue; } - csv->state = csv->state == CSV_BUF_IN_QUOTES ? CSV_BUF_OUT_OF_QUOTES : CSV_BUF_IN_QUOTES; + //quote closing or opening + if (csv->state == CSV_IN_QUOTES) + csv->state = CSV_OUT_OF_QUOTES; + else + csv->state = CSV_IN_QUOTES; } - if (isendl && csv->state != CSV_BUF_IN_QUOTES && - *p != csv->prevsymb && (csv->prevsymb == '\n' || csv->prevsymb == '\r')) { - csv->prevsymb = 0; + //\r\n (or \n\r) linebreak, not in quotes + if (is_line_end && csv->state != CSV_IN_QUOTES && + *p != csv->prev_symbol && + (csv->prev_symbol == '\n' || csv->prev_symbol == '\r') + ) { + csv->prev_symbol = 0; p++; continue; } - csv->prevsymb = *p; + csv->prev_symbol = *p; switch(csv->state) { case CSV_LEADING_SPACES: csv->bufp = csv->buf; if (*p != ' ') { - csv->state = CSV_BUF_OUT_OF_QUOTES; + csv->state = CSV_OUT_OF_QUOTES; } - else break; //spaces passed, perform field at once - case CSV_BUF_OUT_OF_QUOTES: - if (isendl || *p == csv->csv_delim) { + else + break; //spaces passed, perform field at once + case CSV_OUT_OF_QUOTES: + //end of field + if (is_line_end || *p == csv->delimiter) { csv->state = CSV_LEADING_SPACES; - csv->bufp -= csv->csv_ending_spaces; - if(onlyfirst) { + csv->bufp -= csv->ending_spaces; + if (firstonly) { csv->state = CSV_NEWLINE; return p; } else { - csv->emit_field(csv->emit_ctx, csv->buf, csv->bufp); + csv->emit_field(csv->emit_ctx, + csv->buf, csv->bufp); } csv->bufp = csv->buf; - } else if (*p != csv->csv_quote) { + } else if (*p != csv->quote_char) { *csv->bufp++ = *p; } if (*p == ' ') { - csv->csv_ending_spaces++; + csv->ending_spaces++; } else { - csv->csv_ending_spaces = 0; + csv->ending_spaces = 0; } break; - case CSV_BUF_IN_QUOTES: - if (*p != csv->csv_quote) { + case CSV_IN_QUOTES: + if (*p != csv->quote_char) { *csv->bufp++ = *p; } break; @@ -161,11 +214,15 @@ csv_parse_common(struct csv *csv, const char *s, const char *end, int onlyfirst) csv->state = CSV_LEADING_SPACES; break; } - if (isendl && csv->state != CSV_BUF_IN_QUOTES) { + if (is_line_end && csv->state != CSV_IN_QUOTES) { assert(csv->state == CSV_LEADING_SPACES); + /** bufp == buf means empty field, + * but bufp == 0 means no field at the moment, + * it may be end of line or end of file + **/ csv->bufp = 0; - if(onlyfirst) { - if(p + 1 == end) + if (firstonly) { + if (p + 1 == end) return NULL; else return p + 1; @@ -182,7 +239,7 @@ csv_parse_common(struct csv *csv, const char *s, const char *end, int onlyfirst) void csv_parse_chunk(struct csv *csv, const char *s, const char *end) { - csv_parse_common(csv, s, end, 0); + csv_parse_impl(csv, s, end, false); } void @@ -190,12 +247,13 @@ csv_finish_parsing(struct csv *csv) { if (csv_isvalid(csv)){ if (csv->bufp) { - csv->bufp -= csv->csv_ending_spaces; - csv->emit_field(csv->emit_ctx, csv->buf, csv->bufp); + csv->bufp -= csv->ending_spaces; + csv->emit_field(csv->emit_ctx, + csv->buf, csv->bufp); csv->emit_row(csv->emit_ctx); } if (csv->buf) - csv->csv_realloc(csv->buf, 0); + csv->realloc(csv->buf, 0); csv->bufp = NULL; csv->buf = NULL; csv->buf_len = 0; @@ -210,46 +268,69 @@ csv_iterator_create(struct csv_iterator *it, struct csv *csv) it->csv = csv; } +/** + * next iteration step + **/ int -csv_next(struct csv_iterator *it) { +csv_next(struct csv_iterator *it) +{ it->field = NULL; it->field_len = 0; - if(it->buf_begin == NULL) + if (it->buf_begin == NULL) //buffer isn't set return CSV_IT_NEEDMORE; - if(it->buf_begin == it->buf_end) { - if (!it->csv->csv_error_status && !csv_isvalid(it->csv)) { - it->csv->csv_realloc(it->csv->buf, 0); + /** + * length of buffer is zero + * it means end of file, but if there is no \n + * function must emit last field, EOL and EOF. + **/ + if (it->buf_begin == it->buf_end) { + /** bufp == buf means empty field, + * but bufp == 0 means no field at the moment, it may be + * end of line or end of file + **/ + if (it->csv->bufp == NULL) { //nothing to emit, end of file + return CSV_IT_EOF; + } + if (!it->csv->error_status && !csv_isvalid(it->csv)) { + it->csv->realloc(it->csv->buf, 0); it->csv->buf = NULL; it->csv->bufp = NULL; it->csv->buf_len = 0; return CSV_IT_ERROR; } - if(it->csv->bufp == NULL) { - return CSV_IT_EOF; - } - if(it->csv->state != CSV_END_OF_INPUT) { - it->csv->state = CSV_END_OF_INPUT; - it->csv->bufp -= it->csv->csv_ending_spaces; + + if (it->csv->state != CSV_END_OF_LAST_LINE) { //last field + it->csv->state = CSV_END_OF_LAST_LINE; + it->csv->bufp -= it->csv->ending_spaces; it->field = it->csv->buf; it->field_len = it->csv->bufp - it->csv->buf; it->csv->bufp = it->csv->buf; return CSV_IT_OK; - } else { - it->csv->csv_realloc(it->csv->buf, 0); + } + if (it->csv->state == CSV_END_OF_LAST_LINE) { //last line + it->csv->realloc(it->csv->buf, 0); it->csv->buf = NULL; it->csv->bufp = NULL; it->csv->buf_len = 0; return CSV_IT_EOL; } + } - const char *tail = csv_parse_common(it->csv, it->buf_begin, it->buf_end, 1); - if(csv_get_error_status(it->csv) == CSV_ER_MEMORY_ERROR) + const char *tail = csv_parse_impl(it->csv, it->buf_begin, + it->buf_end, true); + + if (csv_get_error_status(it->csv) == CSV_ER_MEMORY_ERROR) return CSV_IT_ERROR; + it->buf_begin = tail; - if(it->csv->bufp == NULL && it->csv->prevsymb) + //bufp == NULL means end of line + if (it->csv->bufp == NULL && it->csv->prev_symbol) return CSV_IT_EOL; - if(tail == it->buf_end) + + if (tail == it->buf_end) //buffer is empty return CSV_IT_NEEDMORE; + + //return field via iterator structure it->field = it->csv->buf; it->field_len = it->csv->bufp - it->csv->buf; return CSV_IT_OK; @@ -263,25 +344,31 @@ csv_feed(struct csv_iterator *it, const char *buf, size_t buf_len) } size_t -csv_escape_field(struct csv *csv, const char *field, size_t field_len, char *dst, size_t buf_size) +csv_escape_field(struct csv *csv, const char *field, + size_t field_len, char *dst, size_t buf_size) { char *p = dst; int inquotes = 0; - if(memchr(field, csv->csv_delim, field_len) || memchr(field, '\n', field_len) || memchr(field, '\r', field_len)) { + //surround quotes, only if there is delimiter \n or \r + if (memchr(field, csv->delimiter, field_len) || + memchr(field, '\n', field_len) || + memchr(field, '\r', field_len)) { inquotes = 1; - *p++ = csv->csv_quote; + *p++ = csv->quote_char; } - while(*field) { - if(*field == csv->csv_quote) { + while (*field) { + // double-quote "" + if (*field == csv->quote_char) { assert(p - dst < buf_size); - *p++ = csv->csv_quote; + *p++ = csv->quote_char; } assert(p - dst < buf_size); *p++ = *field++; } - if(inquotes) { + //adds ending quote + if (inquotes) { assert(p - dst < buf_size); - *p++ = csv->csv_quote; + *p++ = csv->quote_char; } *p = 0; return p - dst; diff --git a/src/lib/csv/csv.h b/src/lib/csv/csv.h index 5b9854a4c2a98de384a84eca40de318913e2c930..b9740b16970949e088f1964a8a9cbf8d47d5da83 100644 --- a/src/lib/csv/csv.h +++ b/src/lib/csv/csv.h @@ -1,7 +1,34 @@ #ifndef TARANTOOL_CSV_H_INCLUDED #define TARANTOOL_CSV_H_INCLUDED - -#include<stdio.h> +/* + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the + * following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY <COPYRIGHT HOLDER> ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * <COPYRIGHT HOLDER> OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF + * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ +#include <stdio.h> #if defined(__cplusplus) extern "C" { @@ -15,14 +42,14 @@ struct csv void *emit_ctx; csv_emit_row_t emit_row; csv_emit_field_t emit_field; - char csv_delim; - char csv_quote; + char delimiter; + char quote_char; - char prevsymb; - int csv_error_status; - int csv_ending_spaces; + char prev_symbol; + int error_status; + int ending_spaces; - void *(*csv_realloc)(void*, size_t); + void *(*realloc)(void*, size_t); int state; char *buf; @@ -30,16 +57,16 @@ struct csv size_t buf_len; }; -enum parser_options { +enum csv_parser_option { CSV_OPT_DELIMITER, CSV_OPT_QUOTE, CSV_OPT_REALLOC, CSV_OPT_EMIT_FIELD, CSV_OPT_EMIT_ROW, - CSV_OPT_CTX + CSV_OPT_EMIT_CTX }; -enum iteraion_states { +enum csv_iteraion_state { CSV_IT_OK, CSV_IT_EOL, CSV_IT_NEEDMORE, @@ -47,15 +74,15 @@ enum iteraion_states { CSV_IT_ERROR }; -enum parser_states { +enum csv_parser_state { CSV_LEADING_SPACES, - CSV_BUF_OUT_OF_QUOTES, - CSV_BUF_IN_QUOTES, + CSV_OUT_OF_QUOTES, + CSV_IN_QUOTES, CSV_NEWLINE, - CSV_END_OF_INPUT + CSV_END_OF_LAST_LINE }; -enum error_status { +enum csv_error_status { CSV_ER_OK, CSV_ER_INVALID, CSV_ER_MEMORY_ERROR @@ -99,10 +126,10 @@ csv_get_error_status(struct csv *csv); struct csv_iterator { struct csv *csv; - const char *buf_begin; + const char *buf_begin; //input buffer const char *buf_end; - const char *field; + const char *field; //output buffer size_t field_len; }; @@ -130,15 +157,17 @@ csv_feed(struct csv_iterator *it, const char *buf, size_t buf_len); * @return length of escaped field or -1 if not enough space in buffer. */ size_t -csv_escape_field(struct csv *csv, const char *field, size_t field_len, char *dst, size_t buf_size); +csv_escape_field(struct csv *csv, const char *field, size_t field_len, char *dst, size_t dst_size); -static inline const char* csv_iterator_get_field(struct csv_iterator *it) +static inline const char * +csv_iterator_get_field(struct csv_iterator *it) { return it->field; } -static inline size_t csv_iterator_get_field_len(struct csv_iterator *it) +static inline size_t +csv_iterator_get_field_len(struct csv_iterator *it) { return it->field_len; } diff --git a/src/lua/csv.lua b/src/lua/csv.lua index c06ba5ab3c9a5f29426785e02f74e4ee36054afd..460e628e97f68499216ee0a56abe7e9ca71e22b8 100644 --- a/src/lua/csv.lua +++ b/src/lua/csv.lua @@ -12,19 +12,21 @@ ffi.cdef[[ void *emit_ctx; csv_emit_row_t emit_row; csv_emit_field_t emit_field; - char csv_delim; - char csv_quote; + char delimiter; + char quote_char; - int csv_invalid; - int csv_ending_spaces; + char prev_symbol; + int error_status; + int ending_spaces; - void *(*csv_realloc)(void*, size_t); + void *(*realloc)(void*, size_t); int state; char *buf; char *bufp; size_t buf_len; }; + void csv_create(struct csv *csv); void csv_destroy(struct csv *csv); void csv_setopt(struct csv *csv, int opt, ...); @@ -49,16 +51,6 @@ ffi.cdef[[ }; ]] -local make_writable = function() - wr = {} - wr.returnstring = "" - wr.write = function(self, s) - wr.returnstring = wr.returnstring .. s - end - return wr -end - - local iter = function(csvstate, i) local readable = csvstate[1] local csv_chunk_size = csvstate[2] @@ -100,26 +92,27 @@ local module = {} --@param readable must be string or object with method read(num) returns string --@param opts.chunk_size (default 4096). Parser will read by chunk_size symbols --@param opts.delimiter (default ','). ---@param opts.quote (default '"'). +--@param opts.quote_char (default '"'). --@param opts.skip_head_lines (default 0). Skip header. --@return iter function, iterator state module.iterate = function(readable, opts) opts = opts or {} + if type(readable) ~= "string" and type(readable.read) ~= "function" then + error("Usage: load(string or object with method read(num)" .. + "returns string)") + end if not opts.chunk_size then opts.chunk_size = 4096 end if not opts.delimiter then opts.delimiter = ',' end - if not opts.quote then - opts.quote = '"' + if not opts.quote_char then + opts.quote_char = '"' end if not opts.skip_head_lines then opts.skip_head_lines = 0 end - if type(readable) ~= "string" and type(readable.read) ~= "function" then - error("Usage: load(string or object with method read(num) returns string)") - end local str if type(readable) == "string" then str = readable @@ -127,16 +120,18 @@ module.iterate = function(readable, opts) else str = readable:read(opts.chunk_size) end - if not str then - error("Usage: load(string or object with method read(num) returns string)") + + if not str then --read not works + error("Usage: load(string or object with method read(num)" .. + "returns string)") end local it = ffi.new('struct csv_iterator') local csv = ffi.new('struct csv') ffi.C.csv_create(csv) ffi.gc(csv, ffi.C.csv_destroy) - csv.csv_delim = string.byte(opts.delimiter) - csv.csv_quote = string.byte(opts.quote) + csv.delimiter = string.byte(opts.delimiter) + csv.quote_char = string.byte(opts.quote_char) ffi.C.csv_iterator_create(it, csv) ffi.C.csv_feed(it, str, string.len(str)) @@ -158,7 +153,7 @@ end --@param t is tuple or table --@param writable must be object with method write(string) like file or socket --@param opts.delimiter (default ','). ---@param opts.quote (default '"'). +--@param opts.quote_char (default '"'). --@return there is no writable it returns csv as string module.dump = function(t, opts, writable) opts = opts or {} @@ -166,49 +161,61 @@ module.dump = function(t, opts, writable) if not opts.delimiter then opts.delimiter = ',' end - if not opts.quote then - opts.quote = '"' + if not opts.quote_char then + opts.quote_char = '"' end - if type(writable) == "nil" then - writable = make_writable() - end - if type(writable.write) ~= "function" or type(t) ~= "table" then + if (type(writable) ~= "nil" and type(writable.write) ~= "function") + or type(t) ~= "table" then error("Usage: dump(table[, opts, writable])") end local csv = ffi.new('struct csv') ffi.C.csv_create(csv) ffi.gc(csv, ffi.C.csv_destroy) - csv.csv_delim = string.byte(opts.delimiter) - csv.csv_quote = string.byte(opts.quote) + csv.delimiter = string.byte(opts.delimiter) + csv.quote_char = string.byte(opts.quote_char) local bufsz = 256 - local buf = csv.csv_realloc(ffi.cast(ffi.typeof('void *'), 0), bufsz) + local buf = csv.realloc(ffi.cast(ffi.typeof('void *'), 0), bufsz) if type(t[1]) ~= 'table' then t = {t} end + local result_table + if type(writable) == 'nil' then + result_table = {} + end for k, line in pairs(t) do local first = true + local output_tuple = {} for k2, field in pairs(line) do strf = tostring(field) - if (strf:len() + 1) * 2 > bufsz then - bufsz = (strf:len() + 1) * 2 - buf = csv.csv_realloc(buf, bufsz) + buf_new_size = (strf:len() + 1) * 2 + if buf_new_size > bufsz then + bufsz = buf_new_size + buf = csv.realloc(buf, bufsz) end - local len = ffi.C.csv_escape_field(csv, strf, string.len(strf), buf, bufsz) + local len = ffi.C.csv_escape_field(csv, strf, + string.len(strf), buf, bufsz) if first then first = false else - writable:write(opts.delimiter) + output_tuple[#output_tuple + 1] = opts.delimiter end - writable:write(ffi.string(buf, len)) + output_tuple[#output_tuple + 1] = ffi.string(buf, len) + end + + output_tuple[#output_tuple + 1] = '\n' + if result_table then + result_table[#result_table + 1] = table.concat(output_tuple) + else + writable:write(table.concat(output_tuple)) end - writable:write('\n') + output_tuple = {} end ffi.C.csv_destroy(csv) - csv.csv_realloc(buf, 0) - if writable.returnstring then - return writable.returnstring + csv.realloc(buf, 0) + if result_table then + return table.concat(result_table) end end diff --git a/test/app/csv.test.lua b/test/app/csv.test.lua index 720eabd3d4636f25e53d4c725fc4de57b547f793..fcefc7295dac977c6977461a99073549ecc7b1d8 100755 --- a/test/app/csv.test.lua +++ b/test/app/csv.test.lua @@ -19,16 +19,21 @@ end local csv = require('csv') local fio = require('fio') local tap = require('tap') -local test1 = '|a|\t|b|\t\n|1|\t|ha\n"ha"\nha|\t\n|3|\t|4|\t\n' -local test2 = '||\t||\t||\t\n||\t||\t\n||\t\n' -local test3 = '||\t||\t\n|kp"v|\t\n' -local test4 = '|123|\t|5|\t|92|\t|0|\t|0|\t\n|1|\t|12 34|\t|56|\t|quote , |\t|66|\t\n|ok|\t\n' -local test5 = "|1|\t\n|23|\t|456|\t|abcac|\t|'multiword field 4'|\t\n|none|\t|none|\t|0|\t\n" .. - "||\t||\t||\t\n|aba|\t|adda|\t|f3|\t|0|\t\n|local res = internal.pwrite(self.fh|\t|d" .. - "ata|\t|len|\t|offset)|\t\n|iflag = bit.bor(iflag|\t|fio.c.flag[ flag ])|\t\n||\t||\t||\t\n" -local test6 = "|23|\t|456|\t|abcac|\t|'multiword field 4'|\t\n|none|\t|none|\t|0|\t\n||\t||\t||\t\n" .. - "|aba|\t|adda|\t|f3|\t|0|\t\n|local res = internal.pwrite(self.fh|\t|data|\t|len|\t|offset)" .. - "|\t\n|iflag = bit.bor(iflag|\t|fio.c.flag[ flag ])|\t\n||\t||\t||\t\n" +local test1_ans = '|a|\t|b|\t\n|1|\t|ha\n"ha"\nha|\t\n|3|\t|4|\t\n' +local test2_ans = '||\t||\t||\t\n||\t||\t\n||\t\n' +local test3_ans = '||\t||\t\n|kp"v|\t\n' +local test4_ans = '|123|\t|5|\t|92|\t|0|\t|0|\t\n|1|\t|12 34|\t|56|\t' .. + '|quote , |\t|66|\t\n|ok|\t\n' +local test5_ans = "|1|\t\n|23|\t|456|\t|abcac|\t|'multiword field 4'|\t\n" .. + "|none|\t|none|\t|0|\t\n||\t||\t||\t\n|aba|\t|adda|\t|f" .. + "3|\t|0|\t\n|local res = internal.pwrite(self.fh|\t|dat" .. + "a|\t|len|\t|offset)|\t\n|iflag = bit.bor(iflag|\t|fio." .. + "c.flag[ flag ])|\t\n||\t||\t||\t\n" +local test6_ans = "|23|\t|456|\t|abcac|\t|'multiword field 4'|\t\n|none|" .. + "\t|none|\t|0|\t\n||\t||\t||\t\n|aba|\t|adda|\t|f3|\t|" .. + "0|\t\n|local res = internal.pwrite(self.fh|\t|data|\t" .. + "|len|\t|offset)|\t\n|iflag = bit.bor(iflag|\t|fio.c.f" .. + "lag[ flag ])|\t\n||\t||\t||\t\n" test = tap.test("csv") test:plan(8) @@ -37,15 +42,15 @@ readable = {} readable.read = myread readable.v = "a,b\n1,\"ha\n\"\"ha\"\"\nha\"\n3,4\n" readable.i = 0 -test:is(table2str(csv.load(readable)), test1, "obj test1") +test:is(table2str(csv.load(readable)), test1_ans, "obj test1") readable.v = ", ,\n , \n\n" readable.i = 0 -test:is(table2str(csv.load(readable, {chunk_size = 1} )), test2, "obj test2") +test:is(table2str(csv.load(readable, {chunk_size = 1} )), test2_ans, "obj test2") readable.v = ", \r\nkp\"\"v" readable.i = 0 -test:is(table2str(csv.load(readable, {chunk_size = 3})), test3, "obj test3") +test:is(table2str(csv.load(readable, {chunk_size = 3})), test3_ans, "obj test3") tmpdir = fio.tempdir() file1 = fio.pathjoin(tmpdir, 'file.1') @@ -57,7 +62,7 @@ f:write("123 , 5 , 92 , 0, 0\n" .. "1, 12 34, 56, \"quote , \", 66\nok") f:close() f = fio.open(file1, {'O_RDONLY'}) -test:is(table2str(csv.load(f, {chunk_size = 10})), test4, "fio test1") +test:is(table2str(csv.load(f, {chunk_size = 10})), test4_ans, "fio test1") f:close() @@ -72,11 +77,14 @@ f:write("1\n23,456,abcac,\'multiword field 4\'\n" .. ) f:close() f = fio.open(file2, {'O_RDONLY'}) -test:is(table2str(csv.load(f, {chunk_size = 1})), test5, "fio test2") --symbol by symbol reading +--symbol by symbol reading +test:is(table2str(csv.load(f, {chunk_size = 1})), test5_ans, "fio test2") f:close() f = fio.open(file2, {'O_RDONLY'}) -test:is(table2str(csv.load(f, {chunk_size = 7, skip_head_lines = 1})), test6, "fio test3") --7 symbols per chunk +opts = {chunk_size = 7, skip_head_lines = 1} +--7 symbols per chunk +test:is(table2str(csv.load(f, opts)), test6_ans, "fio test3") f:close() t = { diff --git a/test/unit/csv.c b/test/unit/csv.c index ce7561c855ab63cf22c8d98612919f1f7599f6a7..0749f00dbbb5df797748ada27c41a8ed5fb93940 100644 --- a/test/unit/csv.c +++ b/test/unit/csv.c @@ -1,8 +1,35 @@ +/* + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the + * following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY <COPYRIGHT HOLDER> ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * <COPYRIGHT HOLDER> OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF + * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ #include "csv/csv.h" #include "unit.h" #include <stdio.h> #include <string.h> -#include <assert.h> int isendl = 1; void @@ -106,7 +133,7 @@ void test5() { csv_setopt(&csv, CSV_OPT_DELIMITER, '\t'); csv_parse_chunk(&csv, s, s + strlen(s)); csv_finish_parsing(&csv); - printf("valid: %s\n", csv.csv_error_status == CSV_ER_INVALID ? "NO" : "yes"); + printf("valid: %s\n", csv.error_status == CSV_ER_INVALID ? "NO" : "yes"); csv_destroy(&csv); footer(); } @@ -161,7 +188,7 @@ void big_chunk_separated_test() { struct counter cnt; cnt.line_cnt = 0; cnt.fieldsizes_cnt = 0; - csv_setopt(&csv, CSV_OPT_CTX, &cnt); + csv_setopt(&csv, CSV_OPT_EMIT_CTX, &cnt); const char *s = "abc, def, def, cba"; for(size_t i = 0; i < lines; i++) { @@ -184,8 +211,8 @@ void big_chunk_separated_test() { //without fieldsizes counts without commas and spaces printf("line_cnt=%d, fieldsizes_cnt=%d, %d\n", (int)cnt.line_cnt, (int)cnt.fieldsizes_cnt, (int) (lines * (strlen(s) - 6) * (linelen / strlen(s)))); - assert(lines == cnt.line_cnt); - assert(lines * (strlen(s) - 6) * (linelen / strlen(s)) == cnt.fieldsizes_cnt); + fail_unless(lines == cnt.line_cnt); + fail_unless(lines * (strlen(s) - 6) * (linelen / strlen(s)) == cnt.fieldsizes_cnt); csv_destroy(&csv); free(buf); footer(); @@ -213,7 +240,7 @@ void random_generated_test() { struct counter cnt; cnt.line_cnt = 0; cnt.fieldsizes_cnt = 0; - csv_setopt(&csv, CSV_OPT_CTX, &cnt); + csv_setopt(&csv, CSV_OPT_EMIT_CTX, &cnt); csv_parse_chunk(&csv, rand_test, rand_test + strlen(rand_test)); csv_finish_parsing(&csv);