From e4492d9a402c4e1850e1e610a708be9b3c6944d7 Mon Sep 17 00:00:00 2001
From: Daniil Medvedev <medvdanil@gmail.com>
Date: Wed, 12 Aug 2015 15:38:58 +0300
Subject: [PATCH] csv style fix3

---
 src/lib/csv/csv.c | 155 +++++++++++++++++++++++-----------------------
 src/lib/csv/csv.h |   5 +-
 2 files changed, 81 insertions(+), 79 deletions(-)

diff --git a/src/lib/csv/csv.c b/src/lib/csv/csv.c
index c2afb317be..020c4d5b7f 100644
--- a/src/lib/csv/csv.c
+++ b/src/lib/csv/csv.c
@@ -36,8 +36,6 @@
 #include <stdarg.h>
 #include <stdbool.h>
 
-static const double csv_buf_expand_factor = 2.0;
-
 void csv_emit_row_empty(void *ctx)
 {
 	(void)ctx;
@@ -73,15 +71,7 @@ csv_destroy(struct csv *csv)
 int
 csv_isvalid(struct csv *csv)
 {
-	if (csv->prev_symbol == csv->quote_char) {
-		if (csv->state == CSV_IN_QUOTES)
-			csv->state = CSV_OUT_OF_QUOTES;
-		else
-			csv->state = CSV_IN_QUOTES;
-		csv->prev_symbol = ' ';
-	}
-	if (csv->error_status == CSV_ER_OK &&
-			csv->state == CSV_IN_QUOTES)
+	if (csv->error_status == CSV_ER_OK && csv->state == CSV_IN_QUOTES)
 		csv->error_status = CSV_ER_INVALID;
 	return !csv->error_status;
 }
@@ -118,8 +108,9 @@ csv_setopt(struct csv *csv, int opt, ...)
 }
 
 /**
-  * both of methods (emitting and iterating) are implemening by one function
+  * both of methods (emitting and iterating) are implementing by one function
   * firstonly == true means iteration method.
+  * @return unprocessed tail
   **/
 const char *
 csv_parse_impl(struct csv *csv, const char *s, const char *end, bool firstonly)
@@ -129,65 +120,70 @@ csv_parse_impl(struct csv *csv, const char *s, const char *end, bool firstonly)
 	assert(end - s > 0);
 	assert(csv->emit_field);
 	assert(csv->emit_row);
-	const char *p = s;
-
-	while (p != end) {
+	for (const char *p = s; p != end; p++) {
 		bool is_line_end = (*p == '\n' || *p == '\r');
-		//realloc buffer
-		if (csv->buf == 0 ||
-		    (csv->bufp && csv->buf_len < csv->bufp - csv->buf + 1)) {
-			csv->buf_len = (int)((csv->bufp - csv->buf + 1) *
-					     csv_buf_expand_factor + 1);
-			char *new_buf = (char *) csv->realloc(csv->buf, csv->buf_len);
+		/* realloc buffer */
+		if (csv->buf == NULL ||
+		   (csv->bufp && csv->buf_len < csv->bufp - csv->buf + 1)) {
+			size_t new_size = csv->buf_len * 2;
+			if (csv->buf_len == 0 || csv->buf == NULL)
+				new_size = 256;
+			char *new_buf = (char *)csv->realloc(csv->buf, new_size);
 			if (new_buf == NULL) {
 				csv->error_status = CSV_ER_MEMORY_ERROR;
 				return NULL;
 			}
+			csv->buf_len = new_size;
 			csv->bufp = csv->bufp - csv->buf + new_buf;
 			csv->buf = new_buf;
 		}
-		/** parser should keep previous symbol, because of "" and \r\n
-		 *  and to prevent additional states of FSM
-		 */
-		if (csv->prev_symbol == csv->quote_char) {
-			//double-quote ""
-			if (*p == csv->quote_char) {
-				*csv->bufp++ = csv->quote_char;
-				csv->prev_symbol = ' ';
-				p++;
-				continue;
-			}
-			//quote closing or opening
-			if (csv->state == CSV_IN_QUOTES)
-				csv->state = CSV_OUT_OF_QUOTES;
-			else
-				csv->state = CSV_IN_QUOTES;
-		}
-		//\r\n (or \n\r) linebreak, not in quotes
+		/* \r\n (or \n\r) linebreak, not in quotes */
 		if (is_line_end && csv->state != CSV_IN_QUOTES &&
-		    *p != csv->prev_symbol &&
-		    (csv->prev_symbol  == '\n' || csv->prev_symbol == '\r')
-		    ) {
-			csv->prev_symbol = 0;
-			p++;
+		   *p != csv->prev_symbol &&
+		   (csv->prev_symbol  == '\n' || csv->prev_symbol == '\r')) {
+			csv->prev_symbol = '\0';
 			continue;
 		}
 		csv->prev_symbol = *p;
-		switch(csv->state) {
+		/* 2 switches to avoid code dublicates */
+		switch (csv->state) {
 		case CSV_LEADING_SPACES:
 			csv->bufp = csv->buf;
-			if (*p != ' ') {
+			if (*p == ' ') /* skip spaces */
+				continue;
+			csv->state = CSV_OUT_OF_QUOTES;
+			/* symbol not performed, go to next switch */
+			break;
+		case CSV_QUOTE_OPENING:
+			if (*p == csv->quote_char) {
+				/* double-quote "" */
+				*csv->bufp++ = csv->quote_char;
 				csv->state = CSV_OUT_OF_QUOTES;
+				continue;
 			}
-			else
-				break; //spaces passed, perform field at once
+			csv->state = CSV_IN_QUOTES;
+			/* symbol not performed, go to next switch */
+			break;
+		case CSV_QUOTE_CLOSING:
+			if (*p == csv->quote_char) {
+				/* double-quote "" */
+				*csv->bufp++ = csv->quote_char;
+				csv->state = CSV_IN_QUOTES;
+				continue;
+			}
+			csv->state = CSV_OUT_OF_QUOTES;
+			/* symbol not performed, go to next switch */
+			break;
+		}
+
+		switch (csv->state) {
 		case CSV_OUT_OF_QUOTES:
-			//end of field
 			if (is_line_end || *p == csv->delimiter) {
+				/* end of field */
 				csv->state = CSV_LEADING_SPACES;
 				csv->bufp -= csv->ending_spaces;
 				if (firstonly) {
-					csv->state = CSV_NEWLINE;
+					csv->state = CSV_NEWFIELD;
 					return p;
 				} else {
 					csv->emit_field(csv->emit_ctx,
@@ -195,7 +191,10 @@ csv_parse_impl(struct csv *csv, const char *s, const char *end, bool firstonly)
 				}
 
 				csv->bufp = csv->buf;
-			} else if (*p != csv->quote_char) {
+
+			} else if (*p == csv->quote_char) {
+				csv->state = CSV_QUOTE_OPENING;
+			} else {
 				*csv->bufp++ = *p;
 			}
 
@@ -204,34 +203,34 @@ csv_parse_impl(struct csv *csv, const char *s, const char *end, bool firstonly)
 			} else {
 				csv->ending_spaces = 0;
 			}
+			if (is_line_end) {
+				/** bufp == buf means empty field,
+				  * but bufp == 0 means no field at the moment,
+				  * it may be end of line or end of file
+				  **/
+				csv->bufp = 0;
+				csv->emit_row(csv->emit_ctx);
+			}
 			break;
 		case CSV_IN_QUOTES:
-			if (*p != csv->quote_char) {
+			if (*p == csv->quote_char) {
+				csv->state = CSV_QUOTE_CLOSING;
+			} else {
 				*csv->bufp++ = *p;
 			}
 			break;
-		case CSV_NEWLINE:
+		case CSV_NEWFIELD:
 			csv->state = CSV_LEADING_SPACES;
-			break;
-		}
-		if (is_line_end && csv->state != CSV_IN_QUOTES) {
-			assert(csv->state == CSV_LEADING_SPACES);
-			/** bufp == buf means empty field,
-			  * but bufp == 0 means no field at the moment,
-			  * it may be end of line or end of file
-			  **/
-			csv->bufp = 0;
-			if (firstonly) {
+			if (is_line_end) {
+				csv->bufp = 0;
 				if (p + 1 == end)
 					return NULL;
 				else
 					return p + 1;
+
 			}
-			else {
-				csv->emit_row(csv->emit_ctx);
-			}
+			break;
 		}
-		p++;
 	}
 	return end;
 }
@@ -276,7 +275,7 @@ csv_next(struct csv_iterator *it)
 {
 	it->field = NULL;
 	it->field_len = 0;
-	if (it->buf_begin == NULL) //buffer isn't set
+	if (it->buf_begin == NULL) /* buffer isn't set */
 		return CSV_IT_NEEDMORE;
 	/**
 	  * length of buffer is zero
@@ -288,7 +287,7 @@ csv_next(struct csv_iterator *it)
 		  * but bufp == 0 means no field at the moment, it may be
 		  * end of line or end of file
 		  **/
-		if (it->csv->bufp == NULL) { //nothing to emit, end of file
+		if (it->csv->bufp == NULL) { /* nothing to emit, end of file */
 			return CSV_IT_EOF;
 		}
 		if (!it->csv->error_status && !csv_isvalid(it->csv)) {
@@ -299,7 +298,7 @@ csv_next(struct csv_iterator *it)
 			return CSV_IT_ERROR;
 		}
 
-		if (it->csv->state != CSV_END_OF_LAST_LINE) { //last field
+		if (it->csv->state != CSV_END_OF_LAST_LINE) { /* last field */
 			it->csv->state = CSV_END_OF_LAST_LINE;
 			it->csv->bufp -= it->csv->ending_spaces;
 			it->field = it->csv->buf;
@@ -307,7 +306,7 @@ csv_next(struct csv_iterator *it)
 			it->csv->bufp = it->csv->buf;
 			return CSV_IT_OK;
 		}
-		if (it->csv->state == CSV_END_OF_LAST_LINE) { //last line
+		if (it->csv->state == CSV_END_OF_LAST_LINE) { /* last line */
 			it->csv->realloc(it->csv->buf, 0);
 			it->csv->buf = NULL;
 			it->csv->bufp = NULL;
@@ -323,14 +322,14 @@ csv_next(struct csv_iterator *it)
 		return CSV_IT_ERROR;
 
 	it->buf_begin = tail;
-	//bufp == NULL means end of line
-	if (it->csv->bufp == NULL && it->csv->prev_symbol)
+	/* bufp == NULL means end of line */
+	if (it->csv->bufp == NULL)
 		return CSV_IT_EOL;
 
-	if (tail == it->buf_end) //buffer is empty
+	if (tail == it->buf_end) /* buffer is empty */
 		return CSV_IT_NEEDMORE;
 
-	//return field via iterator structure
+	/* return field via iterator structure */
 	it->field = it->csv->buf;
 	it->field_len = it->csv->bufp - it->csv->buf;
 	return CSV_IT_OK;
@@ -349,7 +348,7 @@ csv_escape_field(struct csv *csv, const char *field,
 {
 	char *p = dst;
 	int inquotes = 0;
-	//surround quotes, only if there is delimiter \n or \r
+	/* surround quotes, only if there is delimiter \n or \r */
 	if (memchr(field, csv->delimiter, field_len) ||
 	    memchr(field, '\n', field_len) ||
 	    memchr(field, '\r', field_len)) {
@@ -357,7 +356,7 @@ csv_escape_field(struct csv *csv, const char *field,
 		*p++ = csv->quote_char;
 	}
 	while (*field) {
-		// double-quote ""
+		/*  double-quote "" */
 		if (*field == csv->quote_char) {
 			assert(p - dst < buf_size);
 			*p++ = csv->quote_char;
@@ -365,7 +364,7 @@ csv_escape_field(struct csv *csv, const char *field,
 		assert(p - dst < buf_size);
 		*p++ = *field++;
 	}
-	//adds ending quote
+	/* adds ending quote */
 	if (inquotes) {
 		assert(p - dst < buf_size);
 		*p++ = csv->quote_char;
diff --git a/src/lib/csv/csv.h b/src/lib/csv/csv.h
index b9740b1697..343730e299 100644
--- a/src/lib/csv/csv.h
+++ b/src/lib/csv/csv.h
@@ -78,7 +78,10 @@ enum csv_parser_state {
 	CSV_LEADING_SPACES,
 	CSV_OUT_OF_QUOTES,
 	CSV_IN_QUOTES,
-	CSV_NEWLINE,
+	CSV_QUOTE_OPENING,
+	CSV_QUOTE_CLOSING,
+	CSV_LINE_BREAKING,
+	CSV_NEWFIELD,
 	CSV_END_OF_LAST_LINE
 };
 
-- 
GitLab