diff --git a/CMakeLists.txt b/CMakeLists.txt index 7311509a6b3e9a75164c08da5358e766c308c964..cc2352ea100923b28b88557d4e9cded7d40c2428 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -78,6 +78,7 @@ check_function_exists(memmem HAVE_MEMMEM) check_function_exists(memrchr HAVE_MEMRCHR) check_function_exists(funopen HAVE_FUNOPEN) +check_function_exists(fopencookie HAVE_FOPENCOOKIE) # # Some versions of GNU libc define non-portable __libc_stack_end diff --git a/include/fiob.h b/include/fiob.h index 502b824bc0cc6decb16c61a78905d9b63f312626..9b8de8985d2b8301cf4b173647fbb0734953b158 100644 --- a/include/fiob.h +++ b/include/fiob.h @@ -28,8 +28,6 @@ * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ -#define O_DIRECT_BSIZE ( 4096 * 256 ) - #include <sys/types.h> #include <stdio.h> diff --git a/include/tarantool/config.h.cmake b/include/tarantool/config.h.cmake index 15a437337e24274097dfb2fc6984603d1e088333..5d8fc06547e35941d8c26b98f27c65c3d3f04710 100644 --- a/include/tarantool/config.h.cmake +++ b/include/tarantool/config.h.cmake @@ -67,6 +67,11 @@ */ #cmakedefine HAVE_FUNOPEN 1 +/* + * Defined if this platform has GNU specific fopencookie() + */ +#cmakedefine HAVE_FOPENCOOKIE 1 + /* * Defined if this platform has GNU specific memmem(). */ diff --git a/src/fiob.c b/src/fiob.c index 39779e036164568a24bcec94025e19968afc9b8e..4e8cc794778b4b8609f5d7617195db4ee2806fb5 100644 --- a/src/fiob.c +++ b/src/fiob.c @@ -18,6 +18,16 @@ #include <unistd.h> #include <tarantool/config.h> +/* Use special implemention if we have O_DIRECT and FOPENCOOKIE or FUNOPEN */ +#if defined(O_DIRECT) && (defined(HAVE_FUNOPEN) || defined(HAVE_FOPENCOOKIE)) +#define FIOB_DIRECT +#endif + +#if defined(FIOB_DIRECT) +enum { + FIOB_ALIGN = 4096, + FIOB_BSIZE = FIOB_ALIGN * 256 +}; struct fiob { int fd; @@ -37,44 +47,100 @@ struct fiob { #endif }; +static inline off_t +fiob_ceil(off_t off) +{ + /* ceil to FIOB_ALIGN */ + return (off + FIOB_ALIGN - 1) & ~(off_t) (FIOB_ALIGN - 1); +} + +#ifdef HAVE_FUNOPEN +static int +fiob_read(void *cookie, char *buf, int len) +#else static ssize_t -fiob_readf(struct fiob *f, char *buf, size_t count) +fiob_read(void *cookie, char *buf, size_t count) +#endif { + struct fiob *f = (struct fiob *)cookie; ssize_t to_read = (ssize_t) count; + + /* The number of starting bytes in f->buf to skip due to alignment */ + off_t skip = 0; while (to_read > 0) { - ssize_t nrd = read(f->fd, buf, to_read); + /* Align `to_read' FIOB_ALIGN to be <= size of f->buf */ + ssize_t to_read_al = MIN(fiob_ceil(to_read), f->bsize); + /* + * Optimistically try to read aligned size into the aligned + * buffer. If the current file position is not aligned then + * read(2) returns EINVAL. In this case seek to an aligned + * position and try again. This trick saves one extra + * syscall for general workflow. + */ + ssize_t nrd = read(f->fd, f->buf, to_read_al); if (nrd < 0) { if (errno == EINTR) { errno = 0; continue; + } else if (errno == EINVAL && skip == 0) { + /* + * read(2) can return EINVAL only in 3 cases: + * 1. read buffer is not aligned - handled in + * fiob_open(). + * 2. read size is not aligned - handled above + * 3. current file position is not aligned - + * handled here. + */ + off_t pos = lseek(f->fd, 0, SEEK_CUR); + if (pos < 0) { + say_syserror("lseek, [%s]", f->path); + return -1; + } + /* Calculate aligned position */ + skip = pos % FIOB_ALIGN; + pos -= skip; + if (skip == 0) { + /* Position is aligned. */ + errno = EINVAL; + say_error("read, [%s]", f->path); + return -1; + } + /* Seek to the new position */ + if (lseek(f->fd, pos, SEEK_SET) != pos) { + say_syserror("lseek, [%s]", f->path); + return -1; + } + /* Try to read again. */ + continue; } - if (errno == EAGAIN || errno == EWOULDBLOCK) - return count != to_read ? count - to_read : -1; say_syserror("read, [%s]", f->path); return -1; /* XXX: file position is unspecified */ } + /* Ignore starting bytes if the position was aligned. */ + nrd -= skip; if (nrd == 0) break; + if (nrd > to_read) { + /* + * A few more bytes have been read because `to_read' + * is not aligned to FIOB_ALIGN. Set the file position + * to the expected libc value and ignore extra bytes. + */ + if (lseek(f->fd, to_read - nrd, SEEK_CUR) < 0) { + say_syserror("lseek, [%s]", f->path); + return -1; + } + nrd = to_read; + } + memcpy(buf, f->buf + skip, nrd); /* see nrd -= skip */ + skip = 0; /* reset alignment offset */ buf += nrd; to_read -= nrd; } return count - to_read; } -#ifdef HAVE_FUNOPEN -static int -fiob_read(void *cookie, char *buf, int len) -#else -static ssize_t -fiob_read(void *cookie, char *buf, size_t len) -#endif -{ - struct fiob *f = (struct fiob *)cookie; - return fiob_readf(f, buf, len); -} - - static ssize_t fiob_writef(struct fiob *f, const char *buf, size_t count) { @@ -87,8 +153,6 @@ fiob_writef(struct fiob *f, const char *buf, size_t count) errno = 0; continue; } - if (errno == EAGAIN || errno == EWOULDBLOCK) - return count != to_write ? count - to_write : -1; say_syserror("write, [%s]", f->path); return -1; /* XXX: file position is unspecified */ } @@ -101,17 +165,13 @@ fiob_writef(struct fiob *f, const char *buf, size_t count) return count - to_write; } - static int fiob_flushb(struct fiob *f) { - if (!f->buf || !f->bfill) + if (!f->bfill) return 0; - size_t tlen = f->bfill / 4096; - if (f->bfill % 4096) - tlen++; - tlen *= 4096; + size_t tlen = fiob_ceil(f->bfill); if (fiob_writef(f, f->buf, tlen) < 0) { return -1; @@ -141,9 +201,6 @@ fiob_write(void *cookie, const char *buf, size_t len) if (len == 0) return 0; - if (!f->buf) - return fiob_writef(f, buf, len); - ssize_t bytes_left = len; ssize_t tocopy; @@ -223,23 +280,24 @@ fiob_close(void *cookie) errno = save_errno; return res; } +#endif /* defined(FIOB_DIRECT) */ -/** open file. The same as fiob_open but receives additional open (2) flags */ +/** open file. The same as fopen but receives additional open (2) flags */ FILE * fiob_open(const char *path, const char *mode) { int omode = 0666; int flags = 0; int save_errno; - - size_t bsize = 0; - void *buf = NULL; - + int fd = -1; + FILE *file = NULL; +#if defined (FIOB_DIRECT) + struct fiob *f = NULL; +#endif /* defined(FIOB_DIRECT) */ int um = umask(0722); umask(um); omode &= ~um; - if (strchr(mode, 'r')) { if (strchr(mode, '+')) flags |= O_RDWR; @@ -267,55 +325,50 @@ fiob_open(const char *path, const char *mode) if (strchr(mode, 'x')) flags |= O_EXCL; #endif - - /* O_DIRECT */ - if (strchr(mode, 'd')) { -#ifdef O_DIRECT - flags |= O_DIRECT; -#endif - bsize = O_DIRECT_BSIZE; - posix_memalign(&buf, 4096, bsize); - if (!buf) { - errno = ENOMEM; - return NULL; - } - /* for valgrind */ - memset(buf, 0, bsize); - } - /* O_SYNC */ if (strchr(mode, 's')) { flags |= WAL_SYNC_FLAG; } - struct fiob *f = (struct fiob *)calloc(1, sizeof(struct fiob)); - if (!f) { - free(buf); - errno = ENOMEM; - return NULL; +#if defined(FIOB_DIRECT) + if (strchr(mode, 'd')) { + /* Try to open file with O_DIRECT */ + fd = open(path, flags | O_DIRECT, omode); } - - f->path = strdup(path); - if (!f->path) { - errno = ENOMEM; - goto error; + if (fd < 0) { +#endif /* defined(FIOB_DIRECT) */ + /* Fallback to libc implementation */ + fd = open(path, flags, omode); + if (fd < 0) + goto error; + file = fdopen(fd, mode); + if (!file) + goto error; + return file; +#if defined(FIOB_DIRECT) } - f->buf = buf; - f->bsize = bsize; + f = (struct fiob *)calloc(1, sizeof(struct fiob)); + if (!f) + goto error; - f->fd = open(path, flags, omode); - if (f->fd < 0) + f->fd = fd; + f->bsize = FIOB_BSIZE; + if (posix_memalign(&f->buf, FIOB_ALIGN, f->bsize)) goto error; + /* for valgrind */ + memset(f->buf, 0, f->bsize); + f->path = strdup(path); + if (!f->path) + goto error; f->io.read = fiob_read; f->io.write = fiob_write; f->io.seek = fiob_seek; f->io.close = fiob_close; - FILE *file; #ifdef HAVE_FUNOPEN file = funopen(f, f->io.read, f->io.write, f->io.seek, f->io.close); @@ -333,16 +386,21 @@ fiob_open(const char *path, const char *mode) #endif return file; +#endif /* defined(FIOB_DIRECT) */ error: save_errno = errno; say_syserror("Can't open '%s'", path); - if (f->fd > 0) - close(f->fd); - - free(f->buf); - free(f->path); - free(f); + if (fd >= 0) + close(fd); + +#if defined(FIOB_DIRECT) + if (f) { + free(f->buf); + free(f->path); + free(f); + } +#endif /* FIOB_DIRECT */ errno = save_errno; return NULL; diff --git a/test/unit/fiob.c b/test/unit/fiob.c index a26570f61f2150a0b57e1b4dfe16558e99057123..dc61f07ee2109b881378a9e928c5037d5514a936 100644 --- a/test/unit/fiob.c +++ b/test/unit/fiob.c @@ -22,7 +22,7 @@ -#define PLAN 47 +#define PLAN 65 #define ITEMS 7 @@ -98,6 +98,29 @@ main(void) is(done, 12, "Hello world is read (%zu bytes)", done); is(memcmp(buf, "Hello, world", 12), 0, "data"); + is(fseek(f, 7L, SEEK_SET), 0, "set odd position"); + is(ftell(f), 7L, "check odd position"); + is(fread(buf, 1, 4096, f), 5, "read from odd position (size)"); + is(memcmp(buf, "world", 5), 0, "read from odd position (data)"); + + is(fseek(f, 0L, SEEK_SET), 0, "set start position"); + is(ftell(f), 0L, "check start position"); + + is(fseek(f, 0L, SEEK_END), 0, "set eof position"); + is(ftell(f), 12L, "check eof position"); + is(fread(buf, 1, 4096, f), 0, "read from eof position (size)"); + ok(feof(f), "feof"); + is(fread(buf, 1, 4096, f), 0, "read from eof position (size)"); + ok(feof(f), "feof"); + + is(fseek(f, -1L, SEEK_END), 0, "set -1 position"); + is(ftell(f), 11L, "check -1 position"); + is(fread(buf, 1, 4096, f), 1, "read from -1 position (size)"); + is(memcmp(buf, "d", 1), 0, "read from -1 position (data)"); + ok(feof(f), "feof"); + + is(fread(buf, 1, 4096, f), 0, "read from eof position (size)"); + is(fseek(f, 0L, SEEK_SET), 0, "set new position"); done = fread(buf + 1, 1, 12, f); is(done, 12, "Hello world is read (%zu bytes)", done); diff --git a/test/unit/fiob.result b/test/unit/fiob.result index 2cedf473e2d4c16595a053e2d2d5699b28e9e91e..fe92099582ff021b02aed06e2bbec22e318ddc33 100644 --- a/test/unit/fiob.result +++ b/test/unit/fiob.result @@ -1,4 +1,4 @@ -1..47 +1..65 ok 1 - tempdir is created ok 2 - common open ok 3 - Hello world is written (12 bytes) @@ -7,42 +7,60 @@ ok 5 - set new position ok 6 - current position 0 ok 7 - Hello world is read (12 bytes) ok 8 - data -ok 9 - set new position -ok 10 - Hello world is read (12 bytes) -ok 11 - data -ok 12 - set new position -ok 13 - set new position -ok 14 - data is read -ok 15 - data is read -ok 16 - fclose -ok 17 - reopened file -ok 18 - move pos at finish -ok 19 - file size -ok 20 - fclose -ok 21 - common open: O_EXCL -ok 22 - common open -ok 23 - Hello world is written (12 bytes) -ok 24 - move pos -ok 25 - Hello world is written (12 bytes) -ok 26 - move pos -ok 27 - read 11 bytes -ok 28 - content was read -ok 29 - fclose -ok 30 - open big file -ok 31 - Hello world is written (13 bytes) -ok 32 - all bytes were written -ok 33 - fclose -ok 34 - reopen file for reading -ok 35 - all records were written properly -ok 36 - eof -ok 37 - feof +ok 9 - set odd position +ok 10 - check odd position +ok 11 - read from odd position (size) +ok 12 - read from odd position (data) +ok 13 - set start position +ok 14 - check start position +ok 15 - set eof position +ok 16 - check eof position +ok 17 - read from eof position (size) +ok 18 - feof +ok 19 - read from eof position (size) +ok 20 - feof +ok 21 - set -1 position +ok 22 - check -1 position +ok 23 - read from -1 position (size) +ok 24 - read from -1 position (data) +ok 25 - feof +ok 26 - read from eof position (size) +ok 27 - set new position +ok 28 - Hello world is read (12 bytes) +ok 29 - data +ok 30 - set new position +ok 31 - set new position +ok 32 - data is read +ok 33 - data is read +ok 34 - fclose +ok 35 - reopened file +ok 36 - move pos at finish +ok 37 - file size ok 38 - fclose -ok 39 - open big file -ok 40 - Hello world is written (13 bytes) -ok 41 - all bytes were written -ok 42 - fclose -ok 43 - reopen file for reading -ok 44 - all records were written properly -ok 45 - eof -ok 46 - feof +ok 39 - common open: O_EXCL +ok 40 - common open +ok 41 - Hello world is written (12 bytes) +ok 42 - move pos +ok 43 - Hello world is written (12 bytes) +ok 44 - move pos +ok 45 - read 11 bytes +ok 46 - content was read ok 47 - fclose +ok 48 - open big file +ok 49 - Hello world is written (13 bytes) +ok 50 - all bytes were written +ok 51 - fclose +ok 52 - reopen file for reading +ok 53 - all records were written properly +ok 54 - eof +ok 55 - feof +ok 56 - fclose +ok 57 - open big file +ok 58 - Hello world is written (13 bytes) +ok 59 - all bytes were written +ok 60 - fclose +ok 61 - reopen file for reading +ok 62 - all records were written properly +ok 63 - eof +ok 64 - feof +ok 65 - fclose