diff --git a/CMakeLists.txt b/CMakeLists.txt index 205f305a6e895a8b1101a99cf393be5392605c37..1bfa06173cd573bfb44180db296977c38a2d56c6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -85,6 +85,7 @@ endif() check_function_exists(open_memstream HAVE_OPEN_MEMSTREAM) check_function_exists(fmemopen HAVE_FMEMOPEN) check_function_exists(funopen HAVE_FUNOPEN) +check_function_exists(fopencookie HAVE_FOPENCOOKIE) check_function_exists(uuidgen HAVE_UUIDGEN) # diff --git a/src/fiob.c b/src/fiob.c index 61c9e011841ccefacf1f48cfccaf30af01044d70..f22059714ac76ca0e1e11e379d6ee70eccb53a07 100644 --- a/src/fiob.c +++ b/src/fiob.c @@ -16,8 +16,18 @@ #include <say.h> #include <assert.h> #include <unistd.h> -#include <trivia/config.h> +#include <tarantool/config.h> +/* Use special implemention if we have O_DIRECT and FOPENCOOKIE or FUNOPEN */ +#if defined(O_DIRECT) && (defined(HAVE_FUNOPEN) || defined(HAVE_FOPENCOOKIE)) +#define FIOB_DIRECT +#endif + +#if defined(FIOB_DIRECT) +enum { + FIOB_ALIGN = 4096, + FIOB_BSIZE = FIOB_ALIGN * 256 +}; struct fiob { int fd; @@ -37,52 +47,100 @@ struct fiob { #endif }; +static inline off_t +fiob_ceil(off_t off) +{ + /* ceil to FIOB_ALIGN */ + return (off + FIOB_ALIGN - 1) & ~(off_t) (FIOB_ALIGN - 1); +} + +#ifdef HAVE_FUNOPEN +static int +fiob_read(void *cookie, char *buf, int len) +#else static ssize_t -fiob_readf(struct fiob *f, char *buf, size_t count) +fiob_read(void *cookie, char *buf, size_t count) +#endif { + struct fiob *f = (struct fiob *)cookie; ssize_t to_read = (ssize_t) count; + + /* The number of starting bytes in f->buf to skip due to alignment */ + off_t skip = 0; while (to_read > 0) { - int unaligned = f->buf != NULL && (intptr_t) buf % 4096; - ssize_t nrd = read(f->fd, unaligned ? f->buf : buf, to_read); + /* Align `to_read' FIOB_ALIGN to be <= size of f->buf */ + ssize_t to_read_al = MIN(fiob_ceil(to_read), f->bsize); + /* + * Optimistically try to read aligned size into the aligned + * buffer. If the current file position is not aligned then + * read(2) returns EINVAL. In this case seek to an aligned + * position and try again. This trick saves one extra + * syscall for general workflow. + */ + ssize_t nrd = read(f->fd, f->buf, to_read_al); if (nrd < 0) { if (errno == EINTR) { errno = 0; continue; + } else if (errno == EINVAL && skip == 0) { + /* + * read(2) can return EINVAL only in 3 cases: + * 1. read buffer is not aligned - handled in + * fiob_open(). + * 2. read size is not aligned - handled above + * 3. current file position is not aligned - + * handled here. + */ + off_t pos = lseek(f->fd, 0, SEEK_CUR); + if (pos < 0) { + say_syserror("lseek, [%s]", f->path); + return -1; + } + /* Calculate aligned position */ + skip = pos % FIOB_ALIGN; + pos -= skip; + if (skip == 0) { + /* Position is aligned. */ + errno = EINVAL; + say_error("read, [%s]", f->path); + return -1; + } + /* Seek to the new position */ + if (lseek(f->fd, pos, SEEK_SET) != pos) { + say_syserror("lseek, [%s]", f->path); + return -1; + } + /* Try to read again. */ + continue; } - if (errno == EAGAIN || errno == EWOULDBLOCK) - return count != to_read ? count - to_read : -1; say_syserror("read, [%s]", f->path); return -1; /* XXX: file position is unspecified */ } + /* Ignore starting bytes if the position was aligned. */ + nrd -= skip; if (nrd == 0) break; - - if (unaligned) { - memcpy(buf, f->buf, nrd); + if (nrd > to_read) { + /* + * A few more bytes have been read because `to_read' + * is not aligned to FIOB_ALIGN. Set the file position + * to the expected libc value and ignore extra bytes. + */ + if (lseek(f->fd, to_read - nrd, SEEK_CUR) < 0) { + say_syserror("lseek, [%s]", f->path); + return -1; + } + nrd = to_read; } + + memcpy(buf, f->buf + skip, nrd); /* see nrd -= skip */ + skip = 0; /* reset alignment offset */ buf += nrd; to_read -= nrd; - if (f->buf && to_read > 0 && nrd % 4096 != 0) { - /* A workaround to detect EOF with O_DIRECT */ - break; - } } return count - to_read; } -#ifdef HAVE_FUNOPEN -static int -fiob_read(void *cookie, char *buf, int len) -#else -static ssize_t -fiob_read(void *cookie, char *buf, size_t len) -#endif -{ - struct fiob *f = (struct fiob *)cookie; - return fiob_readf(f, buf, len); -} - - static ssize_t fiob_writef(struct fiob *f, const char *buf, size_t count) { @@ -95,8 +153,6 @@ fiob_writef(struct fiob *f, const char *buf, size_t count) errno = 0; continue; } - if (errno == EAGAIN || errno == EWOULDBLOCK) - return count != to_write ? count - to_write : -1; say_syserror("write, [%s]", f->path); return -1; /* XXX: file position is unspecified */ } @@ -109,17 +165,13 @@ fiob_writef(struct fiob *f, const char *buf, size_t count) return count - to_write; } - static int fiob_flushb(struct fiob *f) { - if (!f->buf || !f->bfill) + if (!f->bfill) return 0; - size_t tlen = f->bfill / 4096; - if (f->bfill % 4096) - tlen++; - tlen *= 4096; + size_t tlen = fiob_ceil(f->bfill); if (fiob_writef(f, f->buf, tlen) < 0) { return -1; @@ -149,9 +201,6 @@ fiob_write(void *cookie, const char *buf, size_t len) if (len == 0) return 0; - if (!f->buf) - return fiob_writef(f, buf, len); - ssize_t bytes_left = len; ssize_t tocopy; @@ -169,7 +218,11 @@ fiob_write(void *cookie, const char *buf, size_t len) assert(f->bfill == f->bsize); ssize_t res = fiob_writef(f, f->buf, f->bsize); if (res < 0) +#if defined(HAVE_FUNOPEN) return res; +#else + return 0; +#endif tocopy = f->bsize > bytes_left ? bytes_left : f->bsize; /* * We must memcpy because O_DIRECT requires @@ -231,23 +284,24 @@ fiob_close(void *cookie) errno = save_errno; return res; } +#endif /* defined(FIOB_DIRECT) */ -/** open file. The same as fiob_open but receives additional open (2) flags */ +/** open file. The same as fopen but receives additional open (2) flags */ FILE * fiob_open(const char *path, const char *mode) { int omode = 0666; int flags = 0; int save_errno; - - size_t bsize = 0; - void *buf = NULL; - + int fd = -1; + FILE *file = NULL; +#if defined (FIOB_DIRECT) + struct fiob *f = NULL; +#endif /* defined(FIOB_DIRECT) */ int um = umask(0722); umask(um); omode &= ~um; - if (strchr(mode, 'r')) { if (strchr(mode, '+')) flags |= O_RDWR; @@ -275,67 +329,49 @@ fiob_open(const char *path, const char *mode) if (strchr(mode, 'x')) flags |= O_EXCL; #endif - - /* O_DIRECT */ - if (strchr(mode, 'd')) { -#ifdef O_DIRECT - flags |= O_DIRECT; -#endif - bsize = O_DIRECT_BSIZE; - int res = posix_memalign(&buf, 4096, bsize); - if (res || !buf) { - errno = ENOMEM; - return NULL; - } - /* for valgrind */ - memset(buf, 0, bsize); - } - /* O_SYNC */ if (strchr(mode, 's')) { flags |= WAL_SYNC_FLAG; } - struct fiob *f = (struct fiob *)calloc(1, sizeof(struct fiob)); - if (!f) { - free(buf); - errno = ENOMEM; - return NULL; - } - - f->path = strdup(path); - if (!f->path) { - errno = ENOMEM; + fd = open(path, flags, omode); + if (fd < 0) goto error; +#if defined(FIOB_DIRECT) + if (strchr(mode, 'd') == NULL) + goto fdopen; + + /* Try to enable O_DIRECT */ + flags = fcntl(fd, F_GETFL); + if (flags != -1 && fcntl(fd, F_SETFL, flags | O_DIRECT) != -1) { + say_debug("using O_DIRECT for %s", path); + } else { +#if defined(NDEBUG) /* Don't use opencookie in release mode without O_DIRECT */ + goto fdopen; +#endif /* defined(NDEBUG) */ } - f->buf = buf; - f->bsize = bsize; + f = (struct fiob *)calloc(1, sizeof(struct fiob)); + if (!f) + goto error; - f->fd = open(path, flags, omode); -#ifdef O_DIRECT - if (f->fd < 0 && (flags & O_DIRECT) && errno == EINVAL) { - /* - * Some filesystems don't support O_DIRECT mode (e.g. tmpfs). - * With O_CREAT|O_DIRECT flags Linux normally creates inode - * in directory and then fails on trying to open it. - * Try to re-open created file without O_DIRECT|O_CREATE flags. - */ - flags &= ~(int) (O_DIRECT | O_CREAT); - f->fd = open(path, flags, omode); - } -#endif /* O_DIRECT */ - if (f->fd < 0) + f->fd = fd; + f->bsize = FIOB_BSIZE; + if (posix_memalign(&f->buf, FIOB_ALIGN, f->bsize)) goto error; + /* for valgrind */ + memset(f->buf, 0, f->bsize); + f->path = strdup(path); + if (!f->path) + goto error; f->io.read = fiob_read; f->io.write = fiob_write; f->io.seek = fiob_seek; f->io.close = fiob_close; - FILE *file; #ifdef HAVE_FUNOPEN file = funopen(f, f->io.read, f->io.write, f->io.seek, f->io.close); @@ -354,15 +390,27 @@ fiob_open(const char *path, const char *mode) return file; +fdopen: +#endif /* defined(FIOB_DIRECT) */ + /* Fallback to libc implementation */ + file = fdopen(fd, mode); + if (!file) + goto error; + return file; + error: save_errno = errno; say_syserror("Can't open '%s'", path); - if (f->fd > 0) - close(f->fd); - - free(f->buf); - free(f->path); - free(f); + if (fd >= 0) + close(fd); + +#if defined(FIOB_DIRECT) + if (f) { + free(f->buf); + free(f->path); + free(f); + } +#endif /* FIOB_DIRECT */ errno = save_errno; return NULL; diff --git a/src/fiob.h b/src/fiob.h index 502b824bc0cc6decb16c61a78905d9b63f312626..9b8de8985d2b8301cf4b173647fbb0734953b158 100644 --- a/src/fiob.h +++ b/src/fiob.h @@ -28,8 +28,6 @@ * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ -#define O_DIRECT_BSIZE ( 4096 * 256 ) - #include <sys/types.h> #include <stdio.h> diff --git a/src/trivia/config.h.cmake b/src/trivia/config.h.cmake index 7c807728366f4f54f304d713f9334d1f2b216011..dc272ed29dc6825fca44ef23402ff896d9b9229f 100644 --- a/src/trivia/config.h.cmake +++ b/src/trivia/config.h.cmake @@ -71,6 +71,11 @@ */ #cmakedefine HAVE_FUNOPEN 1 +/* + * Defined if this platform has GNU specific fopencookie() + */ +#cmakedefine HAVE_FOPENCOOKIE 1 + /* * Defined if this platform has GNU specific memmem(). */ diff --git a/test/unit/fiob.c b/test/unit/fiob.c index 9325b752e03429379a9822756d7f2a6a0def4685..2cba9b367d05efad44b2cab5b45033afe8661c40 100644 --- a/test/unit/fiob.c +++ b/test/unit/fiob.c @@ -22,7 +22,7 @@ -#define PLAN 47 +#define PLAN 67 #define ITEMS 7 @@ -77,7 +77,8 @@ main(void) { plan(PLAN); - char *td = mkdtemp(strdup("/tmp/fiob.XXXXXX")); + /* don't create test files in /tmp - tmpfs doesn't support O_DIRECT */ + char *td = mkdtemp(strdup("./fiob.XXXXXX")); isnt(td, NULL, "tempdir is created"); if (td == 0) { diag("Can't create temporary dir: %s", strerror(errno)); @@ -102,6 +103,29 @@ main(void) is(done, 12, "Hello world is read (%zu bytes)", done); is(memcmp(buf, "Hello, world", 12), 0, "data"); + is(fseek(f, 7L, SEEK_SET), 0, "set odd position"); + is(ftell(f), 7L, "check odd position"); + is(fread(buf, 1, 4096, f), 5, "read from odd position (size)"); + is(memcmp(buf, "world", 5), 0, "read from odd position (data)"); + + is(fseek(f, 0L, SEEK_SET), 0, "set start position"); + is(ftell(f), 0L, "check start position"); + + is(fseek(f, 0L, SEEK_END), 0, "set eof position"); + is(ftell(f), 12L, "check eof position"); + is(fread(buf, 1, 4096, f), 0, "read from eof position (size)"); + ok(feof(f), "feof"); + is(fread(buf, 1, 4096, f), 0, "read from eof position (size)"); + ok(feof(f), "feof"); + + is(fseek(f, -1L, SEEK_END), 0, "set -1 position"); + is(ftell(f), 11L, "check -1 position"); + is(fread(buf, 1, 4096, f), 1, "read from -1 position (size)"); + is(memcmp(buf, "d", 1), 0, "read from -1 position (data)"); + ok(feof(f), "feof"); + + is(fread(buf, 1, 4096, f), 0, "read from eof position (size)"); + is(fseek(f, 0L, SEEK_SET), 0, "set new position"); done = fread(buf + 1, 1, 12, f); is(done, 12, "Hello world is read (%zu bytes)", done); @@ -214,7 +238,21 @@ main(void) is(fclose(f), 0, "fclose"); } - + { + FILE *f = fiob_open("/dev/full", "wd"); + if (f) { + errno = 0; + fputs("test", f); + /* flush buffer && close file */ + int r = fclose(f); + is(errno, ENOSPC, "fwrite failed"); + is(r, EOF, "fwrite failed"); + } else { + /* System doesn't have /dev/full */ + ok(1, "fwrite failed"); + ok(1, "fwrite failed") + } + } if (fork() == 0) execl("/bin/rm", "/bin/rm", "-fr", td, NULL); diff --git a/test/unit/fiob.result b/test/unit/fiob.result index 2cedf473e2d4c16595a053e2d2d5699b28e9e91e..b57d9f9d9cb6c4d611fbf896d55c163c02893fe5 100644 --- a/test/unit/fiob.result +++ b/test/unit/fiob.result @@ -1,4 +1,4 @@ -1..47 +1..67 ok 1 - tempdir is created ok 2 - common open ok 3 - Hello world is written (12 bytes) @@ -7,42 +7,62 @@ ok 5 - set new position ok 6 - current position 0 ok 7 - Hello world is read (12 bytes) ok 8 - data -ok 9 - set new position -ok 10 - Hello world is read (12 bytes) -ok 11 - data -ok 12 - set new position -ok 13 - set new position -ok 14 - data is read -ok 15 - data is read -ok 16 - fclose -ok 17 - reopened file -ok 18 - move pos at finish -ok 19 - file size -ok 20 - fclose -ok 21 - common open: O_EXCL -ok 22 - common open -ok 23 - Hello world is written (12 bytes) -ok 24 - move pos -ok 25 - Hello world is written (12 bytes) -ok 26 - move pos -ok 27 - read 11 bytes -ok 28 - content was read -ok 29 - fclose -ok 30 - open big file -ok 31 - Hello world is written (13 bytes) -ok 32 - all bytes were written -ok 33 - fclose -ok 34 - reopen file for reading -ok 35 - all records were written properly -ok 36 - eof -ok 37 - feof +ok 9 - set odd position +ok 10 - check odd position +ok 11 - read from odd position (size) +ok 12 - read from odd position (data) +ok 13 - set start position +ok 14 - check start position +ok 15 - set eof position +ok 16 - check eof position +ok 17 - read from eof position (size) +ok 18 - feof +ok 19 - read from eof position (size) +ok 20 - feof +ok 21 - set -1 position +ok 22 - check -1 position +ok 23 - read from -1 position (size) +ok 24 - read from -1 position (data) +ok 25 - feof +ok 26 - read from eof position (size) +ok 27 - set new position +ok 28 - Hello world is read (12 bytes) +ok 29 - data +ok 30 - set new position +ok 31 - set new position +ok 32 - data is read +ok 33 - data is read +ok 34 - fclose +ok 35 - reopened file +ok 36 - move pos at finish +ok 37 - file size ok 38 - fclose -ok 39 - open big file -ok 40 - Hello world is written (13 bytes) -ok 41 - all bytes were written -ok 42 - fclose -ok 43 - reopen file for reading -ok 44 - all records were written properly -ok 45 - eof -ok 46 - feof +ok 39 - common open: O_EXCL +ok 40 - common open +ok 41 - Hello world is written (12 bytes) +ok 42 - move pos +ok 43 - Hello world is written (12 bytes) +ok 44 - move pos +ok 45 - read 11 bytes +ok 46 - content was read ok 47 - fclose +ok 48 - open big file +ok 49 - Hello world is written (13 bytes) +ok 50 - all bytes were written +ok 51 - fclose +ok 52 - reopen file for reading +ok 53 - all records were written properly +ok 54 - eof +ok 55 - feof +ok 56 - fclose +ok 57 - open big file +ok 58 - Hello world is written (13 bytes) +ok 59 - all bytes were written +ok 60 - fclose +ok 61 - reopen file for reading +ok 62 - all records were written properly +ok 63 - eof +ok 64 - feof +ok 65 - fclose +ok 66 - fwrite failed +ok 67 - fwrite failed