diff --git a/include/fiber.h b/include/fiber.h new file mode 100644 index 0000000000000000000000000000000000000000..7c391620a3714473840c40a3f1b86b51b1101336 --- /dev/null +++ b/include/fiber.h @@ -0,0 +1,183 @@ +#ifndef TARANTOOL_FIBER_H_INCLUDED +#define TARANTOOL_FIBER_H_INCLUDED +/* + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the + * following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY <COPYRIGHT HOLDER> ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * <COPYRIGHT HOLDER> OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF + * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ +#include "tarantool/config.h" + +#include <stdbool.h> +#include <stdint.h> +#include <unistd.h> +#include <tarantool_ev.h> +#include <coro.h> +#include <tarantool/util.h> +#include "third_party/queue.h" + +#if defined(__cplusplus) +#include "exception.h" +#endif /* defined(__cplusplus) */ +#include "palloc.h" +#include <rlist.h> + +#define FIBER_NAME_MAXLEN PALLOC_POOL_NAME_MAXLEN + +#define FIBER_READING_INBOX (1 << 0) +/** This fiber can be cancelled synchronously. */ +#define FIBER_CANCELLABLE (1 << 1) +/** Indicates that a fiber has been cancelled. */ +#define FIBER_CANCEL (1 << 2) +/** This fiber was created via stored procedures API. */ +#define FIBER_USER_MODE (1 << 3) +/** This fiber was marked as ready for wake up */ +#define FIBER_READY (1 << 4) + +/** This is thrown by fiber_* API calls when the fiber is + * cancelled. + */ + +#if defined(__cplusplus) +class FiberCancelException: public Exception { +public: + FiberCancelException(const char *file, unsigned line) + : Exception(file, line) { + /* Nothing */ + } + + virtual void log() const { + say_debug("FiberCancelException"); + } +}; + +extern "C" { +#endif /* defined(__cplusplus) */ + +struct fiber { +#ifdef ENABLE_BACKTRACE + void *last_stack_frame; +#endif + int csw; + struct tarantool_coro coro; + /* A garbage-collected memory pool. */ + struct palloc_pool *gc_pool; + /** Fiber id. */ + uint32_t fid; + /** + * Session id of the session the fiber is running + * on behalf of. The concept of an associated session + * is similar to the concept of controlling tty + * in a UNIX process. When a fiber is created, + * its sid is 0. If it's running a request on behalf + * of a user connection, it's sid is changed to module- + * generated identifier of the session. + */ + uint32_t sid; + + struct rlist link; + struct rlist state; + + /* This struct is considered as non-POD when compiling by g++. + * You can safetly ignore all offset_of-related warnings. + * See http://gcc.gnu.org/bugzilla/show_bug.cgi?id=31488 + */ + void (*f) (va_list); + va_list f_data; + uint32_t flags; + struct fiber *waiter; + uint64_t cookie; +}; + +extern __thread struct fiber *fiber_ptr; + +void fiber_init(void); +void fiber_free(void); +typedef void(*fiber_func)(va_list); +struct fiber *fiber_new(const char *name, fiber_func func); +void fiber_set_name(struct fiber *fiber, const char *name); +int wait_for_child(pid_t pid); + +static inline const char * +fiber_name(struct fiber *f) +{ + return f->gc_pool ? palloc_name(f->gc_pool) : "(none)"; +} + +void +fiber_checkstack(); + +void fiber_yield(void); +void fiber_yield_to(struct fiber *f); + +/** + * @brief yield & check for timeout + * @return true if timeout exceeded + */ +bool fiber_yield_timeout(ev_tstamp delay); + + +void fiber_destroy_all(); + +void fiber_gc(void); +void fiber_call(struct fiber *callee, ...); +void fiber_wakeup(struct fiber *f); +struct fiber *fiber_find(uint32_t fid); +/** Cancel a fiber. A cancelled fiber will have + * tnt_FiberCancelException raised in it. + * + * A fiber can be cancelled only if it is + * FIBER_CANCELLABLE flag is set. + */ +void fiber_cancel(struct fiber *f); +/** Check if the current fiber has been cancelled. Raises + * tnt_FiberCancelException + */ +void fiber_testcancel(void); +/** Make it possible or not possible to cancel the current + * fiber. + * + * return previous state. + */ +bool fiber_setcancellable(bool enable); +void fiber_sleep(ev_tstamp s); +struct tbuf; +void fiber_info(struct tbuf *out); +void fiber_schedule(ev_watcher *watcher, int event __attribute__((unused))); + +/** + * Attach this fiber to a session identified by sid and to a cookie. + */ +static inline void +fiber_set_sid(struct fiber *f, uint32_t sid, uint64_t cookie) +{ + f->sid = sid; + f->cookie = cookie; +} + +#if defined(__cplusplus) +} +#endif +#endif /* TARANTOOL_FIBER_H_INCLUDED */ diff --git a/include/mutex.h b/include/mutex.h new file mode 100644 index 0000000000000000000000000000000000000000..cab56463eea8fdda08c650af5e544eef68db8745 --- /dev/null +++ b/include/mutex.h @@ -0,0 +1,140 @@ +#ifndef TARANTOOL_MUTEX_H_INCLUDED +#define TARANTOOL_MUTEX_H_INCLUDED +/* + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the + * following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY <COPYRIGHT HOLDER> ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * <COPYRIGHT HOLDER> OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF + * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <assert.h> +#include <rlist.h> + +/** Mutex of cooperative multitasking environment. */ + +struct mutex +{ + /** + * The queue of fibers waiting on a mutex. + * The first fiber owns the mutex. + */ + struct rlist queue; +}; + +/** + * Initialize the given mutex. + * + * @param m mutex to be initialized. + */ +static inline void +mutex_create(struct mutex *m) +{ + rlist_create(&m->queue); +} + +static inline void +mutex_destroy(struct mutex *m) +{ + while (!rlist_empty(&m->queue)) { + struct fiber *f = rlist_first_entry(&m->queue, + struct fiber, state); + rlist_del_entry(f, state); + } +} + +/** + * Lock a mutex. If the mutex is already locked by another fiber, + * waits for timeout. + * + * @param m mutex to be locked. + * + * @retval false success + * @retval true timeout + */ +static inline bool +mutex_lock_timeout(struct mutex *m, ev_tstamp timeout) +{ + rlist_add_tail_entry(&m->queue, fiber_ptr, state); + ev_tstamp start = timeout; + while (timeout > 0) { + struct fiber *f = rlist_first_entry(&m->queue, + struct fiber, state); + if (f == fiber_ptr) + break; + + fiber_yield_timeout(timeout); + timeout -= ev_now() - start; + if (timeout <= 0) { + rlist_del_entry(fiber_ptr, state); + errno = ETIMEDOUT; + return true; + } + } + return false; +} + +/** + * Lock a mutex (no timeout). Waits indefinitely until + * the current fiber can gain access to the mutex. + */ +static inline void +mutex_lock(struct mutex *m) +{ + (void) mutex_lock_timeout(m, TIMEOUT_INFINITY); +} + +/** + * Try to lock a mutex. Return immediately if the mutex is locked. + * @retval false success + * @retval true the mutex is locked. + */ +static inline bool +mutex_trylock(struct mutex *m) +{ + if (rlist_empty(&m->queue)) { + mutex_lock(m); + return false; + } + return true; +} + +/** + * Unlock a mutex. The fiber calling this function must + * own the mutex. + */ +static inline void +mutex_unlock(struct mutex *m) +{ + struct fiber *f; + f = rlist_first_entry(&m->queue, struct fiber, state); + assert(f == fiber_ptr); + rlist_del_entry(f, state); + if (!rlist_empty(&m->queue)) { + f = rlist_first_entry(&m->queue, struct fiber, state); + fiber_wakeup(f); + } +} + +#endif /* TARANTOOL_MUTEX_H_INCLUDED */ diff --git a/src/admin.cc b/src/admin.cc new file mode 100644 index 0000000000000000000000000000000000000000..9670a9250b8bd6a49a330899fde18a51156dc4d0 --- /dev/null +++ b/src/admin.cc @@ -0,0 +1,2029 @@ + +#line 1 "src/admin.rl" +/* + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the + * following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY <COPYRIGHT HOLDER> ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * <COPYRIGHT HOLDER> OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF + * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ +#include <stdio.h> +#include <string.h> +#include <stdbool.h> +#include <unistd.h> +#include <stdlib.h> + +#include <fiber.h> +#include <palloc.h> +#include <salloc.h> +#include <say.h> +#include <stat.h> +#include <tarantool.h> +#include "lua/init.h" +#include <recovery.h> +#include <tbuf.h> +#include "tarantool/util.h" +#include <errinj.h> +#include "coio_buf.h" + +extern "C" { +#include <lua.h> +#include <lauxlib.h> +#include <lualib.h> +} + +#include "box/box.h" +#include "lua/init.h" +#include "session.h" +#include "scoped_guard.h" +#include "box/space.h" + +static const char *help = + "available commands:" CRLF + " - help" CRLF + " - exit" CRLF + " - show info" CRLF + " - show fiber" CRLF + " - show configuration" CRLF + " - show index" CRLF + " - show slab" CRLF + " - show palloc" CRLF + " - show stat" CRLF + " - save coredump" CRLF + " - save snapshot" CRLF + " - lua command" CRLF + " - reload configuration" CRLF + " - show injections (debug mode only)" CRLF + " - set injection <name> <state> (debug mode only)" CRLF; + +static const char *unknown_command = "unknown command. try typing help." CRLF; + + +#line 84 "src/admin.cc" +static const int admin_start = 1; +static const int admin_first_final = 139; +static const int admin_error = 0; + +static const int admin_en_main = 1; + + +#line 83 "src/admin.rl" + + +struct salloc_stat_admin_cb_ctx { + int64_t total_used; + int64_t total_used_real; + int64_t total_alloc_real; + struct tbuf *out; +}; + +static int +salloc_stat_admin_cb(const struct slab_cache_stats *cstat, void *cb_ctx) +{ + struct salloc_stat_admin_cb_ctx *ctx = (struct salloc_stat_admin_cb_ctx *) cb_ctx; + + tbuf_printf(ctx->out, + " - { item_size: %6i, slabs: %6i, items: %11" PRIi64 + ", bytes_used: %12" PRIi64 ", waste: %5.2f%%" + ", bytes_free: %12" PRIi64 " }" CRLF, + (int)cstat->item_size, + (int)cstat->slabs, + cstat->items, + cstat->bytes_used, + (double)(cstat->bytes_alloc_real - cstat->bytes_used_real)*100 / + (cstat->bytes_alloc_real + 0.001), + cstat->bytes_free); + + ctx->total_used += cstat->bytes_used; + ctx->total_alloc_real += cstat->bytes_alloc_real; + ctx->total_used_real += cstat->bytes_used_real; + return 0; +} + +static void +show_slab(struct tbuf *out) +{ + struct salloc_stat_admin_cb_ctx cb_ctx; + struct slab_arena_stats astat; + + cb_ctx.total_used = 0; + cb_ctx.total_used_real = 0; + cb_ctx.total_alloc_real = 0; + cb_ctx.out = out; + + tbuf_printf(out, "slab statistics:\n classes:" CRLF); + + salloc_stat(salloc_stat_admin_cb, &astat, &cb_ctx); + + tbuf_printf(out, " items_used: %.2f%%" CRLF, + (double)cb_ctx.total_used / astat.size * 100); + tbuf_printf(out, " arena_used: %.2f%%" CRLF, + (double)astat.used / astat.size * 100); + tbuf_printf(out, " waste: %.2f%%" CRLF, + (double)(cb_ctx.total_alloc_real - cb_ctx.total_used_real) / (cb_ctx.total_alloc_real + 0.001) * 100); + tbuf_printf(out, " bytes_waste: %12" PRIi64 CRLF, + (int64_t)((double)cb_ctx.total_used*(cb_ctx.total_alloc_real - cb_ctx.total_used_real) / + (cb_ctx.total_alloc_real + 0.001))); +} + +static void +end(struct tbuf *out) +{ + tbuf_printf(out, "..." CRLF); +} + +static void +start(struct tbuf *out) +{ + tbuf_printf(out, "---" CRLF); +} + +static void +ok(struct tbuf *out) +{ + start(out); + tbuf_printf(out, "ok" CRLF); + end(out); +} + +static void +fail(struct tbuf *out, struct tbuf *err) +{ + start(out); + tbuf_printf(out, "fail:%.*s" CRLF, err->size, (char *)err->data); + end(out); +} + +static void +index_info(struct tbuf *out) +{ + tbuf_printf(out, "index:" CRLF); + struct space_stat *stat = space_stat(); + int sp_i = 0; + int64_t total_size = 0; + while (stat[sp_i].n >= 0) { + tbuf_printf(out, " - space: %" PRIi32 CRLF, stat[sp_i].n); + int64_t sp_size = 0; + int i; + for (i = 0; stat[sp_i].index[i].n >= 0; ++i) + sp_size += stat[sp_i].index[i].memsize; + + tbuf_printf(out, " memsize: %15" PRIi64 CRLF, sp_size); + total_size += sp_size; + tbuf_printf(out, " index: " CRLF); + for (i = 0; stat[sp_i].index[i].n >= 0; ++i) { + tbuf_printf(out, " - { n: %3d, keys: %15" PRIi64 ", memsize: %15" PRIi64 " }" CRLF, + stat[sp_i].index[i].n, stat[sp_i].index[i].keys, stat[sp_i].index[i].memsize); + } + ++sp_i; + } + tbuf_printf(out, "memsize: %15" PRIi64 CRLF, total_size); +} + +static void +tarantool_info(struct tbuf *out) +{ + tbuf_printf(out, "info:" CRLF); + tbuf_printf(out, " version: \"%s\"" CRLF, tarantool_version()); + tbuf_printf(out, " uptime: %i" CRLF, (int)tarantool_uptime()); + tbuf_printf(out, " pid: %i" CRLF, getpid()); + tbuf_printf(out, " logger_pid: %i" CRLF, logger_pid); + tbuf_printf(out, " snapshot_pid: %i" CRLF, snapshot_pid); + tbuf_printf(out, " lsn: %" PRIi64 CRLF, + recovery_state->confirmed_lsn); + tbuf_printf(out, " recovery_lag: %.3f" CRLF, + recovery_state->remote ? + recovery_state->remote->recovery_lag : 0); + tbuf_printf(out, " recovery_last_update: %.3f" CRLF, + recovery_state->remote ? + recovery_state->remote->recovery_last_update_tstamp :0); + box_info(out); + const char *path = cfg_filename_fullpath; + if (path == NULL) + path = cfg_filename; + tbuf_printf(out, " config: \"%s\"" CRLF, path); +} + +static int +show_stat_item(const char *name, int rps, int64_t total, void *ctx) +{ + struct tbuf *buf = (struct tbuf *) ctx; + int name_len = strlen(name); + tbuf_printf(buf, + " %s:%*s{ rps: %- 6i, total: %- 12" PRIi64 " }" CRLF, + name, 1 + stat_max_name_len - name_len, " ", rps, total); + return 0; +} + +void +show_stat(struct tbuf *buf) +{ + tbuf_printf(buf, "statistics:" CRLF); + stat_foreach(show_stat_item, buf); +} + +static int +admin_dispatch(struct ev_io *coio, struct iobuf *iobuf, lua_State *L) +{ + struct ibuf *in = &iobuf->in; + struct tbuf *out = tbuf_new(fiber_ptr->gc_pool); + struct tbuf *err = tbuf_new(fiber_ptr->gc_pool); + int cs; + char *p, *pe; + char *strstart, *strend; + bool state; + + while ((pe = (char *) memchr(in->pos, '\n', in->end - in->pos)) == NULL) { + if (coio_bread(coio, in, 1) <= 0) + return -1; + } + + pe++; + p = in->pos; + + +#line 267 "src/admin.cc" + { + cs = admin_start; + } + +#line 272 "src/admin.cc" + { + if ( p == pe ) + goto _test_eof; + switch ( cs ) + { +case 1: + switch( (*p) ) { + case 99: goto st2; + case 101: goto st13; + case 104: goto st17; + case 108: goto st21; + case 113: goto st27; + case 114: goto st28; + case 115: goto st48; + } + goto st0; +st0: +cs = 0; + goto _out; +st2: + if ( ++p == pe ) + goto _test_eof2; +case 2: + if ( (*p) == 104 ) + goto st3; + goto st0; +st3: + if ( ++p == pe ) + goto _test_eof3; +case 3: + switch( (*p) ) { + case 32: goto st4; + case 101: goto st10; + } + goto st0; +st4: + if ( ++p == pe ) + goto _test_eof4; +case 4: + switch( (*p) ) { + case 32: goto st4; + case 115: goto st5; + } + goto st0; +st5: + if ( ++p == pe ) + goto _test_eof5; +case 5: + if ( (*p) == 108 ) + goto st6; + goto st0; +st6: + if ( ++p == pe ) + goto _test_eof6; +case 6: + switch( (*p) ) { + case 10: goto tr13; + case 13: goto tr14; + case 97: goto st8; + } + goto st0; +tr13: +#line 357 "src/admin.rl" + {slab_validate(); ok(out);} + goto st139; +tr20: +#line 344 "src/admin.rl" + {return -1;} + goto st139; +tr25: +#line 269 "src/admin.rl" + { + start(out); + tbuf_append(out, help, strlen(help)); + end(out); + } + goto st139; +tr36: +#line 330 "src/admin.rl" + {strend = p;} +#line 275 "src/admin.rl" + { + strstart[strend-strstart]='\0'; + start(out); + tarantool_lua(L, out, strstart); + end(out); + } + goto st139; +tr43: +#line 282 "src/admin.rl" + { + if (reload_cfg(err)) + fail(out, err); + else + ok(out); + } + goto st139; +tr67: +#line 355 "src/admin.rl" + {coredump(60); ok(out);} + goto st139; +tr76: +#line 289 "src/admin.rl" + { + int ret = snapshot(); + + if (ret == 0) + ok(out); + else { + tbuf_printf(err, " can't save snapshot, errno %d (%s)", + ret, strerror(ret)); + + fail(out, err); + } + } + goto st139; +tr98: +#line 340 "src/admin.rl" + { state = false; } +#line 302 "src/admin.rl" + { + strstart[strend-strstart] = '\0'; + if (errinj_set_byname(strstart, state)) { + tbuf_printf(err, "can't find error injection '%s'", strstart); + fail(out, err); + } else { + ok(out); + } + } + goto st139; +tr101: +#line 339 "src/admin.rl" + { state = true; } +#line 302 "src/admin.rl" + { + strstart[strend-strstart] = '\0'; + if (errinj_set_byname(strstart, state)) { + tbuf_printf(err, "can't find error injection '%s'", strstart); + fail(out, err); + } else { + ok(out); + } + } + goto st139; +tr117: +#line 257 "src/admin.rl" + { + start(out); + show_cfg(out); + end(out); + } + goto st139; +tr131: +#line 348 "src/admin.rl" + {start(out); fiber_info(out); end(out);} + goto st139; +tr139: +#line 347 "src/admin.rl" + {start(out); index_info(out); end(out);} + goto st139; +tr141: +#line 346 "src/admin.rl" + {start(out); tarantool_info(out); end(out);} + goto st139; +tr152: +#line 263 "src/admin.rl" + { + start(out); + errinj_info(out); + end(out); + } + goto st139; +tr158: +#line 351 "src/admin.rl" + {start(out); palloc_stat(out); end(out);} + goto st139; +tr166: +#line 350 "src/admin.rl" + {start(out); show_slab(out); end(out);} + goto st139; +tr170: +#line 352 "src/admin.rl" + {start(out); show_stat(out);end(out);} + goto st139; +st139: + if ( ++p == pe ) + goto _test_eof139; +case 139: +#line 461 "src/admin.cc" + goto st0; +tr14: +#line 357 "src/admin.rl" + {slab_validate(); ok(out);} + goto st7; +tr21: +#line 344 "src/admin.rl" + {return -1;} + goto st7; +tr26: +#line 269 "src/admin.rl" + { + start(out); + tbuf_append(out, help, strlen(help)); + end(out); + } + goto st7; +tr37: +#line 330 "src/admin.rl" + {strend = p;} +#line 275 "src/admin.rl" + { + strstart[strend-strstart]='\0'; + start(out); + tarantool_lua(L, out, strstart); + end(out); + } + goto st7; +tr44: +#line 282 "src/admin.rl" + { + if (reload_cfg(err)) + fail(out, err); + else + ok(out); + } + goto st7; +tr68: +#line 355 "src/admin.rl" + {coredump(60); ok(out);} + goto st7; +tr77: +#line 289 "src/admin.rl" + { + int ret = snapshot(); + + if (ret == 0) + ok(out); + else { + tbuf_printf(err, " can't save snapshot, errno %d (%s)", + ret, strerror(ret)); + + fail(out, err); + } + } + goto st7; +tr99: +#line 340 "src/admin.rl" + { state = false; } +#line 302 "src/admin.rl" + { + strstart[strend-strstart] = '\0'; + if (errinj_set_byname(strstart, state)) { + tbuf_printf(err, "can't find error injection '%s'", strstart); + fail(out, err); + } else { + ok(out); + } + } + goto st7; +tr102: +#line 339 "src/admin.rl" + { state = true; } +#line 302 "src/admin.rl" + { + strstart[strend-strstart] = '\0'; + if (errinj_set_byname(strstart, state)) { + tbuf_printf(err, "can't find error injection '%s'", strstart); + fail(out, err); + } else { + ok(out); + } + } + goto st7; +tr118: +#line 257 "src/admin.rl" + { + start(out); + show_cfg(out); + end(out); + } + goto st7; +tr132: +#line 348 "src/admin.rl" + {start(out); fiber_info(out); end(out);} + goto st7; +tr140: +#line 347 "src/admin.rl" + {start(out); index_info(out); end(out);} + goto st7; +tr142: +#line 346 "src/admin.rl" + {start(out); tarantool_info(out); end(out);} + goto st7; +tr153: +#line 263 "src/admin.rl" + { + start(out); + errinj_info(out); + end(out); + } + goto st7; +tr159: +#line 351 "src/admin.rl" + {start(out); palloc_stat(out); end(out);} + goto st7; +tr167: +#line 350 "src/admin.rl" + {start(out); show_slab(out); end(out);} + goto st7; +tr171: +#line 352 "src/admin.rl" + {start(out); show_stat(out);end(out);} + goto st7; +st7: + if ( ++p == pe ) + goto _test_eof7; +case 7: +#line 590 "src/admin.cc" + if ( (*p) == 10 ) + goto st139; + goto st0; +st8: + if ( ++p == pe ) + goto _test_eof8; +case 8: + switch( (*p) ) { + case 10: goto tr13; + case 13: goto tr14; + case 98: goto st9; + } + goto st0; +st9: + if ( ++p == pe ) + goto _test_eof9; +case 9: + switch( (*p) ) { + case 10: goto tr13; + case 13: goto tr14; + } + goto st0; +st10: + if ( ++p == pe ) + goto _test_eof10; +case 10: + switch( (*p) ) { + case 32: goto st4; + case 99: goto st11; + } + goto st0; +st11: + if ( ++p == pe ) + goto _test_eof11; +case 11: + switch( (*p) ) { + case 32: goto st4; + case 107: goto st12; + } + goto st0; +st12: + if ( ++p == pe ) + goto _test_eof12; +case 12: + if ( (*p) == 32 ) + goto st4; + goto st0; +st13: + if ( ++p == pe ) + goto _test_eof13; +case 13: + switch( (*p) ) { + case 10: goto tr20; + case 13: goto tr21; + case 120: goto st14; + } + goto st0; +st14: + if ( ++p == pe ) + goto _test_eof14; +case 14: + switch( (*p) ) { + case 10: goto tr20; + case 13: goto tr21; + case 105: goto st15; + } + goto st0; +st15: + if ( ++p == pe ) + goto _test_eof15; +case 15: + switch( (*p) ) { + case 10: goto tr20; + case 13: goto tr21; + case 116: goto st16; + } + goto st0; +st16: + if ( ++p == pe ) + goto _test_eof16; +case 16: + switch( (*p) ) { + case 10: goto tr20; + case 13: goto tr21; + } + goto st0; +st17: + if ( ++p == pe ) + goto _test_eof17; +case 17: + switch( (*p) ) { + case 10: goto tr25; + case 13: goto tr26; + case 101: goto st18; + } + goto st0; +st18: + if ( ++p == pe ) + goto _test_eof18; +case 18: + switch( (*p) ) { + case 10: goto tr25; + case 13: goto tr26; + case 108: goto st19; + } + goto st0; +st19: + if ( ++p == pe ) + goto _test_eof19; +case 19: + switch( (*p) ) { + case 10: goto tr25; + case 13: goto tr26; + case 112: goto st20; + } + goto st0; +st20: + if ( ++p == pe ) + goto _test_eof20; +case 20: + switch( (*p) ) { + case 10: goto tr25; + case 13: goto tr26; + } + goto st0; +st21: + if ( ++p == pe ) + goto _test_eof21; +case 21: + if ( (*p) == 117 ) + goto st22; + goto st0; +st22: + if ( ++p == pe ) + goto _test_eof22; +case 22: + switch( (*p) ) { + case 32: goto st23; + case 97: goto st26; + } + goto st0; +st23: + if ( ++p == pe ) + goto _test_eof23; +case 23: + switch( (*p) ) { + case 10: goto st0; + case 13: goto st0; + case 32: goto tr34; + } + goto tr33; +tr33: +#line 330 "src/admin.rl" + {strstart = p;} + goto st24; +st24: + if ( ++p == pe ) + goto _test_eof24; +case 24: +#line 750 "src/admin.cc" + switch( (*p) ) { + case 10: goto tr36; + case 13: goto tr37; + } + goto st24; +tr34: +#line 330 "src/admin.rl" + {strstart = p;} + goto st25; +st25: + if ( ++p == pe ) + goto _test_eof25; +case 25: +#line 764 "src/admin.cc" + switch( (*p) ) { + case 10: goto tr36; + case 13: goto tr37; + case 32: goto tr34; + } + goto tr33; +st26: + if ( ++p == pe ) + goto _test_eof26; +case 26: + if ( (*p) == 32 ) + goto st23; + goto st0; +st27: + if ( ++p == pe ) + goto _test_eof27; +case 27: + switch( (*p) ) { + case 10: goto tr20; + case 13: goto tr21; + case 117: goto st14; + } + goto st0; +st28: + if ( ++p == pe ) + goto _test_eof28; +case 28: + if ( (*p) == 101 ) + goto st29; + goto st0; +st29: + if ( ++p == pe ) + goto _test_eof29; +case 29: + switch( (*p) ) { + case 32: goto st30; + case 108: goto st44; + } + goto st0; +st30: + if ( ++p == pe ) + goto _test_eof30; +case 30: + switch( (*p) ) { + case 32: goto st30; + case 99: goto st31; + } + goto st0; +st31: + if ( ++p == pe ) + goto _test_eof31; +case 31: + if ( (*p) == 111 ) + goto st32; + goto st0; +st32: + if ( ++p == pe ) + goto _test_eof32; +case 32: + switch( (*p) ) { + case 10: goto tr43; + case 13: goto tr44; + case 110: goto st33; + } + goto st0; +st33: + if ( ++p == pe ) + goto _test_eof33; +case 33: + switch( (*p) ) { + case 10: goto tr43; + case 13: goto tr44; + case 102: goto st34; + } + goto st0; +st34: + if ( ++p == pe ) + goto _test_eof34; +case 34: + switch( (*p) ) { + case 10: goto tr43; + case 13: goto tr44; + case 105: goto st35; + } + goto st0; +st35: + if ( ++p == pe ) + goto _test_eof35; +case 35: + switch( (*p) ) { + case 10: goto tr43; + case 13: goto tr44; + case 103: goto st36; + } + goto st0; +st36: + if ( ++p == pe ) + goto _test_eof36; +case 36: + switch( (*p) ) { + case 10: goto tr43; + case 13: goto tr44; + case 117: goto st37; + } + goto st0; +st37: + if ( ++p == pe ) + goto _test_eof37; +case 37: + switch( (*p) ) { + case 10: goto tr43; + case 13: goto tr44; + case 114: goto st38; + } + goto st0; +st38: + if ( ++p == pe ) + goto _test_eof38; +case 38: + switch( (*p) ) { + case 10: goto tr43; + case 13: goto tr44; + case 97: goto st39; + } + goto st0; +st39: + if ( ++p == pe ) + goto _test_eof39; +case 39: + switch( (*p) ) { + case 10: goto tr43; + case 13: goto tr44; + case 116: goto st40; + } + goto st0; +st40: + if ( ++p == pe ) + goto _test_eof40; +case 40: + switch( (*p) ) { + case 10: goto tr43; + case 13: goto tr44; + case 105: goto st41; + } + goto st0; +st41: + if ( ++p == pe ) + goto _test_eof41; +case 41: + switch( (*p) ) { + case 10: goto tr43; + case 13: goto tr44; + case 111: goto st42; + } + goto st0; +st42: + if ( ++p == pe ) + goto _test_eof42; +case 42: + switch( (*p) ) { + case 10: goto tr43; + case 13: goto tr44; + case 110: goto st43; + } + goto st0; +st43: + if ( ++p == pe ) + goto _test_eof43; +case 43: + switch( (*p) ) { + case 10: goto tr43; + case 13: goto tr44; + } + goto st0; +st44: + if ( ++p == pe ) + goto _test_eof44; +case 44: + switch( (*p) ) { + case 32: goto st30; + case 111: goto st45; + } + goto st0; +st45: + if ( ++p == pe ) + goto _test_eof45; +case 45: + switch( (*p) ) { + case 32: goto st30; + case 97: goto st46; + } + goto st0; +st46: + if ( ++p == pe ) + goto _test_eof46; +case 46: + switch( (*p) ) { + case 32: goto st30; + case 100: goto st47; + } + goto st0; +st47: + if ( ++p == pe ) + goto _test_eof47; +case 47: + if ( (*p) == 32 ) + goto st30; + goto st0; +st48: + if ( ++p == pe ) + goto _test_eof48; +case 48: + switch( (*p) ) { + case 97: goto st49; + case 101: goto st69; + case 104: goto st88; + } + goto st0; +st49: + if ( ++p == pe ) + goto _test_eof49; +case 49: + switch( (*p) ) { + case 32: goto st50; + case 118: goto st67; + } + goto st0; +st50: + if ( ++p == pe ) + goto _test_eof50; +case 50: + switch( (*p) ) { + case 32: goto st50; + case 99: goto st51; + case 115: goto st59; + } + goto st0; +st51: + if ( ++p == pe ) + goto _test_eof51; +case 51: + if ( (*p) == 111 ) + goto st52; + goto st0; +st52: + if ( ++p == pe ) + goto _test_eof52; +case 52: + switch( (*p) ) { + case 10: goto tr67; + case 13: goto tr68; + case 114: goto st53; + } + goto st0; +st53: + if ( ++p == pe ) + goto _test_eof53; +case 53: + switch( (*p) ) { + case 10: goto tr67; + case 13: goto tr68; + case 101: goto st54; + } + goto st0; +st54: + if ( ++p == pe ) + goto _test_eof54; +case 54: + switch( (*p) ) { + case 10: goto tr67; + case 13: goto tr68; + case 100: goto st55; + } + goto st0; +st55: + if ( ++p == pe ) + goto _test_eof55; +case 55: + switch( (*p) ) { + case 10: goto tr67; + case 13: goto tr68; + case 117: goto st56; + } + goto st0; +st56: + if ( ++p == pe ) + goto _test_eof56; +case 56: + switch( (*p) ) { + case 10: goto tr67; + case 13: goto tr68; + case 109: goto st57; + } + goto st0; +st57: + if ( ++p == pe ) + goto _test_eof57; +case 57: + switch( (*p) ) { + case 10: goto tr67; + case 13: goto tr68; + case 112: goto st58; + } + goto st0; +st58: + if ( ++p == pe ) + goto _test_eof58; +case 58: + switch( (*p) ) { + case 10: goto tr67; + case 13: goto tr68; + } + goto st0; +st59: + if ( ++p == pe ) + goto _test_eof59; +case 59: + if ( (*p) == 110 ) + goto st60; + goto st0; +st60: + if ( ++p == pe ) + goto _test_eof60; +case 60: + switch( (*p) ) { + case 10: goto tr76; + case 13: goto tr77; + case 97: goto st61; + } + goto st0; +st61: + if ( ++p == pe ) + goto _test_eof61; +case 61: + switch( (*p) ) { + case 10: goto tr76; + case 13: goto tr77; + case 112: goto st62; + } + goto st0; +st62: + if ( ++p == pe ) + goto _test_eof62; +case 62: + switch( (*p) ) { + case 10: goto tr76; + case 13: goto tr77; + case 115: goto st63; + } + goto st0; +st63: + if ( ++p == pe ) + goto _test_eof63; +case 63: + switch( (*p) ) { + case 10: goto tr76; + case 13: goto tr77; + case 104: goto st64; + } + goto st0; +st64: + if ( ++p == pe ) + goto _test_eof64; +case 64: + switch( (*p) ) { + case 10: goto tr76; + case 13: goto tr77; + case 111: goto st65; + } + goto st0; +st65: + if ( ++p == pe ) + goto _test_eof65; +case 65: + switch( (*p) ) { + case 10: goto tr76; + case 13: goto tr77; + case 116: goto st66; + } + goto st0; +st66: + if ( ++p == pe ) + goto _test_eof66; +case 66: + switch( (*p) ) { + case 10: goto tr76; + case 13: goto tr77; + } + goto st0; +st67: + if ( ++p == pe ) + goto _test_eof67; +case 67: + switch( (*p) ) { + case 32: goto st50; + case 101: goto st68; + } + goto st0; +st68: + if ( ++p == pe ) + goto _test_eof68; +case 68: + if ( (*p) == 32 ) + goto st50; + goto st0; +st69: + if ( ++p == pe ) + goto _test_eof69; +case 69: + switch( (*p) ) { + case 32: goto st70; + case 116: goto st87; + } + goto st0; +st70: + if ( ++p == pe ) + goto _test_eof70; +case 70: + switch( (*p) ) { + case 32: goto st70; + case 105: goto st71; + } + goto st0; +st71: + if ( ++p == pe ) + goto _test_eof71; +case 71: + if ( (*p) == 110 ) + goto st72; + goto st0; +st72: + if ( ++p == pe ) + goto _test_eof72; +case 72: + switch( (*p) ) { + case 32: goto st73; + case 106: goto st80; + } + goto st0; +st73: + if ( ++p == pe ) + goto _test_eof73; +case 73: + if ( (*p) == 32 ) + goto st73; + if ( 33 <= (*p) && (*p) <= 126 ) + goto tr91; + goto st0; +tr91: +#line 338 "src/admin.rl" + { strstart = p; } + goto st74; +st74: + if ( ++p == pe ) + goto _test_eof74; +case 74: +#line 1221 "src/admin.cc" + if ( (*p) == 32 ) + goto tr92; + if ( 33 <= (*p) && (*p) <= 126 ) + goto st74; + goto st0; +tr92: +#line 338 "src/admin.rl" + { strend = p; } + goto st75; +st75: + if ( ++p == pe ) + goto _test_eof75; +case 75: +#line 1235 "src/admin.cc" + switch( (*p) ) { + case 32: goto st75; + case 111: goto st76; + } + goto st0; +st76: + if ( ++p == pe ) + goto _test_eof76; +case 76: + switch( (*p) ) { + case 102: goto st77; + case 110: goto st79; + } + goto st0; +st77: + if ( ++p == pe ) + goto _test_eof77; +case 77: + switch( (*p) ) { + case 10: goto tr98; + case 13: goto tr99; + case 102: goto st78; + } + goto st0; +st78: + if ( ++p == pe ) + goto _test_eof78; +case 78: + switch( (*p) ) { + case 10: goto tr98; + case 13: goto tr99; + } + goto st0; +st79: + if ( ++p == pe ) + goto _test_eof79; +case 79: + switch( (*p) ) { + case 10: goto tr101; + case 13: goto tr102; + } + goto st0; +st80: + if ( ++p == pe ) + goto _test_eof80; +case 80: + switch( (*p) ) { + case 32: goto st73; + case 101: goto st81; + } + goto st0; +st81: + if ( ++p == pe ) + goto _test_eof81; +case 81: + switch( (*p) ) { + case 32: goto st73; + case 99: goto st82; + } + goto st0; +st82: + if ( ++p == pe ) + goto _test_eof82; +case 82: + switch( (*p) ) { + case 32: goto st73; + case 116: goto st83; + } + goto st0; +st83: + if ( ++p == pe ) + goto _test_eof83; +case 83: + switch( (*p) ) { + case 32: goto st73; + case 105: goto st84; + } + goto st0; +st84: + if ( ++p == pe ) + goto _test_eof84; +case 84: + switch( (*p) ) { + case 32: goto st73; + case 111: goto st85; + } + goto st0; +st85: + if ( ++p == pe ) + goto _test_eof85; +case 85: + switch( (*p) ) { + case 32: goto st73; + case 110: goto st86; + } + goto st0; +st86: + if ( ++p == pe ) + goto _test_eof86; +case 86: + if ( (*p) == 32 ) + goto st73; + goto st0; +st87: + if ( ++p == pe ) + goto _test_eof87; +case 87: + if ( (*p) == 32 ) + goto st70; + goto st0; +st88: + if ( ++p == pe ) + goto _test_eof88; +case 88: + switch( (*p) ) { + case 32: goto st89; + case 111: goto st137; + } + goto st0; +st89: + if ( ++p == pe ) + goto _test_eof89; +case 89: + switch( (*p) ) { + case 32: goto st89; + case 99: goto st90; + case 102: goto st103; + case 105: goto st108; + case 112: goto st124; + case 115: goto st130; + } + goto st0; +st90: + if ( ++p == pe ) + goto _test_eof90; +case 90: + if ( (*p) == 111 ) + goto st91; + goto st0; +st91: + if ( ++p == pe ) + goto _test_eof91; +case 91: + switch( (*p) ) { + case 10: goto tr117; + case 13: goto tr118; + case 110: goto st92; + } + goto st0; +st92: + if ( ++p == pe ) + goto _test_eof92; +case 92: + switch( (*p) ) { + case 10: goto tr117; + case 13: goto tr118; + case 102: goto st93; + } + goto st0; +st93: + if ( ++p == pe ) + goto _test_eof93; +case 93: + switch( (*p) ) { + case 10: goto tr117; + case 13: goto tr118; + case 105: goto st94; + } + goto st0; +st94: + if ( ++p == pe ) + goto _test_eof94; +case 94: + switch( (*p) ) { + case 10: goto tr117; + case 13: goto tr118; + case 103: goto st95; + } + goto st0; +st95: + if ( ++p == pe ) + goto _test_eof95; +case 95: + switch( (*p) ) { + case 10: goto tr117; + case 13: goto tr118; + case 117: goto st96; + } + goto st0; +st96: + if ( ++p == pe ) + goto _test_eof96; +case 96: + switch( (*p) ) { + case 10: goto tr117; + case 13: goto tr118; + case 114: goto st97; + } + goto st0; +st97: + if ( ++p == pe ) + goto _test_eof97; +case 97: + switch( (*p) ) { + case 10: goto tr117; + case 13: goto tr118; + case 97: goto st98; + } + goto st0; +st98: + if ( ++p == pe ) + goto _test_eof98; +case 98: + switch( (*p) ) { + case 10: goto tr117; + case 13: goto tr118; + case 116: goto st99; + } + goto st0; +st99: + if ( ++p == pe ) + goto _test_eof99; +case 99: + switch( (*p) ) { + case 10: goto tr117; + case 13: goto tr118; + case 105: goto st100; + } + goto st0; +st100: + if ( ++p == pe ) + goto _test_eof100; +case 100: + switch( (*p) ) { + case 10: goto tr117; + case 13: goto tr118; + case 111: goto st101; + } + goto st0; +st101: + if ( ++p == pe ) + goto _test_eof101; +case 101: + switch( (*p) ) { + case 10: goto tr117; + case 13: goto tr118; + case 110: goto st102; + } + goto st0; +st102: + if ( ++p == pe ) + goto _test_eof102; +case 102: + switch( (*p) ) { + case 10: goto tr117; + case 13: goto tr118; + } + goto st0; +st103: + if ( ++p == pe ) + goto _test_eof103; +case 103: + if ( (*p) == 105 ) + goto st104; + goto st0; +st104: + if ( ++p == pe ) + goto _test_eof104; +case 104: + switch( (*p) ) { + case 10: goto tr131; + case 13: goto tr132; + case 98: goto st105; + } + goto st0; +st105: + if ( ++p == pe ) + goto _test_eof105; +case 105: + switch( (*p) ) { + case 10: goto tr131; + case 13: goto tr132; + case 101: goto st106; + } + goto st0; +st106: + if ( ++p == pe ) + goto _test_eof106; +case 106: + switch( (*p) ) { + case 10: goto tr131; + case 13: goto tr132; + case 114: goto st107; + } + goto st0; +st107: + if ( ++p == pe ) + goto _test_eof107; +case 107: + switch( (*p) ) { + case 10: goto tr131; + case 13: goto tr132; + } + goto st0; +st108: + if ( ++p == pe ) + goto _test_eof108; +case 108: + switch( (*p) ) { + case 100: goto st109; + case 110: goto st111; + } + goto st0; +st109: + if ( ++p == pe ) + goto _test_eof109; +case 109: + if ( (*p) == 120 ) + goto st110; + goto st0; +st110: + if ( ++p == pe ) + goto _test_eof110; +case 110: + switch( (*p) ) { + case 10: goto tr139; + case 13: goto tr140; + } + goto st0; +st111: + if ( ++p == pe ) + goto _test_eof111; +case 111: + switch( (*p) ) { + case 10: goto tr141; + case 13: goto tr142; + case 100: goto st112; + case 102: goto st114; + case 106: goto st116; + case 115: goto st119; + } + goto st0; +st112: + if ( ++p == pe ) + goto _test_eof112; +case 112: + switch( (*p) ) { + case 10: goto tr139; + case 13: goto tr140; + case 101: goto st113; + } + goto st0; +st113: + if ( ++p == pe ) + goto _test_eof113; +case 113: + switch( (*p) ) { + case 10: goto tr139; + case 13: goto tr140; + case 120: goto st110; + } + goto st0; +st114: + if ( ++p == pe ) + goto _test_eof114; +case 114: + switch( (*p) ) { + case 10: goto tr141; + case 13: goto tr142; + case 111: goto st115; + } + goto st0; +st115: + if ( ++p == pe ) + goto _test_eof115; +case 115: + switch( (*p) ) { + case 10: goto tr141; + case 13: goto tr142; + } + goto st0; +st116: + if ( ++p == pe ) + goto _test_eof116; +case 116: + switch( (*p) ) { + case 101: goto st117; + case 115: goto st119; + } + goto st0; +st117: + if ( ++p == pe ) + goto _test_eof117; +case 117: + switch( (*p) ) { + case 99: goto st118; + case 115: goto st119; + } + goto st0; +st118: + if ( ++p == pe ) + goto _test_eof118; +case 118: + switch( (*p) ) { + case 115: goto st119; + case 116: goto st120; + } + goto st0; +st119: + if ( ++p == pe ) + goto _test_eof119; +case 119: + switch( (*p) ) { + case 10: goto tr152; + case 13: goto tr153; + } + goto st0; +st120: + if ( ++p == pe ) + goto _test_eof120; +case 120: + switch( (*p) ) { + case 105: goto st121; + case 115: goto st119; + } + goto st0; +st121: + if ( ++p == pe ) + goto _test_eof121; +case 121: + switch( (*p) ) { + case 111: goto st122; + case 115: goto st119; + } + goto st0; +st122: + if ( ++p == pe ) + goto _test_eof122; +case 122: + switch( (*p) ) { + case 110: goto st123; + case 115: goto st119; + } + goto st0; +st123: + if ( ++p == pe ) + goto _test_eof123; +case 123: + if ( (*p) == 115 ) + goto st119; + goto st0; +st124: + if ( ++p == pe ) + goto _test_eof124; +case 124: + if ( (*p) == 97 ) + goto st125; + goto st0; +st125: + if ( ++p == pe ) + goto _test_eof125; +case 125: + switch( (*p) ) { + case 10: goto tr158; + case 13: goto tr159; + case 108: goto st126; + } + goto st0; +st126: + if ( ++p == pe ) + goto _test_eof126; +case 126: + switch( (*p) ) { + case 10: goto tr158; + case 13: goto tr159; + case 108: goto st127; + } + goto st0; +st127: + if ( ++p == pe ) + goto _test_eof127; +case 127: + switch( (*p) ) { + case 10: goto tr158; + case 13: goto tr159; + case 111: goto st128; + } + goto st0; +st128: + if ( ++p == pe ) + goto _test_eof128; +case 128: + switch( (*p) ) { + case 10: goto tr158; + case 13: goto tr159; + case 99: goto st129; + } + goto st0; +st129: + if ( ++p == pe ) + goto _test_eof129; +case 129: + switch( (*p) ) { + case 10: goto tr158; + case 13: goto tr159; + } + goto st0; +st130: + if ( ++p == pe ) + goto _test_eof130; +case 130: + switch( (*p) ) { + case 108: goto st131; + case 116: goto st134; + } + goto st0; +st131: + if ( ++p == pe ) + goto _test_eof131; +case 131: + switch( (*p) ) { + case 10: goto tr166; + case 13: goto tr167; + case 97: goto st132; + } + goto st0; +st132: + if ( ++p == pe ) + goto _test_eof132; +case 132: + switch( (*p) ) { + case 10: goto tr166; + case 13: goto tr167; + case 98: goto st133; + } + goto st0; +st133: + if ( ++p == pe ) + goto _test_eof133; +case 133: + switch( (*p) ) { + case 10: goto tr166; + case 13: goto tr167; + } + goto st0; +st134: + if ( ++p == pe ) + goto _test_eof134; +case 134: + switch( (*p) ) { + case 10: goto tr170; + case 13: goto tr171; + case 97: goto st135; + } + goto st0; +st135: + if ( ++p == pe ) + goto _test_eof135; +case 135: + switch( (*p) ) { + case 10: goto tr170; + case 13: goto tr171; + case 116: goto st136; + } + goto st0; +st136: + if ( ++p == pe ) + goto _test_eof136; +case 136: + switch( (*p) ) { + case 10: goto tr170; + case 13: goto tr171; + } + goto st0; +st137: + if ( ++p == pe ) + goto _test_eof137; +case 137: + switch( (*p) ) { + case 32: goto st89; + case 119: goto st138; + } + goto st0; +st138: + if ( ++p == pe ) + goto _test_eof138; +case 138: + if ( (*p) == 32 ) + goto st89; + goto st0; + } + _test_eof2: cs = 2; goto _test_eof; + _test_eof3: cs = 3; goto _test_eof; + _test_eof4: cs = 4; goto _test_eof; + _test_eof5: cs = 5; goto _test_eof; + _test_eof6: cs = 6; goto _test_eof; + _test_eof139: cs = 139; goto _test_eof; + _test_eof7: cs = 7; goto _test_eof; + _test_eof8: cs = 8; goto _test_eof; + _test_eof9: cs = 9; goto _test_eof; + _test_eof10: cs = 10; goto _test_eof; + _test_eof11: cs = 11; goto _test_eof; + _test_eof12: cs = 12; goto _test_eof; + _test_eof13: cs = 13; goto _test_eof; + _test_eof14: cs = 14; goto _test_eof; + _test_eof15: cs = 15; goto _test_eof; + _test_eof16: cs = 16; goto _test_eof; + _test_eof17: cs = 17; goto _test_eof; + _test_eof18: cs = 18; goto _test_eof; + _test_eof19: cs = 19; goto _test_eof; + _test_eof20: cs = 20; goto _test_eof; + _test_eof21: cs = 21; goto _test_eof; + _test_eof22: cs = 22; goto _test_eof; + _test_eof23: cs = 23; goto _test_eof; + _test_eof24: cs = 24; goto _test_eof; + _test_eof25: cs = 25; goto _test_eof; + _test_eof26: cs = 26; goto _test_eof; + _test_eof27: cs = 27; goto _test_eof; + _test_eof28: cs = 28; goto _test_eof; + _test_eof29: cs = 29; goto _test_eof; + _test_eof30: cs = 30; goto _test_eof; + _test_eof31: cs = 31; goto _test_eof; + _test_eof32: cs = 32; goto _test_eof; + _test_eof33: cs = 33; goto _test_eof; + _test_eof34: cs = 34; goto _test_eof; + _test_eof35: cs = 35; goto _test_eof; + _test_eof36: cs = 36; goto _test_eof; + _test_eof37: cs = 37; goto _test_eof; + _test_eof38: cs = 38; goto _test_eof; + _test_eof39: cs = 39; goto _test_eof; + _test_eof40: cs = 40; goto _test_eof; + _test_eof41: cs = 41; goto _test_eof; + _test_eof42: cs = 42; goto _test_eof; + _test_eof43: cs = 43; goto _test_eof; + _test_eof44: cs = 44; goto _test_eof; + _test_eof45: cs = 45; goto _test_eof; + _test_eof46: cs = 46; goto _test_eof; + _test_eof47: cs = 47; goto _test_eof; + _test_eof48: cs = 48; goto _test_eof; + _test_eof49: cs = 49; goto _test_eof; + _test_eof50: cs = 50; goto _test_eof; + _test_eof51: cs = 51; goto _test_eof; + _test_eof52: cs = 52; goto _test_eof; + _test_eof53: cs = 53; goto _test_eof; + _test_eof54: cs = 54; goto _test_eof; + _test_eof55: cs = 55; goto _test_eof; + _test_eof56: cs = 56; goto _test_eof; + _test_eof57: cs = 57; goto _test_eof; + _test_eof58: cs = 58; goto _test_eof; + _test_eof59: cs = 59; goto _test_eof; + _test_eof60: cs = 60; goto _test_eof; + _test_eof61: cs = 61; goto _test_eof; + _test_eof62: cs = 62; goto _test_eof; + _test_eof63: cs = 63; goto _test_eof; + _test_eof64: cs = 64; goto _test_eof; + _test_eof65: cs = 65; goto _test_eof; + _test_eof66: cs = 66; goto _test_eof; + _test_eof67: cs = 67; goto _test_eof; + _test_eof68: cs = 68; goto _test_eof; + _test_eof69: cs = 69; goto _test_eof; + _test_eof70: cs = 70; goto _test_eof; + _test_eof71: cs = 71; goto _test_eof; + _test_eof72: cs = 72; goto _test_eof; + _test_eof73: cs = 73; goto _test_eof; + _test_eof74: cs = 74; goto _test_eof; + _test_eof75: cs = 75; goto _test_eof; + _test_eof76: cs = 76; goto _test_eof; + _test_eof77: cs = 77; goto _test_eof; + _test_eof78: cs = 78; goto _test_eof; + _test_eof79: cs = 79; goto _test_eof; + _test_eof80: cs = 80; goto _test_eof; + _test_eof81: cs = 81; goto _test_eof; + _test_eof82: cs = 82; goto _test_eof; + _test_eof83: cs = 83; goto _test_eof; + _test_eof84: cs = 84; goto _test_eof; + _test_eof85: cs = 85; goto _test_eof; + _test_eof86: cs = 86; goto _test_eof; + _test_eof87: cs = 87; goto _test_eof; + _test_eof88: cs = 88; goto _test_eof; + _test_eof89: cs = 89; goto _test_eof; + _test_eof90: cs = 90; goto _test_eof; + _test_eof91: cs = 91; goto _test_eof; + _test_eof92: cs = 92; goto _test_eof; + _test_eof93: cs = 93; goto _test_eof; + _test_eof94: cs = 94; goto _test_eof; + _test_eof95: cs = 95; goto _test_eof; + _test_eof96: cs = 96; goto _test_eof; + _test_eof97: cs = 97; goto _test_eof; + _test_eof98: cs = 98; goto _test_eof; + _test_eof99: cs = 99; goto _test_eof; + _test_eof100: cs = 100; goto _test_eof; + _test_eof101: cs = 101; goto _test_eof; + _test_eof102: cs = 102; goto _test_eof; + _test_eof103: cs = 103; goto _test_eof; + _test_eof104: cs = 104; goto _test_eof; + _test_eof105: cs = 105; goto _test_eof; + _test_eof106: cs = 106; goto _test_eof; + _test_eof107: cs = 107; goto _test_eof; + _test_eof108: cs = 108; goto _test_eof; + _test_eof109: cs = 109; goto _test_eof; + _test_eof110: cs = 110; goto _test_eof; + _test_eof111: cs = 111; goto _test_eof; + _test_eof112: cs = 112; goto _test_eof; + _test_eof113: cs = 113; goto _test_eof; + _test_eof114: cs = 114; goto _test_eof; + _test_eof115: cs = 115; goto _test_eof; + _test_eof116: cs = 116; goto _test_eof; + _test_eof117: cs = 117; goto _test_eof; + _test_eof118: cs = 118; goto _test_eof; + _test_eof119: cs = 119; goto _test_eof; + _test_eof120: cs = 120; goto _test_eof; + _test_eof121: cs = 121; goto _test_eof; + _test_eof122: cs = 122; goto _test_eof; + _test_eof123: cs = 123; goto _test_eof; + _test_eof124: cs = 124; goto _test_eof; + _test_eof125: cs = 125; goto _test_eof; + _test_eof126: cs = 126; goto _test_eof; + _test_eof127: cs = 127; goto _test_eof; + _test_eof128: cs = 128; goto _test_eof; + _test_eof129: cs = 129; goto _test_eof; + _test_eof130: cs = 130; goto _test_eof; + _test_eof131: cs = 131; goto _test_eof; + _test_eof132: cs = 132; goto _test_eof; + _test_eof133: cs = 133; goto _test_eof; + _test_eof134: cs = 134; goto _test_eof; + _test_eof135: cs = 135; goto _test_eof; + _test_eof136: cs = 136; goto _test_eof; + _test_eof137: cs = 137; goto _test_eof; + _test_eof138: cs = 138; goto _test_eof; + + _test_eof: {} + _out: {} + } + +#line 363 "src/admin.rl" + + + in->pos = pe; + + if (p != pe) { + start(out); + tbuf_append(out, unknown_command, strlen(unknown_command)); + end(out); + } + + coio_write(coio, out->data, out->size); + return 0; +} + +static void +admin_handler(va_list ap) +{ + struct ev_io coio = va_arg(ap, struct ev_io); + struct sockaddr_in *addr = va_arg(ap, struct sockaddr_in *); + struct iobuf *iobuf = va_arg(ap, struct iobuf *); + lua_State *L = lua_newthread(tarantool_L); + int coro_ref = luaL_ref(tarantool_L, LUA_REGISTRYINDEX); + + auto scoped_guard = make_scoped_guard([&] { + luaL_unref(tarantool_L, LUA_REGISTRYINDEX, coro_ref); + evio_close(&coio); + iobuf_delete(iobuf); + session_destroy(fiber_ptr->sid); + }); + + /* + * Admin and iproto connections must have a + * session object, representing the state of + * a remote client: it's used in Lua + * stored procedures. + */ + session_create(coio.fd, *(uint64_t *) addr); + for (;;) { + if (admin_dispatch(&coio, iobuf, L) < 0) + return; + iobuf_gc(iobuf); + fiber_gc(); + } +} + +void +admin_init(const char *bind_ipaddr, int admin_port) +{ + static struct coio_service admin; + coio_service_init(&admin, "admin", bind_ipaddr, + admin_port, admin_handler, NULL); + evio_service_start(&admin.evio_service); +} + +/* + * Local Variables: + * mode: c + * End: + * vim: syntax=objc + */ diff --git a/src/admin.rl b/src/admin.rl new file mode 100644 index 0000000000000000000000000000000000000000..c84cf6d2182ada7ba29ab374d8244aa2c262f122 --- /dev/null +++ b/src/admin.rl @@ -0,0 +1,422 @@ +/* + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the + * following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY <COPYRIGHT HOLDER> ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * <COPYRIGHT HOLDER> OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF + * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ +#include <stdio.h> +#include <string.h> +#include <stdbool.h> +#include <unistd.h> +#include <stdlib.h> + +#include <fiber.h> +#include <palloc.h> +#include <salloc.h> +#include <say.h> +#include <stat.h> +#include <tarantool.h> +#include "lua/init.h" +#include <recovery.h> +#include <tbuf.h> +#include "tarantool/util.h" +#include <errinj.h> +#include "coio_buf.h" + +extern "C" { +#include <lua.h> +#include <lauxlib.h> +#include <lualib.h> +} + +#include "box/box.h" +#include "lua/init.h" +#include "session.h" +#include "scoped_guard.h" +#include "box/space.h" + +static const char *help = + "available commands:" CRLF + " - help" CRLF + " - exit" CRLF + " - show info" CRLF + " - show fiber" CRLF + " - show configuration" CRLF + " - show index" CRLF + " - show slab" CRLF + " - show palloc" CRLF + " - show stat" CRLF + " - save coredump" CRLF + " - save snapshot" CRLF + " - lua command" CRLF + " - reload configuration" CRLF + " - show injections (debug mode only)" CRLF + " - set injection <name> <state> (debug mode only)" CRLF; + +static const char *unknown_command = "unknown command. try typing help." CRLF; + +%%{ + machine admin; + write data; +}%% + +struct salloc_stat_admin_cb_ctx { + int64_t total_used; + int64_t total_used_real; + int64_t total_alloc_real; + struct tbuf *out; +}; + +static int +salloc_stat_admin_cb(const struct slab_cache_stats *cstat, void *cb_ctx) +{ + struct salloc_stat_admin_cb_ctx *ctx = (struct salloc_stat_admin_cb_ctx *) cb_ctx; + + tbuf_printf(ctx->out, + " - { item_size: %6i, slabs: %6i, items: %11" PRIi64 + ", bytes_used: %12" PRIi64 ", waste: %5.2f%%" + ", bytes_free: %12" PRIi64 " }" CRLF, + (int)cstat->item_size, + (int)cstat->slabs, + cstat->items, + cstat->bytes_used, + (double)(cstat->bytes_alloc_real - cstat->bytes_used_real)*100 / + (cstat->bytes_alloc_real + 0.001), + cstat->bytes_free); + + ctx->total_used += cstat->bytes_used; + ctx->total_alloc_real += cstat->bytes_alloc_real; + ctx->total_used_real += cstat->bytes_used_real; + return 0; +} + +static void +show_slab(struct tbuf *out) +{ + struct salloc_stat_admin_cb_ctx cb_ctx; + struct slab_arena_stats astat; + + cb_ctx.total_used = 0; + cb_ctx.total_used_real = 0; + cb_ctx.total_alloc_real = 0; + cb_ctx.out = out; + + tbuf_printf(out, "slab statistics:\n classes:" CRLF); + + salloc_stat(salloc_stat_admin_cb, &astat, &cb_ctx); + + tbuf_printf(out, " items_used: %.2f%%" CRLF, + (double)cb_ctx.total_used / astat.size * 100); + tbuf_printf(out, " arena_used: %.2f%%" CRLF, + (double)astat.used / astat.size * 100); + tbuf_printf(out, " waste: %.2f%%" CRLF, + (double)(cb_ctx.total_alloc_real - cb_ctx.total_used_real) / (cb_ctx.total_alloc_real + 0.001) * 100); + tbuf_printf(out, " bytes_waste: %12" PRIi64 CRLF, + (int64_t)((double)cb_ctx.total_used*(cb_ctx.total_alloc_real - cb_ctx.total_used_real) / + (cb_ctx.total_alloc_real + 0.001))); +} + +static void +end(struct tbuf *out) +{ + tbuf_printf(out, "..." CRLF); +} + +static void +start(struct tbuf *out) +{ + tbuf_printf(out, "---" CRLF); +} + +static void +ok(struct tbuf *out) +{ + start(out); + tbuf_printf(out, "ok" CRLF); + end(out); +} + +static void +fail(struct tbuf *out, struct tbuf *err) +{ + start(out); + tbuf_printf(out, "fail:%.*s" CRLF, err->size, (char *)err->data); + end(out); +} + +static void +index_info(struct tbuf *out) +{ + tbuf_printf(out, "index:" CRLF); + struct space_stat *stat = space_stat(); + int sp_i = 0; + int64_t total_size = 0; + while (stat[sp_i].n >= 0) { + tbuf_printf(out, " - space: %" PRIi32 CRLF, stat[sp_i].n); + int64_t sp_size = 0; + int i; + for (i = 0; stat[sp_i].index[i].n >= 0; ++i) + sp_size += stat[sp_i].index[i].memsize; + + tbuf_printf(out, " memsize: %15" PRIi64 CRLF, sp_size); + total_size += sp_size; + tbuf_printf(out, " index: " CRLF); + for (i = 0; stat[sp_i].index[i].n >= 0; ++i) { + tbuf_printf(out, " - { n: %3d, keys: %15" PRIi64 ", memsize: %15" PRIi64 " }" CRLF, + stat[sp_i].index[i].n, stat[sp_i].index[i].keys, stat[sp_i].index[i].memsize); + } + ++sp_i; + } + tbuf_printf(out, "memsize: %15" PRIi64 CRLF, total_size); +} + +static void +tarantool_info(struct tbuf *out) +{ + tbuf_printf(out, "info:" CRLF); + tbuf_printf(out, " version: \"%s\"" CRLF, tarantool_version()); + tbuf_printf(out, " uptime: %i" CRLF, (int)tarantool_uptime()); + tbuf_printf(out, " pid: %i" CRLF, getpid()); + tbuf_printf(out, " logger_pid: %i" CRLF, logger_pid); + tbuf_printf(out, " snapshot_pid: %i" CRLF, snapshot_pid); + tbuf_printf(out, " lsn: %" PRIi64 CRLF, + recovery_state->confirmed_lsn); + tbuf_printf(out, " recovery_lag: %.3f" CRLF, + recovery_state->remote ? + recovery_state->remote->recovery_lag : 0); + tbuf_printf(out, " recovery_last_update: %.3f" CRLF, + recovery_state->remote ? + recovery_state->remote->recovery_last_update_tstamp :0); + box_info(out); + const char *path = cfg_filename_fullpath; + if (path == NULL) + path = cfg_filename; + tbuf_printf(out, " config: \"%s\"" CRLF, path); +} + +static int +show_stat_item(const char *name, int rps, int64_t total, void *ctx) +{ + struct tbuf *buf = (struct tbuf *) ctx; + int name_len = strlen(name); + tbuf_printf(buf, + " %s:%*s{ rps: %- 6i, total: %- 12" PRIi64 " }" CRLF, + name, 1 + stat_max_name_len - name_len, " ", rps, total); + return 0; +} + +void +show_stat(struct tbuf *buf) +{ + tbuf_printf(buf, "statistics:" CRLF); + stat_foreach(show_stat_item, buf); +} + +static int +admin_dispatch(struct ev_io *coio, struct iobuf *iobuf, lua_State *L) +{ + struct ibuf *in = &iobuf->in; + struct tbuf *out = tbuf_new(fiber_ptr->gc_pool); + struct tbuf *err = tbuf_new(fiber_ptr->gc_pool); + int cs; + char *p, *pe; + char *strstart, *strend; + bool state; + + while ((pe = (char *) memchr(in->pos, '\n', in->end - in->pos)) == NULL) { + if (coio_bread(coio, in, 1) <= 0) + return -1; + } + + pe++; + p = in->pos; + + %%{ + action show_configuration { + start(out); + show_cfg(out); + end(out); + } + + action show_injections { + start(out); + errinj_info(out); + end(out); + } + + action help { + start(out); + tbuf_append(out, help, strlen(help)); + end(out); + } + + action lua { + strstart[strend-strstart]='\0'; + start(out); + tarantool_lua(L, out, strstart); + end(out); + } + + action reload_configuration { + if (reload_cfg(err)) + fail(out, err); + else + ok(out); + } + + action save_snapshot { + int ret = snapshot(); + + if (ret == 0) + ok(out); + else { + tbuf_printf(err, " can't save snapshot, errno %d (%s)", + ret, strerror(ret)); + + fail(out, err); + } + } + + action set_injection { + strstart[strend-strstart] = '\0'; + if (errinj_set_byname(strstart, state)) { + tbuf_printf(err, "can't find error injection '%s'", strstart); + fail(out, err); + } else { + ok(out); + } + } + + eol = "\n" | "\r\n"; + show = "sh"("o"("w")?)?; + info = "in"("f"("o")?)?; + index = ("ind"("e"("x")?)? | "idx"); + check = "ch"("e"("c"("k")?)?)?; + configuration = "co"("n"("f"("i"("g"("u"("r"("a"("t"("i"("o"("n")?)?)?)?)?)?)?)?)?)?)?; + fiber = "fi"("b"("e"("r")?)?)?; + slab = "sl"("a"("b")?)?; + mod = "mo"("d")?; + palloc = "pa"("l"("l"("o"("c")?)?)?)?; + stat = "st"("a"("t")?)?; + plugins = "plugins"; + + help = "h"("e"("l"("p")?)?)?; + exit = "e"("x"("i"("t")?)?)? | "q"("u"("i"("t")?)?)?; + save = "sa"("v"("e")?)?; + coredump = "co"("r"("e"("d"("u"("m"("p")?)?)?)?)?)?; + snapshot = "sn"("a"("p"("s"("h"("o"("t")?)?)?)?)?)?; + string = [^\r\n]+ >{strstart = p;} %{strend = p;}; + reload = "re"("l"("o"("a"("d")?)?)?)?; + lua = "lu"("a")?; + + set = "se"("t")?; + injection = "in"("j"("e"("c"("t"("i"("o"("n")?)?)?)?)?)?)?; + injections = injection"s"; + namech = alnum | punct; + name = namech+ >{ strstart = p; } %{ strend = p; }; + state_on = "on" %{ state = true; }; + state_off = "of"("f")? %{ state = false; }; + state = state_on | state_off; + + commands = (help %help | + exit %{return -1;} | + lua " "+ string %lua | + show " "+ info %{start(out); tarantool_info(out); end(out);} | + show " "+ index %{start(out); index_info(out); end(out);} | + show " "+ fiber %{start(out); fiber_info(out); end(out);} | + show " "+ configuration %show_configuration | + show " "+ slab %{start(out); show_slab(out); end(out);} | + show " "+ palloc %{start(out); palloc_stat(out); end(out);} | + show " "+ stat %{start(out); show_stat(out);end(out);} | + show " "+ injections %show_injections | + set " "+ injection " "+ name " "+ state %set_injection | + save " "+ coredump %{coredump(60); ok(out);} | + save " "+ snapshot %save_snapshot | + check " "+ slab %{slab_validate(); ok(out);} | + reload " "+ configuration %reload_configuration); + + main := commands eol; + write init; + write exec; + }%% + + in->pos = pe; + + if (p != pe) { + start(out); + tbuf_append(out, unknown_command, strlen(unknown_command)); + end(out); + } + + coio_write(coio, out->data, out->size); + return 0; +} + +static void +admin_handler(va_list ap) +{ + struct ev_io coio = va_arg(ap, struct ev_io); + struct sockaddr_in *addr = va_arg(ap, struct sockaddr_in *); + struct iobuf *iobuf = va_arg(ap, struct iobuf *); + lua_State *L = lua_newthread(tarantool_L); + int coro_ref = luaL_ref(tarantool_L, LUA_REGISTRYINDEX); + + auto scoped_guard = make_scoped_guard([&] { + luaL_unref(tarantool_L, LUA_REGISTRYINDEX, coro_ref); + evio_close(&coio); + iobuf_delete(iobuf); + session_destroy(fiber_ptr->sid); + }); + + /* + * Admin and iproto connections must have a + * session object, representing the state of + * a remote client: it's used in Lua + * stored procedures. + */ + session_create(coio.fd, *(uint64_t *) addr); + for (;;) { + if (admin_dispatch(&coio, iobuf, L) < 0) + return; + iobuf_gc(iobuf); + fiber_gc(); + } +} + +void +admin_init(const char *bind_ipaddr, int admin_port) +{ + static struct coio_service admin; + coio_service_init(&admin, "admin", bind_ipaddr, + admin_port, admin_handler, NULL); + evio_service_start(&admin.evio_service); +} + +/* + * Local Variables: + * mode: c + * End: + * vim: syntax=objc + */ diff --git a/src/box/box_lua.cc b/src/box/box_lua.cc new file mode 100644 index 0000000000000000000000000000000000000000..9e7b2a182248c27bcb956f2a5442809d699b89ad --- /dev/null +++ b/src/box/box_lua.cc @@ -0,0 +1,1943 @@ +/* + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the + * following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY <COPYRIGHT HOLDER> ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * <COPYRIGHT HOLDER> OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF + * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ +#include "box_lua.h" +#include "lua/init.h" +#include <fiber.h> +#include "box/box.h" +#include "request.h" +#include "txn.h" +#include "tuple_update.h" + +extern "C" { +#include <lua.h> +#include <lauxlib.h> +#include <lualib.h> +#include <lj_obj.h> +#include <lj_ctype.h> +#include <lj_cdata.h> +#include <lj_cconv.h> +#include <arpa/inet.h> +#include <lib/bit/bit.h> +} /* extern "C" */ + +#include "pickle.h" +#include "tuple.h" +#include "space.h" +#include "port.h" +#include "tbuf.h" +#include "scoped_guard.h" + +/* contents of box.lua, misc.lua, box.net.lua respectively */ +extern char box_lua[], + box_net_lua[], + misc_lua[]; +static const char *lua_sources[] = { + box_lua, + box_net_lua, + misc_lua, + NULL +}; + +/** + * All box connections share the same Lua state. We use + * Lua coroutines (lua_newthread()) to have multiple + * procedures running at the same time. + */ +lua_State *root_L; + +/* + * Functions, exported in box_lua.h should have prefix + * "box_lua_"; functions, available in Lua "box" + * should start with "lbox_". + */ + +/** {{{ box.tuple Lua library + * + * To avoid extra copying between Lua memory and garbage-collected + * tuple memory, provide a Lua userdata object 'box.tuple'. This + * object refers to a tuple instance in the slab allocator, and + * allows accessing it using Lua primitives (array subscription, + * iteration, etc.). When Lua object is garbage-collected, + * tuple reference counter in the slab allocator is decreased, + * allowing the tuple to be eventually garbage collected in + * the slab allocator. + */ + +static const char *tuplelib_name = "box.tuple"; +static const char *tuple_iteratorlib_name = "box.tuple.iterator"; + +static void +lbox_pushtuple(struct lua_State *L, struct tuple *tuple); + +static struct tuple * +lua_totuple(struct lua_State *L, int index); + +static inline struct tuple * +lua_checktuple(struct lua_State *L, int narg) +{ + struct tuple *t = *(struct tuple **) luaL_checkudata(L, narg, tuplelib_name); + assert(t->refs); + return t; +} + +struct tuple * +lua_istuple(struct lua_State *L, int narg) +{ + if (lua_getmetatable(L, narg) == 0) + return NULL; + luaL_getmetatable(L, tuplelib_name); + struct tuple *tuple = 0; + if (lua_equal(L, -1, -2)) + tuple = *(struct tuple **) lua_touserdata(L, narg); + lua_pop(L, 2); + return tuple; +} + +static int +lbox_tuple_new(lua_State *L) +{ + int argc = lua_gettop(L); + if (argc < 1) + luaL_error(L, "tuple.new(): bad arguments"); + struct tuple *tuple = lua_totuple(L, 1); + lbox_pushtuple(L, tuple); + return 1; +} + +static int +lbox_tuple_gc(struct lua_State *L) +{ + struct tuple *tuple = lua_checktuple(L, 1); + tuple_ref(tuple, -1); + return 0; +} + +static int +lbox_tuple_len(struct lua_State *L) +{ + struct tuple *tuple = lua_checktuple(L, 1); + lua_pushnumber(L, tuple->field_count); + return 1; +} + +static int +lbox_tuple_slice(struct lua_State *L) +{ + struct tuple *tuple = lua_checktuple(L, 1); + int argc = lua_gettop(L) - 1; + uint32_t start, end; + int offset; + + /* + * Prepare the range. The second argument is optional. + * If the end is beyond tuple size, adjust it. + * If no arguments, or start > end, return an error. + */ + if (argc == 0 || argc > 2) + luaL_error(L, "tuple.slice(): bad arguments"); + + offset = lua_tointeger(L, 2); + if (offset >= 0 && offset < tuple->field_count) { + start = offset; + } else if (offset < 0 && -offset <= tuple->field_count) { + start = offset + tuple->field_count; + } else { + return luaL_error(L, "tuple.slice(): start >= field count"); + } + + if (argc == 2) { + offset = lua_tointeger(L, 3); + if (offset > 0 && offset <= tuple->field_count) { + end = offset; + } else if (offset < 0 && -offset < tuple->field_count) { + end = offset + tuple->field_count; + } else { + return luaL_error(L, "tuple.slice(): end > field count"); + } + } else { + end = tuple->field_count; + } + if (end <= start) + return luaL_error(L, "tuple.slice(): start must be less than end"); + + struct tuple_iterator it; + tuple_rewind(&it, tuple); + const char *field; + uint32_t len; + + assert(start < tuple->field_count); + uint32_t field_no = start; + field = tuple_seek(&it, start, &len); + while (field && field_no < end) { + lua_pushlstring(L, field, len); + ++field_no; + field = tuple_next(&it, &len); + } + assert(field_no == end); + return end - start; +} + +/** A single value on the Lua stack. */ +struct lua_field { + const char *data; + uint32_t len; + union { + uint32_t u32; + uint64_t u64; + }; + enum field_type type; +}; + +/** + * Convert a value on the lua stack to a Tarantool data type. + */ +static void +lua_tofield(lua_State *L, int i, struct lua_field *field) +{ + double num; + size_t size; + switch (lua_type(L, i)) { + case LUA_TNUMBER: + num = lua_tonumber(L, i); + if (num <= UINT32_MAX && num >= INT32_MIN) { + field->u32 = (uint32_t) num; + field->data = (const char *) &field->u32; + field->len = sizeof(uint32_t); + field->type = NUM; + return; + } else { + field->u64 = (uint64_t) num; + field->data = (const char *) &field->u64; + field->len = sizeof(uint64_t); + field->type = NUM64; + return; + } + case LUA_TCDATA: + field->u64 = tarantool_lua_tointeger64(L, i); + field->data = (const char *) &field->u64; + field->len = sizeof(uint64_t); + field->type = NUM64; + return; + case LUA_TBOOLEAN: + if (lua_toboolean(L, i)) { + field->data = "true"; + field->len = 4; + } else { + field->data = "false"; + field->len = 5; + } + field->type = STRING; + return; + case LUA_TNIL: + field->data = "nil"; + field->len = 3; + field->type = STRING; + return; + case LUA_TSTRING: + field->data = lua_tolstring(L, i, &size); + field->len = (uint32_t) size; + field->type = STRING; + return; + default: + field->data = NULL; + field->len = 0; + field->type = UNKNOWN; + return; + } +} + +/** + * @brief A wrapper for lua_tofield that raises an error if Lua type can not + * be converted to lua_field structure + * @param L stack + * @param index stack index + * @param field conversion result + * @sa lua_tofield() + */ +static inline void +lua_checkfield(lua_State *L, int i, struct lua_field *field) +{ + lua_tofield(L, i, field); + if (unlikely(field->type == UNKNOWN)) + luaL_error(L, "unsupported Lua type '%s'", + lua_typename(L, lua_type(L, i))); +} + +/** + * Tuple transforming function. + * + * Remove the fields designated by 'offset' and 'len' from an tuple, + * and replace them with the elements of supplied data fields, + * if any. + * + * Function returns newly allocated tuple. + * It does not change any parent tuple data. + */ +static int +lbox_tuple_transform(struct lua_State *L) +{ + struct tuple *tuple = lua_checktuple(L, 1); + int argc = lua_gettop(L); + if (argc < 3) + luaL_error(L, "tuple.transform(): bad arguments"); + lua_Integer offset = lua_tointeger(L, 2); /* Can be negative and can be > INT_MAX */ + lua_Integer field_count = lua_tointeger(L, 3); + + /* validate offset and len */ + if (offset < 0) { + if (-offset > tuple->field_count) + luaL_error(L, "tuple.transform(): offset is out of bound"); + offset += tuple->field_count; + } else if (offset > tuple->field_count) { + offset = tuple->field_count; + } + if (field_count < 0) + luaL_error(L, "tuple.transform(): len is negative"); + if (field_count > tuple->field_count - offset) + field_count = tuple->field_count - offset; + + assert(offset + field_count <= tuple->field_count); + + /* + * Calculate the number of operations and length of UPDATE expression + */ + uint32_t op_cnt = 0; + if (offset < tuple->field_count && field_count > 0) + op_cnt++; + if (argc > 3) + op_cnt += argc - 3; + + if (op_cnt == 0) { + /* tuple_update() does not accept an empty operation list. */ + lbox_pushtuple(L, tuple); + return 1; + } + + PallocGuard palloc_guard(fiber_ptr->gc_pool); + + /* + * Prepare UPDATE expression + */ + struct tbuf *b = tbuf_new(fiber_ptr->gc_pool); + tbuf_append(b, (char *) &op_cnt, sizeof(op_cnt)); + if (field_count > 0) { + tbuf_ensure(b, 2 * sizeof(uint32_t) + 1 + 5); + + /* offset */ + char *data = pack_u32(b->data + b->size, offset); + + /* operation */ + *(data++) = UPDATE_OP_DELETE; + + /* field: count */ + data = pack_varint32(data, sizeof(uint32_t)); + data = pack_u32(data, field_count); + + assert(data <= b->data + b->capacity); + b->size = data - b->data; + } + + for (int i = argc ; i > 3; i--) { + struct lua_field field; + lua_checkfield(L, i, &field); + tbuf_ensure(b, sizeof(uint32_t) + 1 + 5 + field.len); + + /* offset */ + char *data = pack_u32(b->data + b->size, offset); + + /* operation */ + *data++ = UPDATE_OP_INSERT; + + /* field */ + data = pack_lstr(data, field.data, field.len); + + assert(data <= b->data + b->capacity); + b->size = data - b->data; + } + + /* Execute tuple_update */ + struct tuple *new_tuple = tuple_update(tuple_format_ber, + palloc_region_alloc, + fiber_ptr->gc_pool, + tuple, tbuf_str(b), tbuf_end(b)); + lbox_pushtuple(L, new_tuple); + return 1; +} + +/* + * Tuple find function. + * + * Find each or one tuple field according to the specified key. + * + * Function returns indexes of the tuple fields that match + * key criteria. + * + */ +static int +tuple_find(struct lua_State *L, struct tuple *tuple, size_t offset, + const char *key, size_t key_size, + bool all) +{ + int top = lua_gettop(L); + int idx = offset; + + struct tuple_iterator it; + tuple_rewind(&it, tuple); + uint32_t len; + const char *field = tuple_seek(&it, idx, &len); + for (; field; field = tuple_next(&it, &len)) { + if (len == key_size && (memcmp(field, key, len) == 0)) { + lua_pushinteger(L, idx); + if (!all) + break; + } + idx++; + } + return lua_gettop(L) - top; +} + +static int +lbox_tuple_find_do(struct lua_State *L, bool all) +{ + struct tuple *tuple = lua_checktuple(L, 1); + int argc = lua_gettop(L); + size_t offset = 0; + switch (argc - 1) { + case 1: break; + case 2: + offset = lua_tointeger(L, 2); + break; + default: + luaL_error(L, "tuple.find(): bad arguments"); + } + + struct lua_field field; + lua_checkfield(L, argc, &field); + + return tuple_find(L, tuple, offset, field.data, field.len, all); +} + +static int +lbox_tuple_find(struct lua_State *L) +{ + return lbox_tuple_find_do(L, false); +} + +static int +lbox_tuple_findall(struct lua_State *L) +{ + return lbox_tuple_find_do(L, true); +} + +static int +lbox_tuple_unpack(struct lua_State *L) +{ + int argc = lua_gettop(L); + (void) argc; + struct tuple *tuple = lua_checktuple(L, 1); + + struct tuple_iterator it; + tuple_rewind(&it, tuple); + const char *field; + + uint32_t len; + while ((field = tuple_next(&it, &len))) + lua_pushlstring(L, field, len); + + assert(lua_gettop(L) == argc + tuple->field_count); + return tuple->field_count; +} + +static int +lbox_tuple_totable(struct lua_State *L) +{ + struct tuple *tuple = lua_checktuple(L, 1); + lua_newtable(L); + int index = 1; + + struct tuple_iterator it; + tuple_rewind(&it, tuple); + const char *field; + uint32_t len; + while ((field = tuple_next(&it, &len))) { + lua_pushnumber(L, index++); + lua_pushlstring(L, field, len); + lua_rawset(L, -3); + } + return 1; +} + +/** + * Implementation of tuple __index metamethod. + * + * Provides operator [] access to individual fields for integer + * indexes, as well as searches and invokes metatable methods + * for strings. + */ +static int +lbox_tuple_index(struct lua_State *L) +{ + struct tuple *tuple = lua_checktuple(L, 1); + /* For integer indexes, implement [] operator */ + if (lua_isnumber(L, 2)) { + int i = luaL_checkint(L, 2); + if (i >= tuple->field_count) + luaL_error(L, "%s: index %d is out of bounds (0..%d)", + tuplelib_name, i, tuple->field_count-1); + uint32_t len = 0; + const char *field = tuple_field(tuple, i, &len); + lua_pushlstring(L, field, len); + return 1; + } + /* If we got a string, try to find a method for it. */ + const char *sz = luaL_checkstring(L, 2); + lua_getmetatable(L, 1); + lua_getfield(L, -1, sz); + return 1; +} + +static int +lbox_tuple_tostring(struct lua_State *L) +{ + struct tuple *tuple = lua_checktuple(L, 1); + /* @todo: print the tuple */ + size_t allocated = palloc_allocated(fiber_ptr->gc_pool); + struct tbuf *tbuf = tbuf_new(fiber_ptr->gc_pool); + tuple_print(tbuf, tuple); + lua_pushlstring(L, tbuf->data, tbuf->size); + ptruncate(fiber_ptr->gc_pool, allocated); + return 1; +} + +static void +lbox_pushtuple(struct lua_State *L, struct tuple *tuple) +{ + if (tuple) { + struct tuple **ptr = (struct tuple **) + lua_newuserdata(L, sizeof(*ptr)); + luaL_getmetatable(L, tuplelib_name); + lua_setmetatable(L, -2); + *ptr = tuple; + tuple_ref(tuple, 1); + } else { + lua_pushnil(L); + } +} + +/** + * Sequential access to tuple fields. Since tuple is a list-like + * structure, iterating over tuple fields is faster + * than accessing fields using an index. + */ +static int +lbox_tuple_next(struct lua_State *L) +{ + struct tuple *tuple = lua_checktuple(L, 1); + int argc = lua_gettop(L) - 1; + + struct tuple_iterator *it = NULL; + if (argc == 0 || (argc == 1 && lua_type(L, 2) == LUA_TNIL)) { + it = (struct tuple_iterator *) lua_newuserdata(L, sizeof(*it)); + assert (it != NULL); + luaL_getmetatable(L, tuple_iteratorlib_name); + lua_setmetatable(L, -2); + tuple_rewind(it, tuple); + } else if (argc == 1 && lua_type(L, 2) == LUA_TUSERDATA) { + it = (struct tuple_iterator *) + luaL_checkudata(L, 2, tuple_iteratorlib_name); + assert (it != NULL); + lua_pushvalue(L, 2); + } else { + return luaL_error(L, "tuple.next(): bad arguments"); + } + + uint32_t len; + const char *field = tuple_next(it, &len); + if (field == NULL) { + lua_pop(L, 1); + lua_pushnil(L); + return 1; + } + + lua_pushlstring(L, field, len); + return 2; +} + +/** Iterator over tuple fields. Adapt lbox_tuple_next + * to Lua iteration conventions. + */ +static int +lbox_tuple_pairs(struct lua_State *L) +{ + lua_pushcfunction(L, lbox_tuple_next); + lua_pushvalue(L, -2); /* tuple */ + lua_pushnil(L); + return 3; +} + + +/** tuple:bsize() + * + */ +static int +lbox_tuple_bsize(struct lua_State *L) +{ + struct tuple *tuple = lua_checktuple(L, 1); + lua_pushnumber(L, tuple->bsize); + return 1; +} + +static const struct luaL_reg lbox_tuple_meta[] = { + {"__gc", lbox_tuple_gc}, + {"__len", lbox_tuple_len}, + {"__index", lbox_tuple_index}, + {"__tostring", lbox_tuple_tostring}, + {"next", lbox_tuple_next}, + {"pairs", lbox_tuple_pairs}, + {"slice", lbox_tuple_slice}, + {"transform", lbox_tuple_transform}, + {"find", lbox_tuple_find}, + {"findall", lbox_tuple_findall}, + {"unpack", lbox_tuple_unpack}, + {"totable", lbox_tuple_totable}, + {"bsize", lbox_tuple_bsize}, + {NULL, NULL} +}; + +static const struct luaL_reg lbox_tuplelib[] = { + {"new", lbox_tuple_new}, + {NULL, NULL} +}; + +static const struct luaL_reg lbox_tuple_iterator_meta[] = { + {NULL, NULL} +}; + +/* }}} */ + +/** {{{ box.index Lua library: access to spaces and indexes + */ + +static const char *indexlib_name = "box.index"; +static const char *iteratorlib_name = "box.index.iterator"; + +static struct iterator * +lbox_checkiterator(struct lua_State *L, int i) +{ + struct iterator **it = (struct iterator **) + luaL_checkudata(L, i, iteratorlib_name); + assert(it != NULL); + return *it; +} + +static void +lbox_pushiterator(struct lua_State *L, Index *index, + struct iterator *it, enum iterator_type type, + const char *key, size_t size, int part_count) +{ + struct lbox_iterator_udata { + struct iterator *it; + char key[]; + }; + + struct lbox_iterator_udata *udata = (struct lbox_iterator_udata *) + lua_newuserdata(L, sizeof(*udata) + size); + luaL_getmetatable(L, iteratorlib_name); + lua_setmetatable(L, -2); + + udata->it = it; + if (key) { + memcpy(udata->key, key, size); + key = udata->key; + } + key_validate(index->key_def, type, key, part_count); + index->initIterator(it, type, key, part_count); +} + +static int +lbox_iterator_gc(struct lua_State *L) +{ + struct iterator *it = lbox_checkiterator(L, -1); + it->free(it); + return 0; +} + +static Index * +lua_checkindex(struct lua_State *L, int i) +{ + Index **index = (Index **) luaL_checkudata(L, i, indexlib_name); + assert(index != NULL); + return *index; +} + +static int +lbox_index_new(struct lua_State *L) +{ + int n = luaL_checkint(L, 1); /* get space id */ + int idx = luaL_checkint(L, 2); /* get index id in */ + /* locate the appropriate index */ + struct space *sp = space_find(n); + Index *index = index_find(sp, idx); + + /* create a userdata object */ + void **ptr = (void **) lua_newuserdata(L, sizeof(void *)); + *ptr = index; + /* set userdata object metatable to indexlib */ + luaL_getmetatable(L, indexlib_name); + lua_setmetatable(L, -2); + + return 1; +} + +static int +lbox_index_tostring(struct lua_State *L) +{ + Index *index = lua_checkindex(L, 1); + lua_pushfstring(L, "index %d in space %d", + index_n(index), space_n(index->space)); + return 1; +} + +static int +lbox_index_len(struct lua_State *L) +{ + Index *index = lua_checkindex(L, 1); + lua_pushinteger(L, index->size()); + return 1; +} + +static int +lbox_index_part_count(struct lua_State *L) +{ + Index *index = lua_checkindex(L, 1); + lua_pushinteger(L, index->key_def->part_count); + return 1; +} + +static int +lbox_index_min(struct lua_State *L) +{ + Index *index = lua_checkindex(L, 1); + lbox_pushtuple(L, index->min()); + return 1; +} + +static int +lbox_index_max(struct lua_State *L) +{ + Index *index = lua_checkindex(L, 1); + lbox_pushtuple(L, index->max()); + return 1; +} + +static int +lbox_index_random(struct lua_State *L) +{ + if (lua_gettop(L) != 2 || lua_isnil(L, 2)) + luaL_error(L, "Usage: index:random((uint32) rnd)"); + + Index *index = lua_checkindex(L, 1); + uint32_t rnd = lua_tointeger(L, 2); + lbox_pushtuple(L, index->random(rnd)); + return 1; +} + + +/* + * Lua iterator over a Taratnool/Box index. + * + * (iteration_state, tuple) = index.next(index, [params]) + * + * When [params] are absent or nil + * returns a pointer to a new ALL iterator and + * to the first tuple (or nil, if the index is + * empty). + * + * When [params] is a userdata, + * i.e. we're inside an iteration loop, retrieves + * the next tuple from the iterator. + * + * Otherwise, [params] can be used to seed + * a new iterator with iterator type and + * type-specific arguments. For exaple, + * for GE iterator, a list of Lua scalars + * cann follow the box.index.GE: this will + * start iteration from the offset specified by + * the given (multipart) key. + * + * @return Returns an iterator object, either created + * or taken from Lua stack. + */ + +static inline struct iterator * +lbox_create_iterator(struct lua_State *L) +{ + Index *index = lua_checkindex(L, 1); + int argc = lua_gettop(L); + + /* Create a new iterator. */ + PallocGuard palloc_guard(fiber_ptr->gc_pool); + enum iterator_type type = ITER_ALL; + uint32_t key_part_count = 0; + const char *key = NULL; + size_t key_size = 0; + if (argc == 1 || (argc == 2 && lua_type(L, 2) == LUA_TNIL)) { + /* + * Nothing or nil on top of the stack, + * iteration over entire range from the + * beginning (ITER_ALL). + */ + } else { + type = (enum iterator_type) luaL_checkint(L, 2); + if (type < ITER_ALL || type >= iterator_type_MAX) + luaL_error(L, "unknown iterator type: %d", type); + /* What else do we have on the stack? */ + if (argc == 2 || (argc == 3 && lua_type(L, 3) == LUA_TNIL)) { + /* Nothing */ + } else if (argc == 3 && lua_type(L, 3) == LUA_TUSERDATA) { + /* Tuple. */ + struct tbuf *b = tbuf_new(fiber_ptr->gc_pool); + struct tuple *tuple = lua_checktuple(L, 2); + key_part_count = tuple->field_count; + tuple_to_tbuf(tuple, b); + key = b->data; + key_size = b->size; + } else { + /* Single or multi- part key. */ + struct tbuf *b = tbuf_new(fiber_ptr->gc_pool); + key_part_count = argc - 2; + struct lua_field field; + for (uint32_t i = 0; i < key_part_count; i++) { + lua_checkfield(L, i + 3, &field); + tbuf_ensure(b, field.len + 5); + char *data = pack_lstr(b->data + b->size, + field.data, field.len); + b->size = data - b->data; + } + key = b->data; + key_size = b->size; + } + /* + * We allow partially specified keys for TREE + * indexes. HASH indexes can only use single-part + * keys. + */ + if (key_part_count > index->key_def->part_count) + luaL_error(L, "Key part count %d" + " is greater than index part count %d", + key_part_count, index->key_def->part_count); + if (key_size == 0) + key = NULL; + } + struct iterator *it = index->allocIterator(); + lbox_pushiterator(L, index, it, type, key, key_size, + key_part_count); + return it; +} + +/** + * Lua-style next() function, for use in pairs(). + * @example: + * for k, v in box.space[0].index[0].idx.next, box.space[0].index[0].idx, nil do + * print(v) + * end + */ +static int +lbox_index_next(struct lua_State *L) +{ + int argc = lua_gettop(L); + struct iterator *it = NULL; + if (argc == 2 && lua_type(L, 2) == LUA_TUSERDATA) { + /* + * Apart from the index itself, we have only one + * other argument, and it's a userdata: must be + * iteration state created before. + */ + it = lbox_checkiterator(L, 2); + } else { + it = lbox_create_iterator(L); + } + struct tuple *tuple = it->next(it); + /* If tuple is NULL, pushes nil as end indicator. */ + lbox_pushtuple(L, tuple); + return tuple ? 2 : 1; +} + +/** iterator() closure function. */ +static int +lbox_index_iterator_closure(struct lua_State *L) +{ + /* Extract closure arguments. */ + struct iterator *it = lbox_checkiterator(L, lua_upvalueindex(1)); + + struct tuple *tuple = it->next(it); + + /* If tuple is NULL, push nil as end indicator. */ + lbox_pushtuple(L, tuple); + return 1; +} + +/** + * @brief Create iterator closure over a Taratnool/Box index. + * @example lua it = box.space[0].index[0]:iterator(box.index.GE, 1); + * print(it(), it()). + * @param L lua stack + * @see http://www.lua.org/pil/7.1.html + * @return number of return values put on the stack + */ +static int +lbox_index_iterator(struct lua_State *L) +{ + /* Create iterator and push it onto the stack. */ + (void) lbox_create_iterator(L); + lua_pushcclosure(L, &lbox_index_iterator_closure, 1); + return 1; +} + + +/** + * Lua index subtree count function. + * Iterate over an index, count the number of tuples which equal the + * provided search criteria. The argument can either point to a + * tuple, a key, or one or more key parts. Returns the number of matched + * tuples. + */ +static int +lbox_index_count(struct lua_State *L) +{ + Index *index = lua_checkindex(L, 1); + int argc = lua_gettop(L) - 1; + if (argc == 0) + luaL_error(L, "index.count(): one or more arguments expected"); + + /* preparing single or multi-part key */ + PallocGuard palloc_guard(fiber_ptr->gc_pool); + uint32_t key_part_count = 0; + const char *key = NULL; + if (argc == 1 && lua_type(L, 2) == LUA_TUSERDATA) { + /* Searching by tuple. */ + struct tuple *tuple = lua_checktuple(L, 2); + struct tbuf *b = tbuf_new(fiber_ptr->gc_pool); + tuple_to_tbuf(tuple, b); + key_part_count = tuple->field_count; + key = b->data; + } else { + /* Single or multi- part key. */ + struct tbuf *b = tbuf_new(fiber_ptr->gc_pool); + key_part_count = argc; + struct lua_field field; + for (uint32_t i = 0; i < key_part_count; i++) { + lua_checkfield(L, i + 2, &field); + tbuf_ensure(b, field.len + 5); + char *data = pack_lstr(b->data + b->size, + field.data, field.len); + b->size = data - b->data; + } + key = b->data; + } + if (key_part_count == 0) + key = NULL; + uint32_t count = 0; + + key_validate(index->key_def, ITER_EQ, key, key_part_count); + /* Prepare index iterator */ + struct iterator *it = index->position(); + index->initIterator(it, ITER_EQ, key, key_part_count); + /* Iterate over the index and count tuples. */ + struct tuple *tuple; + while ((tuple = it->next(it)) != NULL) + count++; + + /* Return subtree size */ + lua_pushnumber(L, count); + return 1; +} + +static const struct luaL_reg lbox_index_meta[] = { + {"__tostring", lbox_index_tostring}, + {"__len", lbox_index_len}, + {"part_count", lbox_index_part_count}, + {"min", lbox_index_min}, + {"max", lbox_index_max}, + {"random", lbox_index_random}, + {"next", lbox_index_next}, + {"iterator", lbox_index_iterator}, + {"count", lbox_index_count}, + {NULL, NULL} +}; + +static const struct luaL_reg indexlib [] = { + {"new", lbox_index_new}, + {NULL, NULL} +}; + +static const struct luaL_reg lbox_iterator_meta[] = { + {"__gc", lbox_iterator_gc}, + {NULL, NULL} +}; + +/* }}} */ + +/** {{{ Lua I/O: facilities to intercept box output + * and push into Lua stack. + */ + +struct port_lua +{ + struct port_vtab *vtab; + struct lua_State *L; +}; + +static inline struct port_lua * +port_lua(struct port *port) { return (struct port_lua *) port; } + +/* + * For addU32/dupU32 do nothing -- the only uint32_t Box can give + * us is tuple count, and we don't need it, since we intercept + * everything into Lua stack first. + * @sa port_add_lua_multret + */ + +static void +port_lua_add_tuple(struct port *port, struct tuple *tuple, + uint32_t flags __attribute__((unused))) +{ + lua_State *L = port_lua(port)->L; + try { + lbox_pushtuple(L, tuple); + } catch (...) { + tnt_raise(ClientError, ER_PROC_LUA, lua_tostring(L, -1)); + } +} + +struct port_vtab port_lua_vtab = { + port_lua_add_tuple, + null_port_eof, +}; + +static struct port * +port_lua_create(struct lua_State *L) +{ + struct port_lua *port = (struct port_lua *) + palloc(fiber_ptr->gc_pool, sizeof(struct port_lua)); + port->vtab = &port_lua_vtab; + port->L = L; + return (struct port *) port; +} + +/** + * Convert a Lua table to a tuple with as little + * overhead as possible. + */ +static struct tuple * +lua_table_to_tuple(struct lua_State *L, int index) +{ + uint32_t field_count = 0; + uint32_t tuple_len = 0; + struct lua_field field; + + /** First go: calculate tuple length. */ + lua_pushnil(L); /* first key */ + while (lua_next(L, index) != 0) { + ++field_count; + + lua_tofield(L, -1, &field); + if (field.type == UNKNOWN) { + tnt_raise(ClientError, ER_PROC_RET, + lua_typename(L, lua_type(L, -1))); + } + tuple_len += field.len + varint32_sizeof(field.len); + lua_pop(L, 1); + } + struct tuple *tuple = tuple_alloc(tuple_format_ber, tuple_len); + /* + * Important: from here and on if there is an exception, + * the tuple is leaked. + */ + tuple->field_count = field_count; + char *pos = tuple->data; + + /* Second go: store data in the tuple. */ + + lua_pushnil(L); /* first key */ + while (lua_next(L, index) != 0) { + lua_tofield(L, -1, &field); + pos = pack_lstr(pos, field.data, field.len); + lua_pop(L, 1); + } + return tuple; +} + +static struct tuple* +lua_totuple(struct lua_State *L, int index) +{ + int type = lua_type(L, index); + struct tuple *tuple; + struct lua_field field; + lua_tofield(L, index, &field); + if (field.type != UNKNOWN) { + tuple = tuple_alloc(tuple_format_ber, + field.len + varint32_sizeof(field.len)); + tuple->field_count = 1; + pack_lstr(tuple->data, field.data, field.len); + return tuple; + } + switch (type) { + case LUA_TTABLE: + { + return lua_table_to_tuple(L, index); + } + case LUA_TUSERDATA: + { + tuple = lua_istuple(L, index); + if (tuple) + return tuple; + } + default: + /* + * LUA_TNONE, LUA_TTABLE, LUA_THREAD, LUA_TFUNCTION + */ + tnt_raise(ClientError, ER_PROC_RET, lua_typename(L, type)); + break; + } +} + +static void +port_add_lua_ret(struct port *port, struct lua_State *L, int index) +{ + struct tuple *tuple = lua_totuple(L, index); + auto scoped_guard = make_scoped_guard([=] { + if (tuple->refs == 0) + tuple_free(tuple); + }); + port_add_tuple(port, tuple, BOX_RETURN_TUPLE); +} + +/** + * Add all elements from Lua stack to fiber iov. + * + * To allow clients to understand a complex return from + * a procedure, we are compatible with SELECT protocol, + * and return the number of return values first, and + * then each return value as a tuple. + * + * If a Lua stack contains at least one scalar, each + * value on the stack is converted to a tuple. A Lua + * is converted to a tuple with multiple fields. + * + * If the stack is a Lua table, each member of which is + * not scalar, each member of the table is converted to + * a tuple. This way very large lists of return values can + * be used, since Lua stack size is limited by 8000 elements, + * while Lua table size is pretty much unlimited. + */ +static void +port_add_lua_multret(struct port *port, struct lua_State *L) +{ + int nargs = lua_gettop(L); + /** Check if we deal with a table of tables. */ + if (nargs == 1 && lua_istable(L, 1)) { + /* + * The table is not empty and consists of tables + * or tuples. Treat each table element as a tuple, + * and push it. + */ + lua_pushnil(L); + int has_keys = lua_next(L, 1); + if (has_keys && + (lua_istable(L, -1) || lua_isuserdata(L, -1))) { + + do { + port_add_lua_ret(port, L, lua_gettop(L)); + lua_pop(L, 1); + } while (lua_next(L, 1)); + return; + } else if (has_keys) { + lua_pop(L, 1); + } + } + for (int i = 1; i <= nargs; ++i) { + port_add_lua_ret(port, L, i); + } +} + +/* }}} */ + +/** + * The main extension provided to Lua by Tarantool/Box -- + * ability to call INSERT/UPDATE/SELECT/DELETE from within + * a Lua procedure. + * + * This is a low-level API, and it expects + * all arguments to be packed in accordance + * with the binary protocol format (iproto + * header excluded). + * + * Signature: + * box.process(op_code, request) + */ +static int +lbox_process(lua_State *L) +{ + uint32_t op = lua_tointeger(L, 1); /* Get the first arg. */ + size_t sz; + const char *req = luaL_checklstring(L, 2, &sz); /* Second arg. */ + if (op == CALL) { + /* + * We should not be doing a CALL from within a CALL. + * To invoke one stored procedure from another, one must + * do it in Lua directly. This deals with + * infinite recursion, stack overflow and such. + */ + return luaL_error(L, "box.process(CALL, ...) is not allowed"); + } + int top = lua_gettop(L); /* to know how much is added by rw_callback */ + + size_t allocated_size = palloc_allocated(fiber_ptr->gc_pool); + struct port *port_lua = port_lua_create(L); + try { + box_process(port_lua, op, req, sz); + + /* + * This only works as long as port_lua doesn't + * use fiber->cleanup and fiber_ptr->gc_pool. + */ + ptruncate(fiber_ptr->gc_pool, allocated_size); + } catch (const Exception& e) { + ptruncate(fiber_ptr->gc_pool, allocated_size); + throw; + } + return lua_gettop(L) - top; +} + +static int +lbox_raise(lua_State *L) +{ + if (lua_gettop(L) != 2) + luaL_error(L, "box.raise(): bad arguments"); + uint32_t code = lua_tointeger(L, 1); + if (!code) + luaL_error(L, "box.raise(): unknown error code"); + const char *str = lua_tostring(L, 2); + tnt_raise(ClientError, str, code); + return 0; +} + +/** + * A helper to find a Lua function by name and put it + * on top of the stack. + */ +static int +box_lua_find(lua_State *L, const char *name, const char *name_end) +{ + int index = LUA_GLOBALSINDEX; + int objstack = 0; + const char *start = name, *end; + + while ((end = (const char *) memchr(start, '.', name_end - start))) { + lua_checkstack(L, 3); + lua_pushlstring(L, start, end - start); + lua_gettable(L, index); + if (! lua_istable(L, -1)) + tnt_raise(ClientError, ER_NO_SUCH_PROC, + name_end - name, name); + start = end + 1; /* next piece of a.b.c */ + index = lua_gettop(L); /* top of the stack */ + } + + /* box.something:method */ + if ((end = (const char *) memchr(start, ':', name_end - start))) { + lua_checkstack(L, 3); + lua_pushlstring(L, start, end - start); + lua_gettable(L, index); + if (! (lua_istable(L, -1) || + lua_islightuserdata(L, -1) || lua_isuserdata(L, -1) )) + tnt_raise(ClientError, ER_NO_SUCH_PROC, + name_end - name, name); + start = end + 1; /* next piece of a.b.c */ + index = lua_gettop(L); /* top of the stack */ + objstack = index; + } + + + lua_pushlstring(L, start, name_end - start); + lua_gettable(L, index); + if (! lua_isfunction(L, -1)) { + /* lua_call or lua_gettable would raise a type error + * for us, but our own message is more verbose. */ + tnt_raise(ClientError, ER_NO_SUCH_PROC, + name_end - name, name); + } + /* setting stack that it would contain only + * the function pointer. */ + if (index != LUA_GLOBALSINDEX) { + if (objstack == 0) { /* no object, only a function */ + lua_replace(L, 1); + } else if (objstack == 1) { /* just two values, swap them */ + lua_insert(L, -2); + } else { /* long path */ + lua_insert(L, 1); + lua_insert(L, 2); + objstack = 1; + } + lua_settop(L, 1 + objstack); + } + return 1 + objstack; +} + + +/** + * A helper to find lua stored procedures for box.call. + * box.call iteslf is pure Lua, to avoid issues + * with infinite call recursion smashing C + * thread stack. + */ + +static int +lbox_call_loadproc(struct lua_State *L) +{ + const char *name; + size_t name_len; + name = lua_tolstring(L, 1, &name_len); + box_lua_find(L, name, name + name_len); + return 1; +} + +/** + * Invoke a Lua stored procedure from the binary protocol + * (implementation of 'CALL' command code). + */ +void +box_lua_execute(struct request *request, struct port *port) +{ + const char **reqpos = &request->data; + const char *reqend = request->data + request->len; + lua_State *L = lua_newthread(root_L); + int coro_ref = luaL_ref(root_L, LUA_REGISTRYINDEX); + /* Request flags: not used. */ + (void) (pick_u32(reqpos, reqend)); + + try { + auto scoped_guard = make_scoped_guard([=] { + /* + * Allow the used coro to be garbage collected. + * @todo: cache and reuse it instead. + */ + luaL_unref(root_L, LUA_REGISTRYINDEX, coro_ref); + }); + + uint32_t field_len; + /* proc name */ + const char *field = pick_field_str(reqpos, reqend, &field_len); + box_lua_find(L, field, field + field_len); + /* Push the rest of args (a tuple). */ + uint32_t nargs = pick_u32(reqpos, reqend); + luaL_checkstack(L, nargs, "call: out of stack"); + for (int i = 0; i < nargs; i++) { + field = pick_field_str(reqpos, reqend, &field_len); + lua_pushlstring(L, field, field_len); + } + lua_call(L, nargs, LUA_MULTRET); + /* Send results of the called procedure to the client. */ + port_add_lua_multret(port, L); + } catch (const Exception& e) { + throw; + } catch (...) { + tnt_raise(ClientError, ER_PROC_LUA, lua_tostring(L, -1)); + } +} + +static void +box_index_init_iterator_types(struct lua_State *L, int idx) +{ + for (int i = 0; i < iterator_type_MAX; i++) { + assert(strncmp(iterator_type_strs[i], "ITER_", 5) == 0); + lua_pushnumber(L, i); + /* cut ITER_ prefix from enum name */ + lua_setfield(L, idx, iterator_type_strs[i] + 5); + } +} + +/** + * Convert box.pack() format specifier to Tarantool + * binary protocol UPDATE opcode + */ +static char format_to_opcode(char format) +{ + switch (format) { + case '=': return 0; + case '+': return 1; + case '&': return 2; + case '^': return 3; + case '|': return 4; + case ':': return 5; + case '#': return 6; + case '!': return 7; + case '-': return 8; + default: return format; + } +} + +/** + * Counterpart to @a format_to_opcode + */ +static char opcode_to_format(char opcode) +{ + switch (opcode) { + case 0: return '='; + case 1: return '+'; + case 2: return '&'; + case 3: return '^'; + case 4: return '|'; + case 5: return ':'; + case 6: return '#'; + case 7: return '!'; + case 8: return '-'; + default: return opcode; + } +} + +enum { BOX_PACK_MAXNESTING = 16 }; /* Max nesting levels. */ + +/* + * This function is recursive. \a level param is needed to control + * the maximum recursion level. + */ +static int +luaL_packsize_r(struct lua_State *L, int index, int level) +{ + switch (lua_type(L, index)) { + case LUA_TNUMBER: + case LUA_TCDATA: + case LUA_TSTRING: + return 1; + case LUA_TUSERDATA: + { + struct tuple *t = lua_istuple(L, index); + if (t == NULL) + luaL_error(L, "box.pack: unsupported type"); + return t->field_count; + } + case LUA_TTABLE: + { + if (level >= BOX_PACK_MAXNESTING) + return 0; /* Limit nested tables */ + + int size = 0; + lua_pushnil(L); + while (lua_next(L, index) != 0) { + /* Sic: use absolute index. */ + size += luaL_packsize_r(L, lua_gettop(L), level++); + lua_pop(L, 1); + } + return size; + } + default: + luaL_error(L, "box.pack: unsupported type"); + } + return 0; +} + +static int +luaL_packsize(struct lua_State *L, int index) +{ + return luaL_packsize_r(L, index, 0); +} + + +/* + * This function is recursive. \a level param is needed to control + * the maximum recursion level. + */ +static void +luaL_packvalue_r(struct lua_State *L, struct tbuf *b, int index, int level) +{ + struct lua_field field; + lua_tofield(L, index, &field); + if (field.type != UNKNOWN) { + tbuf_ensure(b, field.len + 5); + char *data = pack_lstr(b->data + b->size, + field.data, field.len); + b->size = data - b->data; + return; + } + + switch (lua_type(L, index)) { + case LUA_TUSERDATA: + { + struct tuple *tuple = lua_istuple(L, index); + if (tuple == NULL) + luaL_error(L, "box.pack: unsupported type"); + tuple_to_tbuf(tuple, b); + return; + } + case LUA_TTABLE: + { + if (level >= BOX_PACK_MAXNESTING) + return; /* Limit nested tables */ + lua_pushnil(L); + while (lua_next(L, index) != 0) { + /* Sic: use absolute index. */ + luaL_packvalue_r(L, b, lua_gettop(L), level + 1); + lua_pop(L, 1); + } + return; + } + default: + luaL_error(L, "box.pack: unsupported type"); + return; + } +} + +static void +luaL_packvalue(struct lua_State *L, struct tbuf *b, int index) +{ + return luaL_packvalue_r(L, b, index, 0); +} + +static void +luaL_packstack(struct lua_State *L, struct tbuf *b, int first, int last) +{ + int size = 0; + /* sic: if arg_count is 0, first > last */ + for (int i = first; i <= last; ++i) + size += luaL_packsize(L, i); + + tbuf_ensure(b, size + sizeof(size)); + tbuf_append(b, (char *) &size, sizeof(size)); + for (int i = first; i <= last; ++i) + luaL_packvalue(L, b, i); +} + + +/** + * To use Tarantool/Box binary protocol primitives from Lua, we + * need a way to pack Lua variables into a binary representation. + * We do it by exporting a helper function + * + * box.pack(format, args...) + * + * which takes the format, which is very similar to Perl 'pack' + * format, and a list of arguments, and returns a binary string + * which has the arguments packed according to the format. + * + * For example, a typical SELECT packet packs in Lua like this: + * + * pkt = box.pack("iiiiiip", -- pack format + * 0, -- space id + * 0, -- index id + * 0, -- offset + * 2^32, -- limit + * 1, -- number of SELECT arguments + * 1, -- tuple cardinality + * key); -- the key to use for SELECT + * + * @sa doc/box-protocol.txt, binary protocol description + * @todo: implement box.unpack(format, str), for testing purposes + */ +static int +lbox_pack(struct lua_State *L) +{ + const char *format = luaL_checkstring(L, 1); + /* first arg comes second */ + int i = 2; + int nargs = lua_gettop(L); + size_t size; + const char *str; + + PallocGuard palloc_guard(fiber_ptr->gc_pool); + struct tbuf *b = tbuf_new(fiber_ptr->gc_pool); + + struct lua_field field; + double dbl; + float flt; + char *data; + while (*format) { + if (i > nargs) + luaL_error(L, "box.pack: argument count does not match " + "the format"); + lua_tofield(L, i, &field); + switch (*format) { + case 'B': + case 'b': + /* signed and unsigned 8-bit integers */ + if (field.type != NUM || field.u32 > UINT8_MAX) + luaL_error(L, "box.pack: expected 8-bit int"); + tbuf_append(b, (char *) &field.u32, sizeof(uint8_t)); + break; + case 'S': + case 's': + /* signed and unsigned 16-bit integers */ + if (field.type != NUM || field.u32 > UINT16_MAX) + luaL_error(L, "box.pack: expected 16-bit int"); + tbuf_append(b, (char *) &field.u32, sizeof(uint16_t)); + break; + case 'n': + /* signed and unsigned 16-bit big endian integers */ + if (field.type != NUM || field.u32 > UINT16_MAX) + luaL_error(L, "box.pack: expected 16-bit int"); + field.u32 = (uint16_t) htons((uint16_t) field.u32); + tbuf_append(b, (char *) &field.u32, sizeof(uint16_t)); + break; + case 'I': + case 'i': + /* signed and unsigned 32-bit integers */ + if (field.type != NUM) + luaL_error(L, "box.pack: expected 32-bit int"); + tbuf_append(b, (char *) &field.u32, sizeof(uint32_t)); + break; + case 'N': + /* signed and unsigned 32-bit big endian integers */ + if (field.type != NUM) + luaL_error(L, "box.pack: expected 32-bit int"); + field.u32 = htonl(field.u32); + tbuf_append(b, (char *) &field.u32, sizeof(uint32_t)); + break; + case 'L': + case 'l': + /* signed and unsigned 64-bit integers */ + if (field.type == NUM) { + /* extend 32-bit value to 64-bit */ + field.u64 = field.u32; + } else if (field.type != NUM64) { + luaL_error(L, "box.pack: expected 64-bit int"); + } + tbuf_append(b, (char *) &field.u64, sizeof(uint64_t)); + break; + case 'Q': + case 'q': + /* signed and unsigned 64-bit integers */ + if (field.type == NUM) { + /* extend 32-bit value to 64-bit */ + field.u64 = field.u32; + } else if (field.type != NUM64){ + luaL_error(L, "box.pack: expected 64-bit int"); + } + field.u64 = bswap_u64(field.u64); + tbuf_append(b, (char *) &field.u64, sizeof(uint64_t)); + break; + case 'd': + dbl = (double) lua_tonumber(L, i); + tbuf_append(b, (char *) &dbl, sizeof(double)); + break; + case 'f': + flt = (float) lua_tonumber(L, i); + tbuf_append(b, (char *) &flt, sizeof(float)); + break; + case 'w': + /* Perl 'pack' BER-encoded integer */ + if (field.type != NUM) + luaL_error(L, "box.pack: expected 32-bit int"); + + tbuf_ensure(b, 5); + data = pack_varint32(b->data + b->size, field.u32); + assert(data <= b->data + b->capacity); + b->size = data - b->data; + break; + case 'A': + case 'a': + /* A sequence of bytes */ + str = luaL_checklstring(L, i, &size); + tbuf_append(b, str, size); + break; + case 'P': + case 'p': + luaL_packvalue(L, b, i); + break; + case 'V': + { + int arg_count = luaL_checkint(L, i); + if (i + arg_count > nargs) + luaL_error(L, "box.pack: argument count does not match " + "the format"); + luaL_packstack(L, b, i + 1, i + arg_count); + i += arg_count; + break; + } + case '=': + /* update tuple set foo = bar */ + case '+': + /* set field += val */ + case '-': + /* set field -= val */ + case '&': + /* set field & =val */ + case '|': + /* set field |= val */ + case '^': + /* set field ^= val */ + case ':': + /* splice */ + case '#': + /* delete field */ + case '!': + /* insert field */ + /* field no */ + tbuf_ensure(b, sizeof(uint32_t) + 1); + data = b->data + b->size; + + data = pack_u32(data, lua_tointeger(L, i)); + *data++ = format_to_opcode(*format); + + assert(data <= b->data + b->capacity); + b->size = data - b->data; + break; + default: + luaL_error(L, "box.pack: unsupported pack " + "format specifier '%c'", *format); + } + i++; + format++; + } + + lua_pushlstring(L, tbuf_str(b), b->size); + + return 1; +} + +const char * +box_unpack_response(struct lua_State *L, const char *s, const char *end) +{ + uint32_t tuple_count = pick_u32(&s, end); + + /* Unpack and push tuples. */ + while (tuple_count--) { + uint32_t bsize = pick_u32(&s, end); + uint32_t field_count = pick_u32(&s, end); + const char *tend = s + bsize; + if (tend > end) + tnt_raise(IllegalParams, "incorrect packet length"); + + struct tuple *tuple = tuple_new(tuple_format_ber, + field_count, &s, tend); + lbox_pushtuple(L, tuple); + } + return s; +} + + +static int +lbox_unpack(struct lua_State *L) +{ + size_t format_size = 0; + const char *format = luaL_checklstring(L, 1, &format_size); + const char *f = format; + + size_t str_size = 0; + const char *str = luaL_checklstring(L, 2, &str_size); + const char *end = str + str_size; + const char *s = str; + + int save_stacksize = lua_gettop(L); + + char charbuf; + uint8_t u8buf; + uint16_t u16buf; + uint32_t u32buf; + double dbl; + float flt; + +#define CHECK_SIZE(cur) if (unlikely((cur) >= end)) { \ + luaL_error(L, "box.unpack('%c'): got %d bytes (expected: %d+)", \ + *f, (int) (end - str), (int) 1 + ((cur) - str)); \ +} + while (*f) { + switch (*f) { + case 'b': + CHECK_SIZE(s); + u8buf = *(uint8_t *) s; + lua_pushnumber(L, u8buf); + s++; + break; + case 's': + CHECK_SIZE(s + 1); + u16buf = *(uint16_t *) s; + lua_pushnumber(L, u16buf); + s += 2; + break; + case 'n': + CHECK_SIZE(s + 1); + u16buf = ntohs(*(uint16_t *) s); + lua_pushnumber(L, u16buf); + s += 2; + break; + case 'i': + CHECK_SIZE(s + 3); + u32buf = *(uint32_t *) s; + lua_pushnumber(L, u32buf); + s += 4; + break; + case 'N': + CHECK_SIZE(s + 3); + u32buf = ntohl(*(uint32_t *) s); + lua_pushnumber(L, u32buf); + s += 4; + break; + case 'l': + CHECK_SIZE(s + 7); + luaL_pushnumber64(L, *(uint64_t*) s); + s += 8; + break; + case 'q': + CHECK_SIZE(s + 7); + luaL_pushnumber64(L, bswap_u64(*(uint64_t*) s)); + s += 8; + break; + case 'd': + CHECK_SIZE(s + 7); + dbl = *(double *) s; + lua_pushnumber(L, dbl); + s += 8; + break; + case 'f': + CHECK_SIZE(s + 3); + flt = *(float *) s; + lua_pushnumber(L, flt); + s += 4; + break; + case 'w': + /* pick_varint32 throws exception on error. */ + u32buf = pick_varint32(&s, end); + lua_pushnumber(L, u32buf); + break; + + case 'a': + case 'A': /* The rest of the data is a Lua string. */ + lua_pushlstring(L, s, end - s); + s = end; + break; + case 'P': + case 'p': + /* pick_varint32 throws exception on error. */ + u32buf = pick_varint32(&s, end); + CHECK_SIZE(s + u32buf - 1); + lua_pushlstring(L, s, u32buf); + s += u32buf; + break; + case '=': + /* update tuple set foo = bar */ + case '+': + /* set field += val */ + case '-': + /* set field -= val */ + case '&': + /* set field & =val */ + case '|': + /* set field |= val */ + case '^': + /* set field ^= val */ + case ':': + /* splice */ + case '#': + /* delete field */ + case '!': + /* insert field */ + CHECK_SIZE(s + 4); + + /* field no */ + u32buf = *(uint32_t *) s; + + /* opcode */ + charbuf = *(s + 4); + charbuf = opcode_to_format(charbuf); + if (charbuf != *f) { + luaL_error(L, "box.unpack('%s'): " + "unexpected opcode: " + "offset %d, expected '%c'," + "found '%c'", + format, s - str, *f, charbuf); + } + + lua_pushnumber(L, u32buf); + s += 5; + break; + + case 'R': /* Unpack server response, IPROTO format. */ + { + s = box_unpack_response(L, s, end); + break; + } + default: + luaL_error(L, "box.unpack: unsupported " + "format specifier '%c'", *f); + } + f++; + } + + assert(s <= end); + + if (s != end) { + luaL_error(L, "box.unpack('%s'): too many bytes: " + "unpacked %d, total %d", + format, s - str, str_size); + } + + return lua_gettop(L) - save_stacksize; + +#undef CHECK_SIZE +} + +static const struct luaL_reg boxlib[] = { + {"process", lbox_process}, + {"call_loadproc", lbox_call_loadproc}, + {"raise", lbox_raise}, + {"pack", lbox_pack}, + {"unpack", lbox_unpack}, + {NULL, NULL} +}; + +void +mod_lua_init(struct lua_State *L) +{ + /* box, box.tuple */ + tarantool_lua_register_type(L, tuplelib_name, lbox_tuple_meta); + luaL_register(L, tuplelib_name, lbox_tuplelib); + lua_pop(L, 1); + tarantool_lua_register_type(L, tuple_iteratorlib_name, + lbox_tuple_iterator_meta); + luaL_register(L, "box", boxlib); + lua_pop(L, 1); + /* box.index */ + tarantool_lua_register_type(L, indexlib_name, lbox_index_meta); + luaL_register(L, "box.index", indexlib); + box_index_init_iterator_types(L, -2); + lua_pop(L, 1); + tarantool_lua_register_type(L, iteratorlib_name, lbox_iterator_meta); + + /* Load Lua extension */ + for (const char **s = lua_sources; *s; s++) { + if (luaL_dostring(L, *s)) + panic("Error loading Lua source %.160s...: %s", + *s, lua_tostring(L, -1)); + } + + assert(lua_gettop(L) == 0); + + root_L = L; +} diff --git a/src/iproto.cc b/src/iproto.cc new file mode 100644 index 0000000000000000000000000000000000000000..81eec8464139c6c650935b7b619c4cda23627763 --- /dev/null +++ b/src/iproto.cc @@ -0,0 +1,794 @@ +/* + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the + * following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY <COPYRIGHT HOLDER> ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * <COPYRIGHT HOLDER> OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF + * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ +#include "iproto.h" +#include <string.h> +#include <stdint.h> +#include <stdarg.h> +#include <stdio.h> + +#include "iproto_port.h" +#include "tarantool.h" +#include "exception.h" +#include "errcode.h" +#include "fiber.h" +#include "say.h" +#include "evio.h" +#include "session.h" +#include "scoped_guard.h" + +static struct iproto_header dummy_header = { 0, 0, 0 }; +const uint32_t msg_ping = 0xff00; + +/* {{{ iproto_queue */ + +struct iproto_request; + +/** + * Implementation of an input queue of the box request processor. + * + * Socket event handlers read data, determine request boundaries + * and enqueue requests. Once all input/output events are + * processed, an own event handler is invoked to deal with the + * requests in the queue: it's important that each request is + * processed in a fiber environment. + * + * @sa iproto_queue_schedule, iproto_handler, iproto_handshake + */ + +struct iproto_queue +{ + /** + * Ring buffer of fixed size, preallocated + * during initialization. + */ + struct iproto_request *queue; + /** + * Main function of the fiber invoked to handle + * all outstanding tasks in this queue. + */ + void (*handler)(va_list); + /** + * Cache of fibers which work on requests + * in this queue. + */ + struct rlist fiber_cache; + /** + * Used to trigger request processing when + * the queue becomes non-empty. + */ + struct ev_async watcher; + /* Ring buffer position. */ + int begin, end; + /* Ring buffer size. */ + int size; +}; + +enum { + IPROTO_REQUEST_QUEUE_SIZE = 2048, +}; + +struct iproto_session; + +typedef void (*iproto_request_f)(struct iproto_request *); + +/** + * A single request from the client. All requests + * from all clients are queued into a single queue + * and processed in FIFO order. + */ +struct iproto_request +{ + struct iproto_session *session; + struct iobuf *iobuf; + /* Position of the request in the input buffer. */ + struct iproto_header *header; + iproto_request_f process; +}; + +/** + * A single global queue for all requests in all connections. All + * requests are processed concurrently. + * Is also used as a queue for just established connections and to + * execute disconnect triggers. A few notes about these triggers: + * - they need to be run in a fiber + * - unlike an ordinary request failure, on_connect trigger + * failure must lead to connection shutdown. + * - as long as on_connect trigger can be used for client + * authentication, it must be processed before any other request + * on this connection. + */ +static struct iproto_queue request_queue; + +static inline bool +iproto_queue_is_empty(struct iproto_queue *i_queue) +{ + return i_queue->begin == i_queue->end; +} + +static inline void +iproto_enqueue_request(struct iproto_queue *i_queue, + struct iproto_session *session, + struct iobuf *iobuf, + struct iproto_header *header, + iproto_request_f process) +{ + /* If the queue is full, invoke the handler to work it off. */ + if (i_queue->end == i_queue->size) + ev_invoke(&i_queue->watcher, EV_CUSTOM); + assert(i_queue->end < i_queue->size); + bool was_empty = iproto_queue_is_empty(i_queue); + struct iproto_request *request = i_queue->queue + i_queue->end++; + + request->session = session; + request->iobuf = iobuf; + request->header = header; + request->process = process; + /* + * There were some queued requests, ensure they are + * handled. + */ + if (was_empty) + ev_feed_event(&request_queue.watcher, EV_CUSTOM); +} + +static inline bool +iproto_dequeue_request(struct iproto_queue *i_queue, + struct iproto_request *out) +{ + if (i_queue->begin == i_queue->end) + return false; + struct iproto_request *request = i_queue->queue + i_queue->begin++; + if (i_queue->begin == i_queue->end) + i_queue->begin = i_queue->end = 0; + *out = *request; + return true; +} + +/** Put the current fiber into a queue fiber cache. */ +static inline void +iproto_cache_fiber(struct iproto_queue *i_queue) +{ + fiber_gc(); + rlist_add_entry(&i_queue->fiber_cache, fiber_ptr, state); + fiber_yield(); +} + +/** Create fibers to handle all outstanding tasks. */ +static void +iproto_queue_schedule(struct ev_async *watcher, + int events __attribute__((unused))) +{ + struct iproto_queue *i_queue = (struct iproto_queue *) watcher->data; + while (! iproto_queue_is_empty(i_queue)) { + + struct fiber *f; + if (! rlist_empty(&i_queue->fiber_cache)) + f = rlist_shift_entry(&i_queue->fiber_cache, + struct fiber, state); + else + f = fiber_new("iproto", i_queue->handler); + fiber_call(f, i_queue); + } +} + +static inline void +iproto_queue_init(struct iproto_queue *i_queue, + int size, void (*handler)(va_list)) +{ + i_queue->size = size; + i_queue->begin = i_queue->end = 0; + i_queue->queue = (struct iproto_request *) palloc(eter_pool, size * + sizeof (struct iproto_request)); + /** + * Initialize an ev_async event which would start + * workers for all outstanding tasks. + */ + ev_async_init(&i_queue->watcher, iproto_queue_schedule); + i_queue->watcher.data = i_queue; + i_queue->handler = handler; + rlist_create(&i_queue->fiber_cache); +} + +static inline uint32_t +iproto_session_id(struct iproto_session *session); + +static inline uint64_t +iproto_session_cookie(struct iproto_session *session); + +/** A handler to process all queued requests. */ +static void +iproto_queue_handler(va_list ap) +{ + struct iproto_queue *i_queue = va_arg(ap, struct iproto_queue *); + struct iproto_request request; +restart: + while (iproto_dequeue_request(i_queue, &request)) { + + fiber_set_sid(fiber_ptr, iproto_session_id(request.session), iproto_session_cookie(request.session)); + request.process(&request); + } + iproto_cache_fiber(&request_queue); + goto restart; +} + +/* }}} */ + +/* {{{ iproto_session */ + +/** Context of a single client connection. */ +struct iproto_session +{ + /* Cache of iproto_session objects. */ + SLIST_ENTRY(iproto_session) next_in_cache; + /** + * Two rotating buffers for I/O. Input is always read into + * iobuf[0]. As soon as iobuf[0] input buffer becomes full, + * iobuf[0] is moved to iobuf[1], for flushing. As soon as + * all output in iobuf[1].out is sent to the client, iobuf[1] + * and iobuf[0] are moved around again. + */ + struct iobuf *iobuf[2]; + /* + * Size of readahead which is not parsed yet, i.e. + * size of a piece of request which is not fully read. + * Is always relative to iobuf[0]->in.end. In other words, + * iobuf[0]->in.end - parse_size gives the start of the + * unparsed request. A size rather than a pointer is used + * to be safe in case in->buf is reallocated. Being + * relative to in->end, rather than to in->pos is helpful to + * make sure ibuf_reserve() or iobuf rotation don't make + * the value meaningless. + */ + ssize_t parse_size; + /** Current write position in the output buffer */ + struct obuf_svp write_pos; + /** + * Function of the request processor to handle + * a single request. + */ + box_process_func *handler; + struct ev_io input; + struct ev_io output; + /** Session id. */ + uint32_t sid; + uint64_t cookie; +}; + +SLIST_HEAD(, iproto_session) iproto_session_cache = + SLIST_HEAD_INITIALIZER(iproto_session_cache); + +/** + * A session is idle when the client is gone + * and there are no outstanding requests in the request queue. + * An idle session can be safely garbage collected. + * Note: a session only becomes idle after iproto_session_shutdown(), + * which closes the fd. This is why here the check is for + * evio_is_active() (false if fd is closed), not ev_is_active() + * (false if event is not started). + */ +static inline bool +iproto_session_is_idle(struct iproto_session *session) +{ + return !evio_is_active(&session->input) && + ibuf_size(&session->iobuf[0]->in) == 0 && + ibuf_size(&session->iobuf[1]->in) == 0; +} + +static inline uint32_t +iproto_session_id(struct iproto_session *session) +{ + return session->sid; +} + +static inline uint64_t +iproto_session_cookie(struct iproto_session *session) +{ + return session->cookie; +} + +static void +iproto_session_on_input(struct ev_io *watcher, + int revents __attribute__((unused))); +static void +iproto_session_on_output(struct ev_io *watcher, + int revents __attribute__((unused))); + +static void +iproto_process_request(struct iproto_request *request); + +static void +iproto_process_connect(struct iproto_request *request); + +static void +iproto_process_disconnect(struct iproto_request *request); + +static struct iproto_session * +iproto_session_create(const char *name, int fd, struct sockaddr_in *addr, + box_process_func *param) +{ + struct iproto_session *session; + if (SLIST_EMPTY(&iproto_session_cache)) { + session = (struct iproto_session *) palloc(eter_pool, sizeof(*session)); + session->input.data = session->output.data = session; + } else { + session = SLIST_FIRST(&iproto_session_cache); + SLIST_REMOVE_HEAD(&iproto_session_cache, next_in_cache); + assert(session->input.fd == -1); + assert(session->output.fd == -1); + } + session->handler = param; + ev_io_init(&session->input, iproto_session_on_input, fd, EV_READ); + ev_io_init(&session->output, iproto_session_on_output, fd, EV_WRITE); + session->iobuf[0] = iobuf_new(name); + session->iobuf[1] = iobuf_new(name); + session->parse_size = 0; + session->write_pos = obuf_create_svp(&session->iobuf[0]->out); + session->sid = 0; + session->cookie = *(uint64_t *) addr; + return session; +} + +/** Recycle a session. Never throws. */ +static inline void +iproto_session_destroy(struct iproto_session *session) +{ + assert(iproto_session_is_idle(session)); + assert(!evio_is_active(&session->output)); + session_destroy(session->sid); /* Never throws. No-op if sid is 0. */ + iobuf_delete(session->iobuf[0]); + iobuf_delete(session->iobuf[1]); + SLIST_INSERT_HEAD(&iproto_session_cache, session, next_in_cache); +} + +static inline void +iproto_session_shutdown(struct iproto_session *session) +{ + ev_io_stop(&session->input); + ev_io_stop(&session->output); + close(session->input.fd); + session->input.fd = session->output.fd = -1; + /* + * Discard unparsed data, to recycle the session + * as soon as all parsed data is processed. + */ + session->iobuf[0]->in.end -= session->parse_size; + /* + * If the session is not idle, it is destroyed + * after the last request is handled. Otherwise, + * queue a separate request to run on_disconnect() + * trigger and destroy the session. + * Sic: the check is mandatory to not destroy a session + * twice. + */ + if (iproto_session_is_idle(session)) { + iproto_enqueue_request(&request_queue, session, + session->iobuf[0], &dummy_header, + iproto_process_disconnect); + } +} + +static inline void +iproto_validate_header(struct iproto_header *header, int fd) +{ + (void) fd; + if (header->len > IPROTO_BODY_LEN_MAX) { + /* + * The package is too big, just close connection for now to + * avoid DoS. + */ + tnt_raise(IllegalParams, "received package is too big"); + } +} + +/** + * If there is no space for reading input, we can do one of the + * following: + * - try to get a new iobuf, so that it can fit the request. + * Always getting a new input buffer when there is no space + * makes the server susceptible to input-flood attacks. + * Therefore, at most 2 iobufs are used in a single session, + * one is "open", receiving input, and the other is closed, + * flushing output. + * - stop input and wait until the client reads piled up output, + * so the input buffer can be reused. This complements + * the previous strategy. It is only safe to stop input if it + * is known that there is output. In this case input event + * flow will be resumed when all replies to previous requests + * are sent, in iproto_session_gc_iobuf(). Since there are two + * buffers, the input is only stopped when both of them + * are fully used up. + * + * To make this strategy work, each iobuf in use must fit at + * least one request. Otherwise, iobuf[1] may end + * up having no data to flush, while iobuf[0] is too small to + * fit a big incoming request. + */ +static struct iobuf * +iproto_session_input_iobuf(struct iproto_session *session) +{ + struct iobuf *oldbuf = session->iobuf[0]; + + ssize_t to_read = sizeof(struct iproto_header) + + (session->parse_size >= sizeof(struct iproto_header) ? + iproto(oldbuf->in.end - session->parse_size)->len : 0) - + session->parse_size; + + if (ibuf_unused(&oldbuf->in) >= to_read) + return oldbuf; + + /** All requests are processed, reuse the buffer. */ + if (ibuf_size(&oldbuf->in) == session->parse_size) { + ibuf_reserve(&oldbuf->in, to_read); + return oldbuf; + } + + if (! iobuf_is_idle(session->iobuf[1])) { + /* + * Wait until the second buffer is flushed + * and becomes available for reuse. + */ + return NULL; + } + struct iobuf *newbuf = session->iobuf[1]; + + ibuf_reserve(&newbuf->in, to_read + session->parse_size); + /* + * Discard unparsed data in the old buffer, otherwise it + * won't be recycled when all parsed requests are processed. + */ + oldbuf->in.end -= session->parse_size; + /* Move the cached request prefix to the new buffer. */ + memcpy(newbuf->in.pos, oldbuf->in.end, session->parse_size); + newbuf->in.end += session->parse_size; + /* + * Rotate buffers. Not strictly necessary, but + * helps preserve response order. + */ + session->iobuf[1] = oldbuf; + session->iobuf[0] = newbuf; + return newbuf; +} + +/** Enqueue all requests which were read up. */ +static inline void +iproto_enqueue_batch(struct iproto_session *session, struct ibuf *in, int fd) +{ + int batch_size; + for (batch_size = 0; ; batch_size++) { + + if (session->parse_size < sizeof(struct iproto_header)) + break; + + struct iproto_header * + header = iproto(in->end - session->parse_size); + iproto_validate_header(header, fd); + + if (session->parse_size < (sizeof(struct iproto_header) + + header->len)) + break; + + iproto_enqueue_request(&request_queue, session, + session->iobuf[0], header, + iproto_process_request); + session->parse_size -= sizeof(*header) + header->len; + } +} + +static void +iproto_session_on_input(struct ev_io *watcher, + int revents __attribute__((unused))) +{ + struct iproto_session *session = (struct iproto_session *) watcher->data; + int fd = session->input.fd; + assert(fd >= 0); + + try { + /* Ensure we have sufficient space for the next round. */ + struct iobuf *iobuf = iproto_session_input_iobuf(session); + if (iobuf == NULL) { + ev_io_stop(&session->input); + return; + } + + struct ibuf *in = &iobuf->in; + /* Read input. */ + int nrd = sio_read(fd, in->end, ibuf_unused(in)); + if (nrd < 0) { /* Socket is not ready. */ + ev_io_start(&session->input); + return; + } + if (nrd == 0) { /* EOF */ + iproto_session_shutdown(session); + return; + } + /* Update the read position and session state. */ + in->end += nrd; + session->parse_size += nrd; + /* Enqueue all requests which are fully read up. */ + iproto_enqueue_batch(session, in, fd); + /* + * Keep reading input, as long as the socket + * supplies data. + */ + if (!ev_is_active(&session->input)) + ev_feed_event(&session->input, EV_READ); + } catch (const Exception& e) { + e.log(); + iproto_session_shutdown(session); + } +} + +/** Get the iobuf which is currently being flushed. */ +static inline struct iobuf * +iproto_session_output_iobuf(struct iproto_session *session) +{ + if (obuf_size(&session->iobuf[1]->out)) + return session->iobuf[1]; + /* + * Don't try to write from a newer buffer if an older one + * exists: in case of a partial write of a newer buffer, + * the client may end up getting a salad of different + * pieces of replies from both buffers. + */ + if (ibuf_size(&session->iobuf[1]->in) == 0 && + obuf_size(&session->iobuf[0]->out)) + return session->iobuf[0]; + return NULL; +} + +/** writev() to the socket and handle the output. */ +static int +iproto_flush(struct iobuf *iobuf, int fd, struct obuf_svp *svp) +{ + /* Begin writing from the saved position. */ + struct iovec *iov = iobuf->out.iov + svp->pos; + int iovcnt = obuf_iovcnt(&iobuf->out) - svp->pos; + assert(iovcnt); + ssize_t nwr; + try { + sio_add_to_iov(iov, -svp->iov_len); + nwr = sio_writev(fd, iov, iovcnt); + + sio_add_to_iov(iov, svp->iov_len); + } catch (const Exception&) { + sio_add_to_iov(iov, svp->iov_len); + throw; + } + + if (nwr > 0) { + if (svp->size + nwr == obuf_size(&iobuf->out)) { + iobuf_gc(iobuf); + *svp = obuf_create_svp(&iobuf->out); + return 0; + } + svp->size += nwr; + svp->pos += sio_move_iov(iov, nwr, &svp->iov_len); + } + return -1; +} + +static void +iproto_session_on_output(struct ev_io *watcher, + int revent __attribute__((unused))) +{ + struct iproto_session *session = (struct iproto_session *) watcher->data; + int fd = session->output.fd; + struct obuf_svp *svp = &session->write_pos; + + try { + struct iobuf *iobuf; + while ((iobuf = iproto_session_output_iobuf(session))) { + if (iproto_flush(iobuf, fd, svp) < 0) { + ev_io_start(&session->output); + return; + } + if (! ev_is_active(&session->input)) + ev_feed_event(&session->input, EV_READ); + } + if (ev_is_active(&session->output)) + ev_io_stop(&session->output); + } catch (const Exception& e) { + e.log(); + iproto_session_shutdown(session); + } +} + +/* }}} */ + +/* {{{ iproto_process_* functions */ + +/** Stack reply to 'ping' packet. */ +static inline void +iproto_reply_ping(struct obuf *out, struct iproto_header *req) +{ + struct iproto_header reply = *req; + reply.len = 0; + obuf_dup(out, &reply, sizeof(reply)); +} + +/** Send an error packet back. */ +static inline void +iproto_reply_error(struct obuf *out, struct iproto_header *req, + const ClientError& e) +{ + struct iproto_header reply = *req; + int errmsg_len = strlen(e.errmsg()) + 1; + uint32_t ret_code = tnt_errcode_val(e.errcode()); + reply.len = sizeof(ret_code) + errmsg_len;; + obuf_dup(out, &reply, sizeof(reply)); + obuf_dup(out, &ret_code, sizeof(ret_code)); + obuf_dup(out, e.errmsg(), errmsg_len); +} + +/** Stack a reply to a single request to the fiber's io vector. */ +static inline void +iproto_reply(struct iproto_port *port, box_process_func callback, + struct obuf *out, struct iproto_header *header) +{ + if (header->msg_code == msg_ping) + return iproto_reply_ping(out, header); + + /* Make request body point to iproto data */ + char *body = (char *) &header[1]; + iproto_port_init(port, out, header); + try { + callback((struct port *) port, header->msg_code, + body, header->len); + } catch (const ClientError& e) { + if (port->reply.found) + obuf_rollback_to_svp(out, &port->svp); + iproto_reply_error(out, header, e); + } +} + +static void +iproto_process_request(struct iproto_request *request) +{ + struct iproto_session *session = request->session; + struct iproto_header *header = request->header; + struct iobuf *iobuf = request->iobuf; + struct iproto_port port; + + auto scope_guard = make_scoped_guard([=]{ + iobuf->in.pos += sizeof(*header) + header->len; + if (iproto_session_is_idle(session)) + iproto_session_destroy(session); + }); + + if (unlikely(! evio_is_active(&session->output))) + return; + + iproto_reply(&port, *session->handler, &iobuf->out, header); + + if (unlikely(! evio_is_active(&session->output))) + return; + + if (! ev_is_active(&session->output)) + ev_feed_event(&session->output, EV_WRITE); +} + +/** + * Handshake a connection: invoke the on-connect trigger + * and possibly authenticate. Try to send the client an error + * upon a failure. + */ +static void +iproto_process_connect(struct iproto_request *request) +{ + struct iproto_session *session = request->session; + struct iobuf *iobuf = request->iobuf; + int fd = session->input.fd; + try { /* connect. */ + session->sid = session_create(fd, session->cookie); + } catch (const ClientError& e) { + iproto_reply_error(&iobuf->out, request->header, e); + try { + iproto_flush(iobuf, fd, &session->write_pos); + } catch (const Exception& e) { + e.log(); + } + iproto_session_shutdown(session); + return; + } catch (const Exception& e) { + e.log(); + assert(session->sid == 0); + iproto_session_shutdown(session); + return; + } + /* + * Connect is synchronous, so no one could have been + * messing up with the session while it was in + * progress. + */ + assert(evio_is_active(&session->input)); + /* Handshake OK, start reading input. */ + ev_feed_event(&session->input, EV_READ); +} + +static void +iproto_process_disconnect(struct iproto_request *request) +{ + fiber_set_sid(fiber_ptr, request->session->sid, request->session->cookie); + /* Runs the trigger, which may yield. */ + iproto_session_destroy(request->session); +} + +/** }}} */ + +/** + * Create a session context and start input. + */ +static void +iproto_on_accept(struct evio_service *service, int fd, + struct sockaddr_in *addr) +{ + char name[SERVICE_NAME_MAXLEN]; + snprintf(name, sizeof(name), "%s/%s", "iobuf", sio_strfaddr(addr)); + + struct iproto_session *session; + + box_process_func *process_fun = + (box_process_func*) service->on_accept_param; + session = iproto_session_create(name, fd, addr, process_fun); + iproto_enqueue_request(&request_queue, session, + session->iobuf[0], &dummy_header, + iproto_process_connect); +} + +/** + * Initialize read-write and read-only ports + * with binary protocol handlers. + */ +void +iproto_init(const char *bind_ipaddr, int primary_port, + int secondary_port) +{ + /* Run a primary server. */ + if (primary_port != 0) { + static struct evio_service primary; + evio_service_init(&primary, "primary", + bind_ipaddr, primary_port, + iproto_on_accept, &box_process); + evio_service_on_bind(&primary, + box_leave_local_standby_mode, NULL); + evio_service_start(&primary); + } + + /* Run a secondary server. */ + if (secondary_port != 0) { + static struct evio_service secondary; + evio_service_init(&secondary, "secondary", + bind_ipaddr, secondary_port, + iproto_on_accept, &box_process_ro); + evio_service_start(&secondary); + } + iproto_queue_init(&request_queue, IPROTO_REQUEST_QUEUE_SIZE, + iproto_queue_handler); +} + diff --git a/src/log_io.cc b/src/log_io.cc new file mode 100644 index 0000000000000000000000000000000000000000..5935af0759d2f35e5a16488dcaf176845735bdb5 --- /dev/null +++ b/src/log_io.cc @@ -0,0 +1,660 @@ +/* + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the + * following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY <COPYRIGHT HOLDER> ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * <COPYRIGHT HOLDER> OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF + * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ +#include "log_io.h" +#include <dirent.h> +#include <fcntl.h> + +#include "palloc.h" +#include "fiber.h" +#include "crc32.h" +#include "fio.h" +#include "tarantool_eio.h" +#include "fiob.h" + +const uint32_t default_version = 11; +const log_magic_t row_marker_v11 = 0xba0babed; +const log_magic_t eof_marker_v11 = 0x10adab1e; +const char inprogress_suffix[] = ".inprogress"; +const char v11[] = "0.11\n"; + +void +header_v11_sign(struct header_v11 *header) +{ + header->data_crc32c = crc32_calc(0, (const unsigned char *) header + sizeof(struct + header_v11), header->len); + header->header_crc32c = crc32_calc(0, (const unsigned char *) &header->lsn, + sizeof(struct header_v11) - + sizeof(header->header_crc32c)); +} + +void +row_v11_fill(struct row_v11 *row, int64_t lsn, uint16_t tag, uint64_t cookie, + const char *metadata, size_t metadata_len, const char + *data, size_t data_len) +{ + row->marker = row_marker_v11; + row->tag = tag; + row->cookie = cookie; + memcpy(row->data, metadata, metadata_len); + memcpy(row->data + metadata_len, data, data_len); + header_v11_fill(&row->header, lsn, metadata_len + data_len + + sizeof(row->tag) + sizeof(row->cookie)); +} + +struct log_dir snap_dir = { + /* .panic_if_error = */ false, + /* .sync_is_async = */ false, + /* .open_wflags = */ "wxd", + /* .filetype = */ "SNAP\n", + /* .filename_ext = */ ".snap", + /* .dirname = */ NULL +}; + +struct log_dir wal_dir = { + /* .panic_if_error = */ false, + /* .sync_is_async = */ true, + /* .open_wflags = */ "wx", + /* .filetype = */ "XLOG\n", + /* .filename_ext = */ ".xlog", + /* .dirname = */ NULL +}; + +static int +cmp_i64(const void *_a, const void *_b) +{ + const int64_t *a = (const int64_t *) _a, *b = (const int64_t *) _b; + if (*a == *b) + return 0; + return (*a > *b) ? 1 : -1; +} + +static ssize_t +scan_dir(struct log_dir *dir, int64_t **ret_lsn) +{ + ssize_t result = -1; + size_t i = 0, size = 1024; + ssize_t ext_len = strlen(dir->filename_ext); + int64_t *lsn = (int64_t *) palloc(fiber_ptr->gc_pool, sizeof(int64_t) * size); + DIR *dh = opendir(dir->dirname); + + if (lsn == NULL || dh == NULL) + goto out; + + errno = 0; + struct dirent *dent; + while ((dent = readdir(dh)) != NULL) { + + char *ext = strchr(dent->d_name, '.'); + if (ext == NULL) + continue; + + const char *suffix = strchr(ext + 1, '.'); + /* + * A valid ending is either .xlog or + * .xlog.inprogress, given dir->filename_ext == + * 'xlog'. + */ + bool ext_is_ok; + if (suffix == NULL) + ext_is_ok = strcmp(ext, dir->filename_ext) == 0; + else + ext_is_ok = (strncmp(ext, dir->filename_ext, + ext_len) == 0 && + strcmp(suffix, inprogress_suffix) == 0); + if (!ext_is_ok) + continue; + + lsn[i] = strtoll(dent->d_name, &ext, 10); + if (strncmp(ext, dir->filename_ext, ext_len) != 0) { + /* d_name doesn't parse entirely, ignore it */ + say_warn("can't parse `%s', skipping", dent->d_name); + continue; + } + + if (lsn[i] == LLONG_MAX || lsn[i] == LLONG_MIN) { + say_warn("can't parse `%s', skipping", dent->d_name); + continue; + } + + i++; + if (i == size) { + int64_t *n = (int64_t *) palloc(fiber_ptr->gc_pool, sizeof(int64_t) * size * 2); + if (n == NULL) + goto out; + memcpy(n, lsn, sizeof(int64_t) * size); + lsn = n; + size = size * 2; + } + } + + qsort(lsn, i, sizeof(int64_t), cmp_i64); + + *ret_lsn = lsn; + result = i; +out: + if (errno != 0) + say_syserror("error reading directory `%s'", dir->dirname); + + if (dh != NULL) + closedir(dh); + return result; +} + +int64_t +greatest_lsn(struct log_dir *dir) +{ + int64_t *lsn; + ssize_t count = scan_dir(dir, &lsn); + + if (count <= 0) + return count; + + return lsn[count - 1]; +} + +int64_t +find_including_file(struct log_dir *dir, int64_t target_lsn) +{ + int64_t *lsn; + ssize_t count = scan_dir(dir, &lsn); + + if (count <= 0) + return count; + + while (count > 1) { + if (*lsn <= target_lsn && target_lsn < *(lsn + 1)) { + goto out; + return *lsn; + } + lsn++; + count--; + } + + /* + * we can't check here for sure will or will not last file + * contain record with desired lsn since number of rows in file + * is not known beforehand. so, we simply return the last one. + */ + + out: + return *lsn; +} + +char * +format_filename(struct log_dir *dir, int64_t lsn, enum log_suffix suffix) +{ + static __thread char filename[PATH_MAX + 1]; + const char *suffix_str = suffix == INPROGRESS ? inprogress_suffix : ""; + snprintf(filename, PATH_MAX, "%s/%020lld%s%s", + dir->dirname, (long long)lsn, dir->filename_ext, suffix_str); + return filename; +} + +/* }}} */ + +/* {{{ struct log_io_cursor */ + +static const char ROW_EOF[] = ""; + +const char * +row_reader_v11(FILE *f, uint32_t *rowlen) +{ + struct header_v11 m; + + uint32_t header_crc, data_crc; + + if (fread(&m, sizeof(m), 1, f) != 1) + return ROW_EOF; + + /* header crc32c calculated on <lsn, tm, len, data_crc32c> */ + header_crc = crc32_calc(0, (unsigned char *) &m + offsetof(struct header_v11, lsn), + sizeof(m) - offsetof(struct header_v11, lsn)); + + if (m.header_crc32c != header_crc) { + say_error("header crc32c mismatch"); + return NULL; + } + char *row = (char *) palloc(fiber_ptr->gc_pool, sizeof(m) + m.len); + memcpy(row, &m, sizeof(m)); + + if (fread(row + sizeof(m), m.len, 1, f) != 1) + return ROW_EOF; + + data_crc = crc32_calc(0, (unsigned char *) row + sizeof(m), m.len); + if (m.data_crc32c != data_crc) { + say_error("data crc32c mismatch"); + return NULL; + } + + say_debug("read row v11 success lsn:%lld", (long long) m.lsn); + *rowlen = m.len + sizeof(m); + return row; +} + +void +log_io_cursor_open(struct log_io_cursor *i, struct log_io *l) +{ + i->log = l; + i->row_count = 0; + i->good_offset = ftello(l->f); + i->eof_read = false; +} + +void +log_io_cursor_close(struct log_io_cursor *i) +{ + struct log_io *l = i->log; + l->rows += i->row_count; + /* + * Since we don't close log_io + * we must rewind log_io to last known + * good position if there was an error. + * Seek back to last known good offset. + */ + fseeko(l->f, i->good_offset, SEEK_SET); + prelease(fiber_ptr->gc_pool); +} + +/** + * Read logfile contents using designated format, panic if + * the log is corrupted/unreadable. + * + * @param i iterator object, encapsulating log specifics. + * + */ +const char * +log_io_cursor_next(struct log_io_cursor *i, uint32_t *rowlen) +{ + struct log_io *l = i->log; + const char *row; + log_magic_t magic; + off_t marker_offset = 0; + + assert(i->eof_read == false); + + say_debug("log_io_cursor_next: marker:0x%016X/%zu", + row_marker_v11, sizeof(row_marker_v11)); + + /* + * Don't let gc pool grow too much. Yet to + * it before reading the next row, to make + * sure it's not freed along here. + */ + prelease_after(fiber_ptr->gc_pool, 128 * 1024); + +restart: + if (marker_offset > 0) + fseeko(l->f, marker_offset + 1, SEEK_SET); + + if (fread(&magic, sizeof(magic), 1, l->f) != 1) + goto eof; + + while (magic != row_marker_v11) { + int c = fgetc(l->f); + if (c == EOF) { + say_debug("eof while looking for magic"); + goto eof; + } + magic = magic >> 8 | + ((log_magic_t) c & 0xff) << (sizeof(magic)*8 - 8); + } + marker_offset = ftello(l->f) - sizeof(row_marker_v11); + if (i->good_offset != marker_offset) + say_warn("skipped %jd bytes after 0x%08jx offset", + (intmax_t)(marker_offset - i->good_offset), + (uintmax_t)i->good_offset); + say_debug("magic found at 0x%08jx", (uintmax_t)marker_offset); + + row = row_reader_v11(l->f, rowlen); + if (row == ROW_EOF) + goto eof; + + if (row == NULL) { + if (l->dir->panic_if_error) + panic("failed to read row"); + say_warn("failed to read row"); + goto restart; + } + + i->good_offset = ftello(l->f); + i->row_count++; + + if (i->row_count % 100000 == 0) + say_info("%.1fM rows processed", i->row_count / 1000000.); + + return row; +eof: + /* + * The only two cases of fully read file: + * 1. sizeof(eof_marker) > 0 and it is the last record in file + * 2. sizeof(eof_marker) == 0 and there is no unread data in file + */ + if (ftello(l->f) == i->good_offset + sizeof(eof_marker_v11)) { + fseeko(l->f, i->good_offset, SEEK_SET); + if (fread(&magic, sizeof(magic), 1, l->f) != 1) { + + say_error("can't read eof marker"); + } else if (magic == eof_marker_v11) { + i->good_offset = ftello(l->f); + i->eof_read = true; + } else if (magic != row_marker_v11) { + say_error("eof marker is corrupt: %lu", + (unsigned long) magic); + } else { + /* + * Row marker at the end of a file: a sign + * of a corrupt log file in case of + * recovery, but OK in case we're in local + * hot standby or replication relay mode + * (i.e. data is being written to the + * file. Don't pollute the log, the + * condition is taken care of up the + * stack. + */ + } + } + /* No more rows. */ + return NULL; +} + +/* }}} */ + +int +inprogress_log_rename(struct log_io *l) +{ + char *filename = l->filename; + char new_filename[PATH_MAX]; + char *suffix = strrchr(filename, '.'); + + assert(l->is_inprogress); + assert(suffix); + assert(strcmp(suffix, inprogress_suffix) == 0); + + /* Create a new filename without '.inprogress' suffix. */ + memcpy(new_filename, filename, suffix - filename); + new_filename[suffix - filename] = '\0'; + + if (rename(filename, new_filename) != 0) { + say_syserror("can't rename %s to %s", filename, new_filename); + + return -1; + } + l->is_inprogress = false; + return 0; +} + +int +inprogress_log_unlink(char *filename) +{ +#ifndef NDEBUG + char *suffix = strrchr(filename, '.'); + assert(suffix); + assert(strcmp(suffix, inprogress_suffix) == 0); +#endif + if (unlink(filename) != 0) { + /* Don't panic if there is no such file. */ + if (errno == ENOENT) + return 0; + + say_syserror("can't unlink %s", filename); + + return -1; + } + + return 0; +} + +/* {{{ struct log_io */ + +int +log_io_close(struct log_io **lptr) +{ + struct log_io *l = *lptr; + int r; + + if (l->mode == LOG_WRITE) { + fwrite(&eof_marker_v11, 1, sizeof(log_magic_t), l->f); + /* + * Sync the file before closing, since + * otherwise we can end up with a partially + * written file in case of a crash. + * Do not sync if the file is opened with O_SYNC. + */ + if (! strchr(l->dir->open_wflags, 's')) + log_io_sync(l); + if (l->is_inprogress && inprogress_log_rename(l) != 0) + panic("can't rename 'inprogress' WAL"); + } + + r = fclose(l->f); + if (r < 0) + say_syserror("can't close"); + free(l); + *lptr = NULL; + return r; +} + +/** Free log_io memory and destroy it cleanly, without side + * effects (for use in the atfork handler). + */ +void +log_io_atfork(struct log_io **lptr) +{ + struct log_io *l = *lptr; + if (l) { + /* + * Close the file descriptor STDIO buffer does not + * make its way into the respective file in + * fclose(). + */ + close(fileno(l->f)); + fclose(l->f); + free(l); + *lptr = NULL; + } +} + +static int +sync_cb(eio_req *req) +{ + if (req->result) + say_error("%s: fsync failed, errno: %d", + __func__, (int) req->result); + + int fd = (intptr_t) req->data; + close(fd); + return 0; +} + +int +log_io_sync(struct log_io *l) +{ + if (l->dir->sync_is_async) { + int fd = dup(fileno(l->f)); + if (fd == -1) { + say_syserror("%s: dup() failed", __func__); + return -1; + } + eio_fsync(fd, 0, sync_cb, (void *) (intptr_t) fd); + } else if (fsync(fileno(l->f)) < 0) { + say_syserror("%s: fsync failed", l->filename); + return -1; + } + return 0; +} + +static int +log_io_write_header(struct log_io *l) +{ + int ret = fprintf(l->f, "%s%s\n", l->dir->filetype, v11); + + return ret < 0 ? -1 : 0; +} + +/** + * Verify that file is of the given format. + * + * @param l log_io object, denoting the file to check. + * @param[out] errmsg set if error + * + * @return 0 if success, -1 on error. + */ +static int +log_io_verify_meta(struct log_io *l, const char **errmsg) +{ + char filetype[32], version[32], buf[256]; + struct log_dir *dir = l->dir; + FILE *stream = l->f; + + if (fgets(filetype, sizeof(filetype), stream) == NULL || + fgets(version, sizeof(version), stream) == NULL) { + *errmsg = "failed to read log file header"; + goto error; + } + if (strcmp(dir->filetype, filetype) != 0) { + *errmsg = "unknown filetype"; + goto error; + } + + if (strcmp(v11, version) != 0) { + *errmsg = "unknown version"; + goto error; + } + for (;;) { + if (fgets(buf, sizeof(buf), stream) == NULL) { + *errmsg = "failed to read log file header"; + goto error; + } + if (strcmp(buf, "\n") == 0 || strcmp(buf, "\r\n") == 0) + break; + } + return 0; +error: + return -1; +} + +struct log_io * +log_io_open(struct log_dir *dir, enum log_mode mode, + const char *filename, enum log_suffix suffix, FILE *file) +{ + struct log_io *l = NULL; + int save_errno; + const char *errmsg = NULL; + /* + * Check fopen() result the caller first thing, to + * preserve the errno. + */ + if (file == NULL) { + errmsg = strerror(errno); + goto error; + } + l = (struct log_io *) calloc(1, sizeof(*l)); + if (l == NULL) { + errmsg = strerror(errno); + goto error; + } + l->f = file; + strncpy(l->filename, filename, PATH_MAX); + l->mode = mode; + l->dir = dir; + l->is_inprogress = suffix == INPROGRESS; + if (mode == LOG_READ) { + if (log_io_verify_meta(l, &errmsg) != 0) { + errmsg = strerror(errno); + goto error; + } + } else { /* LOG_WRITE */ + setvbuf(l->f, NULL, _IONBF, 0); + if (log_io_write_header(l) != 0) { + errmsg = strerror(errno); + goto error; + } + } + return l; +error: + save_errno = errno; + say_error("%s: failed to open %s: %s", __func__, filename, errmsg); + if (file) + fclose(file); + if (l) + free(l); + errno = save_errno; + return NULL; +} + +struct log_io * +log_io_open_for_read(struct log_dir *dir, int64_t lsn, enum log_suffix suffix) +{ + assert(lsn != 0); + + const char *filename = format_filename(dir, lsn, suffix); + FILE *f = fopen(filename, "r"); + return log_io_open(dir, LOG_READ, filename, suffix, f); +} + +/** + * In case of error, writes a message to the server log + * and sets errno. + */ +struct log_io * +log_io_open_for_write(struct log_dir *dir, int64_t lsn, enum log_suffix suffix) +{ + char *filename; + FILE *f; + assert(lsn != 0); + + if (suffix == INPROGRESS) { + /* + * Check whether a file with this name already exists. + * We don't overwrite existing files. + */ + filename = format_filename(dir, lsn, NONE); + if (access(filename, F_OK) == 0) { + errno = EEXIST; + goto error; + } + } + filename = format_filename(dir, lsn, suffix); + /* + * Open the <lsn>.<suffix>.inprogress file. If it exists, + * open will fail. + */ + f = fiob_open(filename, dir->open_wflags); + + if (!f) + goto error; + say_info("creating `%s'", filename); + return log_io_open(dir, LOG_WRITE, filename, suffix, f); +error: + say_syserror("%s: failed to open `%s'", __func__, filename); + return NULL; +} + +/* }}} */ + diff --git a/src/lua/session.cc b/src/lua/session.cc new file mode 100644 index 0000000000000000000000000000000000000000..8d14d3c0071527b5346d109e1b2bccd9bef5594b --- /dev/null +++ b/src/lua/session.cc @@ -0,0 +1,214 @@ +/* + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the + * following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY <COPYRIGHT HOLDER> ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * <COPYRIGHT HOLDER> OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF + * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ +#include "lua/session.h" +#include "lua/init.h" + +extern "C" { +#include <lua.h> +#include <lauxlib.h> +#include <lualib.h> +} + +#include "fiber.h" +#include "session.h" +#include "sio.h" + +static const char *sessionlib_name = "box.session"; +extern lua_State *root_L; + +/** + * Return a unique monotonic session + * identifier. The identifier can be used + * to check whether or not a session is alive. + * 0 means there is no session (e.g. + * a procedure is running in a detached + * fiber). + */ +static int +lbox_session_id(struct lua_State *L) +{ + lua_pushnumber(L, fiber_ptr->sid); + return 1; +} + +/** + * Check whether or not a session exists. + */ +static int +lbox_session_exists(struct lua_State *L) +{ + if (lua_gettop(L) != 1) + luaL_error(L, "session.exists(sid): bad arguments"); + + uint32_t sid = luaL_checkint(L, -1); + lua_pushnumber(L, session_exists(sid)); + return 1; +} + +/** + * Pretty print peer name. + */ +static int +lbox_session_peer(struct lua_State *L) +{ + if (lua_gettop(L) > 1) + luaL_error(L, "session.peer(sid): bad arguments"); + + uint32_t sid = lua_gettop(L) == 1 ? + luaL_checkint(L, -1) : fiber_ptr->sid; + + int fd = session_fd(sid); + struct sockaddr_in addr; + sio_getpeername(fd, &addr); + + lua_pushstring(L, sio_strfaddr(&addr)); + return 1; +} + +struct lbox_session_trigger +{ + struct session_trigger *trigger; + int ref; +}; + +static struct lbox_session_trigger on_connect = + { &session_on_connect, LUA_NOREF}; +static struct lbox_session_trigger on_disconnect = + { &session_on_disconnect, LUA_NOREF}; + +static void +lbox_session_run_trigger(void *param) +{ + struct lbox_session_trigger *trigger = + (struct lbox_session_trigger *) param; + /* Copy the referenced callable object object stack. */ + lua_State *L = lua_newthread(tarantool_L); + int coro_ref = luaL_ref(tarantool_L, LUA_REGISTRYINDEX); + lua_rawgeti(tarantool_L, LUA_REGISTRYINDEX, trigger->ref); + /** Move the function to be called to the new coro. */ + lua_xmove(tarantool_L, L, 1); + + try { + lua_call(L, 0, 0); + luaL_unref(tarantool_L, LUA_REGISTRYINDEX, coro_ref); + } catch (const Exception& e) { + luaL_unref(tarantool_L, LUA_REGISTRYINDEX, coro_ref); + throw; + } catch (...) { + luaL_unref(tarantool_L, LUA_REGISTRYINDEX, coro_ref); + tnt_raise(ClientError, ER_PROC_LUA, lua_tostring(L, -1)); + } +} + +static int +lbox_session_set_trigger(struct lua_State *L, + struct lbox_session_trigger *trigger) +{ + if (lua_gettop(L) != 1 || + (lua_type(L, -1) != LUA_TFUNCTION && + lua_type(L, -1) != LUA_TNIL)) { + luaL_error(L, "session.on_connect(chunk): bad arguments"); + } + + /* Pop the old trigger */ + if (trigger->ref != LUA_NOREF) { + lua_rawgeti(L, LUA_REGISTRYINDEX, trigger->ref); + luaL_unref(L, LUA_REGISTRYINDEX, trigger->ref); + } else { + lua_pushnil(L); + } + + /* + * Set or clear the trigger. Return the old value of the + * trigger. + */ + if (lua_type(L, -2) == LUA_TNIL) { + trigger->ref = LUA_NOREF; + trigger->trigger->trigger = NULL; + trigger->trigger->param = NULL; + } else { + /* Move the trigger to the top of the stack. */ + lua_insert(L, -2); + /* Reference the new trigger. Pops it. */ + trigger->ref = luaL_ref(L, LUA_REGISTRYINDEX); + trigger->trigger->trigger = lbox_session_run_trigger; + trigger->trigger->param = trigger; + } + /* Return the old trigger. */ + return 1; +} + +static int +lbox_session_on_connect(struct lua_State *L) +{ + return lbox_session_set_trigger(L, &on_connect); +} + +static int +lbox_session_on_disconnect(struct lua_State *L) +{ + return lbox_session_set_trigger(L, &on_disconnect); +} + +void +session_storage_cleanup(int sid) +{ + static int ref = LUA_REFNIL; + + int top = lua_gettop(root_L); + + if (ref == LUA_REFNIL) { + lua_getfield(root_L, LUA_GLOBALSINDEX, "box"); + lua_getfield(root_L, -1, "session"); + lua_getmetatable(root_L, -1); + lua_getfield(root_L, -1, "aggregate_storage"); + ref = luaL_ref(root_L, LUA_REGISTRYINDEX); + } + lua_rawgeti(root_L, LUA_REGISTRYINDEX, ref); + + lua_pushnil(root_L); + lua_rawseti(root_L, -2, sid); + lua_settop(root_L, top); +} + +static const struct luaL_reg sessionlib[] = { + {"id", lbox_session_id}, + {"exists", lbox_session_exists}, + {"peer", lbox_session_peer}, + {"on_connect", lbox_session_on_connect}, + {"on_disconnect", lbox_session_on_disconnect}, + {NULL, NULL} +}; + +void +tarantool_lua_session_init(struct lua_State *L) +{ + luaL_register(L, sessionlib_name, sessionlib); + lua_pop(L, 1); +} diff --git a/src/memcached-grammar.cc b/src/memcached-grammar.cc new file mode 100644 index 0000000000000000000000000000000000000000..3cf79e01bd414f4e78ae5f77dadb7e555ea1a412 --- /dev/null +++ b/src/memcached-grammar.cc @@ -0,0 +1,3671 @@ + +#line 1 "src/memcached-grammar.rl" +/* + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the + * following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY <COPYRIGHT HOLDER> ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * <COPYRIGHT HOLDER> OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF + * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + + +#line 34 "src/memcached-grammar.cc" +static const int memcached_start = 1; +static const int memcached_first_final = 197; +static const int memcached_error = 0; + +static const int memcached_en_main = 1; + + +#line 33 "src/memcached-grammar.rl" + + +static int __attribute__((noinline)) +memcached_dispatch(struct ev_io *coio, struct iobuf *iobuf) +{ + int cs; + char *p, *pe; + char *fstart; + struct tbuf *keys = tbuf_new(fiber_ptr->gc_pool); + const char *key; + bool append, show_cas; + int incr_sign; + uint64_t cas, incr; + uint32_t flags, exptime, bytes; + bool noreply = false; + char *data = NULL; + bool done = false; + uintptr_t flush_delay = 0; + size_t keys_count = 0; + struct ibuf *in = &iobuf->in; + struct obuf *out = &iobuf->out; + /* Savepoint for 'noreply' */ + struct obuf_svp obuf_svp = obuf_create_svp(out); + + p = in->pos; + pe = in->end; + + say_debug("memcached_dispatch '%.*s'", MIN((int)(pe - p), 40) , p); + + +#line 73 "src/memcached-grammar.cc" + { + cs = memcached_start; + } + +#line 78 "src/memcached-grammar.cc" + { + if ( p == pe ) + goto _test_eof; + switch ( cs ) + { +case 1: + switch( (*p) ) { + case 65: goto st2; + case 67: goto st44; + case 68: goto st67; + case 70: goto st103; + case 71: goto st124; + case 73: goto st132; + case 80: goto st136; + case 81: goto st143; + case 82: goto st148; + case 83: goto st172; + case 97: goto st2; + case 99: goto st44; + case 100: goto st67; + case 102: goto st103; + case 103: goto st124; + case 105: goto st132; + case 112: goto st136; + case 113: goto st143; + case 114: goto st148; + case 115: goto st172; + } + goto st0; +st0: +cs = 0; + goto _out; +st2: + if ( ++p == pe ) + goto _test_eof2; +case 2: + switch( (*p) ) { + case 68: goto st3; + case 80: goto st22; + case 100: goto st3; + case 112: goto st22; + } + goto st0; +st3: + if ( ++p == pe ) + goto _test_eof3; +case 3: + switch( (*p) ) { + case 68: goto st4; + case 100: goto st4; + } + goto st0; +st4: + if ( ++p == pe ) + goto _test_eof4; +case 4: + if ( (*p) == 32 ) + goto st5; + goto st0; +st5: + if ( ++p == pe ) + goto _test_eof5; +case 5: + switch( (*p) ) { + case 13: goto st0; + case 32: goto st5; + } + if ( 9 <= (*p) && (*p) <= 10 ) + goto st0; + goto tr15; +tr15: +#line 222 "src/memcached-grammar.rl" + { + fstart = p; + for (; p < pe && *p != ' ' && *p != '\r' && *p != '\n'; p++); + if ( *p == ' ' || *p == '\r' || *p == '\n') { + tbuf_store_field(keys, fstart, p - fstart); + keys_count++; + p--; + } else + p = fstart; + } + goto st6; +st6: + if ( ++p == pe ) + goto _test_eof6; +case 6: +#line 166 "src/memcached-grammar.cc" + if ( (*p) == 32 ) + goto st7; + goto st0; +st7: + if ( ++p == pe ) + goto _test_eof7; +case 7: + if ( (*p) == 32 ) + goto st7; + if ( 48 <= (*p) && (*p) <= 57 ) + goto tr17; + goto st0; +tr17: +#line 221 "src/memcached-grammar.rl" + { fstart = p; } + goto st8; +st8: + if ( ++p == pe ) + goto _test_eof8; +case 8: +#line 187 "src/memcached-grammar.cc" + if ( (*p) == 32 ) + goto tr18; + if ( 48 <= (*p) && (*p) <= 57 ) + goto st8; + goto st0; +tr18: +#line 244 "src/memcached-grammar.rl" + {flags = memcached_natoq(fstart, p);} + goto st9; +st9: + if ( ++p == pe ) + goto _test_eof9; +case 9: +#line 201 "src/memcached-grammar.cc" + if ( (*p) == 32 ) + goto st9; + if ( 48 <= (*p) && (*p) <= 57 ) + goto tr21; + goto st0; +tr21: +#line 221 "src/memcached-grammar.rl" + { fstart = p; } + goto st10; +st10: + if ( ++p == pe ) + goto _test_eof10; +case 10: +#line 215 "src/memcached-grammar.cc" + if ( (*p) == 32 ) + goto tr22; + if ( 48 <= (*p) && (*p) <= 57 ) + goto st10; + goto st0; +tr22: +#line 237 "src/memcached-grammar.rl" + { + exptime = memcached_natoq(fstart, p); + if (exptime > 0 && exptime <= 60*60*24*30) + exptime = exptime + ev_now(); + } + goto st11; +st11: + if ( ++p == pe ) + goto _test_eof11; +case 11: +#line 233 "src/memcached-grammar.cc" + if ( (*p) == 32 ) + goto st11; + if ( 48 <= (*p) && (*p) <= 57 ) + goto tr25; + goto st0; +tr25: +#line 221 "src/memcached-grammar.rl" + { fstart = p; } + goto st12; +st12: + if ( ++p == pe ) + goto _test_eof12; +case 12: +#line 247 "src/memcached-grammar.cc" + switch( (*p) ) { + case 10: goto tr26; + case 13: goto tr27; + case 32: goto tr28; + } + if ( 48 <= (*p) && (*p) <= 57 ) + goto st12; + goto st0; +tr26: +#line 245 "src/memcached-grammar.rl" + {bytes = memcached_natoq(fstart, p);} +#line 279 "src/memcached-grammar.rl" + { p++; } +#line 250 "src/memcached-grammar.rl" + { + size_t parsed = p - in->pos; + while (ibuf_size(in) - parsed < bytes + 2) { + size_t to_read = bytes + 2 - (pe - p); + if (coio_bread(coio, in, to_read) < to_read) + return -1; /* premature EOF */ + } + /* + * Buffered read may have reallocated the + * buffer. + */ + p = in->pos + parsed; + pe = in->end; + + data = p; + + if (strncmp((char *)(p + bytes), "\r\n", 2) == 0) { + p += bytes + 2; + } else { + goto exit; + } + } +#line 273 "src/memcached-grammar.rl" + { + done = true; + stats.bytes_read += p - in->pos; + in->pos = p; + } +#line 68 "src/memcached-grammar.rl" + { + key = tbuf_read_field(keys); + struct tuple *tuple = memcached_find(key); + if (tuple != NULL && !memcached_is_expired(tuple)) + obuf_dup(out, "NOT_STORED\r\n", 12); + else + STORE; + } + goto st197; +tr30: +#line 279 "src/memcached-grammar.rl" + { p++; } +#line 250 "src/memcached-grammar.rl" + { + size_t parsed = p - in->pos; + while (ibuf_size(in) - parsed < bytes + 2) { + size_t to_read = bytes + 2 - (pe - p); + if (coio_bread(coio, in, to_read) < to_read) + return -1; /* premature EOF */ + } + /* + * Buffered read may have reallocated the + * buffer. + */ + p = in->pos + parsed; + pe = in->end; + + data = p; + + if (strncmp((char *)(p + bytes), "\r\n", 2) == 0) { + p += bytes + 2; + } else { + goto exit; + } + } +#line 273 "src/memcached-grammar.rl" + { + done = true; + stats.bytes_read += p - in->pos; + in->pos = p; + } +#line 68 "src/memcached-grammar.rl" + { + key = tbuf_read_field(keys); + struct tuple *tuple = memcached_find(key); + if (tuple != NULL && !memcached_is_expired(tuple)) + obuf_dup(out, "NOT_STORED\r\n", 12); + else + STORE; + } + goto st197; +tr39: +#line 281 "src/memcached-grammar.rl" + { noreply = true; } +#line 279 "src/memcached-grammar.rl" + { p++; } +#line 250 "src/memcached-grammar.rl" + { + size_t parsed = p - in->pos; + while (ibuf_size(in) - parsed < bytes + 2) { + size_t to_read = bytes + 2 - (pe - p); + if (coio_bread(coio, in, to_read) < to_read) + return -1; /* premature EOF */ + } + /* + * Buffered read may have reallocated the + * buffer. + */ + p = in->pos + parsed; + pe = in->end; + + data = p; + + if (strncmp((char *)(p + bytes), "\r\n", 2) == 0) { + p += bytes + 2; + } else { + goto exit; + } + } +#line 273 "src/memcached-grammar.rl" + { + done = true; + stats.bytes_read += p - in->pos; + in->pos = p; + } +#line 68 "src/memcached-grammar.rl" + { + key = tbuf_read_field(keys); + struct tuple *tuple = memcached_find(key); + if (tuple != NULL && !memcached_is_expired(tuple)) + obuf_dup(out, "NOT_STORED\r\n", 12); + else + STORE; + } + goto st197; +tr58: +#line 245 "src/memcached-grammar.rl" + {bytes = memcached_natoq(fstart, p);} +#line 279 "src/memcached-grammar.rl" + { p++; } +#line 250 "src/memcached-grammar.rl" + { + size_t parsed = p - in->pos; + while (ibuf_size(in) - parsed < bytes + 2) { + size_t to_read = bytes + 2 - (pe - p); + if (coio_bread(coio, in, to_read) < to_read) + return -1; /* premature EOF */ + } + /* + * Buffered read may have reallocated the + * buffer. + */ + p = in->pos + parsed; + pe = in->end; + + data = p; + + if (strncmp((char *)(p + bytes), "\r\n", 2) == 0) { + p += bytes + 2; + } else { + goto exit; + } + } +#line 273 "src/memcached-grammar.rl" + { + done = true; + stats.bytes_read += p - in->pos; + in->pos = p; + } +#line 97 "src/memcached-grammar.rl" + { + struct tbuf *b; + const char *field; + uint32_t field_len; + + key = tbuf_read_field(keys); + struct tuple *tuple = memcached_find(key); + if (tuple == NULL) { + obuf_dup(out, "NOT_STORED\r\n", 12); + } else { + field = tuple_field(tuple, 3, &field_len); + b = tbuf_new(fiber_ptr->gc_pool); + if (append) { + tbuf_append(b, field, field_len); + tbuf_append(b, data, bytes); + } else { + tbuf_append(b, data, bytes); + tbuf_append(b, field, field_len); + } + + bytes += field_len; + data = b->data; + STORE; + } + } + goto st197; +tr62: +#line 279 "src/memcached-grammar.rl" + { p++; } +#line 250 "src/memcached-grammar.rl" + { + size_t parsed = p - in->pos; + while (ibuf_size(in) - parsed < bytes + 2) { + size_t to_read = bytes + 2 - (pe - p); + if (coio_bread(coio, in, to_read) < to_read) + return -1; /* premature EOF */ + } + /* + * Buffered read may have reallocated the + * buffer. + */ + p = in->pos + parsed; + pe = in->end; + + data = p; + + if (strncmp((char *)(p + bytes), "\r\n", 2) == 0) { + p += bytes + 2; + } else { + goto exit; + } + } +#line 273 "src/memcached-grammar.rl" + { + done = true; + stats.bytes_read += p - in->pos; + in->pos = p; + } +#line 97 "src/memcached-grammar.rl" + { + struct tbuf *b; + const char *field; + uint32_t field_len; + + key = tbuf_read_field(keys); + struct tuple *tuple = memcached_find(key); + if (tuple == NULL) { + obuf_dup(out, "NOT_STORED\r\n", 12); + } else { + field = tuple_field(tuple, 3, &field_len); + b = tbuf_new(fiber_ptr->gc_pool); + if (append) { + tbuf_append(b, field, field_len); + tbuf_append(b, data, bytes); + } else { + tbuf_append(b, data, bytes); + tbuf_append(b, field, field_len); + } + + bytes += field_len; + data = b->data; + STORE; + } + } + goto st197; +tr71: +#line 281 "src/memcached-grammar.rl" + { noreply = true; } +#line 279 "src/memcached-grammar.rl" + { p++; } +#line 250 "src/memcached-grammar.rl" + { + size_t parsed = p - in->pos; + while (ibuf_size(in) - parsed < bytes + 2) { + size_t to_read = bytes + 2 - (pe - p); + if (coio_bread(coio, in, to_read) < to_read) + return -1; /* premature EOF */ + } + /* + * Buffered read may have reallocated the + * buffer. + */ + p = in->pos + parsed; + pe = in->end; + + data = p; + + if (strncmp((char *)(p + bytes), "\r\n", 2) == 0) { + p += bytes + 2; + } else { + goto exit; + } + } +#line 273 "src/memcached-grammar.rl" + { + done = true; + stats.bytes_read += p - in->pos; + in->pos = p; + } +#line 97 "src/memcached-grammar.rl" + { + struct tbuf *b; + const char *field; + uint32_t field_len; + + key = tbuf_read_field(keys); + struct tuple *tuple = memcached_find(key); + if (tuple == NULL) { + obuf_dup(out, "NOT_STORED\r\n", 12); + } else { + field = tuple_field(tuple, 3, &field_len); + b = tbuf_new(fiber_ptr->gc_pool); + if (append) { + tbuf_append(b, field, field_len); + tbuf_append(b, data, bytes); + } else { + tbuf_append(b, data, bytes); + tbuf_append(b, field, field_len); + } + + bytes += field_len; + data = b->data; + STORE; + } + } + goto st197; +tr91: +#line 246 "src/memcached-grammar.rl" + {cas = memcached_natoq(fstart, p);} +#line 279 "src/memcached-grammar.rl" + { p++; } +#line 250 "src/memcached-grammar.rl" + { + size_t parsed = p - in->pos; + while (ibuf_size(in) - parsed < bytes + 2) { + size_t to_read = bytes + 2 - (pe - p); + if (coio_bread(coio, in, to_read) < to_read) + return -1; /* premature EOF */ + } + /* + * Buffered read may have reallocated the + * buffer. + */ + p = in->pos + parsed; + pe = in->end; + + data = p; + + if (strncmp((char *)(p + bytes), "\r\n", 2) == 0) { + p += bytes + 2; + } else { + goto exit; + } + } +#line 273 "src/memcached-grammar.rl" + { + done = true; + stats.bytes_read += p - in->pos; + in->pos = p; + } +#line 86 "src/memcached-grammar.rl" + { + key = tbuf_read_field(keys); + struct tuple *tuple = memcached_find(key); + if (tuple == NULL || memcached_is_expired(tuple)) + obuf_dup(out, "NOT_FOUND\r\n", 11); + else if (memcached_meta(tuple)->cas != cas) + obuf_dup(out, "EXISTS\r\n", 8); + else + STORE; + } + goto st197; +tr95: +#line 279 "src/memcached-grammar.rl" + { p++; } +#line 250 "src/memcached-grammar.rl" + { + size_t parsed = p - in->pos; + while (ibuf_size(in) - parsed < bytes + 2) { + size_t to_read = bytes + 2 - (pe - p); + if (coio_bread(coio, in, to_read) < to_read) + return -1; /* premature EOF */ + } + /* + * Buffered read may have reallocated the + * buffer. + */ + p = in->pos + parsed; + pe = in->end; + + data = p; + + if (strncmp((char *)(p + bytes), "\r\n", 2) == 0) { + p += bytes + 2; + } else { + goto exit; + } + } +#line 273 "src/memcached-grammar.rl" + { + done = true; + stats.bytes_read += p - in->pos; + in->pos = p; + } +#line 86 "src/memcached-grammar.rl" + { + key = tbuf_read_field(keys); + struct tuple *tuple = memcached_find(key); + if (tuple == NULL || memcached_is_expired(tuple)) + obuf_dup(out, "NOT_FOUND\r\n", 11); + else if (memcached_meta(tuple)->cas != cas) + obuf_dup(out, "EXISTS\r\n", 8); + else + STORE; + } + goto st197; +tr105: +#line 281 "src/memcached-grammar.rl" + { noreply = true; } +#line 279 "src/memcached-grammar.rl" + { p++; } +#line 250 "src/memcached-grammar.rl" + { + size_t parsed = p - in->pos; + while (ibuf_size(in) - parsed < bytes + 2) { + size_t to_read = bytes + 2 - (pe - p); + if (coio_bread(coio, in, to_read) < to_read) + return -1; /* premature EOF */ + } + /* + * Buffered read may have reallocated the + * buffer. + */ + p = in->pos + parsed; + pe = in->end; + + data = p; + + if (strncmp((char *)(p + bytes), "\r\n", 2) == 0) { + p += bytes + 2; + } else { + goto exit; + } + } +#line 273 "src/memcached-grammar.rl" + { + done = true; + stats.bytes_read += p - in->pos; + in->pos = p; + } +#line 86 "src/memcached-grammar.rl" + { + key = tbuf_read_field(keys); + struct tuple *tuple = memcached_find(key); + if (tuple == NULL || memcached_is_expired(tuple)) + obuf_dup(out, "NOT_FOUND\r\n", 11); + else if (memcached_meta(tuple)->cas != cas) + obuf_dup(out, "EXISTS\r\n", 8); + else + STORE; + } + goto st197; +tr118: +#line 247 "src/memcached-grammar.rl" + {incr = memcached_natoq(fstart, p);} +#line 279 "src/memcached-grammar.rl" + { p++; } +#line 273 "src/memcached-grammar.rl" + { + done = true; + stats.bytes_read += p - in->pos; + in->pos = p; + } +#line 123 "src/memcached-grammar.rl" + { + struct meta *m; + struct tbuf *b; + const char *field; + uint32_t field_len; + uint64_t value; + + key = tbuf_read_field(keys); + struct tuple *tuple = memcached_find(key); + if (tuple == NULL || memcached_is_expired(tuple)) { + obuf_dup(out, "NOT_FOUND\r\n", 11); + } else { + m = memcached_meta(tuple); + field = tuple_field(tuple, 3, &field_len); + + if (memcached_is_numeric(field, field_len)) { + value = memcached_natoq(field, + field + field_len); + + if (incr_sign > 0) { + value += incr; + } else { + if (incr > value) + value = 0; + else + value -= incr; + } + + exptime = m->exptime; + flags = m->flags; + + b = tbuf_new(fiber_ptr->gc_pool); + tbuf_printf(b, "%" PRIu64, value); + data = b->data; + bytes = b->size; + + stats.cmd_set++; + try { + memcached_store(key, exptime, flags, bytes, data); + stats.total_items++; + obuf_dup(out, b->data, b->size); + obuf_dup(out, "\r\n", 2); + } catch (const ClientError& e) { + obuf_dup(out, "SERVER_ERROR ", 13); + obuf_dup(out, e.errmsg(), strlen(e.errmsg())); + obuf_dup(out, "\r\n", 2); + } + } else { + obuf_dup(out, "CLIENT_ERROR cannot increment or decrement non-numeric value\r\n", 62); + } + } + + } + goto st197; +tr122: +#line 279 "src/memcached-grammar.rl" + { p++; } +#line 273 "src/memcached-grammar.rl" + { + done = true; + stats.bytes_read += p - in->pos; + in->pos = p; + } +#line 123 "src/memcached-grammar.rl" + { + struct meta *m; + struct tbuf *b; + const char *field; + uint32_t field_len; + uint64_t value; + + key = tbuf_read_field(keys); + struct tuple *tuple = memcached_find(key); + if (tuple == NULL || memcached_is_expired(tuple)) { + obuf_dup(out, "NOT_FOUND\r\n", 11); + } else { + m = memcached_meta(tuple); + field = tuple_field(tuple, 3, &field_len); + + if (memcached_is_numeric(field, field_len)) { + value = memcached_natoq(field, + field + field_len); + + if (incr_sign > 0) { + value += incr; + } else { + if (incr > value) + value = 0; + else + value -= incr; + } + + exptime = m->exptime; + flags = m->flags; + + b = tbuf_new(fiber_ptr->gc_pool); + tbuf_printf(b, "%" PRIu64, value); + data = b->data; + bytes = b->size; + + stats.cmd_set++; + try { + memcached_store(key, exptime, flags, bytes, data); + stats.total_items++; + obuf_dup(out, b->data, b->size); + obuf_dup(out, "\r\n", 2); + } catch (const ClientError& e) { + obuf_dup(out, "SERVER_ERROR ", 13); + obuf_dup(out, e.errmsg(), strlen(e.errmsg())); + obuf_dup(out, "\r\n", 2); + } + } else { + obuf_dup(out, "CLIENT_ERROR cannot increment or decrement non-numeric value\r\n", 62); + } + } + + } + goto st197; +tr132: +#line 281 "src/memcached-grammar.rl" + { noreply = true; } +#line 279 "src/memcached-grammar.rl" + { p++; } +#line 273 "src/memcached-grammar.rl" + { + done = true; + stats.bytes_read += p - in->pos; + in->pos = p; + } +#line 123 "src/memcached-grammar.rl" + { + struct meta *m; + struct tbuf *b; + const char *field; + uint32_t field_len; + uint64_t value; + + key = tbuf_read_field(keys); + struct tuple *tuple = memcached_find(key); + if (tuple == NULL || memcached_is_expired(tuple)) { + obuf_dup(out, "NOT_FOUND\r\n", 11); + } else { + m = memcached_meta(tuple); + field = tuple_field(tuple, 3, &field_len); + + if (memcached_is_numeric(field, field_len)) { + value = memcached_natoq(field, + field + field_len); + + if (incr_sign > 0) { + value += incr; + } else { + if (incr > value) + value = 0; + else + value -= incr; + } + + exptime = m->exptime; + flags = m->flags; + + b = tbuf_new(fiber_ptr->gc_pool); + tbuf_printf(b, "%" PRIu64, value); + data = b->data; + bytes = b->size; + + stats.cmd_set++; + try { + memcached_store(key, exptime, flags, bytes, data); + stats.total_items++; + obuf_dup(out, b->data, b->size); + obuf_dup(out, "\r\n", 2); + } catch (const ClientError& e) { + obuf_dup(out, "SERVER_ERROR ", 13); + obuf_dup(out, e.errmsg(), strlen(e.errmsg())); + obuf_dup(out, "\r\n", 2); + } + } else { + obuf_dup(out, "CLIENT_ERROR cannot increment or decrement non-numeric value\r\n", 62); + } + } + + } + goto st197; +tr141: +#line 279 "src/memcached-grammar.rl" + { p++; } +#line 273 "src/memcached-grammar.rl" + { + done = true; + stats.bytes_read += p - in->pos; + in->pos = p; + } +#line 177 "src/memcached-grammar.rl" + { + key = tbuf_read_field(keys); + struct tuple *tuple = memcached_find(key); + if (tuple == NULL || memcached_is_expired(tuple)) { + obuf_dup(out, "NOT_FOUND\r\n", 11); + } else { + try { + memcached_delete(key); + obuf_dup(out, "DELETED\r\n", 9); + } + catch (const ClientError& e) { + obuf_dup(out, "SERVER_ERROR ", 13); + obuf_dup(out, e.errmsg(), strlen(e.errmsg())); + obuf_dup(out, "\r\n", 2); + } + } + } + goto st197; +tr146: +#line 237 "src/memcached-grammar.rl" + { + exptime = memcached_natoq(fstart, p); + if (exptime > 0 && exptime <= 60*60*24*30) + exptime = exptime + ev_now(); + } +#line 279 "src/memcached-grammar.rl" + { p++; } +#line 273 "src/memcached-grammar.rl" + { + done = true; + stats.bytes_read += p - in->pos; + in->pos = p; + } +#line 177 "src/memcached-grammar.rl" + { + key = tbuf_read_field(keys); + struct tuple *tuple = memcached_find(key); + if (tuple == NULL || memcached_is_expired(tuple)) { + obuf_dup(out, "NOT_FOUND\r\n", 11); + } else { + try { + memcached_delete(key); + obuf_dup(out, "DELETED\r\n", 9); + } + catch (const ClientError& e) { + obuf_dup(out, "SERVER_ERROR ", 13); + obuf_dup(out, e.errmsg(), strlen(e.errmsg())); + obuf_dup(out, "\r\n", 2); + } + } + } + goto st197; +tr157: +#line 281 "src/memcached-grammar.rl" + { noreply = true; } +#line 279 "src/memcached-grammar.rl" + { p++; } +#line 273 "src/memcached-grammar.rl" + { + done = true; + stats.bytes_read += p - in->pos; + in->pos = p; + } +#line 177 "src/memcached-grammar.rl" + { + key = tbuf_read_field(keys); + struct tuple *tuple = memcached_find(key); + if (tuple == NULL || memcached_is_expired(tuple)) { + obuf_dup(out, "NOT_FOUND\r\n", 11); + } else { + try { + memcached_delete(key); + obuf_dup(out, "DELETED\r\n", 9); + } + catch (const ClientError& e) { + obuf_dup(out, "SERVER_ERROR ", 13); + obuf_dup(out, e.errmsg(), strlen(e.errmsg())); + obuf_dup(out, "\r\n", 2); + } + } + } + goto st197; +tr169: +#line 279 "src/memcached-grammar.rl" + { p++; } +#line 273 "src/memcached-grammar.rl" + { + done = true; + stats.bytes_read += p - in->pos; + in->pos = p; + } +#line 206 "src/memcached-grammar.rl" + { + struct fiber *f = fiber_new("flush_all", + memcached_flush_all); + fiber_call(f, flush_delay); + obuf_dup(out, "OK\r\n", 4); + } + goto st197; +tr174: +#line 248 "src/memcached-grammar.rl" + {flush_delay = memcached_natoq(fstart, p);} +#line 279 "src/memcached-grammar.rl" + { p++; } +#line 273 "src/memcached-grammar.rl" + { + done = true; + stats.bytes_read += p - in->pos; + in->pos = p; + } +#line 206 "src/memcached-grammar.rl" + { + struct fiber *f = fiber_new("flush_all", + memcached_flush_all); + fiber_call(f, flush_delay); + obuf_dup(out, "OK\r\n", 4); + } + goto st197; +tr185: +#line 281 "src/memcached-grammar.rl" + { noreply = true; } +#line 279 "src/memcached-grammar.rl" + { p++; } +#line 273 "src/memcached-grammar.rl" + { + done = true; + stats.bytes_read += p - in->pos; + in->pos = p; + } +#line 206 "src/memcached-grammar.rl" + { + struct fiber *f = fiber_new("flush_all", + memcached_flush_all); + fiber_call(f, flush_delay); + obuf_dup(out, "OK\r\n", 4); + } + goto st197; +tr195: +#line 279 "src/memcached-grammar.rl" + { p++; } +#line 273 "src/memcached-grammar.rl" + { + done = true; + stats.bytes_read += p - in->pos; + in->pos = p; + } +#line 195 "src/memcached-grammar.rl" + { + try { + memcached_get(out, keys_count, keys, show_cas); + } catch (const ClientError& e) { + obuf_rollback_to_svp(out, &obuf_svp); + obuf_dup(out, "SERVER_ERROR ", 13); + obuf_dup(out, e.errmsg(), strlen(e.errmsg())); + obuf_dup(out, "\r\n", 2); + } + } + goto st197; +tr213: +#line 279 "src/memcached-grammar.rl" + { p++; } +#line 273 "src/memcached-grammar.rl" + { + done = true; + stats.bytes_read += p - in->pos; + in->pos = p; + } +#line 217 "src/memcached-grammar.rl" + { + return -1; + } + goto st197; +tr233: +#line 245 "src/memcached-grammar.rl" + {bytes = memcached_natoq(fstart, p);} +#line 279 "src/memcached-grammar.rl" + { p++; } +#line 250 "src/memcached-grammar.rl" + { + size_t parsed = p - in->pos; + while (ibuf_size(in) - parsed < bytes + 2) { + size_t to_read = bytes + 2 - (pe - p); + if (coio_bread(coio, in, to_read) < to_read) + return -1; /* premature EOF */ + } + /* + * Buffered read may have reallocated the + * buffer. + */ + p = in->pos + parsed; + pe = in->end; + + data = p; + + if (strncmp((char *)(p + bytes), "\r\n", 2) == 0) { + p += bytes + 2; + } else { + goto exit; + } + } +#line 273 "src/memcached-grammar.rl" + { + done = true; + stats.bytes_read += p - in->pos; + in->pos = p; + } +#line 77 "src/memcached-grammar.rl" + { + key = tbuf_read_field(keys); + struct tuple *tuple = memcached_find(key); + if (tuple == NULL || memcached_is_expired(tuple)) + obuf_dup(out, "NOT_STORED\r\n", 12); + else + STORE; + } + goto st197; +tr237: +#line 279 "src/memcached-grammar.rl" + { p++; } +#line 250 "src/memcached-grammar.rl" + { + size_t parsed = p - in->pos; + while (ibuf_size(in) - parsed < bytes + 2) { + size_t to_read = bytes + 2 - (pe - p); + if (coio_bread(coio, in, to_read) < to_read) + return -1; /* premature EOF */ + } + /* + * Buffered read may have reallocated the + * buffer. + */ + p = in->pos + parsed; + pe = in->end; + + data = p; + + if (strncmp((char *)(p + bytes), "\r\n", 2) == 0) { + p += bytes + 2; + } else { + goto exit; + } + } +#line 273 "src/memcached-grammar.rl" + { + done = true; + stats.bytes_read += p - in->pos; + in->pos = p; + } +#line 77 "src/memcached-grammar.rl" + { + key = tbuf_read_field(keys); + struct tuple *tuple = memcached_find(key); + if (tuple == NULL || memcached_is_expired(tuple)) + obuf_dup(out, "NOT_STORED\r\n", 12); + else + STORE; + } + goto st197; +tr246: +#line 281 "src/memcached-grammar.rl" + { noreply = true; } +#line 279 "src/memcached-grammar.rl" + { p++; } +#line 250 "src/memcached-grammar.rl" + { + size_t parsed = p - in->pos; + while (ibuf_size(in) - parsed < bytes + 2) { + size_t to_read = bytes + 2 - (pe - p); + if (coio_bread(coio, in, to_read) < to_read) + return -1; /* premature EOF */ + } + /* + * Buffered read may have reallocated the + * buffer. + */ + p = in->pos + parsed; + pe = in->end; + + data = p; + + if (strncmp((char *)(p + bytes), "\r\n", 2) == 0) { + p += bytes + 2; + } else { + goto exit; + } + } +#line 273 "src/memcached-grammar.rl" + { + done = true; + stats.bytes_read += p - in->pos; + in->pos = p; + } +#line 77 "src/memcached-grammar.rl" + { + key = tbuf_read_field(keys); + struct tuple *tuple = memcached_find(key); + if (tuple == NULL || memcached_is_expired(tuple)) + obuf_dup(out, "NOT_STORED\r\n", 12); + else + STORE; + } + goto st197; +tr263: +#line 245 "src/memcached-grammar.rl" + {bytes = memcached_natoq(fstart, p);} +#line 279 "src/memcached-grammar.rl" + { p++; } +#line 250 "src/memcached-grammar.rl" + { + size_t parsed = p - in->pos; + while (ibuf_size(in) - parsed < bytes + 2) { + size_t to_read = bytes + 2 - (pe - p); + if (coio_bread(coio, in, to_read) < to_read) + return -1; /* premature EOF */ + } + /* + * Buffered read may have reallocated the + * buffer. + */ + p = in->pos + parsed; + pe = in->end; + + data = p; + + if (strncmp((char *)(p + bytes), "\r\n", 2) == 0) { + p += bytes + 2; + } else { + goto exit; + } + } +#line 273 "src/memcached-grammar.rl" + { + done = true; + stats.bytes_read += p - in->pos; + in->pos = p; + } +#line 63 "src/memcached-grammar.rl" + { + key = tbuf_read_field(keys); + STORE; + } + goto st197; +tr267: +#line 279 "src/memcached-grammar.rl" + { p++; } +#line 250 "src/memcached-grammar.rl" + { + size_t parsed = p - in->pos; + while (ibuf_size(in) - parsed < bytes + 2) { + size_t to_read = bytes + 2 - (pe - p); + if (coio_bread(coio, in, to_read) < to_read) + return -1; /* premature EOF */ + } + /* + * Buffered read may have reallocated the + * buffer. + */ + p = in->pos + parsed; + pe = in->end; + + data = p; + + if (strncmp((char *)(p + bytes), "\r\n", 2) == 0) { + p += bytes + 2; + } else { + goto exit; + } + } +#line 273 "src/memcached-grammar.rl" + { + done = true; + stats.bytes_read += p - in->pos; + in->pos = p; + } +#line 63 "src/memcached-grammar.rl" + { + key = tbuf_read_field(keys); + STORE; + } + goto st197; +tr276: +#line 281 "src/memcached-grammar.rl" + { noreply = true; } +#line 279 "src/memcached-grammar.rl" + { p++; } +#line 250 "src/memcached-grammar.rl" + { + size_t parsed = p - in->pos; + while (ibuf_size(in) - parsed < bytes + 2) { + size_t to_read = bytes + 2 - (pe - p); + if (coio_bread(coio, in, to_read) < to_read) + return -1; /* premature EOF */ + } + /* + * Buffered read may have reallocated the + * buffer. + */ + p = in->pos + parsed; + pe = in->end; + + data = p; + + if (strncmp((char *)(p + bytes), "\r\n", 2) == 0) { + p += bytes + 2; + } else { + goto exit; + } + } +#line 273 "src/memcached-grammar.rl" + { + done = true; + stats.bytes_read += p - in->pos; + in->pos = p; + } +#line 63 "src/memcached-grammar.rl" + { + key = tbuf_read_field(keys); + STORE; + } + goto st197; +tr281: +#line 279 "src/memcached-grammar.rl" + { p++; } +#line 273 "src/memcached-grammar.rl" + { + done = true; + stats.bytes_read += p - in->pos; + in->pos = p; + } +#line 213 "src/memcached-grammar.rl" + { + memcached_print_stats(out); + } + goto st197; +st197: + if ( ++p == pe ) + goto _test_eof197; +case 197: +#line 1347 "src/memcached-grammar.cc" + goto st0; +tr27: +#line 245 "src/memcached-grammar.rl" + {bytes = memcached_natoq(fstart, p);} + goto st13; +tr40: +#line 281 "src/memcached-grammar.rl" + { noreply = true; } + goto st13; +st13: + if ( ++p == pe ) + goto _test_eof13; +case 13: +#line 1361 "src/memcached-grammar.cc" + if ( (*p) == 10 ) + goto tr30; + goto st0; +tr28: +#line 245 "src/memcached-grammar.rl" + {bytes = memcached_natoq(fstart, p);} + goto st14; +st14: + if ( ++p == pe ) + goto _test_eof14; +case 14: +#line 1373 "src/memcached-grammar.cc" + switch( (*p) ) { + case 32: goto st14; + case 78: goto st15; + case 110: goto st15; + } + goto st0; +st15: + if ( ++p == pe ) + goto _test_eof15; +case 15: + switch( (*p) ) { + case 79: goto st16; + case 111: goto st16; + } + goto st0; +st16: + if ( ++p == pe ) + goto _test_eof16; +case 16: + switch( (*p) ) { + case 82: goto st17; + case 114: goto st17; + } + goto st0; +st17: + if ( ++p == pe ) + goto _test_eof17; +case 17: + switch( (*p) ) { + case 69: goto st18; + case 101: goto st18; + } + goto st0; +st18: + if ( ++p == pe ) + goto _test_eof18; +case 18: + switch( (*p) ) { + case 80: goto st19; + case 112: goto st19; + } + goto st0; +st19: + if ( ++p == pe ) + goto _test_eof19; +case 19: + switch( (*p) ) { + case 76: goto st20; + case 108: goto st20; + } + goto st0; +st20: + if ( ++p == pe ) + goto _test_eof20; +case 20: + switch( (*p) ) { + case 89: goto st21; + case 121: goto st21; + } + goto st0; +st21: + if ( ++p == pe ) + goto _test_eof21; +case 21: + switch( (*p) ) { + case 10: goto tr39; + case 13: goto tr40; + } + goto st0; +st22: + if ( ++p == pe ) + goto _test_eof22; +case 22: + switch( (*p) ) { + case 80: goto st23; + case 112: goto st23; + } + goto st0; +st23: + if ( ++p == pe ) + goto _test_eof23; +case 23: + switch( (*p) ) { + case 69: goto st24; + case 101: goto st24; + } + goto st0; +st24: + if ( ++p == pe ) + goto _test_eof24; +case 24: + switch( (*p) ) { + case 78: goto st25; + case 110: goto st25; + } + goto st0; +st25: + if ( ++p == pe ) + goto _test_eof25; +case 25: + switch( (*p) ) { + case 68: goto st26; + case 100: goto st26; + } + goto st0; +st26: + if ( ++p == pe ) + goto _test_eof26; +case 26: + if ( (*p) == 32 ) + goto tr45; + goto st0; +tr45: +#line 287 "src/memcached-grammar.rl" + {append = true; } + goto st27; +tr209: +#line 288 "src/memcached-grammar.rl" + {append = false;} + goto st27; +st27: + if ( ++p == pe ) + goto _test_eof27; +case 27: +#line 1498 "src/memcached-grammar.cc" + switch( (*p) ) { + case 13: goto st0; + case 32: goto st27; + } + if ( 9 <= (*p) && (*p) <= 10 ) + goto st0; + goto tr46; +tr46: +#line 222 "src/memcached-grammar.rl" + { + fstart = p; + for (; p < pe && *p != ' ' && *p != '\r' && *p != '\n'; p++); + if ( *p == ' ' || *p == '\r' || *p == '\n') { + tbuf_store_field(keys, fstart, p - fstart); + keys_count++; + p--; + } else + p = fstart; + } + goto st28; +st28: + if ( ++p == pe ) + goto _test_eof28; +case 28: +#line 1523 "src/memcached-grammar.cc" + if ( (*p) == 32 ) + goto st29; + goto st0; +st29: + if ( ++p == pe ) + goto _test_eof29; +case 29: + if ( (*p) == 32 ) + goto st29; + if ( 48 <= (*p) && (*p) <= 57 ) + goto tr49; + goto st0; +tr49: +#line 221 "src/memcached-grammar.rl" + { fstart = p; } + goto st30; +st30: + if ( ++p == pe ) + goto _test_eof30; +case 30: +#line 1544 "src/memcached-grammar.cc" + if ( (*p) == 32 ) + goto tr50; + if ( 48 <= (*p) && (*p) <= 57 ) + goto st30; + goto st0; +tr50: +#line 244 "src/memcached-grammar.rl" + {flags = memcached_natoq(fstart, p);} + goto st31; +st31: + if ( ++p == pe ) + goto _test_eof31; +case 31: +#line 1558 "src/memcached-grammar.cc" + if ( (*p) == 32 ) + goto st31; + if ( 48 <= (*p) && (*p) <= 57 ) + goto tr53; + goto st0; +tr53: +#line 221 "src/memcached-grammar.rl" + { fstart = p; } + goto st32; +st32: + if ( ++p == pe ) + goto _test_eof32; +case 32: +#line 1572 "src/memcached-grammar.cc" + if ( (*p) == 32 ) + goto tr54; + if ( 48 <= (*p) && (*p) <= 57 ) + goto st32; + goto st0; +tr54: +#line 237 "src/memcached-grammar.rl" + { + exptime = memcached_natoq(fstart, p); + if (exptime > 0 && exptime <= 60*60*24*30) + exptime = exptime + ev_now(); + } + goto st33; +st33: + if ( ++p == pe ) + goto _test_eof33; +case 33: +#line 1590 "src/memcached-grammar.cc" + if ( (*p) == 32 ) + goto st33; + if ( 48 <= (*p) && (*p) <= 57 ) + goto tr57; + goto st0; +tr57: +#line 221 "src/memcached-grammar.rl" + { fstart = p; } + goto st34; +st34: + if ( ++p == pe ) + goto _test_eof34; +case 34: +#line 1604 "src/memcached-grammar.cc" + switch( (*p) ) { + case 10: goto tr58; + case 13: goto tr59; + case 32: goto tr60; + } + if ( 48 <= (*p) && (*p) <= 57 ) + goto st34; + goto st0; +tr59: +#line 245 "src/memcached-grammar.rl" + {bytes = memcached_natoq(fstart, p);} + goto st35; +tr72: +#line 281 "src/memcached-grammar.rl" + { noreply = true; } + goto st35; +st35: + if ( ++p == pe ) + goto _test_eof35; +case 35: +#line 1625 "src/memcached-grammar.cc" + if ( (*p) == 10 ) + goto tr62; + goto st0; +tr60: +#line 245 "src/memcached-grammar.rl" + {bytes = memcached_natoq(fstart, p);} + goto st36; +st36: + if ( ++p == pe ) + goto _test_eof36; +case 36: +#line 1637 "src/memcached-grammar.cc" + switch( (*p) ) { + case 32: goto st36; + case 78: goto st37; + case 110: goto st37; + } + goto st0; +st37: + if ( ++p == pe ) + goto _test_eof37; +case 37: + switch( (*p) ) { + case 79: goto st38; + case 111: goto st38; + } + goto st0; +st38: + if ( ++p == pe ) + goto _test_eof38; +case 38: + switch( (*p) ) { + case 82: goto st39; + case 114: goto st39; + } + goto st0; +st39: + if ( ++p == pe ) + goto _test_eof39; +case 39: + switch( (*p) ) { + case 69: goto st40; + case 101: goto st40; + } + goto st0; +st40: + if ( ++p == pe ) + goto _test_eof40; +case 40: + switch( (*p) ) { + case 80: goto st41; + case 112: goto st41; + } + goto st0; +st41: + if ( ++p == pe ) + goto _test_eof41; +case 41: + switch( (*p) ) { + case 76: goto st42; + case 108: goto st42; + } + goto st0; +st42: + if ( ++p == pe ) + goto _test_eof42; +case 42: + switch( (*p) ) { + case 89: goto st43; + case 121: goto st43; + } + goto st0; +st43: + if ( ++p == pe ) + goto _test_eof43; +case 43: + switch( (*p) ) { + case 10: goto tr71; + case 13: goto tr72; + } + goto st0; +st44: + if ( ++p == pe ) + goto _test_eof44; +case 44: + switch( (*p) ) { + case 65: goto st45; + case 97: goto st45; + } + goto st0; +st45: + if ( ++p == pe ) + goto _test_eof45; +case 45: + switch( (*p) ) { + case 83: goto st46; + case 115: goto st46; + } + goto st0; +st46: + if ( ++p == pe ) + goto _test_eof46; +case 46: + if ( (*p) == 32 ) + goto st47; + goto st0; +st47: + if ( ++p == pe ) + goto _test_eof47; +case 47: + switch( (*p) ) { + case 13: goto st0; + case 32: goto st47; + } + if ( 9 <= (*p) && (*p) <= 10 ) + goto st0; + goto tr76; +tr76: +#line 222 "src/memcached-grammar.rl" + { + fstart = p; + for (; p < pe && *p != ' ' && *p != '\r' && *p != '\n'; p++); + if ( *p == ' ' || *p == '\r' || *p == '\n') { + tbuf_store_field(keys, fstart, p - fstart); + keys_count++; + p--; + } else + p = fstart; + } + goto st48; +st48: + if ( ++p == pe ) + goto _test_eof48; +case 48: +#line 1760 "src/memcached-grammar.cc" + if ( (*p) == 32 ) + goto st49; + goto st0; +st49: + if ( ++p == pe ) + goto _test_eof49; +case 49: + if ( (*p) == 32 ) + goto st49; + if ( 48 <= (*p) && (*p) <= 57 ) + goto tr78; + goto st0; +tr78: +#line 221 "src/memcached-grammar.rl" + { fstart = p; } + goto st50; +st50: + if ( ++p == pe ) + goto _test_eof50; +case 50: +#line 1781 "src/memcached-grammar.cc" + if ( (*p) == 32 ) + goto tr79; + if ( 48 <= (*p) && (*p) <= 57 ) + goto st50; + goto st0; +tr79: +#line 244 "src/memcached-grammar.rl" + {flags = memcached_natoq(fstart, p);} + goto st51; +st51: + if ( ++p == pe ) + goto _test_eof51; +case 51: +#line 1795 "src/memcached-grammar.cc" + if ( (*p) == 32 ) + goto st51; + if ( 48 <= (*p) && (*p) <= 57 ) + goto tr82; + goto st0; +tr82: +#line 221 "src/memcached-grammar.rl" + { fstart = p; } + goto st52; +st52: + if ( ++p == pe ) + goto _test_eof52; +case 52: +#line 1809 "src/memcached-grammar.cc" + if ( (*p) == 32 ) + goto tr83; + if ( 48 <= (*p) && (*p) <= 57 ) + goto st52; + goto st0; +tr83: +#line 237 "src/memcached-grammar.rl" + { + exptime = memcached_natoq(fstart, p); + if (exptime > 0 && exptime <= 60*60*24*30) + exptime = exptime + ev_now(); + } + goto st53; +st53: + if ( ++p == pe ) + goto _test_eof53; +case 53: +#line 1827 "src/memcached-grammar.cc" + if ( (*p) == 32 ) + goto st53; + if ( 48 <= (*p) && (*p) <= 57 ) + goto tr86; + goto st0; +tr86: +#line 221 "src/memcached-grammar.rl" + { fstart = p; } + goto st54; +st54: + if ( ++p == pe ) + goto _test_eof54; +case 54: +#line 1841 "src/memcached-grammar.cc" + if ( (*p) == 32 ) + goto tr87; + if ( 48 <= (*p) && (*p) <= 57 ) + goto st54; + goto st0; +tr87: +#line 245 "src/memcached-grammar.rl" + {bytes = memcached_natoq(fstart, p);} + goto st55; +st55: + if ( ++p == pe ) + goto _test_eof55; +case 55: +#line 1855 "src/memcached-grammar.cc" + if ( (*p) == 32 ) + goto st55; + if ( 48 <= (*p) && (*p) <= 57 ) + goto tr90; + goto st0; +tr90: +#line 221 "src/memcached-grammar.rl" + { fstart = p; } + goto st56; +st56: + if ( ++p == pe ) + goto _test_eof56; +case 56: +#line 1869 "src/memcached-grammar.cc" + switch( (*p) ) { + case 10: goto tr91; + case 13: goto tr92; + case 32: goto tr93; + } + if ( 48 <= (*p) && (*p) <= 57 ) + goto st56; + goto st0; +tr106: +#line 281 "src/memcached-grammar.rl" + { noreply = true; } + goto st57; +tr92: +#line 246 "src/memcached-grammar.rl" + {cas = memcached_natoq(fstart, p);} + goto st57; +st57: + if ( ++p == pe ) + goto _test_eof57; +case 57: +#line 1890 "src/memcached-grammar.cc" + if ( (*p) == 10 ) + goto tr95; + goto st0; +tr93: +#line 246 "src/memcached-grammar.rl" + {cas = memcached_natoq(fstart, p);} + goto st58; +st58: + if ( ++p == pe ) + goto _test_eof58; +case 58: +#line 1902 "src/memcached-grammar.cc" + switch( (*p) ) { + case 10: goto tr95; + case 13: goto st57; + case 32: goto st58; + case 78: goto st59; + case 110: goto st59; + } + goto st0; +st59: + if ( ++p == pe ) + goto _test_eof59; +case 59: + switch( (*p) ) { + case 79: goto st60; + case 111: goto st60; + } + goto st0; +st60: + if ( ++p == pe ) + goto _test_eof60; +case 60: + switch( (*p) ) { + case 82: goto st61; + case 114: goto st61; + } + goto st0; +st61: + if ( ++p == pe ) + goto _test_eof61; +case 61: + switch( (*p) ) { + case 69: goto st62; + case 101: goto st62; + } + goto st0; +st62: + if ( ++p == pe ) + goto _test_eof62; +case 62: + switch( (*p) ) { + case 80: goto st63; + case 112: goto st63; + } + goto st0; +st63: + if ( ++p == pe ) + goto _test_eof63; +case 63: + switch( (*p) ) { + case 76: goto st64; + case 108: goto st64; + } + goto st0; +st64: + if ( ++p == pe ) + goto _test_eof64; +case 64: + switch( (*p) ) { + case 89: goto st65; + case 121: goto st65; + } + goto st0; +st65: + if ( ++p == pe ) + goto _test_eof65; +case 65: + switch( (*p) ) { + case 10: goto tr105; + case 13: goto tr106; + case 32: goto tr107; + } + goto st0; +tr107: +#line 281 "src/memcached-grammar.rl" + { noreply = true; } + goto st66; +st66: + if ( ++p == pe ) + goto _test_eof66; +case 66: +#line 1983 "src/memcached-grammar.cc" + switch( (*p) ) { + case 10: goto tr95; + case 13: goto st57; + case 32: goto st66; + } + goto st0; +st67: + if ( ++p == pe ) + goto _test_eof67; +case 67: + switch( (*p) ) { + case 69: goto st68; + case 101: goto st68; + } + goto st0; +st68: + if ( ++p == pe ) + goto _test_eof68; +case 68: + switch( (*p) ) { + case 67: goto st69; + case 76: goto st85; + case 99: goto st69; + case 108: goto st85; + } + goto st0; +st69: + if ( ++p == pe ) + goto _test_eof69; +case 69: + switch( (*p) ) { + case 82: goto st70; + case 114: goto st70; + } + goto st0; +st70: + if ( ++p == pe ) + goto _test_eof70; +case 70: + if ( (*p) == 32 ) + goto tr113; + goto st0; +tr113: +#line 296 "src/memcached-grammar.rl" + {incr_sign = -1;} + goto st71; +tr202: +#line 295 "src/memcached-grammar.rl" + {incr_sign = 1; } + goto st71; +st71: + if ( ++p == pe ) + goto _test_eof71; +case 71: +#line 2038 "src/memcached-grammar.cc" + switch( (*p) ) { + case 13: goto st0; + case 32: goto st71; + } + if ( 9 <= (*p) && (*p) <= 10 ) + goto st0; + goto tr114; +tr114: +#line 222 "src/memcached-grammar.rl" + { + fstart = p; + for (; p < pe && *p != ' ' && *p != '\r' && *p != '\n'; p++); + if ( *p == ' ' || *p == '\r' || *p == '\n') { + tbuf_store_field(keys, fstart, p - fstart); + keys_count++; + p--; + } else + p = fstart; + } + goto st72; +st72: + if ( ++p == pe ) + goto _test_eof72; +case 72: +#line 2063 "src/memcached-grammar.cc" + if ( (*p) == 32 ) + goto st73; + goto st0; +st73: + if ( ++p == pe ) + goto _test_eof73; +case 73: + if ( (*p) == 32 ) + goto st73; + if ( 48 <= (*p) && (*p) <= 57 ) + goto tr117; + goto st0; +tr117: +#line 221 "src/memcached-grammar.rl" + { fstart = p; } + goto st74; +st74: + if ( ++p == pe ) + goto _test_eof74; +case 74: +#line 2084 "src/memcached-grammar.cc" + switch( (*p) ) { + case 10: goto tr118; + case 13: goto tr119; + case 32: goto tr120; + } + if ( 48 <= (*p) && (*p) <= 57 ) + goto st74; + goto st0; +tr133: +#line 281 "src/memcached-grammar.rl" + { noreply = true; } + goto st75; +tr119: +#line 247 "src/memcached-grammar.rl" + {incr = memcached_natoq(fstart, p);} + goto st75; +st75: + if ( ++p == pe ) + goto _test_eof75; +case 75: +#line 2105 "src/memcached-grammar.cc" + if ( (*p) == 10 ) + goto tr122; + goto st0; +tr120: +#line 247 "src/memcached-grammar.rl" + {incr = memcached_natoq(fstart, p);} + goto st76; +st76: + if ( ++p == pe ) + goto _test_eof76; +case 76: +#line 2117 "src/memcached-grammar.cc" + switch( (*p) ) { + case 10: goto tr122; + case 13: goto st75; + case 32: goto st76; + case 78: goto st77; + case 110: goto st77; + } + goto st0; +st77: + if ( ++p == pe ) + goto _test_eof77; +case 77: + switch( (*p) ) { + case 79: goto st78; + case 111: goto st78; + } + goto st0; +st78: + if ( ++p == pe ) + goto _test_eof78; +case 78: + switch( (*p) ) { + case 82: goto st79; + case 114: goto st79; + } + goto st0; +st79: + if ( ++p == pe ) + goto _test_eof79; +case 79: + switch( (*p) ) { + case 69: goto st80; + case 101: goto st80; + } + goto st0; +st80: + if ( ++p == pe ) + goto _test_eof80; +case 80: + switch( (*p) ) { + case 80: goto st81; + case 112: goto st81; + } + goto st0; +st81: + if ( ++p == pe ) + goto _test_eof81; +case 81: + switch( (*p) ) { + case 76: goto st82; + case 108: goto st82; + } + goto st0; +st82: + if ( ++p == pe ) + goto _test_eof82; +case 82: + switch( (*p) ) { + case 89: goto st83; + case 121: goto st83; + } + goto st0; +st83: + if ( ++p == pe ) + goto _test_eof83; +case 83: + switch( (*p) ) { + case 10: goto tr132; + case 13: goto tr133; + case 32: goto tr134; + } + goto st0; +tr134: +#line 281 "src/memcached-grammar.rl" + { noreply = true; } + goto st84; +st84: + if ( ++p == pe ) + goto _test_eof84; +case 84: +#line 2198 "src/memcached-grammar.cc" + switch( (*p) ) { + case 10: goto tr122; + case 13: goto st75; + case 32: goto st84; + } + goto st0; +st85: + if ( ++p == pe ) + goto _test_eof85; +case 85: + switch( (*p) ) { + case 69: goto st86; + case 101: goto st86; + } + goto st0; +st86: + if ( ++p == pe ) + goto _test_eof86; +case 86: + switch( (*p) ) { + case 84: goto st87; + case 116: goto st87; + } + goto st0; +st87: + if ( ++p == pe ) + goto _test_eof87; +case 87: + switch( (*p) ) { + case 69: goto st88; + case 101: goto st88; + } + goto st0; +st88: + if ( ++p == pe ) + goto _test_eof88; +case 88: + if ( (*p) == 32 ) + goto st89; + goto st0; +st89: + if ( ++p == pe ) + goto _test_eof89; +case 89: + switch( (*p) ) { + case 13: goto st0; + case 32: goto st89; + } + if ( 9 <= (*p) && (*p) <= 10 ) + goto st0; + goto tr140; +tr140: +#line 222 "src/memcached-grammar.rl" + { + fstart = p; + for (; p < pe && *p != ' ' && *p != '\r' && *p != '\n'; p++); + if ( *p == ' ' || *p == '\r' || *p == '\n') { + tbuf_store_field(keys, fstart, p - fstart); + keys_count++; + p--; + } else + p = fstart; + } + goto st90; +st90: + if ( ++p == pe ) + goto _test_eof90; +case 90: +#line 2267 "src/memcached-grammar.cc" + switch( (*p) ) { + case 10: goto tr141; + case 13: goto st91; + case 32: goto st92; + } + goto st0; +tr147: +#line 237 "src/memcached-grammar.rl" + { + exptime = memcached_natoq(fstart, p); + if (exptime > 0 && exptime <= 60*60*24*30) + exptime = exptime + ev_now(); + } + goto st91; +tr158: +#line 281 "src/memcached-grammar.rl" + { noreply = true; } + goto st91; +st91: + if ( ++p == pe ) + goto _test_eof91; +case 91: +#line 2290 "src/memcached-grammar.cc" + if ( (*p) == 10 ) + goto tr141; + goto st0; +st92: + if ( ++p == pe ) + goto _test_eof92; +case 92: + switch( (*p) ) { + case 10: goto tr141; + case 13: goto st91; + case 32: goto st92; + case 78: goto st95; + case 110: goto st95; + } + if ( 48 <= (*p) && (*p) <= 57 ) + goto tr144; + goto st0; +tr144: +#line 221 "src/memcached-grammar.rl" + { fstart = p; } + goto st93; +st93: + if ( ++p == pe ) + goto _test_eof93; +case 93: +#line 2316 "src/memcached-grammar.cc" + switch( (*p) ) { + case 10: goto tr146; + case 13: goto tr147; + case 32: goto tr148; + } + if ( 48 <= (*p) && (*p) <= 57 ) + goto st93; + goto st0; +tr148: +#line 237 "src/memcached-grammar.rl" + { + exptime = memcached_natoq(fstart, p); + if (exptime > 0 && exptime <= 60*60*24*30) + exptime = exptime + ev_now(); + } + goto st94; +st94: + if ( ++p == pe ) + goto _test_eof94; +case 94: +#line 2337 "src/memcached-grammar.cc" + switch( (*p) ) { + case 10: goto tr141; + case 13: goto st91; + case 32: goto st94; + case 78: goto st95; + case 110: goto st95; + } + goto st0; +st95: + if ( ++p == pe ) + goto _test_eof95; +case 95: + switch( (*p) ) { + case 79: goto st96; + case 111: goto st96; + } + goto st0; +st96: + if ( ++p == pe ) + goto _test_eof96; +case 96: + switch( (*p) ) { + case 82: goto st97; + case 114: goto st97; + } + goto st0; +st97: + if ( ++p == pe ) + goto _test_eof97; +case 97: + switch( (*p) ) { + case 69: goto st98; + case 101: goto st98; + } + goto st0; +st98: + if ( ++p == pe ) + goto _test_eof98; +case 98: + switch( (*p) ) { + case 80: goto st99; + case 112: goto st99; + } + goto st0; +st99: + if ( ++p == pe ) + goto _test_eof99; +case 99: + switch( (*p) ) { + case 76: goto st100; + case 108: goto st100; + } + goto st0; +st100: + if ( ++p == pe ) + goto _test_eof100; +case 100: + switch( (*p) ) { + case 89: goto st101; + case 121: goto st101; + } + goto st0; +st101: + if ( ++p == pe ) + goto _test_eof101; +case 101: + switch( (*p) ) { + case 10: goto tr157; + case 13: goto tr158; + case 32: goto tr159; + } + goto st0; +tr159: +#line 281 "src/memcached-grammar.rl" + { noreply = true; } + goto st102; +st102: + if ( ++p == pe ) + goto _test_eof102; +case 102: +#line 2418 "src/memcached-grammar.cc" + switch( (*p) ) { + case 10: goto tr141; + case 13: goto st91; + case 32: goto st102; + } + goto st0; +st103: + if ( ++p == pe ) + goto _test_eof103; +case 103: + switch( (*p) ) { + case 76: goto st104; + case 108: goto st104; + } + goto st0; +st104: + if ( ++p == pe ) + goto _test_eof104; +case 104: + switch( (*p) ) { + case 85: goto st105; + case 117: goto st105; + } + goto st0; +st105: + if ( ++p == pe ) + goto _test_eof105; +case 105: + switch( (*p) ) { + case 83: goto st106; + case 115: goto st106; + } + goto st0; +st106: + if ( ++p == pe ) + goto _test_eof106; +case 106: + switch( (*p) ) { + case 72: goto st107; + case 104: goto st107; + } + goto st0; +st107: + if ( ++p == pe ) + goto _test_eof107; +case 107: + if ( (*p) == 95 ) + goto st108; + goto st0; +st108: + if ( ++p == pe ) + goto _test_eof108; +case 108: + switch( (*p) ) { + case 65: goto st109; + case 97: goto st109; + } + goto st0; +st109: + if ( ++p == pe ) + goto _test_eof109; +case 109: + switch( (*p) ) { + case 76: goto st110; + case 108: goto st110; + } + goto st0; +st110: + if ( ++p == pe ) + goto _test_eof110; +case 110: + switch( (*p) ) { + case 76: goto st111; + case 108: goto st111; + } + goto st0; +st111: + if ( ++p == pe ) + goto _test_eof111; +case 111: + switch( (*p) ) { + case 10: goto tr169; + case 13: goto st112; + case 32: goto st113; + } + goto st0; +tr186: +#line 281 "src/memcached-grammar.rl" + { noreply = true; } + goto st112; +tr175: +#line 248 "src/memcached-grammar.rl" + {flush_delay = memcached_natoq(fstart, p);} + goto st112; +st112: + if ( ++p == pe ) + goto _test_eof112; +case 112: +#line 2517 "src/memcached-grammar.cc" + if ( (*p) == 10 ) + goto tr169; + goto st0; +st113: + if ( ++p == pe ) + goto _test_eof113; +case 113: + switch( (*p) ) { + case 10: goto tr169; + case 13: goto st112; + case 32: goto st113; + case 78: goto st116; + case 110: goto st116; + } + if ( 48 <= (*p) && (*p) <= 57 ) + goto tr172; + goto st0; +tr172: +#line 221 "src/memcached-grammar.rl" + { fstart = p; } + goto st114; +st114: + if ( ++p == pe ) + goto _test_eof114; +case 114: +#line 2543 "src/memcached-grammar.cc" + switch( (*p) ) { + case 10: goto tr174; + case 13: goto tr175; + case 32: goto tr176; + } + if ( 48 <= (*p) && (*p) <= 57 ) + goto st114; + goto st0; +tr176: +#line 248 "src/memcached-grammar.rl" + {flush_delay = memcached_natoq(fstart, p);} + goto st115; +st115: + if ( ++p == pe ) + goto _test_eof115; +case 115: +#line 2560 "src/memcached-grammar.cc" + switch( (*p) ) { + case 10: goto tr169; + case 13: goto st112; + case 32: goto st115; + case 78: goto st116; + case 110: goto st116; + } + goto st0; +st116: + if ( ++p == pe ) + goto _test_eof116; +case 116: + switch( (*p) ) { + case 79: goto st117; + case 111: goto st117; + } + goto st0; +st117: + if ( ++p == pe ) + goto _test_eof117; +case 117: + switch( (*p) ) { + case 82: goto st118; + case 114: goto st118; + } + goto st0; +st118: + if ( ++p == pe ) + goto _test_eof118; +case 118: + switch( (*p) ) { + case 69: goto st119; + case 101: goto st119; + } + goto st0; +st119: + if ( ++p == pe ) + goto _test_eof119; +case 119: + switch( (*p) ) { + case 80: goto st120; + case 112: goto st120; + } + goto st0; +st120: + if ( ++p == pe ) + goto _test_eof120; +case 120: + switch( (*p) ) { + case 76: goto st121; + case 108: goto st121; + } + goto st0; +st121: + if ( ++p == pe ) + goto _test_eof121; +case 121: + switch( (*p) ) { + case 89: goto st122; + case 121: goto st122; + } + goto st0; +st122: + if ( ++p == pe ) + goto _test_eof122; +case 122: + switch( (*p) ) { + case 10: goto tr185; + case 13: goto tr186; + case 32: goto tr187; + } + goto st0; +tr187: +#line 281 "src/memcached-grammar.rl" + { noreply = true; } + goto st123; +st123: + if ( ++p == pe ) + goto _test_eof123; +case 123: +#line 2641 "src/memcached-grammar.cc" + switch( (*p) ) { + case 10: goto tr169; + case 13: goto st112; + case 32: goto st123; + } + goto st0; +st124: + if ( ++p == pe ) + goto _test_eof124; +case 124: + switch( (*p) ) { + case 69: goto st125; + case 101: goto st125; + } + goto st0; +st125: + if ( ++p == pe ) + goto _test_eof125; +case 125: + switch( (*p) ) { + case 84: goto st126; + case 116: goto st126; + } + goto st0; +st126: + if ( ++p == pe ) + goto _test_eof126; +case 126: + switch( (*p) ) { + case 32: goto tr191; + case 83: goto st131; + case 115: goto st131; + } + goto st0; +tr191: +#line 292 "src/memcached-grammar.rl" + {show_cas = false;} + goto st127; +tr198: +#line 293 "src/memcached-grammar.rl" + {show_cas = true;} + goto st127; +st127: + if ( ++p == pe ) + goto _test_eof127; +case 127: +#line 2688 "src/memcached-grammar.cc" + switch( (*p) ) { + case 13: goto st0; + case 32: goto st127; + } + if ( 9 <= (*p) && (*p) <= 10 ) + goto st0; + goto tr193; +tr193: +#line 222 "src/memcached-grammar.rl" + { + fstart = p; + for (; p < pe && *p != ' ' && *p != '\r' && *p != '\n'; p++); + if ( *p == ' ' || *p == '\r' || *p == '\n') { + tbuf_store_field(keys, fstart, p - fstart); + keys_count++; + p--; + } else + p = fstart; + } + goto st128; +st128: + if ( ++p == pe ) + goto _test_eof128; +case 128: +#line 2713 "src/memcached-grammar.cc" + switch( (*p) ) { + case 10: goto tr195; + case 13: goto st129; + case 32: goto st130; + } + goto st0; +st129: + if ( ++p == pe ) + goto _test_eof129; +case 129: + if ( (*p) == 10 ) + goto tr195; + goto st0; +st130: + if ( ++p == pe ) + goto _test_eof130; +case 130: + switch( (*p) ) { + case 9: goto st0; + case 10: goto tr195; + case 13: goto st129; + case 32: goto st130; + } + goto tr193; +st131: + if ( ++p == pe ) + goto _test_eof131; +case 131: + if ( (*p) == 32 ) + goto tr198; + goto st0; +st132: + if ( ++p == pe ) + goto _test_eof132; +case 132: + switch( (*p) ) { + case 78: goto st133; + case 110: goto st133; + } + goto st0; +st133: + if ( ++p == pe ) + goto _test_eof133; +case 133: + switch( (*p) ) { + case 67: goto st134; + case 99: goto st134; + } + goto st0; +st134: + if ( ++p == pe ) + goto _test_eof134; +case 134: + switch( (*p) ) { + case 82: goto st135; + case 114: goto st135; + } + goto st0; +st135: + if ( ++p == pe ) + goto _test_eof135; +case 135: + if ( (*p) == 32 ) + goto tr202; + goto st0; +st136: + if ( ++p == pe ) + goto _test_eof136; +case 136: + switch( (*p) ) { + case 82: goto st137; + case 114: goto st137; + } + goto st0; +st137: + if ( ++p == pe ) + goto _test_eof137; +case 137: + switch( (*p) ) { + case 69: goto st138; + case 101: goto st138; + } + goto st0; +st138: + if ( ++p == pe ) + goto _test_eof138; +case 138: + switch( (*p) ) { + case 80: goto st139; + case 112: goto st139; + } + goto st0; +st139: + if ( ++p == pe ) + goto _test_eof139; +case 139: + switch( (*p) ) { + case 69: goto st140; + case 101: goto st140; + } + goto st0; +st140: + if ( ++p == pe ) + goto _test_eof140; +case 140: + switch( (*p) ) { + case 78: goto st141; + case 110: goto st141; + } + goto st0; +st141: + if ( ++p == pe ) + goto _test_eof141; +case 141: + switch( (*p) ) { + case 68: goto st142; + case 100: goto st142; + } + goto st0; +st142: + if ( ++p == pe ) + goto _test_eof142; +case 142: + if ( (*p) == 32 ) + goto tr209; + goto st0; +st143: + if ( ++p == pe ) + goto _test_eof143; +case 143: + switch( (*p) ) { + case 85: goto st144; + case 117: goto st144; + } + goto st0; +st144: + if ( ++p == pe ) + goto _test_eof144; +case 144: + switch( (*p) ) { + case 73: goto st145; + case 105: goto st145; + } + goto st0; +st145: + if ( ++p == pe ) + goto _test_eof145; +case 145: + switch( (*p) ) { + case 84: goto st146; + case 116: goto st146; + } + goto st0; +st146: + if ( ++p == pe ) + goto _test_eof146; +case 146: + switch( (*p) ) { + case 10: goto tr213; + case 13: goto st147; + } + goto st0; +st147: + if ( ++p == pe ) + goto _test_eof147; +case 147: + if ( (*p) == 10 ) + goto tr213; + goto st0; +st148: + if ( ++p == pe ) + goto _test_eof148; +case 148: + switch( (*p) ) { + case 69: goto st149; + case 101: goto st149; + } + goto st0; +st149: + if ( ++p == pe ) + goto _test_eof149; +case 149: + switch( (*p) ) { + case 80: goto st150; + case 112: goto st150; + } + goto st0; +st150: + if ( ++p == pe ) + goto _test_eof150; +case 150: + switch( (*p) ) { + case 76: goto st151; + case 108: goto st151; + } + goto st0; +st151: + if ( ++p == pe ) + goto _test_eof151; +case 151: + switch( (*p) ) { + case 65: goto st152; + case 97: goto st152; + } + goto st0; +st152: + if ( ++p == pe ) + goto _test_eof152; +case 152: + switch( (*p) ) { + case 67: goto st153; + case 99: goto st153; + } + goto st0; +st153: + if ( ++p == pe ) + goto _test_eof153; +case 153: + switch( (*p) ) { + case 69: goto st154; + case 101: goto st154; + } + goto st0; +st154: + if ( ++p == pe ) + goto _test_eof154; +case 154: + if ( (*p) == 32 ) + goto st155; + goto st0; +st155: + if ( ++p == pe ) + goto _test_eof155; +case 155: + switch( (*p) ) { + case 13: goto st0; + case 32: goto st155; + } + if ( 9 <= (*p) && (*p) <= 10 ) + goto st0; + goto tr222; +tr222: +#line 222 "src/memcached-grammar.rl" + { + fstart = p; + for (; p < pe && *p != ' ' && *p != '\r' && *p != '\n'; p++); + if ( *p == ' ' || *p == '\r' || *p == '\n') { + tbuf_store_field(keys, fstart, p - fstart); + keys_count++; + p--; + } else + p = fstart; + } + goto st156; +st156: + if ( ++p == pe ) + goto _test_eof156; +case 156: +#line 2972 "src/memcached-grammar.cc" + if ( (*p) == 32 ) + goto st157; + goto st0; +st157: + if ( ++p == pe ) + goto _test_eof157; +case 157: + if ( (*p) == 32 ) + goto st157; + if ( 48 <= (*p) && (*p) <= 57 ) + goto tr224; + goto st0; +tr224: +#line 221 "src/memcached-grammar.rl" + { fstart = p; } + goto st158; +st158: + if ( ++p == pe ) + goto _test_eof158; +case 158: +#line 2993 "src/memcached-grammar.cc" + if ( (*p) == 32 ) + goto tr225; + if ( 48 <= (*p) && (*p) <= 57 ) + goto st158; + goto st0; +tr225: +#line 244 "src/memcached-grammar.rl" + {flags = memcached_natoq(fstart, p);} + goto st159; +st159: + if ( ++p == pe ) + goto _test_eof159; +case 159: +#line 3007 "src/memcached-grammar.cc" + if ( (*p) == 32 ) + goto st159; + if ( 48 <= (*p) && (*p) <= 57 ) + goto tr228; + goto st0; +tr228: +#line 221 "src/memcached-grammar.rl" + { fstart = p; } + goto st160; +st160: + if ( ++p == pe ) + goto _test_eof160; +case 160: +#line 3021 "src/memcached-grammar.cc" + if ( (*p) == 32 ) + goto tr229; + if ( 48 <= (*p) && (*p) <= 57 ) + goto st160; + goto st0; +tr229: +#line 237 "src/memcached-grammar.rl" + { + exptime = memcached_natoq(fstart, p); + if (exptime > 0 && exptime <= 60*60*24*30) + exptime = exptime + ev_now(); + } + goto st161; +st161: + if ( ++p == pe ) + goto _test_eof161; +case 161: +#line 3039 "src/memcached-grammar.cc" + if ( (*p) == 32 ) + goto st161; + if ( 48 <= (*p) && (*p) <= 57 ) + goto tr232; + goto st0; +tr232: +#line 221 "src/memcached-grammar.rl" + { fstart = p; } + goto st162; +st162: + if ( ++p == pe ) + goto _test_eof162; +case 162: +#line 3053 "src/memcached-grammar.cc" + switch( (*p) ) { + case 10: goto tr233; + case 13: goto tr234; + case 32: goto tr235; + } + if ( 48 <= (*p) && (*p) <= 57 ) + goto st162; + goto st0; +tr234: +#line 245 "src/memcached-grammar.rl" + {bytes = memcached_natoq(fstart, p);} + goto st163; +tr247: +#line 281 "src/memcached-grammar.rl" + { noreply = true; } + goto st163; +st163: + if ( ++p == pe ) + goto _test_eof163; +case 163: +#line 3074 "src/memcached-grammar.cc" + if ( (*p) == 10 ) + goto tr237; + goto st0; +tr235: +#line 245 "src/memcached-grammar.rl" + {bytes = memcached_natoq(fstart, p);} + goto st164; +st164: + if ( ++p == pe ) + goto _test_eof164; +case 164: +#line 3086 "src/memcached-grammar.cc" + switch( (*p) ) { + case 32: goto st164; + case 78: goto st165; + case 110: goto st165; + } + goto st0; +st165: + if ( ++p == pe ) + goto _test_eof165; +case 165: + switch( (*p) ) { + case 79: goto st166; + case 111: goto st166; + } + goto st0; +st166: + if ( ++p == pe ) + goto _test_eof166; +case 166: + switch( (*p) ) { + case 82: goto st167; + case 114: goto st167; + } + goto st0; +st167: + if ( ++p == pe ) + goto _test_eof167; +case 167: + switch( (*p) ) { + case 69: goto st168; + case 101: goto st168; + } + goto st0; +st168: + if ( ++p == pe ) + goto _test_eof168; +case 168: + switch( (*p) ) { + case 80: goto st169; + case 112: goto st169; + } + goto st0; +st169: + if ( ++p == pe ) + goto _test_eof169; +case 169: + switch( (*p) ) { + case 76: goto st170; + case 108: goto st170; + } + goto st0; +st170: + if ( ++p == pe ) + goto _test_eof170; +case 170: + switch( (*p) ) { + case 89: goto st171; + case 121: goto st171; + } + goto st0; +st171: + if ( ++p == pe ) + goto _test_eof171; +case 171: + switch( (*p) ) { + case 10: goto tr246; + case 13: goto tr247; + } + goto st0; +st172: + if ( ++p == pe ) + goto _test_eof172; +case 172: + switch( (*p) ) { + case 69: goto st173; + case 84: goto st192; + case 101: goto st173; + case 116: goto st192; + } + goto st0; +st173: + if ( ++p == pe ) + goto _test_eof173; +case 173: + switch( (*p) ) { + case 84: goto st174; + case 116: goto st174; + } + goto st0; +st174: + if ( ++p == pe ) + goto _test_eof174; +case 174: + if ( (*p) == 32 ) + goto st175; + goto st0; +st175: + if ( ++p == pe ) + goto _test_eof175; +case 175: + switch( (*p) ) { + case 13: goto st0; + case 32: goto st175; + } + if ( 9 <= (*p) && (*p) <= 10 ) + goto st0; + goto tr252; +tr252: +#line 222 "src/memcached-grammar.rl" + { + fstart = p; + for (; p < pe && *p != ' ' && *p != '\r' && *p != '\n'; p++); + if ( *p == ' ' || *p == '\r' || *p == '\n') { + tbuf_store_field(keys, fstart, p - fstart); + keys_count++; + p--; + } else + p = fstart; + } + goto st176; +st176: + if ( ++p == pe ) + goto _test_eof176; +case 176: +#line 3211 "src/memcached-grammar.cc" + if ( (*p) == 32 ) + goto st177; + goto st0; +st177: + if ( ++p == pe ) + goto _test_eof177; +case 177: + if ( (*p) == 32 ) + goto st177; + if ( 48 <= (*p) && (*p) <= 57 ) + goto tr254; + goto st0; +tr254: +#line 221 "src/memcached-grammar.rl" + { fstart = p; } + goto st178; +st178: + if ( ++p == pe ) + goto _test_eof178; +case 178: +#line 3232 "src/memcached-grammar.cc" + if ( (*p) == 32 ) + goto tr255; + if ( 48 <= (*p) && (*p) <= 57 ) + goto st178; + goto st0; +tr255: +#line 244 "src/memcached-grammar.rl" + {flags = memcached_natoq(fstart, p);} + goto st179; +st179: + if ( ++p == pe ) + goto _test_eof179; +case 179: +#line 3246 "src/memcached-grammar.cc" + if ( (*p) == 32 ) + goto st179; + if ( 48 <= (*p) && (*p) <= 57 ) + goto tr258; + goto st0; +tr258: +#line 221 "src/memcached-grammar.rl" + { fstart = p; } + goto st180; +st180: + if ( ++p == pe ) + goto _test_eof180; +case 180: +#line 3260 "src/memcached-grammar.cc" + if ( (*p) == 32 ) + goto tr259; + if ( 48 <= (*p) && (*p) <= 57 ) + goto st180; + goto st0; +tr259: +#line 237 "src/memcached-grammar.rl" + { + exptime = memcached_natoq(fstart, p); + if (exptime > 0 && exptime <= 60*60*24*30) + exptime = exptime + ev_now(); + } + goto st181; +st181: + if ( ++p == pe ) + goto _test_eof181; +case 181: +#line 3278 "src/memcached-grammar.cc" + if ( (*p) == 32 ) + goto st181; + if ( 48 <= (*p) && (*p) <= 57 ) + goto tr262; + goto st0; +tr262: +#line 221 "src/memcached-grammar.rl" + { fstart = p; } + goto st182; +st182: + if ( ++p == pe ) + goto _test_eof182; +case 182: +#line 3292 "src/memcached-grammar.cc" + switch( (*p) ) { + case 10: goto tr263; + case 13: goto tr264; + case 32: goto tr265; + } + if ( 48 <= (*p) && (*p) <= 57 ) + goto st182; + goto st0; +tr264: +#line 245 "src/memcached-grammar.rl" + {bytes = memcached_natoq(fstart, p);} + goto st183; +tr277: +#line 281 "src/memcached-grammar.rl" + { noreply = true; } + goto st183; +st183: + if ( ++p == pe ) + goto _test_eof183; +case 183: +#line 3313 "src/memcached-grammar.cc" + if ( (*p) == 10 ) + goto tr267; + goto st0; +tr265: +#line 245 "src/memcached-grammar.rl" + {bytes = memcached_natoq(fstart, p);} + goto st184; +st184: + if ( ++p == pe ) + goto _test_eof184; +case 184: +#line 3325 "src/memcached-grammar.cc" + switch( (*p) ) { + case 32: goto st184; + case 78: goto st185; + case 110: goto st185; + } + goto st0; +st185: + if ( ++p == pe ) + goto _test_eof185; +case 185: + switch( (*p) ) { + case 79: goto st186; + case 111: goto st186; + } + goto st0; +st186: + if ( ++p == pe ) + goto _test_eof186; +case 186: + switch( (*p) ) { + case 82: goto st187; + case 114: goto st187; + } + goto st0; +st187: + if ( ++p == pe ) + goto _test_eof187; +case 187: + switch( (*p) ) { + case 69: goto st188; + case 101: goto st188; + } + goto st0; +st188: + if ( ++p == pe ) + goto _test_eof188; +case 188: + switch( (*p) ) { + case 80: goto st189; + case 112: goto st189; + } + goto st0; +st189: + if ( ++p == pe ) + goto _test_eof189; +case 189: + switch( (*p) ) { + case 76: goto st190; + case 108: goto st190; + } + goto st0; +st190: + if ( ++p == pe ) + goto _test_eof190; +case 190: + switch( (*p) ) { + case 89: goto st191; + case 121: goto st191; + } + goto st0; +st191: + if ( ++p == pe ) + goto _test_eof191; +case 191: + switch( (*p) ) { + case 10: goto tr276; + case 13: goto tr277; + } + goto st0; +st192: + if ( ++p == pe ) + goto _test_eof192; +case 192: + switch( (*p) ) { + case 65: goto st193; + case 97: goto st193; + } + goto st0; +st193: + if ( ++p == pe ) + goto _test_eof193; +case 193: + switch( (*p) ) { + case 84: goto st194; + case 116: goto st194; + } + goto st0; +st194: + if ( ++p == pe ) + goto _test_eof194; +case 194: + switch( (*p) ) { + case 83: goto st195; + case 115: goto st195; + } + goto st0; +st195: + if ( ++p == pe ) + goto _test_eof195; +case 195: + switch( (*p) ) { + case 10: goto tr281; + case 13: goto st196; + } + goto st0; +st196: + if ( ++p == pe ) + goto _test_eof196; +case 196: + if ( (*p) == 10 ) + goto tr281; + goto st0; + } + _test_eof2: cs = 2; goto _test_eof; + _test_eof3: cs = 3; goto _test_eof; + _test_eof4: cs = 4; goto _test_eof; + _test_eof5: cs = 5; goto _test_eof; + _test_eof6: cs = 6; goto _test_eof; + _test_eof7: cs = 7; goto _test_eof; + _test_eof8: cs = 8; goto _test_eof; + _test_eof9: cs = 9; goto _test_eof; + _test_eof10: cs = 10; goto _test_eof; + _test_eof11: cs = 11; goto _test_eof; + _test_eof12: cs = 12; goto _test_eof; + _test_eof197: cs = 197; goto _test_eof; + _test_eof13: cs = 13; goto _test_eof; + _test_eof14: cs = 14; goto _test_eof; + _test_eof15: cs = 15; goto _test_eof; + _test_eof16: cs = 16; goto _test_eof; + _test_eof17: cs = 17; goto _test_eof; + _test_eof18: cs = 18; goto _test_eof; + _test_eof19: cs = 19; goto _test_eof; + _test_eof20: cs = 20; goto _test_eof; + _test_eof21: cs = 21; goto _test_eof; + _test_eof22: cs = 22; goto _test_eof; + _test_eof23: cs = 23; goto _test_eof; + _test_eof24: cs = 24; goto _test_eof; + _test_eof25: cs = 25; goto _test_eof; + _test_eof26: cs = 26; goto _test_eof; + _test_eof27: cs = 27; goto _test_eof; + _test_eof28: cs = 28; goto _test_eof; + _test_eof29: cs = 29; goto _test_eof; + _test_eof30: cs = 30; goto _test_eof; + _test_eof31: cs = 31; goto _test_eof; + _test_eof32: cs = 32; goto _test_eof; + _test_eof33: cs = 33; goto _test_eof; + _test_eof34: cs = 34; goto _test_eof; + _test_eof35: cs = 35; goto _test_eof; + _test_eof36: cs = 36; goto _test_eof; + _test_eof37: cs = 37; goto _test_eof; + _test_eof38: cs = 38; goto _test_eof; + _test_eof39: cs = 39; goto _test_eof; + _test_eof40: cs = 40; goto _test_eof; + _test_eof41: cs = 41; goto _test_eof; + _test_eof42: cs = 42; goto _test_eof; + _test_eof43: cs = 43; goto _test_eof; + _test_eof44: cs = 44; goto _test_eof; + _test_eof45: cs = 45; goto _test_eof; + _test_eof46: cs = 46; goto _test_eof; + _test_eof47: cs = 47; goto _test_eof; + _test_eof48: cs = 48; goto _test_eof; + _test_eof49: cs = 49; goto _test_eof; + _test_eof50: cs = 50; goto _test_eof; + _test_eof51: cs = 51; goto _test_eof; + _test_eof52: cs = 52; goto _test_eof; + _test_eof53: cs = 53; goto _test_eof; + _test_eof54: cs = 54; goto _test_eof; + _test_eof55: cs = 55; goto _test_eof; + _test_eof56: cs = 56; goto _test_eof; + _test_eof57: cs = 57; goto _test_eof; + _test_eof58: cs = 58; goto _test_eof; + _test_eof59: cs = 59; goto _test_eof; + _test_eof60: cs = 60; goto _test_eof; + _test_eof61: cs = 61; goto _test_eof; + _test_eof62: cs = 62; goto _test_eof; + _test_eof63: cs = 63; goto _test_eof; + _test_eof64: cs = 64; goto _test_eof; + _test_eof65: cs = 65; goto _test_eof; + _test_eof66: cs = 66; goto _test_eof; + _test_eof67: cs = 67; goto _test_eof; + _test_eof68: cs = 68; goto _test_eof; + _test_eof69: cs = 69; goto _test_eof; + _test_eof70: cs = 70; goto _test_eof; + _test_eof71: cs = 71; goto _test_eof; + _test_eof72: cs = 72; goto _test_eof; + _test_eof73: cs = 73; goto _test_eof; + _test_eof74: cs = 74; goto _test_eof; + _test_eof75: cs = 75; goto _test_eof; + _test_eof76: cs = 76; goto _test_eof; + _test_eof77: cs = 77; goto _test_eof; + _test_eof78: cs = 78; goto _test_eof; + _test_eof79: cs = 79; goto _test_eof; + _test_eof80: cs = 80; goto _test_eof; + _test_eof81: cs = 81; goto _test_eof; + _test_eof82: cs = 82; goto _test_eof; + _test_eof83: cs = 83; goto _test_eof; + _test_eof84: cs = 84; goto _test_eof; + _test_eof85: cs = 85; goto _test_eof; + _test_eof86: cs = 86; goto _test_eof; + _test_eof87: cs = 87; goto _test_eof; + _test_eof88: cs = 88; goto _test_eof; + _test_eof89: cs = 89; goto _test_eof; + _test_eof90: cs = 90; goto _test_eof; + _test_eof91: cs = 91; goto _test_eof; + _test_eof92: cs = 92; goto _test_eof; + _test_eof93: cs = 93; goto _test_eof; + _test_eof94: cs = 94; goto _test_eof; + _test_eof95: cs = 95; goto _test_eof; + _test_eof96: cs = 96; goto _test_eof; + _test_eof97: cs = 97; goto _test_eof; + _test_eof98: cs = 98; goto _test_eof; + _test_eof99: cs = 99; goto _test_eof; + _test_eof100: cs = 100; goto _test_eof; + _test_eof101: cs = 101; goto _test_eof; + _test_eof102: cs = 102; goto _test_eof; + _test_eof103: cs = 103; goto _test_eof; + _test_eof104: cs = 104; goto _test_eof; + _test_eof105: cs = 105; goto _test_eof; + _test_eof106: cs = 106; goto _test_eof; + _test_eof107: cs = 107; goto _test_eof; + _test_eof108: cs = 108; goto _test_eof; + _test_eof109: cs = 109; goto _test_eof; + _test_eof110: cs = 110; goto _test_eof; + _test_eof111: cs = 111; goto _test_eof; + _test_eof112: cs = 112; goto _test_eof; + _test_eof113: cs = 113; goto _test_eof; + _test_eof114: cs = 114; goto _test_eof; + _test_eof115: cs = 115; goto _test_eof; + _test_eof116: cs = 116; goto _test_eof; + _test_eof117: cs = 117; goto _test_eof; + _test_eof118: cs = 118; goto _test_eof; + _test_eof119: cs = 119; goto _test_eof; + _test_eof120: cs = 120; goto _test_eof; + _test_eof121: cs = 121; goto _test_eof; + _test_eof122: cs = 122; goto _test_eof; + _test_eof123: cs = 123; goto _test_eof; + _test_eof124: cs = 124; goto _test_eof; + _test_eof125: cs = 125; goto _test_eof; + _test_eof126: cs = 126; goto _test_eof; + _test_eof127: cs = 127; goto _test_eof; + _test_eof128: cs = 128; goto _test_eof; + _test_eof129: cs = 129; goto _test_eof; + _test_eof130: cs = 130; goto _test_eof; + _test_eof131: cs = 131; goto _test_eof; + _test_eof132: cs = 132; goto _test_eof; + _test_eof133: cs = 133; goto _test_eof; + _test_eof134: cs = 134; goto _test_eof; + _test_eof135: cs = 135; goto _test_eof; + _test_eof136: cs = 136; goto _test_eof; + _test_eof137: cs = 137; goto _test_eof; + _test_eof138: cs = 138; goto _test_eof; + _test_eof139: cs = 139; goto _test_eof; + _test_eof140: cs = 140; goto _test_eof; + _test_eof141: cs = 141; goto _test_eof; + _test_eof142: cs = 142; goto _test_eof; + _test_eof143: cs = 143; goto _test_eof; + _test_eof144: cs = 144; goto _test_eof; + _test_eof145: cs = 145; goto _test_eof; + _test_eof146: cs = 146; goto _test_eof; + _test_eof147: cs = 147; goto _test_eof; + _test_eof148: cs = 148; goto _test_eof; + _test_eof149: cs = 149; goto _test_eof; + _test_eof150: cs = 150; goto _test_eof; + _test_eof151: cs = 151; goto _test_eof; + _test_eof152: cs = 152; goto _test_eof; + _test_eof153: cs = 153; goto _test_eof; + _test_eof154: cs = 154; goto _test_eof; + _test_eof155: cs = 155; goto _test_eof; + _test_eof156: cs = 156; goto _test_eof; + _test_eof157: cs = 157; goto _test_eof; + _test_eof158: cs = 158; goto _test_eof; + _test_eof159: cs = 159; goto _test_eof; + _test_eof160: cs = 160; goto _test_eof; + _test_eof161: cs = 161; goto _test_eof; + _test_eof162: cs = 162; goto _test_eof; + _test_eof163: cs = 163; goto _test_eof; + _test_eof164: cs = 164; goto _test_eof; + _test_eof165: cs = 165; goto _test_eof; + _test_eof166: cs = 166; goto _test_eof; + _test_eof167: cs = 167; goto _test_eof; + _test_eof168: cs = 168; goto _test_eof; + _test_eof169: cs = 169; goto _test_eof; + _test_eof170: cs = 170; goto _test_eof; + _test_eof171: cs = 171; goto _test_eof; + _test_eof172: cs = 172; goto _test_eof; + _test_eof173: cs = 173; goto _test_eof; + _test_eof174: cs = 174; goto _test_eof; + _test_eof175: cs = 175; goto _test_eof; + _test_eof176: cs = 176; goto _test_eof; + _test_eof177: cs = 177; goto _test_eof; + _test_eof178: cs = 178; goto _test_eof; + _test_eof179: cs = 179; goto _test_eof; + _test_eof180: cs = 180; goto _test_eof; + _test_eof181: cs = 181; goto _test_eof; + _test_eof182: cs = 182; goto _test_eof; + _test_eof183: cs = 183; goto _test_eof; + _test_eof184: cs = 184; goto _test_eof; + _test_eof185: cs = 185; goto _test_eof; + _test_eof186: cs = 186; goto _test_eof; + _test_eof187: cs = 187; goto _test_eof; + _test_eof188: cs = 188; goto _test_eof; + _test_eof189: cs = 189; goto _test_eof; + _test_eof190: cs = 190; goto _test_eof; + _test_eof191: cs = 191; goto _test_eof; + _test_eof192: cs = 192; goto _test_eof; + _test_eof193: cs = 193; goto _test_eof; + _test_eof194: cs = 194; goto _test_eof; + _test_eof195: cs = 195; goto _test_eof; + _test_eof196: cs = 196; goto _test_eof; + + _test_eof: {} + _out: {} + } + +#line 306 "src/memcached-grammar.rl" + + + if (!done) { + say_debug("parse failed after: `%.*s'", (int)(pe - p), p); + if (pe - p > (1 << 20)) { + exit: + say_warn("memcached proto error"); + obuf_dup(out, "ERROR\r\n", 7); + stats.bytes_written += 7; + return -1; + } + char *r; + if ((r = (char *) memmem(p, pe - p, "\r\n", 2)) != NULL) { + in->pos = r + 2; + obuf_dup(out, "CLIENT_ERROR bad command line format\r\n", 38); + return 1; + } + return 0; + } + + if (noreply) { + obuf_rollback_to_svp(out, &obuf_svp); + } + return 1; +} + +/* + * Local Variables: + * mode: c + * End: + * vim: syntax=objc + */ diff --git a/src/memcached-grammar.rl b/src/memcached-grammar.rl new file mode 100644 index 0000000000000000000000000000000000000000..8b3a00ab5d9175a84a43a6f55771dbaf66040fbe --- /dev/null +++ b/src/memcached-grammar.rl @@ -0,0 +1,337 @@ +/* + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the + * following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY <COPYRIGHT HOLDER> ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * <COPYRIGHT HOLDER> OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF + * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +%%{ + machine memcached; + write data; +}%% + +static int __attribute__((noinline)) +memcached_dispatch(struct ev_io *coio, struct iobuf *iobuf) +{ + int cs; + char *p, *pe; + char *fstart; + struct tbuf *keys = tbuf_new(fiber_ptr->gc_pool); + const char *key; + bool append, show_cas; + int incr_sign; + uint64_t cas, incr; + uint32_t flags, exptime, bytes; + bool noreply = false; + char *data = NULL; + bool done = false; + uintptr_t flush_delay = 0; + size_t keys_count = 0; + struct ibuf *in = &iobuf->in; + struct obuf *out = &iobuf->out; + /* Savepoint for 'noreply' */ + struct obuf_svp obuf_svp = obuf_create_svp(out); + + p = in->pos; + pe = in->end; + + say_debug("memcached_dispatch '%.*s'", MIN((int)(pe - p), 40) , p); + + %%{ + action set { + key = tbuf_read_field(keys); + STORE; + } + + action add { + key = tbuf_read_field(keys); + struct tuple *tuple = memcached_find(key); + if (tuple != NULL && !memcached_is_expired(tuple)) + obuf_dup(out, "NOT_STORED\r\n", 12); + else + STORE; + } + + action replace { + key = tbuf_read_field(keys); + struct tuple *tuple = memcached_find(key); + if (tuple == NULL || memcached_is_expired(tuple)) + obuf_dup(out, "NOT_STORED\r\n", 12); + else + STORE; + } + + action cas { + key = tbuf_read_field(keys); + struct tuple *tuple = memcached_find(key); + if (tuple == NULL || memcached_is_expired(tuple)) + obuf_dup(out, "NOT_FOUND\r\n", 11); + else if (memcached_meta(tuple)->cas != cas) + obuf_dup(out, "EXISTS\r\n", 8); + else + STORE; + } + + action append_prepend { + struct tbuf *b; + const char *field; + uint32_t field_len; + + key = tbuf_read_field(keys); + struct tuple *tuple = memcached_find(key); + if (tuple == NULL) { + obuf_dup(out, "NOT_STORED\r\n", 12); + } else { + field = tuple_field(tuple, 3, &field_len); + b = tbuf_new(fiber_ptr->gc_pool); + if (append) { + tbuf_append(b, field, field_len); + tbuf_append(b, data, bytes); + } else { + tbuf_append(b, data, bytes); + tbuf_append(b, field, field_len); + } + + bytes += field_len; + data = b->data; + STORE; + } + } + + action incr_decr { + struct meta *m; + struct tbuf *b; + const char *field; + uint32_t field_len; + uint64_t value; + + key = tbuf_read_field(keys); + struct tuple *tuple = memcached_find(key); + if (tuple == NULL || memcached_is_expired(tuple)) { + obuf_dup(out, "NOT_FOUND\r\n", 11); + } else { + m = memcached_meta(tuple); + field = tuple_field(tuple, 3, &field_len); + + if (memcached_is_numeric(field, field_len)) { + value = memcached_natoq(field, + field + field_len); + + if (incr_sign > 0) { + value += incr; + } else { + if (incr > value) + value = 0; + else + value -= incr; + } + + exptime = m->exptime; + flags = m->flags; + + b = tbuf_new(fiber_ptr->gc_pool); + tbuf_printf(b, "%" PRIu64, value); + data = b->data; + bytes = b->size; + + stats.cmd_set++; + try { + memcached_store(key, exptime, flags, bytes, data); + stats.total_items++; + obuf_dup(out, b->data, b->size); + obuf_dup(out, "\r\n", 2); + } catch (const ClientError& e) { + obuf_dup(out, "SERVER_ERROR ", 13); + obuf_dup(out, e.errmsg(), strlen(e.errmsg())); + obuf_dup(out, "\r\n", 2); + } + } else { + obuf_dup(out, "CLIENT_ERROR cannot increment or decrement non-numeric value\r\n", 62); + } + } + + } + + action delete { + key = tbuf_read_field(keys); + struct tuple *tuple = memcached_find(key); + if (tuple == NULL || memcached_is_expired(tuple)) { + obuf_dup(out, "NOT_FOUND\r\n", 11); + } else { + try { + memcached_delete(key); + obuf_dup(out, "DELETED\r\n", 9); + } + catch (const ClientError& e) { + obuf_dup(out, "SERVER_ERROR ", 13); + obuf_dup(out, e.errmsg(), strlen(e.errmsg())); + obuf_dup(out, "\r\n", 2); + } + } + } + + action get { + try { + memcached_get(out, keys_count, keys, show_cas); + } catch (const ClientError& e) { + obuf_rollback_to_svp(out, &obuf_svp); + obuf_dup(out, "SERVER_ERROR ", 13); + obuf_dup(out, e.errmsg(), strlen(e.errmsg())); + obuf_dup(out, "\r\n", 2); + } + } + + action flush_all { + struct fiber *f = fiber_new("flush_all", + memcached_flush_all); + fiber_call(f, flush_delay); + obuf_dup(out, "OK\r\n", 4); + } + + action stats { + memcached_print_stats(out); + } + + action quit { + return -1; + } + + action fstart { fstart = p; } + action key_start { + fstart = p; + for (; p < pe && *p != ' ' && *p != '\r' && *p != '\n'; p++); + if ( *p == ' ' || *p == '\r' || *p == '\n') { + tbuf_store_field(keys, fstart, p - fstart); + keys_count++; + p--; + } else + p = fstart; + } + + + printable = [^ \t\r\n]; + key = printable >key_start ; + + action exptime { + exptime = memcached_natoq(fstart, p); + if (exptime > 0 && exptime <= 60*60*24*30) + exptime = exptime + ev_now(); + } + exptime = digit+ >fstart %exptime; + + flags = digit+ >fstart %{flags = memcached_natoq(fstart, p);}; + bytes = digit+ >fstart %{bytes = memcached_natoq(fstart, p);}; + cas_value = digit+ >fstart %{cas = memcached_natoq(fstart, p);}; + incr_value = digit+ >fstart %{incr = memcached_natoq(fstart, p);}; + flush_delay = digit+ >fstart %{flush_delay = memcached_natoq(fstart, p);}; + + action read_data { + size_t parsed = p - in->pos; + while (ibuf_size(in) - parsed < bytes + 2) { + size_t to_read = bytes + 2 - (pe - p); + if (coio_bread(coio, in, to_read) < to_read) + return -1; /* premature EOF */ + } + /* + * Buffered read may have reallocated the + * buffer. + */ + p = in->pos + parsed; + pe = in->end; + + data = p; + + if (strncmp((char *)(p + bytes), "\r\n", 2) == 0) { + p += bytes + 2; + } else { + goto exit; + } + } + + action done { + done = true; + stats.bytes_read += p - in->pos; + in->pos = p; + } + + eol = ("\r\n" | "\n") @{ p++; }; + spc = " "+; + noreply = (spc "noreply"i %{ noreply = true; })?; + store_command_body = spc key spc flags spc exptime spc bytes noreply eol; + + set = ("set"i store_command_body) @read_data @done @set; + add = ("add"i store_command_body) @read_data @done @add; + replace = ("replace"i store_command_body) @read_data @done @replace; + append = ("append"i %{append = true; } store_command_body) @read_data @done @append_prepend; + prepend = ("prepend"i %{append = false;} store_command_body) @read_data @done @append_prepend; + cas = ("cas"i spc key spc flags spc exptime spc bytes spc cas_value noreply spc?) eol @read_data @done @cas; + + + get = "get"i %{show_cas = false;} spc key (spc key)* spc? eol @done @get; + gets = "gets"i %{show_cas = true;} spc key (spc key)* spc? eol @done @get; + delete = "delete"i spc key (spc exptime)? noreply spc? eol @done @delete; + incr = "incr"i %{incr_sign = 1; } spc key spc incr_value noreply spc? eol @done @incr_decr; + decr = "decr"i %{incr_sign = -1;} spc key spc incr_value noreply spc? eol @done @incr_decr; + + stats = "stats"i eol @done @stats; + flush_all = "flush_all"i (spc flush_delay)? noreply spc? eol @done @flush_all; + quit = "quit"i eol @done @quit; + + main := set | cas | add | replace | append | prepend | get | gets | delete | incr | decr | stats | flush_all | quit; + #main := set; + write init; + write exec; + }%% + + if (!done) { + say_debug("parse failed after: `%.*s'", (int)(pe - p), p); + if (pe - p > (1 << 20)) { + exit: + say_warn("memcached proto error"); + obuf_dup(out, "ERROR\r\n", 7); + stats.bytes_written += 7; + return -1; + } + char *r; + if ((r = (char *) memmem(p, pe - p, "\r\n", 2)) != NULL) { + in->pos = r + 2; + obuf_dup(out, "CLIENT_ERROR bad command line format\r\n", 38); + return 1; + } + return 0; + } + + if (noreply) { + obuf_rollback_to_svp(out, &obuf_svp); + } + return 1; +} + +/* + * Local Variables: + * mode: c + * End: + * vim: syntax=objc + */ diff --git a/src/memcached.cc b/src/memcached.cc new file mode 100644 index 0000000000000000000000000000000000000000..f137009b03e96baba8f45e7ef14f80d1c66aac5b --- /dev/null +++ b/src/memcached.cc @@ -0,0 +1,615 @@ +/* + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the + * following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY <COPYRIGHT HOLDER> ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * <COPYRIGHT HOLDER> OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF + * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ +#include "memcached.h" +#include "tarantool.h" + +#include <limits.h> + +#include "box/box.h" +#include "box/request.h" +#include "box/space.h" +#include "box/port.h" +#include "box/tuple.h" +#include "fiber.h" +extern "C" { +#include <cfg/warning.h> +#include <cfg/tarantool_box_cfg.h> +} /* extern "C" */ +#include "say.h" +#include "stat.h" +#include "salloc.h" +#include "pickle.h" +#include "coio_buf.h" +#include "scoped_guard.h" + +#define STAT(_) \ + _(MEMC_GET, 1) \ + _(MEMC_GET_MISS, 2) \ + _(MEMC_GET_HIT, 3) \ + _(MEMC_EXPIRED_KEYS, 4) + +ENUM(memcached_stat, STAT); +STRS(memcached_stat, STAT); + +static int stat_base; +static struct fiber *memcached_expire = NULL; + +static Index *memcached_index; +static struct iterator *memcached_it; + +/* memcached tuple format: + <key, meta, data> */ + +struct meta { + uint32_t exptime; + uint32_t flags; + uint64_t cas; +} __packed__; + +static uint64_t +memcached_natoq(const char *start, const char *end) +{ + uint64_t num = 0; + while (start < end) { + uint8_t code = *start++; + num = num * 10 + (code - '0'); + } + return num; +} + +void +tbuf_append_field(struct tbuf *b, const char *f) +{ + const char *begin = f; + uint32_t size = load_varint32(&f); + tbuf_append(b, begin, f - begin + size); +} + +void +tbuf_store_field(struct tbuf *b, const char *field, uint32_t len) +{ + char buf[sizeof(uint32_t)+1]; + char *bufend = pack_varint32(buf, len); + tbuf_append(b, buf, bufend - buf); + tbuf_append(b, field, len); +} + +/** + * Check that we have a valid field and return it. + * Advances the buffer to point after the field as a side effect. + */ +const char * +tbuf_read_field(struct tbuf *buf) +{ + const char *field = buf->data; + uint32_t field_len = pick_varint32((const char **) &buf->data, + buf->data + buf->size); + if (buf->data + field_len > field + buf->size) + tnt_raise(IllegalParams, "packet too short (expected a field)"); + buf->data += field_len; + buf->size -= buf->data - field; + buf->capacity -= buf->data - field; + return field; +} + +static void +memcached_store(const char *key, uint32_t exptime, uint32_t flags, uint32_t bytes, + const char *data) +{ + uint32_t box_flags = 0; + uint32_t field_count = 4; + static uint64_t cas = 42; + struct meta m; + + struct tbuf *req = tbuf_new(fiber_ptr->gc_pool); + + tbuf_append(req, &cfg.memcached_space, sizeof(uint32_t)); + tbuf_append(req, &box_flags, sizeof(box_flags)); + tbuf_append(req, &field_count, sizeof(field_count)); + + tbuf_append_field(req, key); + + m.exptime = exptime; + m.flags = flags; + m.cas = cas++; + tbuf_store_field(req, (const char *) &m, sizeof(m)); + + char b[43]; + sprintf(b, " %" PRIu32 " %" PRIu32 "\r\n", flags, bytes); + tbuf_store_field(req, b, strlen(b)); + + tbuf_store_field(req, data, bytes); + + int key_len = load_varint32(&key); + say_debug("memcached/store key:(%i)'%.*s' exptime:%" PRIu32 " flags:%" PRIu32 " cas:%" PRIu64, + key_len, key_len, (char*) key, exptime, flags, cas); + /* + * Use a box dispatch wrapper which handles correctly + * read-only/read-write modes. + */ + box_process(&null_port, REPLACE, req->data, req->size); +} + +static void +memcached_delete(const char *key) +{ + uint32_t key_len = 1; + uint32_t box_flags = 0; + struct tbuf *req = tbuf_new(fiber_ptr->gc_pool); + + tbuf_append(req, &cfg.memcached_space, sizeof(uint32_t)); + tbuf_append(req, &box_flags, sizeof(box_flags)); + tbuf_append(req, &key_len, sizeof(key_len)); + tbuf_append_field(req, key); + + box_process(&null_port, DELETE, req->data, req->size); +} + +static struct tuple * +memcached_find(const char *key) +{ + return memcached_index->findByKey(key, 1); +} + +static struct meta * +memcached_meta(struct tuple *tuple) +{ + uint32_t len; + const char *field = tuple_field(tuple, 1, &len); + assert(sizeof(struct meta) <= len); + return (struct meta *) field; +} + +static bool +memcached_is_expired(struct tuple *tuple) +{ + struct meta *m = memcached_meta(tuple); + return m->exptime == 0 ? 0 : m->exptime < ev_now(); +} + +static bool +memcached_is_numeric(const char *field, uint32_t value_len) +{ + for (int i = 0; i < value_len; i++) + if (*(field + i) < '0' || '9' < *(field + i)) + return false; + return true; +} + +static struct stats { + uint64_t total_items; + uint32_t curr_connections; + uint32_t total_connections; + uint64_t cmd_get; + uint64_t cmd_set; + uint64_t get_hits; + uint64_t get_misses; + uint64_t evictions; + uint64_t bytes_read; + uint64_t bytes_written; +} stats; + +struct salloc_stat_memcached_cb_ctx { + int64_t bytes_used; + int64_t items; +}; + +static int +salloc_stat_memcached_cb(const struct slab_cache_stats *cstat, void *cb_ctx) +{ + struct salloc_stat_memcached_cb_ctx *ctx = + (struct salloc_stat_memcached_cb_ctx *) cb_ctx; + ctx->bytes_used += cstat->bytes_used; + ctx->items += cstat->items; + return 0; +} + +static void +memcached_print_stats(struct obuf *out) +{ + struct tbuf *buf = tbuf_new(fiber_ptr->gc_pool); + + struct salloc_stat_memcached_cb_ctx memstats; + memstats.bytes_used = memstats.items = 0; + salloc_stat(salloc_stat_memcached_cb, NULL, &memstats); + + tbuf_printf(buf, "STAT pid %" PRIu32 "\r\n", (uint32_t)getpid()); + tbuf_printf(buf, "STAT uptime %" PRIu32 "\r\n", (uint32_t)tarantool_uptime()); + tbuf_printf(buf, "STAT time %" PRIu32 "\r\n", (uint32_t)ev_now()); + tbuf_printf(buf, "STAT version 1.2.5 (tarantool/box)\r\n"); + tbuf_printf(buf, "STAT pointer_size %" PRI_SZ "\r\n", sizeof(void *)*8); + tbuf_printf(buf, "STAT curr_items %" PRIu64 "\r\n", memstats.items); + tbuf_printf(buf, "STAT total_items %" PRIu64 "\r\n", stats.total_items); + tbuf_printf(buf, "STAT bytes %" PRIu64 "\r\n", memstats.bytes_used); + tbuf_printf(buf, "STAT curr_connections %" PRIu32 "\r\n", stats.curr_connections); + tbuf_printf(buf, "STAT total_connections %" PRIu32 "\r\n", stats.total_connections); + tbuf_printf(buf, "STAT connection_structures %" PRIu32 "\r\n", stats.curr_connections); /* lie a bit */ + tbuf_printf(buf, "STAT cmd_get %" PRIu64 "\r\n", stats.cmd_get); + tbuf_printf(buf, "STAT cmd_set %" PRIu64 "\r\n", stats.cmd_set); + tbuf_printf(buf, "STAT get_hits %" PRIu64 "\r\n", stats.get_hits); + tbuf_printf(buf, "STAT get_misses %" PRIu64 "\r\n", stats.get_misses); + tbuf_printf(buf, "STAT evictions %" PRIu64 "\r\n", stats.evictions); + tbuf_printf(buf, "STAT bytes_read %" PRIu64 "\r\n", stats.bytes_read); + tbuf_printf(buf, "STAT bytes_written %" PRIu64 "\r\n", stats.bytes_written); + tbuf_printf(buf, "STAT limit_maxbytes %" PRIu64 "\r\n", (uint64_t)(cfg.slab_alloc_arena * (1 << 30))); + tbuf_printf(buf, "STAT threads 1\r\n"); + tbuf_printf(buf, "END\r\n"); + obuf_dup(out, buf->data, buf->size); +} + +void memcached_get(struct obuf *out, size_t keys_count, struct tbuf *keys, + bool show_cas) +{ + stat_collect(stat_base, MEMC_GET, 1); + stats.cmd_get++; + say_debug("ensuring space for %" PRI_SZ " keys", keys_count); + while (keys_count-- > 0) { + struct tuple *tuple; + const struct meta *m; + const char *value; + const char *suffix; + uint32_t key_len; + uint32_t value_len; + uint32_t suffix_len; + + const char *key = tbuf_read_field(keys); + tuple = memcached_find(key); + key_len = load_varint32(&key); + + if (tuple == NULL) { + stat_collect(stat_base, MEMC_GET_MISS, 1); + stats.get_misses++; + continue; + } + + uint32_t len; + struct tuple_iterator it; + tuple_rewind(&it, tuple); + /* skip key */ + (void) tuple_next(&it, &len); + + /* metainfo */ + m = (const struct meta *) tuple_next(&it, &len); + assert(sizeof(struct meta) <= len); + + /* suffix */ + suffix = tuple_next(&it, &suffix_len); + + /* value */ + value = tuple_next(&it, &value_len); + + assert(tuple_next(&it, &len) == NULL); + + if (m->exptime > 0 && m->exptime < ev_now()) { + stats.get_misses++; + stat_collect(stat_base, MEMC_GET_MISS, 1); + continue; + } + stats.get_hits++; + stat_collect(stat_base, MEMC_GET_HIT, 1); + + if (show_cas) { + struct tbuf *b = tbuf_new(fiber_ptr->gc_pool); + tbuf_printf(b, "VALUE %.*s %" PRIu32 " %" PRIu32 " %" PRIu64 "\r\n", key_len, (char*) key, m->flags, value_len, m->cas); + obuf_dup(out, b->data, b->size); + stats.bytes_written += b->size; + } else { + obuf_dup(out, "VALUE ", 6); + obuf_dup(out, key, key_len); + obuf_dup(out, suffix, suffix_len); + } + obuf_dup(out, value, value_len); + obuf_dup(out, "\r\n", 2); + stats.bytes_written += value_len + 2; + } + obuf_dup(out, "END\r\n", 5); + stats.bytes_written += 5; +} + +static void +memcached_flush_all(va_list ap) +{ + uintptr_t delay = va_arg(ap, uintptr_t); + fiber_sleep(delay - ev_now()); + struct tuple *tuple; + struct iterator *it = memcached_index->allocIterator(); + memcached_index->initIterator(it, ITER_ALL, NULL, 0); + while ((tuple = it->next(it))) { + memcached_meta(tuple)->exptime = 1; + } + it->free(it); +} + +#define STORE \ +do { \ + stats.cmd_set++; \ + if (bytes > (1<<20)) { \ + obuf_dup(out, "SERVER_ERROR object too large for cache\r\n", 41);\ + } else { \ + try { \ + memcached_store(key, exptime, flags, bytes, data); \ + stats.total_items++; \ + obuf_dup(out, "STORED\r\n", 8); \ + } \ + catch (const ClientError& e) { \ + obuf_dup(out, "SERVER_ERROR ", 13); \ + obuf_dup(out, e.errmsg(), strlen(e.errmsg())); \ + obuf_dup(out, "\r\n", 2); \ + } \ + } \ +} while (0) + +#include "memcached-grammar.cc" + +void +memcached_loop(struct ev_io *coio, struct iobuf *iobuf) +{ + int rc; + int bytes_written; + int batch_count; + struct ibuf *in = &iobuf->in; + + for (;;) { + batch_count = 0; + if (coio_bread(coio, in, 1) <= 0) + return; + + dispatch: + rc = memcached_dispatch(coio, iobuf); + if (rc < 0) { + say_debug("negative dispatch, closing connection"); + return; + } + + if (rc == 0 && batch_count == 0) /* we haven't successfully parsed any requests */ + continue; + + if (rc == 1) { + batch_count++; + /* some unparsed commands remain and batch count less than 20 */ + if (ibuf_size(in) > 0 && batch_count < 20) + goto dispatch; + } + + bytes_written = iobuf_flush(iobuf, coio); + fiber_gc(); + stats.bytes_written += bytes_written; + + if (rc == 1 && ibuf_size(in) > 0) { + batch_count = 0; + goto dispatch; + } + } +} + +static void +memcached_handler(va_list ap) +{ + struct ev_io coio = va_arg(ap, struct ev_io); + struct sockaddr_in *addr = va_arg(ap, struct sockaddr_in *); + struct iobuf *iobuf = va_arg(ap, struct iobuf *); + stats.total_connections++; + stats.curr_connections++; + + (void) addr; + + try { + auto scoped_guard = make_scoped_guard([&] { + fiber_sleep(0.01); + stats.curr_connections--; + evio_close(&coio); + iobuf_delete(iobuf); + }); + + memcached_loop(&coio, iobuf); + iobuf_flush(iobuf, &coio); + } catch (const FiberCancelException& e) { + throw; + } catch (const Exception& e) { + e.log(); + } +} + +int +memcached_check_config(struct tarantool_cfg *conf) +{ + if (conf->memcached_port == 0) { + return 0; + } + + if (conf->memcached_port <= 0 || conf->memcached_port >= USHRT_MAX) { + /* invalid space number */ + out_warning(CNF_OK, "invalid memcached port value: %i", + conf->memcached_port); + return -1; + } + + /* check memcached space number: it shoud be in segment [0, max_space] */ + + if (conf->memcached_expire_per_loop <= 0) { + /* invalid expire per loop value */ + out_warning(CNF_OK, "invalid expire per loop value: %i", + conf->memcached_expire_per_loop); + return -1; + } + + if (conf->memcached_expire_full_sweep <= 0) { + /* invalid expire full sweep value */ + out_warning(CNF_OK, "invalid expire full sweep value: %i", + conf->memcached_expire_full_sweep); + return -1; + } + + return 0; +} + +void +memcached_free(void) +{ + if (memcached_it) + memcached_it->free(memcached_it); +} + + +void +memcached_init(const char *bind_ipaddr, int memcached_port) +{ + if (memcached_port == 0) + return; + + stat_base = stat_register(memcached_stat_strs, memcached_stat_MAX); + + struct space *sp = space_by_n(cfg.memcached_space); + memcached_index = space_index(sp, 0); + + /* run memcached server */ + static struct coio_service memcached; + coio_service_init(&memcached, "memcached", + bind_ipaddr, memcached_port, + memcached_handler, NULL); + evio_service_start(&memcached.evio_service); +} + +void +memcached_space_init() +{ + if (cfg.memcached_port == 0) + return; + + /* Configure memcached index key. */ + struct key_def *key_def = (struct key_def *) malloc(sizeof(struct key_def)); + key_def->part_count = 1; + key_def->is_unique = true; + key_def->type = HASH; + + key_def->parts = (struct key_part *) malloc(sizeof(struct key_part)); + key_def->cmp_order = (uint32_t *) malloc(sizeof(uint32_t)); + + key_def->parts[0].fieldno = 0; + key_def->parts[0].type = STRING; + + key_def->max_fieldno = 1; + key_def->cmp_order[0] = 0; + + (void) space_new(cfg.memcached_space, key_def, 1, 4); +} + +/** Delete a bunch of expired keys. */ + +void +memcached_delete_expired_keys(struct tbuf *keys_to_delete) +{ + int expired_keys = 0; + + while (keys_to_delete->size > 0) { + try { + memcached_delete(tbuf_read_field(keys_to_delete)); + expired_keys++; + } + catch (const ClientError& e) { + /* expire is off when replication is on */ + assert(e.errcode() != ER_NONMASTER); + /* The error is already logged. */ + } + } + stat_collect(stat_base, MEMC_EXPIRED_KEYS, expired_keys); + + double delay = ((double) cfg.memcached_expire_per_loop * + cfg.memcached_expire_full_sweep / + (memcached_index->size() + 1)); + if (delay > 1) + delay = 1; + fiber_setcancellable(true); + fiber_sleep(delay); + fiber_setcancellable(false); +} + +void +memcached_expire_loop(va_list ap __attribute__((unused))) +{ + struct tuple *tuple = NULL; + + say_info("memcached expire fiber started"); + memcached_it = memcached_index->allocIterator(); + try { +restart: + if (tuple == NULL) + memcached_index->initIterator(memcached_it, ITER_ALL, NULL, 0); + + struct tbuf *keys_to_delete = tbuf_new(fiber_ptr->gc_pool); + + for (int j = 0; j < cfg.memcached_expire_per_loop; j++) { + + tuple = memcached_it->next(memcached_it); + + if (tuple == NULL) + break; + + if (!memcached_is_expired(tuple)) + continue; + + say_debug("expire tuple %p", tuple); + uint32_t len = 0; + const char *field = tuple_field(tuple, 0, &len); + tbuf_store_field(keys_to_delete, field, len); + } + memcached_delete_expired_keys(keys_to_delete); + fiber_gc(); + goto restart; + } catch (const Exception& e) { + memcached_it->free(memcached_it); + memcached_it = NULL; + throw; + } +} + +void memcached_start_expire() +{ + if (cfg.memcached_port == 0 || cfg.memcached_expire == 0) + return; + + assert(memcached_expire == NULL); + try { + memcached_expire = fiber_new("memcached_expire", + memcached_expire_loop); + } catch (const Exception& e) { + say_error("can't start the expire fiber"); + return; + } + fiber_call(memcached_expire); +} + +void memcached_stop_expire() +{ + if (cfg.memcached_port == 0 || cfg.memcached_expire == 0) + return; + assert(memcached_expire != NULL); + fiber_cancel(memcached_expire); + memcached_expire = NULL; +} diff --git a/src/recovery.cc b/src/recovery.cc new file mode 100644 index 0000000000000000000000000000000000000000..e2ae5213ac08037968589afa51f8355739fbdd26 --- /dev/null +++ b/src/recovery.cc @@ -0,0 +1,1298 @@ +/* + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the + * following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY <COPYRIGHT HOLDER> ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * <COPYRIGHT HOLDER> OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF + * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ +#include "recovery.h" + +#include <fcntl.h> + +#include "log_io.h" +#include "fiber.h" +#include "tt_pthread.h" +#include "fio.h" +#include "errinj.h" + +/* + * Recovery subsystem + * ------------------ + * + * A facade of the recovery subsystem is struct recovery_state, + * which is a singleton. + * + * Depending on the configuration, start-up parameters, the + * actual task being performed, the recovery can be + * in a different state. + * + * The main factors influencing recovery state are: + * - temporal: whether or not the instance is just booting + * from a snapshot, is in 'local hot standby mode', or + * is already accepting requests + * - topological: whether or not it is a master instance + * or a replica + * - task based: whether it's a master process, + * snapshot saving process or a replication relay. + * + * Depending on the above factors, recovery can be in two main + * operation modes: "read mode", recovering in-memory state + * from existing data, and "write mode", i.e. recording on + * disk changes of the in-memory state. + * + * Let's enumerate all possible distinct states of recovery: + * + * Read mode + * --------- + * IR - initial recovery, initiated right after server start: + * reading data from the snapshot and existing WALs + * and restoring the in-memory state + * IRR - initial replication relay mode, reading data from + * existing WALs (xlogs) and sending it to the client. + * + * HS - standby mode, entered once all existing WALs are read: + * following the WAL directory for all changes done by the master + * and updating the in-memory state + * RR - replication relay, following the WAL directory for all + * changes done by the master and sending them to the + * replica + * + * Write mode + * ---------- + * M - master mode, recording in-memory state changes in the WAL + * R - replica mode, receiving changes from the master and + * recording them in the WAL + * S - snapshot mode, writing entire in-memory state to a compact + * snapshot file. + * + * The following state transitions are possible/supported: + * + * recovery_init() -> IR | IRR # recover() + * IR -> HS # recovery_follow_local() + * IRR -> RR # recovery_follow_local() + * HS -> M # recovery_finalize() + * M -> R # recovery_follow_remote() + * R -> M # recovery_stop_remote() + * M -> S # snapshot() + * R -> S # snapshot() + */ + +struct recovery_state *recovery_state; + +static const uint64_t snapshot_cookie = 0; + +const char *wal_mode_STRS[] = { "none", "write", "fsync", "fsync_delay", NULL }; + +/* {{{ LSN API */ + +void +wait_lsn_set(struct wait_lsn *wait_lsn, int64_t lsn) +{ + assert(wait_lsn->waiter == NULL); + wait_lsn->waiter = fiber_ptr; + wait_lsn->lsn = lsn; +} + + +/* Alert the waiter, if any. */ +static inline void +wakeup_lsn_waiter(struct recovery_state *r) +{ + if (r->wait_lsn.waiter && r->confirmed_lsn >= r->wait_lsn.lsn) { + fiber_wakeup(r->wait_lsn.waiter); + } +} + +void +confirm_lsn(struct recovery_state *r, int64_t lsn, bool is_commit) +{ + assert(r->confirmed_lsn <= r->lsn); + + if (r->confirmed_lsn < lsn) { + if (is_commit) { + if (r->confirmed_lsn + 1 != lsn) + say_warn("non consecutive LSN, confirmed: %jd, " + " new: %jd, diff: %jd", + (intmax_t) r->confirmed_lsn, + (intmax_t) lsn, + (intmax_t) (lsn - r->confirmed_lsn)); + r->confirmed_lsn = lsn; + } + } else { + /* + * There can be holes in + * confirmed_lsn, in case of disk write failure, but + * wal_writer never confirms LSNs out order. + */ + assert(false); + say_error("LSN is used twice or COMMIT order is broken: " + "confirmed: %jd, new: %jd", + (intmax_t) r->confirmed_lsn, (intmax_t) lsn); + } + wakeup_lsn_waiter(r); +} + +void +set_lsn(struct recovery_state *r, int64_t lsn) +{ + r->lsn = lsn; + r->confirmed_lsn = lsn; + say_debug("set_lsn(%p, %" PRIi64, r, r->lsn); + wakeup_lsn_waiter(r); +} + +/** Wait until the given LSN makes its way to disk. */ +void +recovery_wait_lsn(struct recovery_state *r, int64_t lsn) +{ + while (lsn < r->confirmed_lsn) { + wait_lsn_set(&r->wait_lsn, lsn); + try { + fiber_yield(); + wait_lsn_clear(&r->wait_lsn); + } catch (const Exception& e) { + wait_lsn_clear(&r->wait_lsn); + throw; + } + } +} + + +int64_t +next_lsn(struct recovery_state *r) +{ + r->lsn++; + say_debug("next_lsn(%p, %" PRIi64, r, r->lsn); + return r->lsn; +} + + +/* }}} */ + +/* {{{ Initial recovery */ + +static int +wal_writer_start(struct recovery_state *state); +void +wal_writer_stop(struct recovery_state *r); +static void +recovery_stop_local(struct recovery_state *r); + +void +recovery_init(const char *snap_dirname, const char *wal_dirname, + row_handler row_handler, void *row_handler_param, + int rows_per_wal, int flags) +{ + assert(recovery_state == NULL); + recovery_state = (struct recovery_state *) p0alloc(eter_pool, sizeof(struct recovery_state)); + struct recovery_state *r = recovery_state; + recovery_update_mode(r, "none", 0); + + assert(rows_per_wal > 1); + + r->row_handler = row_handler; + r->row_handler_param = row_handler_param; + + r->snap_dir = &snap_dir; + r->snap_dir->dirname = strdup(snap_dirname); + r->wal_dir = &wal_dir; + r->wal_dir->dirname = strdup(wal_dirname); + if (r->wal_mode == WAL_FSYNC) { + (void) strcat(r->wal_dir->open_wflags, "s"); + } + r->rows_per_wal = rows_per_wal; + wait_lsn_clear(&r->wait_lsn); + r->flags = flags; +} + +void +recovery_update_mode(struct recovery_state *r, + const char *mode, double fsync_delay) +{ + r->wal_mode = (enum wal_mode) strindex(wal_mode_STRS, mode, WAL_MODE_MAX); + assert(r->wal_mode != WAL_MODE_MAX); + /* No mutex lock: let's not bother with whether + * or not a WAL writer thread is present, and + * if it's present, the delay will be propagated + * to it whenever there is a next lock/unlock of + * wal_writer->mutex. + */ + r->wal_fsync_delay = fsync_delay; +} + +void +recovery_update_io_rate_limit(struct recovery_state *r, double new_limit) +{ + r->snap_io_rate_limit = new_limit * 1024 * 1024; + if (r->snap_io_rate_limit == 0) + r->snap_io_rate_limit = UINT64_MAX; +} + +void +recovery_free() +{ + struct recovery_state *r = recovery_state; + if (r == NULL) + return; + + if (r->watcher) + recovery_stop_local(r); + + if (r->writer) + wal_writer_stop(r); + + free(r->snap_dir->dirname); + free(r->wal_dir->dirname); + if (r->current_wal) { + /* + * Possible if shutting down a replication + * relay or if error during startup. + */ + log_io_close(&r->current_wal); + } + + recovery_state = NULL; +} + +void +recovery_setup_panic(struct recovery_state *r, bool on_snap_error, bool on_wal_error) +{ + r->wal_dir->panic_if_error = on_wal_error; + r->snap_dir->panic_if_error = on_snap_error; +} + + +/** + * Read a snapshot and call row_handler for every snapshot row. + * Panic in case of error. + */ +void +recover_snap(struct recovery_state *r) +{ + /* current_wal isn't open during initial recover. */ + assert(r->current_wal == NULL); + say_info("recovery start"); + + struct log_io *snap; + int64_t lsn; + + lsn = greatest_lsn(r->snap_dir); + if (lsn <= 0) { + say_error("can't find snapshot"); + goto error; + } + snap = log_io_open_for_read(r->snap_dir, lsn, NONE); + if (snap == NULL) { + say_error("can't find/open snapshot"); + goto error; + } + say_info("recover from `%s'", snap->filename); + struct log_io_cursor i; + + log_io_cursor_open(&i, snap); + + const char *row; + uint32_t rowlen; + while ((row = log_io_cursor_next(&i, &rowlen))) { + if (r->row_handler(r->row_handler_param, row, rowlen) < 0) { + say_error("can't apply row"); + if (snap->dir->panic_if_error) + break; + } + } + log_io_cursor_close(&i); + log_io_close(&snap); + + if (row == NULL) { + r->lsn = r->confirmed_lsn = lsn; + say_info("snapshot recovered, confirmed lsn: %" + PRIi64, r->confirmed_lsn); + return; + } +error: + if (greatest_lsn(r->snap_dir) <= 0) { + say_crit("didn't you forget to initialize storage with --init-storage switch?"); + _exit(1); + } + panic("snapshot recovery failed"); +} + +#define LOG_EOF 0 + +/** + * @retval -1 error + * @retval 0 EOF + * @retval 1 ok, maybe read something + */ +static int +recover_wal(struct recovery_state *r, struct log_io *l) +{ + int res = -1; + struct log_io_cursor i; + + log_io_cursor_open(&i, l); + + const char *row; + uint32_t rowlen; + while ((row = log_io_cursor_next(&i, &rowlen))) { + int64_t lsn = header_v11(row)->lsn; + if (lsn <= r->confirmed_lsn) { + say_debug("skipping too young row"); + continue; + } + /* + * After handler(row) returned, row may be + * modified, do not use it. + */ + if (r->row_handler(r->row_handler_param, row, rowlen) < 0) { + say_error("can't apply row"); + if (l->dir->panic_if_error) + goto end; + } + set_lsn(r, lsn); + } + res = i.eof_read ? LOG_EOF : 1; +end: + log_io_cursor_close(&i); + /* Sic: we don't close the log here. */ + return res; +} + +/** Find out if there are new .xlog files since the current + * LSN, and read them all up. + * + * This function will not close r->current_wal if + * recovery was successful. + */ +static int +recover_remaining_wals(struct recovery_state *r) +{ + int result = 0; + struct log_io *next_wal; + int64_t current_lsn, wal_greatest_lsn; + size_t rows_before; + FILE *f; + char *filename; + enum log_suffix suffix; + + current_lsn = r->confirmed_lsn + 1; + wal_greatest_lsn = greatest_lsn(r->wal_dir); + + /* if the caller already opened WAL for us, recover from it first */ + if (r->current_wal != NULL) + goto recover_current_wal; + + while (current_lsn <= wal_greatest_lsn) { + /* + * If a newer WAL appeared in the directory before + * current_wal was fully read, try re-reading + * one last time. */ + if (r->current_wal != NULL) { + if (r->current_wal->retry++ < 3) { + say_warn("`%s' has no EOF marker, yet a newer WAL file exists:" + " trying to re-read (attempt #%d)", + r->current_wal->filename, r->current_wal->retry); + goto recover_current_wal; + } else { + say_warn("WAL `%s' wasn't correctly closed", + r->current_wal->filename); + log_io_close(&r->current_wal); + } + } + + /* + * For the last WAL, first try to open .inprogress + * file: if it doesn't exist, we can safely try an + * .xlog, with no risk of a concurrent + * inprogress_log_rename(). + */ + f = NULL; + suffix = INPROGRESS; + if (current_lsn == wal_greatest_lsn) { + /* Last WAL present at the time of rescan. */ + filename = format_filename(r->wal_dir, + current_lsn, suffix); + f = fopen(filename, "r"); + } + if (f == NULL) { + suffix = NONE; + filename = format_filename(r->wal_dir, + current_lsn, suffix); + f = fopen(filename, "r"); + } + next_wal = log_io_open(r->wal_dir, LOG_READ, filename, suffix, f); + /* + * When doing final recovery, and dealing with the + * last file, try opening .<ext>.inprogress. + */ + if (next_wal == NULL) { + if (r->finalize && suffix == INPROGRESS) { + /* + * There is an .inprogress file, but + * we failed to open it. Try to + * delete it. + */ + say_warn("unlink broken %s WAL", filename); + if (inprogress_log_unlink(filename) != 0) + panic("can't unlink 'inprogres' WAL"); + result = 0; + break; + } + /* Missing xlog or gap in LSN */ + say_error("not all WALs have been successfully read"); + if (!r->wal_dir->panic_if_error) { + /* Ignore missing WALs */ + say_warn("ignoring missing WALs"); + current_lsn++; + continue; + } + result = -1; + break; + } + assert(r->current_wal == NULL); + r->current_wal = next_wal; + say_info("recover from `%s'", r->current_wal->filename); + +recover_current_wal: + rows_before = r->current_wal->rows; + result = recover_wal(r, r->current_wal); + if (result < 0) { + say_error("failure reading from %s", + r->current_wal->filename); + break; + } + + if (r->current_wal->rows > 0 && + r->current_wal->rows != rows_before) { + r->current_wal->retry = 0; + } + /* rows == 0 could indicate an empty WAL */ + if (r->current_wal->rows == 0) { + say_error("read zero records from %s", + r->current_wal->filename); + break; + } + if (result == LOG_EOF) { + say_info("done `%s' confirmed_lsn: %" PRIi64, + r->current_wal->filename, + r->confirmed_lsn); + log_io_close(&r->current_wal); + } + + current_lsn = r->confirmed_lsn + 1; + } + + /* + * It's not a fatal error when last WAL is empty, but if + * we lose some logs it is a fatal error. + */ + if (wal_greatest_lsn > r->confirmed_lsn + 1) { + say_error("can't recover WALs"); + result = -1; + } + + prelease(fiber_ptr->gc_pool); + return result; +} + +/** + * Recover all WALs created after the last snapshot. Panic if + * error. + */ +void +recover_existing_wals(struct recovery_state *r) +{ + int64_t next_lsn = r->confirmed_lsn + 1; + int64_t wal_lsn = find_including_file(r->wal_dir, next_lsn); + if (wal_lsn <= 0) { + /* No WALs to recover from. */ + goto out; + } + r->current_wal = log_io_open_for_read(r->wal_dir, wal_lsn, NONE); + if (r->current_wal == NULL) + goto out; + if (recover_remaining_wals(r) < 0) + panic("recover failed"); + say_info("WALs recovered, confirmed lsn: %" PRIi64, r->confirmed_lsn); +out: + prelease(fiber_ptr->gc_pool); +} + +void +recovery_finalize(struct recovery_state *r) +{ + int result; + + if (r->watcher) + recovery_stop_local(r); + + r->finalize = true; + + result = recover_remaining_wals(r); + if (result < 0) + panic("unable to successfully finalize recovery"); + + if (r->current_wal != NULL && result != LOG_EOF) { + say_warn("WAL `%s' wasn't correctly closed", r->current_wal->filename); + + if (!r->current_wal->is_inprogress) { + if (r->current_wal->rows == 0) + /* Regular WAL (not inprogress) must contain at least one row */ + panic("zero rows was successfully read from last WAL `%s'", + r->current_wal->filename); + } else if (r->current_wal->rows == 0) { + /* Unlink empty inprogress WAL */ + say_warn("unlink broken %s WAL", r->current_wal->filename); + if (inprogress_log_unlink(r->current_wal->filename) != 0) + panic("can't unlink 'inprogress' WAL"); + } else if (r->current_wal->rows == 1) { + /* Rename inprogress wal with one row */ + say_warn("rename unfinished %s WAL", r->current_wal->filename); + if (inprogress_log_rename(r->current_wal) != 0) + panic("can't rename 'inprogress' WAL"); + } else + panic("too many rows in inprogress WAL `%s'", r->current_wal->filename); + + log_io_close(&r->current_wal); + } + + if ((r->flags & RECOVER_READONLY) == 0) + wal_writer_start(r); +} + + +/* }}} */ + +/* {{{ Local recovery: support of hot standby and replication relay */ + +/** + * This is used in local hot standby or replication + * relay mode: look for changes in the wal_dir and apply them + * locally or send to the replica. + */ +struct wal_watcher { + /** + * Rescan the WAL directory in search for new WAL files + * every wal_dir_rescan_delay seconds. + */ + ev_timer dir_timer; + /** + * When the latest WAL does not contain a EOF marker, + * re-read its tail on every change in file metadata. + */ + ev_stat stat; + /** Path to the file being watched with 'stat'. */ + char filename[PATH_MAX+1]; +}; + +static struct wal_watcher wal_watcher; + +static void recovery_rescan_file(ev_stat *w, int revents __attribute__((unused))); + +static void +recovery_watch_file(struct wal_watcher *watcher, struct log_io *wal) +{ + strncpy(watcher->filename, wal->filename, PATH_MAX); + ev_stat_init(&watcher->stat, recovery_rescan_file, watcher->filename, 0.); + ev_stat_start(&watcher->stat); +} + +static void +recovery_stop_file(struct wal_watcher *watcher) +{ + ev_stat_stop(&watcher->stat); +} + +static void +recovery_rescan_dir(ev_timer *w, int revents __attribute__((unused))) +{ + struct recovery_state *r = (struct recovery_state *) w->data; + struct wal_watcher *watcher = r->watcher; + struct log_io *save_current_wal = r->current_wal; + + int result = recover_remaining_wals(r); + if (result < 0) + panic("recover failed: %i", result); + if (save_current_wal != r->current_wal) { + if (save_current_wal != NULL) + recovery_stop_file(watcher); + if (r->current_wal != NULL) + recovery_watch_file(watcher, r->current_wal); + } +} + +static void +recovery_rescan_file(ev_stat *w, int revents __attribute__((unused))) +{ + struct recovery_state *r = (struct recovery_state *) w->data; + struct wal_watcher *watcher = r->watcher; + int result = recover_wal(r, r->current_wal); + if (result < 0) + panic("recover failed"); + if (result == LOG_EOF) { + say_info("done `%s' confirmed_lsn: %" PRIi64, + r->current_wal->filename, + r->confirmed_lsn); + log_io_close(&r->current_wal); + recovery_stop_file(watcher); + /* Don't wait for wal_dir_rescan_delay. */ + recovery_rescan_dir(&watcher->dir_timer, 0); + } +} + +void +recovery_follow_local(struct recovery_state *r, ev_tstamp wal_dir_rescan_delay) +{ + assert(r->watcher == NULL); + assert(r->writer == NULL); + + struct wal_watcher *watcher = r->watcher= &wal_watcher; + + ev_timer_init(&watcher->dir_timer, recovery_rescan_dir, + wal_dir_rescan_delay, wal_dir_rescan_delay); + watcher->dir_timer.data = watcher->stat.data = r; + ev_timer_start(&watcher->dir_timer); + /* + * recover() leaves the current wal open if it has no + * EOF marker. + */ + if (r->current_wal != NULL) + recovery_watch_file(watcher, r->current_wal); +} + +static void +recovery_stop_local(struct recovery_state *r) +{ + struct wal_watcher *watcher = r->watcher; + assert(ev_is_active(&watcher->dir_timer)); + ev_timer_stop(&watcher->dir_timer); + if (ev_is_active(&watcher->stat)) + ev_stat_stop(&watcher->stat); + + r->watcher = NULL; +} + +/* }}} */ + +/* {{{ WAL writer - maintain a Write Ahead Log for every change + * in the data state. + */ + +struct wal_write_request { + STAILQ_ENTRY(wal_write_request) wal_fifo_entry; + /* Auxiliary. */ + int res; + struct fiber *fiber; + struct row_v11 row; +}; + +/* Context of the WAL writer thread. */ +STAILQ_HEAD(wal_fifo, wal_write_request); + +struct wal_writer +{ + struct wal_fifo input; + struct wal_fifo commit; + pthread_t thread; + pthread_mutex_t mutex; + pthread_cond_t cond; + ev_async write_event; + struct fio_batch *batch; + bool is_shutdown; + bool is_rollback; +}; + +static pthread_once_t wal_writer_once = PTHREAD_ONCE_INIT; + +static struct wal_writer wal_writer; + +/** + * A pthread_atfork() callback for a child process. Today we only + * fork the master process to save a snapshot, and in the child + * the WAL writer thread is not necessary and not present. + */ +static void +wal_writer_child() +{ + log_io_atfork(&recovery_state->current_wal); + if (wal_writer.batch) { + free(wal_writer.batch); + wal_writer.batch = NULL; + } + /* + * Make sure that atexit() handlers in the child do + * not try to stop the non-existent thread. + * The writer is not used in the child. + */ + recovery_state->writer = NULL; +} + +/** + * Today a WAL writer is started once at start of the + * server. Nevertheless, use pthread_once() to make + * sure we can start/stop the writer many times. + */ +static void +wal_writer_init_once() +{ + (void) tt_pthread_atfork(NULL, NULL, wal_writer_child); +} + +/** + * A commit watcher callback is invoked whenever there + * are requests in wal_writer->commit. This callback is + * associated with an internal WAL writer watcher and is + * invoked in the front-end main event loop. + * + * A rollback watcher callback is invoked only when there is + * a rollback request and commit is empty. + * We roll back the entire input queue. + * + * ev_async, under the hood, is a simple pipe. The WAL + * writer thread writes to that pipe whenever it's done + * handling a pack of requests (look for ev_async_send() + * call in the writer thread loop). + */ +static void +wal_schedule_queue(struct wal_fifo *queue) +{ + /* + * Can't use STAILQ_FOREACH since fiber_call() + * destroys the list entry. + */ + struct wal_write_request *req, *tmp; + STAILQ_FOREACH_SAFE(req, queue, wal_fifo_entry, tmp) + fiber_call(req->fiber); +} + +static void +wal_schedule(ev_async *watcher, int event __attribute__((unused))) +{ + struct wal_writer *writer = (struct wal_writer *) watcher->data; + struct wal_fifo commit = STAILQ_HEAD_INITIALIZER(commit); + struct wal_fifo rollback = STAILQ_HEAD_INITIALIZER(rollback); + + (void) tt_pthread_mutex_lock(&writer->mutex); + STAILQ_CONCAT(&commit, &writer->commit); + if (writer->is_rollback) { + STAILQ_CONCAT(&rollback, &writer->input); + writer->is_rollback = false; + } + (void) tt_pthread_mutex_unlock(&writer->mutex); + + wal_schedule_queue(&commit); + /* + * Perform a cascading abort of all transactions which + * depend on the transaction which failed to get written + * to the write ahead log. Abort transactions + * in reverse order, performing a playback of the + * in-memory database state. + */ + STAILQ_REVERSE(&rollback, wal_write_request, wal_fifo_entry); + wal_schedule_queue(&rollback); +} + +/** + * Initialize WAL writer context. Even though it's a singleton, + * encapsulate the details just in case we may use + * more writers in the future. + */ +static void +wal_writer_init(struct wal_writer *writer) +{ + /* I. Initialize the state. */ + pthread_mutexattr_t errorcheck; + + (void) tt_pthread_mutexattr_init(&errorcheck); + +#ifndef NDEBUG + (void) tt_pthread_mutexattr_settype(&errorcheck, PTHREAD_MUTEX_ERRORCHECK); +#endif + /* Initialize queue lock mutex. */ + (void) tt_pthread_mutex_init(&writer->mutex, &errorcheck); + (void) tt_pthread_mutexattr_destroy(&errorcheck); + + (void) tt_pthread_cond_init(&writer->cond, NULL); + + STAILQ_INIT(&writer->input); + STAILQ_INIT(&writer->commit); + + ev_async_init(&writer->write_event, wal_schedule); + writer->write_event.data = writer; + + (void) tt_pthread_once(&wal_writer_once, wal_writer_init_once); + + writer->batch = fio_batch_alloc(sysconf(_SC_IOV_MAX)); + + if (writer->batch == NULL) + panic_syserror("fio_batch_alloc"); +} + +/** Destroy a WAL writer structure. */ +static void +wal_writer_destroy(struct wal_writer *writer) +{ + (void) tt_pthread_mutex_destroy(&writer->mutex); + (void) tt_pthread_cond_destroy(&writer->cond); + free(writer->batch); +} + +/** WAL writer thread routine. */ +static void *wal_writer_thread(void *worker_args); + +/** + * Initialize WAL writer, start the thread. + * + * @pre The server has completed recovery from a snapshot + * and/or existing WALs. All WALs opened in read-only + * mode are closed. + * + * @param state WAL writer meta-data. + * + * @return 0 success, -1 on error. On success, recovery->writer + * points to a newly created WAL writer. + */ +static int +wal_writer_start(struct recovery_state *r) +{ + assert(r->writer == NULL); + assert(r->watcher == NULL); + assert(r->current_wal == NULL); + assert(! wal_writer.is_shutdown); + assert(STAILQ_EMPTY(&wal_writer.input)); + assert(STAILQ_EMPTY(&wal_writer.commit)); + + /* I. Initialize the state. */ + wal_writer_init(&wal_writer); + r->writer = &wal_writer; + + ev_async_start(&wal_writer.write_event); + + /* II. Start the thread. */ + + if (tt_pthread_create(&wal_writer.thread, NULL, wal_writer_thread, r)) { + wal_writer_destroy(&wal_writer); + r->writer = NULL; + return -1; + } + return 0; +} + +/** Stop and destroy the writer thread (at shutdown). */ +void +wal_writer_stop(struct recovery_state *r) +{ + struct wal_writer *writer = r->writer; + + /* Stop the worker thread. */ + + (void) tt_pthread_mutex_lock(&writer->mutex); + writer->is_shutdown= true; + (void) tt_pthread_cond_signal(&writer->cond); + (void) tt_pthread_mutex_unlock(&writer->mutex); + + if (tt_pthread_join(writer->thread, NULL) != 0) { + /* We can't recover from this in any reasonable way. */ + panic_syserror("WAL writer: thread join failed"); + } + + ev_async_stop(&writer->write_event); + wal_writer_destroy(writer); + + r->writer = NULL; +} + +/** + * Pop a bulk of requests to write to disk to process. + * Block on the condition only if we have no other work to + * do. Loop in case of a spurious wakeup. + */ +void +wal_writer_pop(struct wal_writer *writer, struct wal_fifo *input) +{ + while (! writer->is_shutdown) + { + if (! writer->is_rollback && ! STAILQ_EMPTY(&writer->input)) { + STAILQ_CONCAT(input, &writer->input); + break; + } + (void) tt_pthread_cond_wait(&writer->cond, &writer->mutex); + } +} + +/** + * If there is no current WAL, try to open it, and close the + * previous WAL. We close the previous WAL only after opening + * a new one to smoothly move local hot standby and replication + * over to the next WAL. + * If the current WAL has only 1 record, it means we need to + * rename it from '.inprogress' to '.xlog'. We maintain + * '.inprogress' WALs to ensure that, at any point in time, + * an .xlog file contains at least 1 valid record. + * In case of error, we try to close any open WALs. + * + * @post r->current_wal is in a good shape for writes or is NULL. + * @return 0 in case of success, -1 on error. + */ +static int +wal_opt_rotate(struct log_io **wal, int rows_per_wal, struct log_dir *dir, + int64_t lsn) +{ + struct log_io *l = *wal, *wal_to_close = NULL; + + ERROR_INJECT_RETURN(ERRINJ_WAL_ROTATE); + + if (l != NULL && (l->rows >= rows_per_wal || lsn % rows_per_wal == 0)) { + /* + * if l->rows == 1, log_io_close() does + * inprogress_log_rename() for us. + */ + wal_to_close = l; + l = NULL; + } + if (l == NULL) { + /* Open WAL with '.inprogress' suffix. */ + l = log_io_open_for_write(dir, lsn, INPROGRESS); + /* + * Close the file *after* we create the new WAL, since + * this is when replication relays get an inotify alarm + * (when we close the file), and try to reopen the next + * WAL. In other words, make sure that replication relays + * try to open the next WAL only when it exists. + */ + if (wal_to_close) { + /* + * We can not handle log_io_close() + * failure in any reasonable way. + * A warning is written to the server + * log file. + */ + log_io_close(&wal_to_close); + } + } else if (l->rows == 1) { + /* + * Rename WAL after the first successful write + * to a name without .inprogress suffix. + */ + if (inprogress_log_rename(l)) + log_io_close(&l); /* error. */ + } + assert(wal_to_close == NULL); + *wal = l; + return l ? 0 : -1; +} + +static void +wal_opt_sync(struct log_io *wal, double sync_delay) +{ + static ev_tstamp last_sync = 0; + + if (sync_delay > 0 && ev_now() - last_sync >= sync_delay) { + /* + * XXX: in case of error, we don't really know how + * many records were not written to disk: probably + * way more than the last one. + */ + (void) log_io_sync(wal); + last_sync = ev_now(); + } +} + +static struct wal_write_request * +wal_fill_batch(struct log_io *wal, struct fio_batch *batch, int rows_per_wal, + struct wal_write_request *req) +{ + int max_rows = wal->is_inprogress ? 1 : rows_per_wal - wal->rows; + /* Post-condition of successful wal_opt_rotate(). */ + assert(max_rows > 0); + fio_batch_start(batch, max_rows); + while (req != NULL && ! fio_batch_is_full(batch)) { + struct row_v11 *row = &req->row; + header_v11_sign(&row->header); + fio_batch_add(batch, row, row_v11_size(row)); + req = STAILQ_NEXT(req, wal_fifo_entry); + } + return req; +} + +static struct wal_write_request * +wal_write_batch(struct log_io *wal, struct fio_batch *batch, + struct wal_write_request *req, struct wal_write_request *end) +{ + int rows_written = fio_batch_write(batch, fileno(wal->f)); + wal->rows += rows_written; + while (req != end && rows_written-- != 0) { + req->res = 0; + req = STAILQ_NEXT(req, wal_fifo_entry); + } + return req; +} + +static void +wal_write_to_disk(struct recovery_state *r, struct wal_writer *writer, + struct wal_fifo *input, struct wal_fifo *commit, + struct wal_fifo *rollback) +{ + struct log_io **wal = &r->current_wal; + struct fio_batch *batch = writer->batch; + + struct wal_write_request *req = STAILQ_FIRST(input); + struct wal_write_request *write_end = req; + + while (req) { + if (wal_opt_rotate(wal, r->rows_per_wal, r->wal_dir, + req->row.header.lsn) != 0) + break; + struct wal_write_request *batch_end; + batch_end = wal_fill_batch(*wal, batch, r->rows_per_wal, req); + write_end = wal_write_batch(*wal, batch, req, batch_end); + if (batch_end != write_end) + break; + wal_opt_sync(*wal, r->wal_fsync_delay); + req = write_end; + } + STAILQ_SPLICE(input, write_end, wal_fifo_entry, rollback); + STAILQ_CONCAT(commit, input); +} + +/** WAL writer thread main loop. */ +static void * +wal_writer_thread(void *worker_args) +{ + struct recovery_state *r = (struct recovery_state *) worker_args; + struct wal_writer *writer = r->writer; + struct wal_fifo input = STAILQ_HEAD_INITIALIZER(input); + struct wal_fifo commit = STAILQ_HEAD_INITIALIZER(commit); + struct wal_fifo rollback = STAILQ_HEAD_INITIALIZER(rollback); + + (void) tt_pthread_mutex_lock(&writer->mutex); + while (! writer->is_shutdown) { + wal_writer_pop(writer, &input); + (void) tt_pthread_mutex_unlock(&writer->mutex); + + wal_write_to_disk(r, writer, &input, &commit, &rollback); + + (void) tt_pthread_mutex_lock(&writer->mutex); + STAILQ_CONCAT(&writer->commit, &commit); + if (! STAILQ_EMPTY(&rollback)) { + /* + * Begin rollback: create a rollback queue + * from all requests which were not + * written to disk and all requests in the + * input queue. + */ + writer->is_rollback = true; + STAILQ_CONCAT(&rollback, &writer->input); + STAILQ_CONCAT(&writer->input, &rollback); + } + ev_async_send(&writer->write_event); + } + (void) tt_pthread_mutex_unlock(&writer->mutex); + if (r->current_wal != NULL) + log_io_close(&r->current_wal); + return NULL; +} + +/** + * WAL writer main entry point: queue a single request + * to be written to disk and wait until this task is completed. + */ +int +wal_write(struct recovery_state *r, int64_t lsn, uint64_t cookie, + uint16_t op, const char *row, uint32_t row_len) +{ + say_debug("wal_write lsn=%" PRIi64, lsn); + ERROR_INJECT_RETURN(ERRINJ_WAL_IO); + + if (r->wal_mode == WAL_NONE) + return 0; + + struct wal_writer *writer = r->writer; + + struct wal_write_request *req = (struct wal_write_request *) + palloc(fiber_ptr->gc_pool, sizeof(struct wal_write_request) + + sizeof(op) + row_len); + + req->fiber = fiber_ptr; + req->res = -1; + row_v11_fill(&req->row, lsn, XLOG, cookie, (const char *) &op, + sizeof(op), row, row_len); + + (void) tt_pthread_mutex_lock(&writer->mutex); + + bool input_was_empty = STAILQ_EMPTY(&writer->input); + STAILQ_INSERT_TAIL(&writer->input, req, wal_fifo_entry); + + if (input_was_empty) + (void) tt_pthread_cond_signal(&writer->cond); + + (void) tt_pthread_mutex_unlock(&writer->mutex); + + fiber_yield(); /* Request was inserted. */ + + return req->res; +} + +/* }}} */ + +/* {{{ SAVE SNAPSHOT and tarantool_box --cat */ + +void +snapshot_write_row(struct log_io *l, + const char *metadata, size_t metadata_len, + const char *data, size_t data_len) +{ + static uint64_t bytes; + ev_tstamp elapsed; + static ev_tstamp last = 0; + + struct row_v11 *row = (struct row_v11 *) palloc(fiber_ptr->gc_pool, + sizeof(struct row_v11) + + data_len + metadata_len); + + row_v11_fill(row, 0, SNAP, snapshot_cookie, + metadata, metadata_len, data, data_len); + header_v11_sign(&row->header); + + + size_t written = fwrite(row, 1, row_v11_size(row), l->f); + + if (written != row_v11_size(row)) { + say_error("Can't write row (%zu bytes)", row_v11_size(row)); + panic_syserror("snapshot_write_row"); + } + + bytes += written; + + + prelease_after(fiber_ptr->gc_pool, 128 * 1024); + + if (recovery_state->snap_io_rate_limit != UINT64_MAX) { + if (last == 0) { + /* + * Remember the time of first + * write to disk. + */ + ev_now_update(); + last = ev_now(); + } + /** + * If io rate limit is set, flush the + * filesystem cache, otherwise the limit is + * not really enforced. + */ + if (bytes > recovery_state->snap_io_rate_limit) + fdatasync(fileno(l->f)); + } + while (bytes > recovery_state->snap_io_rate_limit) { + ev_now_update(); + /* + * How much time have passed since + * last write? + */ + elapsed = ev_now() - last; + /* + * If last write was in less than + * a second, sleep until the + * second is reached. + */ + if (elapsed < 1) + usleep(((1 - elapsed) * 1000000)); + + ev_now_update(); + last = ev_now(); + bytes -= recovery_state->snap_io_rate_limit; + } +} + +void +snapshot_save(struct recovery_state *r, void (*f) (struct log_io *)) +{ + struct log_io *snap; + snap = log_io_open_for_write(r->snap_dir, r->confirmed_lsn, + INPROGRESS); + if (snap == NULL) + panic_status(errno, "Failed to save snapshot: failed to open file in write mode."); + /* + * While saving a snapshot, snapshot name is set to + * <lsn>.snap.inprogress. When done, the snapshot is + * renamed to <lsn>.snap. + */ + say_info("saving snapshot `%s'", + format_filename(r->snap_dir, r->confirmed_lsn, + NONE)); + if (f) + f(snap); + + log_io_close(&snap); + + say_info("done"); +} + +/** + * Read WAL/SNAPSHOT and invoke a callback on every record (used + * for --cat command line option). + * @retval 0 success + * @retval -1 error + */ + +int +read_log(const char *filename, + row_handler *xlog_handler, row_handler *snap_handler, + void *param) +{ + struct log_dir *dir; + row_handler *h; + + if (strstr(filename, wal_dir.filename_ext)) { + dir = &wal_dir; + h = xlog_handler; + } else if (strstr(filename, snap_dir.filename_ext)) { + dir = &snap_dir; + h = snap_handler; + } else { + say_error("don't know how to read `%s'", filename); + return -1; + } + + FILE *f = fopen(filename, "r"); + struct log_io *l = log_io_open(dir, LOG_READ, filename, NONE, f); + if (l == NULL) + return -1; + + struct log_io_cursor i; + + log_io_cursor_open(&i, l); + const char *row; + uint32_t rowlen; + while ((row = log_io_cursor_next(&i, &rowlen))) + h(param, row, rowlen); + + log_io_cursor_close(&i); + log_io_close(&l); + return 0; +} + +/* }}} */ + diff --git a/src/replica.cc b/src/replica.cc new file mode 100644 index 0000000000000000000000000000000000000000..4860fe39e5cd1349c28f93f8697436135f0016d8 --- /dev/null +++ b/src/replica.cc @@ -0,0 +1,226 @@ +/* + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the + * following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY <COPYRIGHT HOLDER> ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * <COPYRIGHT HOLDER> OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF + * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ +#include "recovery.h" + +#include <sys/socket.h> +#include <netinet/in.h> +#include <arpa/inet.h> + +#include "log_io.h" +#include "fiber.h" +#include "pickle.h" +#include "coio_buf.h" +#include "tarantool.h" + +static void +remote_apply_row(struct recovery_state *r, const char *row, uint32_t rowlne); + +const char * +remote_read_row(struct ev_io *coio, struct iobuf *iobuf, uint32_t *rowlen) +{ + struct ibuf *in = &iobuf->in; + ssize_t to_read = sizeof(struct header_v11) - ibuf_size(in); + + if (to_read > 0) { + ibuf_reserve(in, cfg_readahead); + coio_breadn(coio, in, to_read); + } + + ssize_t request_len = header_v11(in->pos)->len + + sizeof(struct header_v11); + to_read = request_len - ibuf_size(in); + + if (to_read > 0) + coio_breadn(coio, in, to_read); + + const char *row = in->pos; + *rowlen = request_len; + in->pos += request_len; + return row; +} + +static void +remote_connect(struct ev_io *coio, struct sockaddr_in *remote_addr, + int64_t initial_lsn, const char **err) +{ + evio_socket(coio, AF_INET, SOCK_STREAM, IPPROTO_TCP); + + *err = "can't connect to master"; + coio_connect(coio, remote_addr); + + *err = "can't write version"; + coio_write(coio, &initial_lsn, sizeof(initial_lsn)); + + uint32_t version; + *err = "can't read version"; + coio_readn(coio, &version, sizeof(version)); + *err = NULL; + if (version != default_version) + tnt_raise(IllegalParams, "remote version mismatch"); + + say_crit("successfully connected to master"); + say_crit("starting replication from lsn: %" PRIi64, initial_lsn); +} + +static void +pull_from_remote(va_list ap) +{ + struct recovery_state *r = va_arg(ap, struct recovery_state *); + struct ev_io coio; + struct iobuf *iobuf = NULL; + bool warning_said = false; + const int reconnect_delay = 1; + + coio_init(&coio); + + for (;;) { + const char *err = NULL; + try { + fiber_setcancellable(true); + if (! evio_is_active(&coio)) { + title("replica", "%s/%s", r->remote->source, + "connecting"); + if (iobuf == NULL) + iobuf = iobuf_new(fiber_name(fiber_ptr)); + remote_connect(&coio, &r->remote->addr, + r->confirmed_lsn + 1, &err); + warning_said = false; + title("replica", "%s/%s", r->remote->source, + "connected"); + } + err = "can't read row"; + uint32_t rowlen; + const char *row = remote_read_row(&coio, iobuf, &rowlen); + fiber_setcancellable(false); + err = NULL; + + r->remote->recovery_lag = ev_now() - header_v11(row)->tm; + r->remote->recovery_last_update_tstamp = ev_now(); + + remote_apply_row(r, row, rowlen); + + iobuf_gc(iobuf); + fiber_gc(); + } catch (const FiberCancelException& e) { + title("replica", "%s/%s", r->remote->source, "failed"); + iobuf_delete(iobuf); + evio_close(&coio); + throw; + } catch (const Exception& e) { + title("replica", "%s/%s", r->remote->source, "failed"); + e.log(); + if (! warning_said) { + if (err != NULL) + say_info("%s", err); + say_info("will retry every %i second", reconnect_delay); + warning_said = true; + } + evio_close(&coio); + } + + /* Put fiber_sleep() out of catch block. + * + * This is done to avoid situation, when two or more + * fibers yield's inside their try/catch blocks and + * throws an exceptions. Seems like exception unwinder + * stores some global state while being inside a catch + * block. + * + * This could lead to incorrect exception processing + * and crash the server. + * + * See: https://github.com/tarantool/tarantool/issues/136 + */ + if (! evio_is_active(&coio)) + fiber_sleep(reconnect_delay); + } +} + +static void +remote_apply_row(struct recovery_state *r, const char *row, uint32_t rowlen) +{ + int64_t lsn = header_v11(row)->lsn; + + assert(*(uint16_t*)(row + sizeof(struct header_v11)) == XLOG); + + if (r->row_handler(r->row_handler_param, row, rowlen) < 0) + panic("replication failure: can't apply row"); + + set_lsn(r, lsn); +} + +void +recovery_follow_remote(struct recovery_state *r, const char *addr) +{ + char name[FIBER_NAME_MAXLEN]; + char ip_addr[32]; + int port; + int rc; + struct fiber *f; + struct in_addr server; + + assert(r->remote == NULL); + + say_crit("initializing the replica, WAL master %s", addr); + snprintf(name, sizeof(name), "replica/%s", addr); + + try { + f = fiber_new(name, pull_from_remote); + } catch (const Exception& ) { + return; + } + + rc = sscanf(addr, "%31[^:]:%i", ip_addr, &port); + assert(rc == 2); + (void)rc; + + if (inet_aton(ip_addr, &server) < 0) { + say_syserror("inet_aton: %s", ip_addr); + return; + } + + static struct remote remote; + memset(&remote, 0, sizeof(remote)); + remote.addr.sin_family = AF_INET; + memcpy(&remote.addr.sin_addr.s_addr, &server, sizeof(server)); + remote.addr.sin_port = htons(port); + memcpy(&remote.cookie, &remote.addr, MIN(sizeof(remote.cookie), sizeof(remote.addr))); + remote.reader = f; + snprintf(remote.source, sizeof(remote.source), "%s", addr); + r->remote = &remote; + fiber_call(f, r); +} + +void +recovery_stop_remote(struct recovery_state *r) +{ + say_info("shutting down the replica"); + fiber_cancel(r->remote->reader); + r->remote = NULL; +} diff --git a/src/replication.cc b/src/replication.cc new file mode 100644 index 0000000000000000000000000000000000000000..41d71c65b3fa91cab450227dc9f62ab3b4a8ff50 --- /dev/null +++ b/src/replication.cc @@ -0,0 +1,698 @@ +/* + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the + * following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY <COPYRIGHT HOLDER> ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * <COPYRIGHT HOLDER> OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF + * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ +#include <replication.h> +#include <say.h> +#include <fiber.h> +extern "C" { +#include <cfg/warning.h> +#include <cfg/tarantool_box_cfg.h> +} /* extern "C" */ +#include <palloc.h> +#include <stddef.h> + +#include <stddef.h> +#include <sys/types.h> +#include <sys/socket.h> +#include <sys/wait.h> +#include <sys/uio.h> +#include <netinet/in.h> +#include <arpa/inet.h> +#include <limits.h> +#include <fcntl.h> + +#include "fiber.h" +#include "recovery.h" +#include "log_io.h" +#include "evio.h" + +/** Replication topology + * ---------------------- + * + * Tarantool replication consists of 3 interacting processes: + * master, spawner and replication relay. + * + * The spawner is created at server start, and master communicates + * with the spawner using a socketpair(2). Replication relays are + * created by the spawner and handle one client connection each. + * + * The master process binds to replication_port and accepts + * incoming connections. This is done in the master to be able to + * correctly handle RELOAD CONFIGURATION, which happens in the + * master, and, in future, perform authentication of replication + * clients. + * + * Once a client socket is accepted, it is sent to the spawner + * process, through the master's end of the socket pair. + * + * The spawner listens on the receiving end of the socket pair and + * for every received socket creates a replication relay, which is + * then responsible for sending write ahead logs to the replica. + * + * Upon shutdown, the master closes its end of the socket pair. + * The spawner then reads EOF from its end, terminates all + * children and exits. + */ +static int master_to_spawner_socket; + +/** Accept a new connection on the replication port: push the accepted socket + * to the spawner. + */ +static void +replication_on_accept(struct evio_service *service __attribute__((unused)), + int fd, struct sockaddr_in *addr __attribute__((unused))); + +/** Send a file descriptor to replication relay spawner. + * + * Invoked when spawner's end of the socketpair becomes ready. + */ +static void +replication_send_socket(ev_io *watcher, int events __attribute__((unused))); + +/** Replication spawner process */ +static struct spawner { + /** reading end of the socket pair with the master */ + int sock; + /** non-zero if got a terminating signal */ + sig_atomic_t killed; + /** child process count */ + sig_atomic_t child_count; +} spawner; + +/** Initialize spawner process. + * + * @param sock the socket between the main process and the spawner. + */ +static void +spawner_init(int sock); + +/** Spawner main loop. */ +static void +spawner_main_loop(); + +/** Shutdown spawner and all its children. */ +static void +spawner_shutdown(); + +/** Handle SIGINT, SIGTERM, SIGHUP. */ +static void +spawner_signal_handler(int signal); + +/** Handle SIGCHLD: collect status of a terminated child. */ +static void +spawner_sigchld_handler(int signal __attribute__((unused))); + +/** Create a replication relay. + * + * @return 0 on success, -1 on error + */ +static int +spawner_create_replication_relay(int client_sock); + +/** Shut down all relays when shutting down the spawner. */ +static void +spawner_shutdown_children(); + +/** Initialize replication relay process. */ +static void +replication_relay_loop(int client_sock); + +/* + * ------------------------------------------------------------------------ + * replication module + * ------------------------------------------------------------------------ + */ + +/** Check replication module configuration. */ +int +replication_check_config(struct tarantool_cfg *config) +{ + if (config->replication_port < 0 || + config->replication_port >= USHRT_MAX) { + say_error("invalid replication port value: %" PRId32, + config->replication_port); + return -1; + } + + return 0; +} + +/** Pre-fork replication spawner process. */ +void +replication_prefork() +{ + if (cfg.replication_port == 0) { + /* replication is not needed, do nothing */ + return; + } + int sockpair[2]; + /* + * Create UNIX sockets to communicate between the main and + * spawner processes. + */ + if (socketpair(PF_LOCAL, SOCK_STREAM, 0, sockpair) != 0) + panic_syserror("socketpair"); + + /* create spawner */ + pid_t pid = fork(); + if (pid == -1) + panic_syserror("fork"); + + if (pid != 0) { + /* parent process: tarantool */ + close(sockpair[1]); + master_to_spawner_socket = sockpair[0]; + sio_setfl(master_to_spawner_socket, O_NONBLOCK, 1); + } else { + ev_default_fork(); + ev_loop(EVLOOP_NONBLOCK); + /* child process: spawner */ + close(sockpair[0]); + /* + * Move to an own process group, to not receive + * signals from the controlling tty. + */ + setpgid(0, 0); + spawner_init(sockpair[1]); + } +} + +/** + * Create a fiber which accepts client connections and pushes them + * to replication spawner. + */ + +void +replication_init(const char *bind_ipaddr, int replication_port) +{ + if (replication_port == 0) + return; /* replication is not in use */ + + static struct evio_service replication; + + evio_service_init(&replication, "replication", bind_ipaddr, + replication_port, replication_on_accept, NULL); + + evio_service_start(&replication); +} + + +/*-----------------------------------------------------------------------------*/ +/* replication accept/sender fibers */ +/*-----------------------------------------------------------------------------*/ + +/** Replication acceptor fiber handler. */ +static void +replication_on_accept(struct evio_service *service __attribute__((unused)), + int fd, + struct sockaddr_in *addr __attribute__((unused))) +{ + /* + * Drop the O_NONBLOCK flag, which was possibly + * inherited from the acceptor fd (happens on + * Darwin). + */ + sio_setfl(fd, O_NONBLOCK, 0); + + struct ev_io *io = (struct ev_io *) malloc(sizeof(struct ev_io)); + if (io == NULL) { + close(fd); + return; + } + io->data = (void *) (intptr_t) fd; + ev_io_init(io, replication_send_socket, master_to_spawner_socket, EV_WRITE); + ev_io_start(io); +} + + +/** Send a file descriptor to the spawner. */ +static void +replication_send_socket(ev_io *watcher, int events __attribute__((unused))) +{ + int client_sock = (intptr_t) watcher->data; + struct msghdr msg; + struct iovec iov[1]; + char control_buf[CMSG_SPACE(sizeof(int))]; + struct cmsghdr *control_message = NULL; + int cmd_code = 0; + + iov[0].iov_base = &cmd_code; + iov[0].iov_len = sizeof(cmd_code); + + memset(&msg, 0, sizeof(msg)); + + msg.msg_name = NULL; + msg.msg_namelen = 0; + msg.msg_iov = iov; + msg.msg_iovlen = 1; + msg.msg_control = control_buf; + msg.msg_controllen = sizeof(control_buf); + + control_message = CMSG_FIRSTHDR(&msg); + control_message->cmsg_len = CMSG_LEN(sizeof(int)); + control_message->cmsg_level = SOL_SOCKET; + control_message->cmsg_type = SCM_RIGHTS; + *((int *) CMSG_DATA(control_message)) = client_sock; + + /* Send the client socket to the spawner. */ + if (sendmsg(master_to_spawner_socket, &msg, 0) < 0) + say_syserror("sendmsg"); + + ev_io_stop(watcher); + free(watcher); + /* Close client socket in the main process. */ + close(client_sock); +} + + +/*--------------------------------------------------------------------------* + * spawner process * + * -------------------------------------------------------------------------*/ + +/** Initialize the spawner. */ + +static void +spawner_init(int sock) +{ + struct sigaction sa; + + title("spawner", NULL); + fiber_set_name(fiber_ptr, status); + + /* init replicator process context */ + spawner.sock = sock; + + /* init signals */ + memset(&sa, 0, sizeof(sa)); + sigemptyset(&sa.sa_mask); + + /* + * The spawner normally does not receive any signals, + * except when sent by a system administrator. + * When the master process terminates, it closes its end + * of the socket pair and this signals to the spawner that + * it's time to die as well. But before exiting, the + * spawner must kill and collect all active replication + * relays. This is why we need to change the default + * signal action here. + */ + sa.sa_handler = spawner_signal_handler; + + if (sigaction(SIGHUP, &sa, NULL) == -1 || + sigaction(SIGINT, &sa, NULL) == -1 || + sigaction(SIGTERM, &sa, NULL) == -1) + say_syserror("sigaction"); + + sa.sa_handler = spawner_sigchld_handler; + + if (sigaction(SIGCHLD, &sa, NULL) == -1) + say_syserror("sigaction"); + + sa.sa_handler = SIG_IGN; + /* + * Ignore SIGUSR1, SIGUSR1 is used to make snapshots, + * and if someone wrote a faulty regexp for `ps' and + * fed it to `kill' the replication shouldn't die. + * Ignore SIGUSR2 as well, since one can be pretty + * inventive in ways of shooting oneself in the foot. + * Ignore SIGPIPE, otherwise we may receive SIGPIPE + * when trying to write to the log. + */ + if (sigaction(SIGUSR1, &sa, NULL) == -1 || + sigaction(SIGUSR2, &sa, NULL) == -1 || + sigaction(SIGPIPE, &sa, NULL) == -1) { + + say_syserror("sigaction"); + } + + say_crit("initialized"); + spawner_main_loop(); +} + + + +static int +spawner_unpack_cmsg(struct msghdr *msg) +{ + struct cmsghdr *control_message; + for (control_message = CMSG_FIRSTHDR(msg); + control_message != NULL; + control_message = CMSG_NXTHDR(msg, control_message)) + if ((control_message->cmsg_level == SOL_SOCKET) && + (control_message->cmsg_type == SCM_RIGHTS)) + return *((int *) CMSG_DATA(control_message)); + assert(false); + return -1; +} + +/** Replication spawner process main loop. */ +static void +spawner_main_loop() +{ + struct msghdr msg; + struct iovec iov[1]; + char control_buf[CMSG_SPACE(sizeof(int))]; + int cmd_code = 0; + int client_sock; + + iov[0].iov_base = &cmd_code; + iov[0].iov_len = sizeof(cmd_code); + + msg.msg_name = NULL; + msg.msg_namelen = 0; + msg.msg_iov = iov; + msg.msg_iovlen = 1; + msg.msg_control = control_buf; + msg.msg_controllen = sizeof(control_buf); + + while (!spawner.killed) { + int msglen = recvmsg(spawner.sock, &msg, 0); + if (msglen > 0) { + client_sock = spawner_unpack_cmsg(&msg); + spawner_create_replication_relay(client_sock); + } else if (msglen == 0) { /* orderly master shutdown */ + say_info("Exiting: master shutdown"); + break; + } else { /* msglen == -1 */ + if (errno != EINTR) + say_syserror("recvmsg"); + /* continue, the error may be temporary */ + } + } + spawner_shutdown(); +} + +/** Replication spawner shutdown. */ +static void +spawner_shutdown() +{ + /* + * There is no need to ever use signals with the spawner + * process. If someone did send spawner a signal by + * mistake, at least make a squeak in the error log before + * dying. + */ + if (spawner.killed) + say_info("Terminated by signal %d", (int) spawner.killed); + + /* close socket */ + close(spawner.sock); + + /* kill all children */ + spawner_shutdown_children(); + + exit(EXIT_SUCCESS); +} + +/** Replication spawner signal handler for terminating signals. */ +static void spawner_signal_handler(int signal) +{ + spawner.killed = signal; +} + +/** Wait for a terminated child. */ +static void +spawner_sigchld_handler(int signo __attribute__((unused))) +{ + static const char waitpid_failed[] = "spawner: waitpid() failed\n"; + do { + int exit_status; + pid_t pid = waitpid(-1, &exit_status, WNOHANG); + switch (pid) { + case -1: + if (errno != ECHILD) { + int r = write(sayfd, waitpid_failed, + sizeof(waitpid_failed) - 1); + (void) r; /* -Wunused-result warning suppression */ + } + return; + case 0: /* no more changes in children status */ + return; + default: + spawner.child_count--; + } + } while (spawner.child_count > 0); +} + +/** Create replication client handler process. */ +static int +spawner_create_replication_relay(int client_sock) +{ + pid_t pid = fork(); + + if (pid < 0) { + say_syserror("fork"); + return -1; + } + + if (pid == 0) { + ev_default_fork(); + ev_loop(EVLOOP_NONBLOCK); + close(spawner.sock); + replication_relay_loop(client_sock); + } else { + spawner.child_count++; + close(client_sock); + say_info("created a replication relay: pid = %d", (int) pid); + } + + return 0; +} + +/** Replicator spawner shutdown: kill and wait for children. */ +static void +spawner_shutdown_children() +{ + int kill_signo = SIGTERM, signo; + sigset_t mask, orig_mask, alarm_mask; + +retry: + sigemptyset(&mask); + sigaddset(&mask, SIGCHLD); + sigaddset(&mask, SIGALRM); + /* + * We're going to kill the entire process group, which + * we're part of. Handle the signal sent to ourselves. + */ + sigaddset(&mask, kill_signo); + + if (spawner.child_count == 0) + return; + + /* Block SIGCHLD and SIGALRM to avoid races. */ + if (sigprocmask(SIG_BLOCK, &mask, &orig_mask)) { + say_syserror("sigprocmask"); + return; + } + + /* We'll wait for children no longer than 5 sec. */ + alarm(5); + + say_info("sending signal %d to %d children", kill_signo, + (int) spawner.child_count); + + kill(0, kill_signo); + + say_info("waiting for children for up to 5 seconds"); + + while (spawner.child_count > 0) { + sigwait(&mask, &signo); + if (signo == SIGALRM) { /* timed out */ + break; + } + else if (signo != kill_signo) { + assert(signo == SIGCHLD); + spawner_sigchld_handler(signo); + } + } + + /* Reset the alarm. */ + alarm(0); + + /* Clear possibly pending SIGALRM. */ + sigpending(&alarm_mask); + if (sigismember(&alarm_mask, SIGALRM)) { + sigemptyset(&alarm_mask); + sigaddset(&alarm_mask, SIGALRM); + sigwait(&alarm_mask, &signo); + } + + /* Restore the old mask. */ + if (sigprocmask(SIG_SETMASK, &orig_mask, NULL)) { + say_syserror("sigprocmask"); + return; + } + + if (kill_signo == SIGTERM) { + kill_signo = SIGKILL; + goto retry; + } +} + +/** A libev callback invoked when a relay client socket is ready + * for read. This currently only happens when the client closes + * its socket, and we get an EOF. + */ +static void +replication_relay_recv(struct ev_io *w, int __attribute__((unused)) revents) +{ + int client_sock = (int) (intptr_t) w->data; + uint8_t data; + + int rc = recv(client_sock, &data, sizeof(data), 0); + + if (rc == 0 || (rc < 0 && errno == ECONNRESET)) { + say_info("the client has closed its replication socket, exiting"); + exit(EXIT_SUCCESS); + } + if (rc < 0) + say_syserror("recv"); + + exit(EXIT_FAILURE); +} + + +/** Send a single row to the client. */ +static int +replication_relay_send_row(void *param, const char *row, uint32_t rowlen) +{ + int client_sock = (int) (intptr_t) param; + ssize_t bytes, len = rowlen; + while (len > 0) { + bytes = write(client_sock, row, len); + if (bytes < 0) { + if (errno == EPIPE) { + /* socket closed on opposite site */ + goto shutdown_handler; + } + panic_syserror("write"); + } + len -= bytes; + row += bytes; + } + + return 0; +shutdown_handler: + say_info("the client has closed its replication socket, exiting"); + exit(EXIT_SUCCESS); +} + + +/** The main loop of replication client service process. */ +static void +replication_relay_loop(int client_sock) +{ + struct sigaction sa; + int64_t lsn; + ssize_t r; + + /* Set process title and fiber name. + * Even though we use only the main fiber, the logger + * uses the current fiber name. + */ + struct sockaddr_in peer; + socklen_t addrlen = sizeof(peer); + getpeername(client_sock, ((struct sockaddr*)&peer), &addrlen); + title("relay", "%s", sio_strfaddr(&peer)); + fiber_set_name(fiber_ptr, status); + + /* init signals */ + memset(&sa, 0, sizeof(sa)); + sigemptyset(&sa.sa_mask); + + /* Reset all signals to their defaults. */ + sa.sa_handler = SIG_DFL; + if (sigaction(SIGCHLD, &sa, NULL) == -1 || + sigaction(SIGHUP, &sa, NULL) == -1 || + sigaction(SIGINT, &sa, NULL) == -1 || + sigaction(SIGTERM, &sa, NULL) == -1) + say_syserror("sigaction"); + + /* + * Ignore SIGPIPE, we already handle EPIPE. + * Ignore SIGUSR1, SIGUSR1 is used to make snapshots, + * and if someone wrote a faulty regexp for `ps' and + * fed it to `kill' the replication shouldn't die. + * Ignore SIGUSR2 as well, since one can be pretty + * inventive in ways of shooting oneself in the foot. + */ + sa.sa_handler = SIG_IGN; + if (sigaction(SIGPIPE, &sa, NULL) == -1 || + sigaction(SIGUSR1, &sa, NULL) == -1 || + sigaction(SIGUSR2, &sa, NULL) == -1) { + + say_syserror("sigaction"); + } + + r = read(client_sock, &lsn, sizeof(lsn)); + if (r != sizeof(lsn)) { + if (r < 0) { + panic_syserror("read"); + } + panic("invalid LSN request size: %zu", r); + } + say_info("starting replication from lsn: %" PRIi64, lsn); + + replication_relay_send_row((void *)(intptr_t) client_sock, + (const char *) &default_version, + sizeof(default_version)); + + /* init libev events handlers */ + ev_default_loop(0); + + /* + * Init a read event: when replica closes its end + * of the socket, we can read EOF and shutdown the + * relay. + */ + struct ev_io sock_read_ev; + sock_read_ev.data = (void *)(intptr_t) client_sock; + ev_io_init(&sock_read_ev, replication_relay_recv, client_sock, EV_READ); + ev_io_start(&sock_read_ev); + + /* Initialize the recovery process */ + recovery_init(cfg.snap_dir, cfg.wal_dir, + replication_relay_send_row, (void *)(intptr_t) client_sock, + INT32_MAX, RECOVER_READONLY); + /* + * Note that recovery starts with lsn _NEXT_ to + * the confirmed one. + */ + recovery_state->lsn = recovery_state->confirmed_lsn = lsn - 1; + recover_existing_wals(recovery_state); + /* Found nothing. */ + if (recovery_state->lsn == lsn - 1) + say_error("can't find WAL containing record with lsn: %" PRIi64, lsn); + recovery_follow_local(recovery_state, 0.1); + + ev_loop(0); + + say_crit("exiting the relay loop"); + exit(EXIT_SUCCESS); +} + diff --git a/src/session.cc b/src/session.cc new file mode 100644 index 0000000000000000000000000000000000000000..9abdc1acef4faabde7cb1b3fba4deb15154ef15e --- /dev/null +++ b/src/session.cc @@ -0,0 +1,123 @@ +/* + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the + * following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY <COPYRIGHT HOLDER> ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * <COPYRIGHT HOLDER> OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF + * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ +#include "session.h" +#include "fiber.h" + +#include "assoc.h" +#include "exception.h" +#include <sys/socket.h> + +uint32_t sid_max; + +static struct mh_i32ptr_t *session_registry; + +struct session_trigger session_on_connect; +struct session_trigger session_on_disconnect; + +uint32_t +session_create(int fd, uint64_t cookie) +{ + /* Return the next sid rolling over the reserved value of 0. */ + while (++sid_max == 0) + ; + + uint32_t sid = sid_max; + struct mh_i32ptr_node_t node; + node.key = sid; + node.val = (void *) (intptr_t) fd; + + mh_int_t k = mh_i32ptr_put(session_registry, &node, NULL, NULL); + + if (k == mh_end(session_registry)) { + tnt_raise(ClientError, ER_MEMORY_ISSUE, + "session hash", "new session"); + } + /* + * Run the trigger *after* setting the current + * fiber sid. + */ + fiber_set_sid(fiber_ptr, sid, cookie); + if (session_on_connect.trigger) { + void *param = session_on_connect.param; + try { + session_on_connect.trigger(param); + } catch (const Exception& e) { + fiber_set_sid(fiber_ptr, 0, 0); + mh_i32ptr_remove(session_registry, &node, NULL); + throw; + } + } + + return sid; +} + +void +session_destroy(uint32_t sid) +{ + if (sid == 0) /* no-op for a dead session. */ + return; + + if (session_on_disconnect.trigger) { + void *param = session_on_disconnect.param; + try { + session_on_disconnect.trigger(param); + } catch (const Exception& e) { + e.log(); + } catch (...) { + /* catch all. */ + } + } + session_storage_cleanup(sid); + struct mh_i32ptr_node_t node = { sid, NULL }; + mh_i32ptr_remove(session_registry, &node, NULL); +} + +int +session_fd(uint32_t sid) +{ + struct mh_i32ptr_node_t node = { sid, NULL }; + mh_int_t k = mh_i32ptr_get(session_registry, &node, NULL); + return k == mh_end(session_registry) ? + -1 : (intptr_t) mh_i32ptr_node(session_registry, k)->val; +} + +void +session_init() +{ + session_registry = mh_i32ptr_new(); + if (session_registry == NULL) + panic("out of memory"); +} + +void +session_free() +{ + if (session_registry) + mh_i32ptr_delete(session_registry); +} diff --git a/src/tarantool.cc b/src/tarantool.cc new file mode 100644 index 0000000000000000000000000000000000000000..ea433395cc447a23a838f00b8a30152abf9d4ac2 --- /dev/null +++ b/src/tarantool.cc @@ -0,0 +1,953 @@ +/* + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the + * following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY <COPYRIGHT HOLDER> ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * <COPYRIGHT HOLDER> OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF + * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ +#include "tarantool.h" +#include "tarantool/config.h" + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <sys/types.h> +#include <sys/resource.h> +#include <pwd.h> +#include <unistd.h> +#include <getopt.h> +#include <libgen.h> +#include <sysexits.h> +#if defined(TARGET_OS_LINUX) && defined(HAVE_PRCTL_H) +# include <sys/prctl.h> +#endif +#include <admin.h> +#include <replication.h> +#include <fiber.h> +#include <coeio.h> +#include <iproto.h> +#include "mutex.h" +#include <recovery.h> +#include <crc32.h> +#include <palloc.h> +#include <salloc.h> +#include <say.h> +#include <stat.h> +#include <limits.h> +#include "tarantool/util.h" +extern "C" { +#include <cfg/warning.h> +#include <cfg/tarantool_box_cfg.h> +#include <third_party/gopt/gopt.h> +} /* extern "C" */ +#include "tt_pthread.h" +#include "lua/init.h" +#include "memcached.h" +#include "session.h" +#include "box/box.h" +#include "scoped_guard.h" + + +static pid_t master_pid; +const char *cfg_filename = NULL; +char *cfg_filename_fullpath = NULL; +char *binary_filename; +char *custom_proc_title; +char status[64] = "unknown"; +char **main_argv; +int main_argc; +static void *main_opt = NULL; +struct tarantool_cfg cfg; +/** Signals handled after start as part of the event loop. */ +static ev_signal ev_sigs[4]; +static const int ev_sig_count = sizeof(ev_sigs)/sizeof(*ev_sigs); + +int snapshot_pid = 0; /* snapshot processes pid */ +uint32_t snapshot_version = 0; + +extern const void *opt_def; + +/* defined in third_party/proctitle.c */ +extern "C" { +char **init_set_proc_title(int argc, char **argv); +void free_proc_title(int argc, char **argv); +void set_proc_title(const char *format, ...); +} /* extern "C" */ + +static int +core_check_config(struct tarantool_cfg *conf) +{ + if (strindex(wal_mode_STRS, conf->wal_mode, + WAL_MODE_MAX) == WAL_MODE_MAX) { + out_warning(CNF_OK, "wal_mode %s is not recognized", conf->wal_mode); + return -1; + } + return 0; +} + +void +title(const char *role, const char *fmt, ...) +{ + (void) role; + + va_list ap; + char buf[256], *bufptr = buf, *bufend = buf + sizeof(buf); + char *statusptr = status, *statusend = status + sizeof(status); + statusptr += snprintf(statusptr, statusend - statusptr, "%s", role); + bufptr += snprintf(bufptr, bufend - bufptr, "%s%s", role, + custom_proc_title); + + if (fmt != NULL) { + const char *s = statusptr; + statusptr += snprintf(statusptr, statusend - statusptr, "/"); + va_start(ap, fmt); + statusptr += vsnprintf(statusptr, statusend - statusptr, + fmt, ap); + va_end(ap); + bufptr += snprintf(bufptr, bufend - bufptr, "%s", s); + } + + int ports[] = { cfg.primary_port, cfg.secondary_port, + cfg.memcached_port, cfg.admin_port, + cfg.replication_port }; + int *pptr = ports; + const char *names[] = { "pri", "sec", "memc", "adm", "rpl", NULL }; + const char **nptr = names; + + for (; *nptr; nptr++, pptr++) + if (*pptr) + bufptr += snprintf(bufptr, bufend - bufptr, + " %s: %i", *nptr, *pptr); + if (strlen(cfg.bind_ipaddr)) { + bufptr += snprintf(bufptr, bufend - bufptr, + ", ip: %s", cfg.bind_ipaddr); + } + + set_proc_title(buf); +} + +static int +load_cfg(struct tarantool_cfg *conf, int32_t check_rdonly) +{ + FILE *f; + int32_t n_accepted, n_skipped, n_ignored; + + tbuf_reset(cfg_out); + + if (cfg_filename_fullpath != NULL) + f = fopen(cfg_filename_fullpath, "r"); + else + f = fopen(cfg_filename, "r"); + + if (f == NULL) { + out_warning(CNF_OK, "can't open config `%s'", cfg_filename); + return -1; + } + + int syntax = parse_cfg_file_tarantool_cfg(conf, f, check_rdonly, + &n_accepted, + &n_skipped, + &n_ignored); + fclose(f); + + if (syntax != 0) + return -1; + + if (check_cfg_tarantool_cfg(conf) != 0) + return -1; + + if (n_skipped != 0) + return -1; + + if (n_accepted == 0) { + out_warning(CNF_OK, "empty configuration file '%s'", cfg_filename); + return -1; + } + + if (core_check_config(conf) != 0) + return -1; + + if (replication_check_config(conf) != 0) + return -1; + + return box_check_config(conf); +} + +static int +core_reload_config(const struct tarantool_cfg *old_conf, + const struct tarantool_cfg *new_conf) +{ + if (strcasecmp(old_conf->wal_mode, new_conf->wal_mode) != 0 || + old_conf->wal_fsync_delay != new_conf->wal_fsync_delay) { + + double new_delay = new_conf->wal_fsync_delay; + + /* Mode has changed: */ + if (strcasecmp(old_conf->wal_mode, new_conf->wal_mode)) { + if (strcasecmp(old_conf->wal_mode, "fsync") == 0 || + strcasecmp(new_conf->wal_mode, "fsync") == 0) { + out_warning(CNF_OK, "wal_mode cannot switch to/from fsync"); + return -1; + } + } + + /* + * Unless wal_mode=fsync_delay, wal_fsync_delay is + * irrelevant and must be 0. + */ + if (strcasecmp(new_conf->wal_mode, "fsync_delay") != 0) + new_delay = 0.0; + + + recovery_update_mode(recovery_state, new_conf->wal_mode, new_delay); + } + + if (old_conf->snap_io_rate_limit != new_conf->snap_io_rate_limit) + recovery_update_io_rate_limit(recovery_state, new_conf->snap_io_rate_limit); + + if (old_conf->io_collect_interval != new_conf->io_collect_interval) + ev_set_io_collect_interval(new_conf->io_collect_interval); + + return 0; +} + +int +reload_cfg(struct tbuf *out) +{ + static struct mutex *mutex = NULL; + struct tarantool_cfg new_cfg, aux_cfg; + + if (mutex == NULL) { + mutex = (struct mutex *) palloc(eter_pool, sizeof(*mutex)); + mutex_create(mutex); + } + + if (mutex_trylock(mutex) == true) { + out_warning(CNF_OK, "Could not reload configuration: it is being reloaded right now"); + tbuf_append(out, cfg_out->data, cfg_out->size); + + return -1; + } + + + auto scoped_guard = make_scoped_guard([&] { + destroy_tarantool_cfg(&aux_cfg); + destroy_tarantool_cfg(&new_cfg); + + if (cfg_out->size != 0) + tbuf_append(out, cfg_out->data, cfg_out->size); + + mutex_unlock(mutex); + }); + + init_tarantool_cfg(&new_cfg); + init_tarantool_cfg(&aux_cfg); + + /* + Prepare a copy of the original config file + for confetti, so that it can compare the new + file with the old one when loading the new file. + Load the new file and return an error if it + contains a different value for some read-only + parameter. + */ + if (dup_tarantool_cfg(&aux_cfg, &cfg) != 0 || + load_cfg(&aux_cfg, 1) != 0) + return -1; + /* + Load the new configuration file, but + skip the check for read only parameters. + new_cfg contains only defaults and + new settings. + */ + if (fill_default_tarantool_cfg(&new_cfg) != 0 || + load_cfg(&new_cfg, 0) != 0) + return -1; + + /* Check that no default value has been changed. */ + char *diff = cmp_tarantool_cfg(&aux_cfg, &new_cfg, 1); + if (diff != NULL) { + out_warning(CNF_OK, "Could not accept read only '%s' option", diff); + return -1; + } + + /* Process wal-writer-related changes. */ + if (core_reload_config(&cfg, &new_cfg) != 0) + return -1; + + /* Now pass the config to the module, to take action. */ + if (box_reload_config(&cfg, &new_cfg) != 0) + return -1; + /* All OK, activate the config. */ + swap_tarantool_cfg(&cfg, &new_cfg); + tarantool_lua_load_cfg(tarantool_L, &cfg); + + return 0; +} + +/** Print the configuration file in YAML format. */ +void +show_cfg(struct tbuf *out) +{ + tarantool_cfg_iterator_t *i; + char *key, *value; + + tbuf_printf(out, "configuration:" CRLF); + i = tarantool_cfg_iterator_init(); + while ((key = tarantool_cfg_iterator_next(i, &cfg, &value)) != NULL) { + if (value) { + tbuf_printf(out, " %s: \"%s\"" CRLF, key, value); + free(value); + } else { + tbuf_printf(out, " %s: (null)" CRLF, key); + } + } +} + +const char * +tarantool_version(void) +{ + return PACKAGE_VERSION; +} + +static double start_time; + +double +tarantool_uptime(void) +{ + return ev_now() - start_time; +} + +int +snapshot(void) +{ + if (snapshot_pid) + return EINPROGRESS; + + + pid_t p = fork(); + if (p < 0) { + say_syserror("fork"); + return -1; + } + if (p > 0) { + snapshot_pid = p; + /* increment snapshot version */ + snapshot_version++; + int status = wait_for_child(p); + snapshot_pid = 0; + return (WIFSIGNALED(status) ? EINTR : WEXITSTATUS(status)); + } + + salloc_protect(); + + title("dumper", "%" PRIu32, getppid()); + fiber_set_name(fiber_ptr, status); + + /* + * Safety: make sure we don't double-write + * parent stdio buffers at exit(). + */ + close_all_xcpt(1, sayfd); + /* + * We must avoid double destruction of tuples on exit. + * Since there is no way to remove existing handlers + * registered in the master process, and snapshot_save() + * may call exit(), push a top-level handler which will do + * _exit() for us. + */ + snapshot_save(recovery_state, box_snapshot); + + exit(EXIT_SUCCESS); + return 0; +} + + +/** +* Create snapshot from signal handler (SIGUSR1) +*/ +static void +sig_snapshot(struct ev_signal *w, int revents) +{ + (void) w; + (void) revents; + + if (snapshot_pid) { + say_warn("Snapshot process is already running," + " the signal is ignored"); + return; + } + fiber_call(fiber_new("snapshot", (fiber_func)snapshot)); +} + +static void +signal_cb(struct ev_signal *w, int revents) +{ + (void) w; + (void) revents; + + /* Terminate the main event loop */ + ev_unloop(EV_A_ EVUNLOOP_ALL); +} + +/** Try to log as much as possible before dumping a core. + * + * Core files are not aways allowed and it takes an effort to + * extract useful information from them. + * + * *Recursive invocation* + * + * Unless SIGSEGV is sent by kill(), Linux + * resets the signal a default value before invoking + * the handler. + * + * Despite that, as an extra precaution to avoid infinite + * recursion, we count invocations of the handler, and + * quietly _exit() when called for a second time. + */ +static void +sig_fatal_cb(int signo) +{ + static volatile sig_atomic_t in_cb = 0; + int fd = STDERR_FILENO; + struct sigaction sa; + + /* Got a signal while running the handler. */ + if (in_cb) { + fdprintf(fd, "Fatal %d while backtracing", signo); + goto end; + } + + in_cb = 1; + + if (signo == SIGSEGV) + fdprintf(fd, "Segmentation fault\n"); + else + fdprintf(fd, "Got a fatal signal %d\n", signo); + + fdprintf(fd, "Current time: %u\n", (unsigned) time(0)); + fdprintf(fd, + "Please file a bug at http://github.com/tarantool/tarantool/issues\n" + "Attempting backtrace... Note: since the server has " + "already crashed, \nthis may fail as well\n"); +#ifdef ENABLE_BACKTRACE + print_backtrace(); +#endif +end: + /* Try to dump core. */ + memset(&sa, 0, sizeof(sa)); + sigemptyset(&sa.sa_mask); + sa.sa_handler = SIG_DFL; + sigaction(SIGABRT, &sa, NULL); + + abort(); +} + +/** + * This SIGTERM handler is only used before the main event loop started to + * cleanup server pid file. The handler is replaced by ev_signal after the boot. + * @sa signal_start + */ +static void +sig_term_cb(int signo) +{ + psignal(signo, ""); + /* unlink pidfile. */ + if (cfg.pid_file != NULL) + unlink(cfg.pid_file); + + _exit(EXIT_SUCCESS); +} + +static void +signal_free(void) +{ + int i; + for (i = 0; i < ev_sig_count; i++) + ev_signal_stop(&ev_sigs[i]); +} + +static void +signal_start(void) +{ + for (int i = 0; i < ev_sig_count; i++) + ev_signal_start(&ev_sigs[i]); +} + +/** Make sure the child has a default signal disposition. */ +static void +signal_reset() +{ + struct sigaction sa; + + /* Reset all signals to their defaults. */ + memset(&sa, 0, sizeof(sa)); + sigemptyset(&sa.sa_mask); + sa.sa_handler = SIG_DFL; + + if (sigaction(SIGUSR1, &sa, NULL) == -1 || + sigaction(SIGINT, &sa, NULL) == -1 || + sigaction(SIGTERM, &sa, NULL) == -1 || + sigaction(SIGHUP, &sa, NULL) == -1 || + sigaction(SIGSEGV, &sa, NULL) == -1 || + sigaction(SIGFPE, &sa, NULL) == -1) + say_syserror("sigaction"); + + /* Unblock any signals blocked by libev. */ + sigset_t sigset; + sigfillset(&sigset); + if (sigprocmask(SIG_UNBLOCK, &sigset, NULL) == -1) + say_syserror("sigprocmask"); +} + + +/** + * Adjust the process signal mask and add handlers for signals. + */ +static void +signal_init(void) +{ + struct sigaction sa; + memset(&sa, 0, sizeof(sa)); + + sa.sa_handler = SIG_IGN; + sigemptyset(&sa.sa_mask); + + if (sigaction(SIGPIPE, &sa, 0) == -1) + panic_syserror("sigaction"); + + sa.sa_handler = sig_fatal_cb; + + if (sigaction(SIGSEGV, &sa, 0) == -1 || + sigaction(SIGFPE, &sa, 0) == -1) { + panic_syserror("sigaction"); + } + + sa.sa_handler = sig_term_cb; + if (sigaction(SIGUSR1, &sa, 0) == -1 || + sigaction(SIGINT, &sa, 0) == -1 || + sigaction(SIGTERM, &sa, 0) == -1 || + sigaction(SIGHUP, &sa, 0) == -1) { + panic_syserror("sigaction"); + } + + ev_signal_init(&ev_sigs[0], sig_snapshot, SIGUSR1); + ev_signal_init(&ev_sigs[1], signal_cb, SIGINT); + ev_signal_init(&ev_sigs[2], signal_cb, SIGTERM); + ev_signal_init(&ev_sigs[3], signal_cb, SIGHUP); + + (void) tt_pthread_atfork(NULL, NULL, signal_reset); +} + +static void +create_pid(void) +{ + FILE *f; + char buf[16] = { 0 }; + pid_t pid; + + if (cfg.pid_file == NULL) + return; + + f = fopen(cfg.pid_file, "a+"); + if (f == NULL) + panic_syserror("can't open pid file"); + /* + * fopen() is not guaranteed to set the seek position to + * the beginning of file according to ANSI C (and, e.g., + * on FreeBSD. + */ + if (fseeko(f, 0, SEEK_SET) != 0) + panic_syserror("can't fseek to the beginning of pid file"); + + if (fgets(buf, sizeof(buf), f) != NULL && strlen(buf) > 0) { + pid = strtol(buf, NULL, 10); + if (pid > 0 && kill(pid, 0) == 0) + panic("the daemon is already running"); + else + say_info("updating a stale pid file"); + if (fseeko(f, 0, SEEK_SET) != 0) + panic_syserror("can't fseek to the beginning of pid file"); + if (ftruncate(fileno(f), 0) == -1) + panic_syserror("ftruncate(`%s')", cfg.pid_file); + } + + master_pid = getpid(); + fprintf(f, "%i\n", master_pid); + fclose(f); +} + +/** Run in the background. */ +static void +background() +{ + switch (fork()) { + case -1: + goto error; + case 0: /* child */ + break; + default: /* parent */ + exit(EXIT_SUCCESS); + } + + if (setsid() == -1) + goto error; + + /* + * Prints to stdout on failure, so got to be done before + * we close it. + */ + create_pid(); + + close(STDIN_FILENO); + close(STDOUT_FILENO); + close(STDERR_FILENO); + return; +error: + exit(EXIT_FAILURE); +} + +void +tarantool_lua_free() +{ + /* + * Got to be done prior to anything else, since GC + * handlers can refer to other subsystems (e.g. fibers). + */ + if (tarantool_L) + tarantool_lua_close(tarantool_L); + tarantool_L = NULL; +} + +void +tarantool_free(void) +{ + /* Do nothing in a fork. */ + if (getpid() != master_pid) + return; + signal_free(); + memcached_free(); + tarantool_lua_free(); + box_free(); + recovery_free(); + stat_free(); + + if (cfg_filename_fullpath) + free(cfg_filename_fullpath); + if (main_opt) + gopt_free(main_opt); + free_proc_title(main_argc, main_argv); + + /* unlink pidfile. */ + if (cfg.pid_file != NULL) + unlink(cfg.pid_file); + destroy_tarantool_cfg(&cfg); + + session_free(); + fiber_free(); + palloc_free(); + ev_default_destroy(); +#ifdef ENABLE_GCOV + __gcov_flush(); +#endif +#ifdef HAVE_BFD + symbols_free(); +#endif + /* A hack for cc/ld, see ffisyms.c */ + if (time(NULL) == 0) { + /* never executed */ + extern void *ffi_symbols[]; + write(0, ffi_symbols, 0); + } +} + +static void +initialize_minimal() +{ + if (!salloc_init(64 * 1000 * 1000, 4, 2)) + panic("can't initialize slab allocator"); + + fiber_init(); + coeio_init(); +} + +int +main(int argc, char **argv) +{ + const char *cfg_paramname = NULL; + +#ifndef HAVE_LIBC_STACK_END +/* + * GNU libc provides a way to get at the top of the stack. This + * is, of course, not-standard and doesn't work on non-GNU + * systems, such as FreeBSD. But as far as we're concerned, argv + * is at the top of the main thread's stack, so save the address + * of it. + */ + __libc_stack_end = (void*) &argv; +#endif + + crc32_init(); + stat_init(); + palloc_init(); + +#ifdef HAVE_BFD + symbols_load(argv[0]); +#endif + + argv = init_set_proc_title(argc, argv); + main_argc = argc; + main_argv = argv; + + void *opt = gopt_sort(&argc, (const char **)argv, opt_def); + main_opt = opt; + binary_filename = argv[0]; + + if (gopt(opt, 'V')) { + printf("Tarantool %s\n", tarantool_version()); + printf("Target: %s\n", BUILD_INFO); + printf("Build options: %s\n", BUILD_OPTIONS); + printf("Compiler: %s\n", COMPILER_INFO); + printf("C_FLAGS:%s\n", TARANTOOL_C_FLAGS); + printf("CXX_FLAGS:%s\n", TARANTOOL_CXX_FLAGS); + return 0; + } + + if (gopt(opt, 'h')) { + puts("Tarantool -- an efficient in-memory data store."); + printf("Usage: %s [OPTIONS]\n", basename(argv[0])); + puts(""); + gopt_help(opt_def); + puts(""); + puts("Please visit project home page at http://tarantool.org"); + puts("to see online documentation, submit bugs or contribute a patch."); + return 0; + } + + gopt_arg(opt, 'c', &cfg_filename); + /* + * if config is not specified trying ./tarantool.cfg then + * /etc/tarantool.cfg + */ + if (cfg_filename == NULL) { + if (access(DEFAULT_CFG_FILENAME, F_OK) == 0) + cfg_filename = DEFAULT_CFG_FILENAME; + else if (access(DEFAULT_CFG, F_OK) == 0) + cfg_filename = DEFAULT_CFG; + else + panic("can't load config " "%s or %s", DEFAULT_CFG_FILENAME, DEFAULT_CFG); + } + + cfg.log_level += gopt(opt, 'v'); + + if (argc != 1) { + fprintf(stderr, "Can't parse command line: try --help or -h for help.\n"); + exit(EX_USAGE); + } + + if (cfg_filename[0] != '/') { + cfg_filename_fullpath = (char *) malloc(PATH_MAX); + if (getcwd(cfg_filename_fullpath, PATH_MAX - strlen(cfg_filename) - 1) == NULL) { + say_syserror("getcwd"); + exit(EX_OSERR); + } + + strcat(cfg_filename_fullpath, "/"); + strcat(cfg_filename_fullpath, cfg_filename); + } + + cfg_out = tbuf_new(eter_pool); + assert(cfg_out); + + if (gopt(opt, 'k')) { + if (fill_default_tarantool_cfg(&cfg) != 0 || load_cfg(&cfg, 0) != 0) { + say_error("check_config FAILED" + "%.*s", cfg_out->size, (char *)cfg_out->data); + + return 1; + } + + return 0; + } + + if (fill_default_tarantool_cfg(&cfg) != 0 || load_cfg(&cfg, 0) != 0) + panic("can't load config:" + "%.*s", cfg_out->size, (char *)cfg_out->data); + + if (gopt_arg(opt, 'g', &cfg_paramname)) { + tarantool_cfg_iterator_t *i; + char *key, *value; + + i = tarantool_cfg_iterator_init(); + while ((key = tarantool_cfg_iterator_next(i, &cfg, &value)) != NULL) { + if (strcmp(key, cfg_paramname) == 0 && value != NULL) { + printf("%s\n", value); + free(value); + + return 0; + } + + free(value); + } + + return 1; + } + + if (cfg.work_dir != NULL && chdir(cfg.work_dir) == -1) + say_syserror("can't chdir to `%s'", cfg.work_dir); + + if (cfg.username != NULL) { + if (getuid() == 0 || geteuid() == 0) { + struct passwd *pw; + errno = 0; + if ((pw = getpwnam(cfg.username)) == 0) { + if (errno) { + say_syserror("getpwnam: %s", + cfg.username); + } else { + say_error("User not found: %s", + cfg.username); + } + exit(EX_NOUSER); + } + if (setgid(pw->pw_gid) < 0 || setuid(pw->pw_uid) < 0 || seteuid(pw->pw_uid)) { + say_syserror("setgit/setuid"); + exit(EX_OSERR); + } + } else { + say_error("can't switch to %s: i'm not root", cfg.username); + } + } + + if (cfg.coredump) { + struct rlimit c = { 0, 0 }; + if (getrlimit(RLIMIT_CORE, &c) < 0) { + say_syserror("getrlimit"); + exit(EX_OSERR); + } + c.rlim_cur = c.rlim_max; + if (setrlimit(RLIMIT_CORE, &c) < 0) { + say_syserror("setrlimit"); + exit(EX_OSERR); + } +#if defined(TARGET_OS_LINUX) && defined(HAVE_PRCTL_H) + if (prctl(PR_SET_DUMPABLE, 1, 0, 0, 0) < 0) { + say_syserror("prctl"); + exit(EX_OSERR); + } +#endif + } + + if (gopt(opt, 'I')) { + initialize_minimal(); + box_init(true); + set_lsn(recovery_state, 1); + snapshot_save(recovery_state, NULL); + exit(EXIT_SUCCESS); + } + + if (gopt(opt, 'B')) { + if (cfg.logger == NULL) { + say_crit("--background requires 'logger' configuration option to be set"); + exit(EXIT_FAILURE); + } + background(); + } + else { + create_pid(); + } + + /* init process title - used for logging */ + if (cfg.custom_proc_title == NULL) { + custom_proc_title = (char *) palloc(eter_pool, 1); + custom_proc_title[0] = '\0'; + } else { + custom_proc_title = (char *) palloc(eter_pool, strlen(cfg.custom_proc_title) + 2); + strcpy(custom_proc_title, "@"); + strcat(custom_proc_title, cfg.custom_proc_title); + } + + say_logger_init(cfg.logger_nonblock); + + /* main core cleanup routine */ + atexit(tarantool_free); + + ev_default_loop(EVFLAG_AUTO); + fiber_init(); + replication_prefork(); + coeio_init(); + if (!salloc_init(cfg.slab_alloc_arena * (1 << 30) /* GB */, + cfg.slab_alloc_minimal, cfg.slab_alloc_factor)) + panic("can't initialize slab allocator"); + + signal_init(); + + try { + say_crit("version %s", tarantool_version()); + tarantool_L = tarantool_lua_init(); + box_init(false); + memcached_init(cfg.bind_ipaddr, cfg.memcached_port); + tarantool_lua_load_cfg(tarantool_L, &cfg); + /* + * init iproto before admin and after memcached: + * recovery is finished on bind to the primary port, + * and it has to happen before requests on the + * administrative port start to arrive. + * And when recovery is finalized, memcached + * expire loop is started, so binding can happen + * only after memcached is initialized. + */ + iproto_init(cfg.bind_ipaddr, cfg.primary_port, + cfg.secondary_port); + admin_init(cfg.bind_ipaddr, cfg.admin_port); + replication_init(cfg.bind_ipaddr, cfg.replication_port); + session_init(); + /* + * Load user init script. The script should have access + * to Tarantool Lua API (box.cfg, box.fiber, etc...) that + * is why script must run only after the server was fully + * initialized. + */ + tarantool_lua_load_init_script(tarantool_L); + prelease(fiber_ptr->gc_pool); + say_crit("log level %i", cfg.log_level); + say_crit("entering event loop"); + if (cfg.io_collect_interval > 0) + ev_set_io_collect_interval(cfg.io_collect_interval); + ev_now_update(); + start_time = ev_now(); + signal_start(); + ev_loop(0); + } catch (const Exception& e) { + e.log(); + panic("%s", "Fatal error, exiting loop"); + } + + say_crit("exiting loop"); + /* freeing resources */ + return 0; +}