diff --git a/doc/sphinx/_templates/base b/doc/sphinx/_templates/base index 2a596e512ca340bf948279673d83e63de161de4a..4be6540f6f928a2bbab388489402b908979f40cb 100644 --- a/doc/sphinx/_templates/base +++ b/doc/sphinx/_templates/base @@ -1,4 +1,3 @@ - {% import "menu" as menu %} {% include "custom_sphinx" with context %} diff --git a/doc/sphinx/_templates/layout.html b/doc/sphinx/_templates/layout.html index b1b5d9ed714e7d11fe625b8b66ab63cc5047bee2..865b29ed3d14633c3315f32e9fcb6d6a17eaf56d 100644 --- a/doc/sphinx/_templates/layout.html +++ b/doc/sphinx/_templates/layout.html @@ -14,6 +14,8 @@ {% set render_sidebar = true %} {% endif %} +{% set render_sidebar = false %} + {% block content %} {% if render_sidebar %} <section class="b-lightgray_block b-documentation_top b-clearbox p-documentation_in"> @@ -36,17 +38,16 @@ </div> <div class="b-cols_content_right"> <div class="b-cols_content_right-slot"> - {{ breadcrumbs.breadcrumbs() }} - <!--{{ navbar.navbar(True) }}--> {% else %} <section class="b-block b-documentation"> <div class="b-block-wrapper"> {% endif %} + {{ breadcrumbs.breadcrumbs() }} <article class="b-article"> {% block body %} {% endblock %} </article> + {{ navbar.navbar(False) }} {% if render_sidebar %} - {{ navbar.navbar(False) }} </div> </div> </div> diff --git a/doc/sphinx/book/administration.rst b/doc/sphinx/book/administration.rst new file mode 100644 index 0000000000000000000000000000000000000000..b43ba375144fc825774b8501444093df0b4e60f3 --- /dev/null +++ b/doc/sphinx/book/administration.rst @@ -0,0 +1,497 @@ +.. include:: ../directives.rst +.. highlight:: lua + +------------------------------------------------------------------------------- + Server administration +------------------------------------------------------------------------------- + +Typical server administration tasks include starting and stopping the server, +reloading configuration, taking snapshots, log rotation. + +===================================================================== + Server signal handling +===================================================================== + +The server is configured to shut down gracefully on SIGTERM and SIGINT (keyboard +interrupt) or SIGHUP. SIGUSR1 can be used to save a snapshot. All other signals +are blocked or ignored. The signals are processed in the main event loop. Thus, +if the control flow never reaches the event loop (thanks to a runaway stored +procedure), the server stops responding to any signal, and can only be killed +with SIGKILL (this signal can not be ignored). + + +===================================================================== + Utility ``tarantool`` +===================================================================== + +.. program:: tarantool + +If ``tarantool`` is started without an initialization file, then there will be +a prompt ("``tarantool>``") and it will be possible to enter requests. When +used this way, ``tarantool`` is a client program as well as a server program. + +This section shows all legal syntax for the tarantool program, with short notes +and examples. Other client programs may have similar options and request +syntaxes. Some of the information in this section is duplicated in the +`Configuration Reference`_ chapter. + +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Conventions used in this section +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Tokens are character sequences which are treated as syntactic units within +requests. Square brackets [ and ] enclose optional syntax. Three dots in a +row ... mean the preceding tokens may be repeated. A vertical bar | means +the preceding and following tokens are mutually exclusive alternatives. + +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Options when starting client from the command line +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +General form: + +.. code-block:: bash + + $ tarantool + OR + $ tarantool <options> + OR + $ tarantool <lua-initialization-file> [arguments] + +<lua-initialization-file> can be any script containing code for initializing. +Effect: The code in the file is executed during startup. Example: ``init.lua``. +Notes: If a script is used, there will be no prompt. The script should contain +configuration information including "``box.cfg{...listen=...}``" or +"``box.listen(...)``" so that a separate program can connect to the server via +one of the ports. + +Option is one of the following (in alphabetical order by the long form of the +option): + +.. option:: -?, -h, --help + + Client displays a help message including a list of options. + + .. code-block:: bash + + tarantool --help + + The program stops after displaying the help. + +.. option:: -V, --version + + .. code-block:: bash + + tarantool --version + + The program stops after displaying the version. + + +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Tokens, requests, and special key combinations +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Procedure identifiers are: Any sequence of letters, digits, or underscores +which is legal according to the rules for Lua identifiers. Procedure +identifiers are also called function names. Notes: function names are case +insensitive so ``insert`` and ``Insert`` are not the same thing. + +tring literals are: Any sequence of zero or more characters enclosed in +single quotes. Double quotes are legal but single quotes are preferred. +Enclosing in double square brackets is good for multi-line strings as +described in `Lua documentation`_. + +Example: + +.. code-block:: lua + + 'Hello, world', 'A', [[A\B!]]. + +Numeric literals are: Character sequences containing only digits, optionally +preceded by + or -. Examples: 55, -. Notes: Tarantool NUM data type is +unsigned, so -1 is understood as a large unsigned number. + +Single-byte tokens are: * or , or ( or ). Examples: * , ( ). + +Tokens must be separated from each other by one or more spaces, except that +spaces are not necessary around single-byte tokens or string literals. + +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Requests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Generally requests are entered following the prompt in interactive mode while +``tarantool`` is running. (A prompt will be the word tarantool and a +greater-than sign, for example ``tarantool>``). The end-of-request marker is by +default a newline (line feed). + +For multi-line requests, it is possible to change the end-of-request marker. +Syntax: ``console = require('console'); console.delimiter(string-literal)``. +The string-literal must be a value in single quotes. Effect: string becomes +end-of-request delimiter, so newline alone is not treated as end of request. +To go back to normal mode: ``console.delimiter('')string-literal``. Example: + +.. code-block:: lua + + console = require('console'); console.delimiter('!') + function f () + statement_1 = 'a' + statement_2 = 'b' + end! + console.delimiter('')! + +For a condensed Backus-Naur Form [BNF] description of the suggested form of +client requests, see `doc/box-protocol.html`_ and `doc/sql.txt`_. + +In *interactive* mode, one types requests and gets results. Typically the +requests are typed in by the user following prompts. Here is an example of an interactive-mode tarantool client session: + +.. code-block:: bash + + $ tarantool + [ tarantool will display an introductory message + including version number here ] + tarantool> box.cfg{listen=3301} + [ tarantool will display configuration information + here ] + tarantool> s = box.schema.space.create('tester') + [ tarantool may display an in-progress message here ] + --- + ... + tarantool> s:create_index('primary', {type = 'hash', parts = {1, 'NUM'}}) + --- + ... + tarantool> box.space.tester:insert{1,'My first tuple'} + --- + - [1, 'My first tuple'] + ... + tarantool> box.space.tester:select(1) + --- + - - [1, 'My first tuple'] + ... + tarantool> box.space.tester:drop() + --- + ... + tarantool> os.exit() + 2014-04-30 10:28:00.886 [20436] main/101/spawner I> Exiting: master shutdown + $ + +Explanatory notes about what tarantool displayed in the above example: + +* Many requests return typed objects. In the case of "``box.cfg{listen=3301}``", + this result is displayed on the screen. If the request had assigned the result + to a variable, for example "``c = box.cfg{listen=3301}``", then the result + would not have been displayed on the screen. +* A display of an object always begins with "``---``" and ends with "``...``". +* The insert request returns an object of type = tuple, so the object display line begins with a single dash ('``-``'). However, the select request returns an object of type = table of tuples, so the object display line begins with two dashes ('``- -``'). + +===================================================================== + Utility ``tarantoolctl`` +===================================================================== + +.. program:: tarantoolctl + +With ``tarantoolctl`` one can say: "start an instance of the Tarantool server +which runs a single user-written Lua program, allocating disk resources +specifically for that program, via a standardized deployment method." +If Tarantool was downloaded from source, then the script is in +:file:`~/extra/dist/tarantoolctl`. If Tarantool was installed with Debian or +Red Hat installation packages, the script is renamed :program:`tarantoolctl` +and is in :file:`/usr/bin/tarantoolctl`. The script handles such things as: +starting, stopping, rotating logs, logging in to the application's console, +and checking status. + +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + configuring for tarantoolctl +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The :program:`tarantoolctl` script will read a configuration file named +:file:`/etc/sysconfig/tarantool`, or :file:`/etc/default/tarantool`. Most +of the settings are similar to the settings used by ``box.cfg{...};`` +however, tarantoolctl adjusts some of them by adding an application name. +A copy of :file:`/etc/sysconfig/tarantool`, with defaults for all settings, +would look like this: + +.. code-block:: lua + + default_cfg = { + pid_file = "/var/run/tarantool", + wal_dir = "/var/lib/tarantool", + snap_dir = "/var/lib/tarantool", + sophia_dir = "/var/lib/tarantool", + logger = "/var/log/tarantool", + username = "tarantool", + } + instance_dir = "/etc/tarantool/instances.enabled" + +The settings in the above script are: + +``pid_file`` + The directory for the pid file and control-socket file. The + script will add ":file:`/instance-name`" to the directory name. + +``wal_dir`` + The directory for the write-ahead :file:`*.xlog` files. The + script will add ":file:`/instance-name`" to the directory-name. + +``snap_dir`` + The directory for the snapshot :file:`*.snap` files. The script + will add ":file:`/instance-name`" to the directory-name. + +``sophia_dir`` + The directory for the sophia-storage-engine files. The script + will add ":file:`/sophia/instance-name`" to the directory-name. + +``logger`` + The place where the application log will go. The script will + add ":file:`/instance-name.log`" to the name. + +``username`` + the user that runs the tarantool server. This is the operating-system + user name rather than the Tarantool-client user name. + +``instance_dir`` + the directory where all applications for this host are stored. The user + who writes an application for :program:`tarantoolctl` must put the + application's source code in this directory, or a symbolic link. For + examples in this section the application name my_app will be used, and + its source will have to be in :file:`instance_dir/my_app.lua`. + + +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + commands for tarantoolctl +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The command format is ``tarantoolctl operation application-name``, where +operation is one of: start, stop, status, logrotate, enter. Thus ... + +.. option:: start <app_name> + + starts application <app_name> + +.. option:: stop <app_name> + + stops <app_name> + +.. option:: enter <app_name> + + show <app_name>'s admin console, if it has one + +.. option:: logrotate <app_name> + + rotate <app_name>'s log files (make new, remove old) + +.. option:: status <app_name> + + check <app_name>'s status + +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + typical code snippets for tarantoolctl +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +A user can check whether my_app is running with these lines: + +.. code-block:: bash + + if tarantoolctl status my_app; then + ... + fi + +A user can initiate, for boot time, an init.d set of instructions: + +.. code-block:: bash + + for (each file mentioned in the instance_dir directory): + tarantoolctl start `basename $ file .lua` + +A user can set up a further configuration file for log rotation, like this: + +.. code-block:: lua + + /path/to/tarantool/*.log { + daily + size 512k + missingok + rotate 10 + compress + delaycompress + create 0640 tarantool adm + postrotate + /path/to/tarantoolctl logrotate `basename $ 1 .log` + endscript + } + +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + A detailed example for tarantoolctl +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The example's objective is: make a temporary directory where tarantoolctl +can start a long-running application and monitor it. + +The assumptions are: the root password is known, the computer is only being used +for tests, the Tarantool server is ready to run but is not currently running, +and there currently is no directory named :file:`tarantool_test`. + +Create a directory named /tarantool_test: + +.. code-block:: bash + + sudo mkdir /tarantool_test + +Copy tarantoolctl to /tarantool_test. If you made a source +download to ~/tarantool-master, then + +.. code-block:: bash + + sudo cp ~/tarantool-master/extra/dist/tarantoolctl /tarantool_test/tarantoolctl + +If the file was named tarantoolctl and placed on /usr/bin/tarantoolctl, then + +.. code-block:: bash + + sudo cp /usr/bin/tarantoolctl /tarantool_test/tarantoolctl + +Check and possibly change the first line of /tarantool_test/tarantoolctl. +Initially it says + +.. code-block:: bash + + #!/usr/bin/env tarantool + +If that is not correct, edit tarantoolctl and change the line. For example, +if the Tarantool server is actually on /home/user/tarantool-master/src/tarantool, +change the line to + +.. code-block:: bash + + #!/usr/bin/env /home/user/tarantool-master/src/tarantool + +Save a copy of /etc/sysconfig/tarantool, if it exists. + +Edit /etc/sysconfig/tarantool. It might be necessary to say sudo mkdir /etc/sysconfig first. Let the new file contents be: + +.. code-block:: lua + + default_cfg = { + pid_file = "/tarantool_test/my_app.pid", + wal_dir = "/tarantool_test", + snap_dir = "/tarantool_test", + sophia_dir = "/tarantool_test", + logger = "/tarantool_test/log", + username = "tarantool", + } + instance_dir = "/tarantool_test" + +Make the my_app application file, that is, /tarantool_test/my_app.lua. Let the file contents be: + +.. code-block:: lua + + box.cfg{listen = 3301} + box.schema.user.passwd('Gx5!') + box.schema.user.grant('guest','read,write,execute','universe') + fiber = require('fiber') + box.schema.space.create('tester') + box.space.tester:create_index('primary',{}) + i = 0 + while 0 == 0 do + fiber.sleep(5) + i = i + 1 + print('insert ' .. i) + box.space.tester:insert{i, 'my_app tuple'} + end + +Tell tarantoolctl to start the application ... + +.. code-block:: bash + + cd /tarantool_test + sudo ./tarantoolctl start my_app + +... expect to see messages indicating that the instance has started. Then ... + +.. code-block:: bash + + ls -l /tarantool_test/my_app + +... expect to see the .snap file, .xlog file, and sophia directory. Then ... + +.. code-block:: bash + + less /tarantool_test/log/my_app.log + +... expect to see the contents of my_app's log, including error messages, if any. Then ... + +.. code-block:: bash + + cd /tarantool_test + #assume that 'tarantool' invokes the tarantool server + sudo tarantool + box.cfg{} + console = require('console') + console.connect('localhost:3301') + box.space.tester:select({0},{iterator='GE'}) + +... expect to see several tuples that my_app has created. + +Stop. The only clean way to stop my_app is with tarantoolctl, thus: + + +.. code-block:: bash + + sudo ./tarantoolctl stop my_app + +Clean up. Restore the original contents of /etc/sysconfig/tarantool, and ... + +.. code-block:: bash + + cd / + sudo rm -R tarantool_test + +===================================================================== + System-specific administration notes +===================================================================== + +This section will contain information about issue or features which exist +on some platforms but not others - for example, on certain versions of a +particular Linux distribution. + +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Administrating with Debian GNU/Linux and Ubuntu +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Setting up an instance: +``ln -s /etc/tarantool/instances.available/instance-name.cfg /etc/tarantool/instances.enabled/`` + +Starting all instances: +``service tarantool start`` + +Stopping all instances: +``service tarantool stop`` + +Starting/stopping one instance: +``service tarantool-instance-name start/stop`` + + +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Fedora, RHEL, CentOS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +There are no known permanent issues. For transient issues, go to +http://github.com/tarantool/tarantool/issues and enter "RHEL" or +"CentOS" or "Fedora" or "Red Hat" in the search box. + +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + FreeBSD +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +There are no known permanent issues. For transient issues, go to +http://github.com/tarantool/tarantool/issues and enter "FreeBSD" +in the search box. + +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Mac OS X +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +There are no known permanent issues. For transient issues, go to +http://github.com/tarantool/tarantool/issues and enter "OS X" in +the search box. diff --git a/doc/sphinx/book/configuration.rst b/doc/sphinx/book/configuration.rst index 7485f0aea5f3f28003d38121d005cd7a53013ce7..56f8877de7a9bf093cae2ef2a91f94a0787c956d 100644 --- a/doc/sphinx/book/configuration.rst +++ b/doc/sphinx/book/configuration.rst @@ -340,7 +340,6 @@ for binary logging and snapshots, for replication, for networking, and for loggi | | | | | changes to write-ahead-log files for the sake of | | | | | | replication or local hot standby. | +----------------------+-----------+----------+----------+-----------------------------------------------------+ - | | | | | **Replication** @@ -364,9 +363,94 @@ for binary logging and snapshots, for replication, for networking, and for loggi **Networking** - ho + +----------------------+-----------+----------+----------+-----------------------------------------------------+ + | Name | Type | Default | Dynamic? | Description | + +======================+===========+==========+==========+=====================================================+ + | io_collect_interval | float | null | **yes** | The server will sleep for io_collect_interval | + | | | | | seconds between iterations of the event loop. Can | + | | | | | be used to reduce CPU load in deployments in which | + | | | | | the number of client connections is large, but | + | | | | | requests are not so frequent (for example, each | + | | | | | connection issues just a handful of requests per | + | | | | | second). | + +----------------------+-----------+----------+----------+-----------------------------------------------------+ + | readahead | integer | 16320 | **yes** | The size of the read-ahead buffer associated with a | + | | | | | client connection. The larger the buffer, the more | + | | | | | memory an active connection consumes and the more | + | | | | | requests can be read from the operating system | + | | | | | buffer in a single system call. The rule of thumb | + | | | | | is to make sure the buffer can contain at least a | + | | | | | few dozen requests. Therefore, if a typical tuple | + | | | | | in a request is large, e.g. a few kilobytes or even | + | | | | | megabytes, the read-ahead buffer size should be | + | | | | | increased. If batched request processing is not | + | | | | | used, it's prudent to leave this setting at its | + | | | | | default. | + +----------------------+-----------+----------+----------+-----------------------------------------------------+ **Logging** - + +----------------------+-----------+----------+----------+-----------------------------------------------------+ + | Name | Type | Default | Dynamic? | Description | + +======================+===========+==========+==========+=====================================================+ + | log_level | integer | true | **yes** | How verbose the logging is. There are six log | + | | | | | verbosity classes: 1 -- SYSERROR, 2 -- ERROR, | + | | | | | 3 -- CRITICAL, 4 -- WARNING, 5 -- INFO, 6 -- DEBUG. | + | | | | | By setting log_level, one can enable logging of all | + | | | | | classes below or equal to the given level. | + | | | | | Tarantool prints its logs to the standard error | + | | | | | stream by default, but this can be changed with | + | | | | | the "logger" configuration parameter. | + +----------------------+-----------+----------+----------+-----------------------------------------------------+ + | logger | string | "null" | no | By default, the log is sent to the standard error | + | | | | | stream (``stderr``). If logger is specified, the | + | | | | | log is sent to the file named in the string. | + | | | | | Example setting: ``logger = 'tarantool.log'`` (this | + | | | | | will open tarantool.log for output on the server's | + | | | | | default directory). | + +----------------------+-----------+----------+----------+-----------------------------------------------------+ + | logger_nonblock | boolean | true | no | If logger_nonblock equals true, Tarantool does not | + | | | | | block on the log file descriptor when it's not | + | | | | | ready for write, and drops the message instead. If | + | | | | | log_level is high, and a lot of messages go to the | + | | | | | log file, setting logger_nonblock to true may | + | | | | | improve logging performance at the cost of some log | + | | | | | messages getting lost. | + +----------------------+-----------+----------+----------+-----------------------------------------------------+ + | too_long_threshold | float | 0.5 | **yes** | If processing a request takes longer than the given | + | | | | | value (in seconds), warn about it in the log. Has | + | | | | | effect only if log_level is less than or equal to | + | | | | | 4 (WARNING). | + +----------------------+-----------+----------+----------+-----------------------------------------------------+ + +===================================================================== + Local hot standby +===================================================================== + +Local hot standby is a feature which provides a simple form of failover without +replication. To initiate it, start a second instance of the Tarantool server on +the same computer with the same :func:`box.cfg` configuration settings - +including the same directories and same URIs. A warning should appear with a +message like + +.. code-block:: lua + W> primary: [URI] is already in use, will retry binding after [n] seconds + +This is fine. It means that the second instance is ready to take over if the +first instance goes down. + +The expectation is that there will be two instances of the server using the +same configuration. The first one to start will be the "primary" instance. +The second one to start will be the "standby" instance. The standby instance +will initialize and will try to connect on listen address and admin address, +but will fail because the primary instance has already taken them. So the +standby instance goes into a loop, reading the write ahead log which the +primary instance is writing (so the two instances are always in synch), +and trying to connect on the ports. If the primary instance goes down for any +reason, the ports will become free so the standby instance will succeed in +connecting, and will become the primary instance. Thus there is no noticeable +downtime if the primary instance goes down. + +If this ``local_hot_standby`` feature is being used, then ``replication_source`` +should be an empty string and ``wal_mode`` should not be equal to "none". diff --git a/doc/sphinx/book/connectors/__python.rst b/doc/sphinx/book/connectors/__python.rst index d2b5e36c15115fe108ed5e7573d7def40044889f..34f80a9c23914a439e017aab116d78d3a61e79ba 100644 --- a/doc/sphinx/book/connectors/__python.rst +++ b/doc/sphinx/book/connectors/__python.rst @@ -1,5 +1,5 @@ ===================================================================== - PHP + Python ===================================================================== Here is a complete Python program that inserts ``[99999,'Value','Value']`` into diff --git a/doc/sphinx/book/connectors/index.rst b/doc/sphinx/book/connectors/index.rst index 42365614cfa728cc65229b4d9611aeef491657a8..8cc51a9fa3db78a93e788536a7202b2da17edc62 100644 --- a/doc/sphinx/book/connectors/index.rst +++ b/doc/sphinx/book/connectors/index.rst @@ -1,4 +1,4 @@ -.. include:: ../directives.rst +.. include:: ../../directives.rst .. highlight:: lua ------------------------------------------------------------------------------- diff --git a/doc/sphinx/book/index.rst b/doc/sphinx/book/index.rst index a86a6c90ec697dc27f53120781755f76fea9a1ff..4613e14c4bcc26c4641486bf95a565a2fb3d0745 100644 --- a/doc/sphinx/book/index.rst +++ b/doc/sphinx/book/index.rst @@ -4,6 +4,9 @@ .. toctree:: + replication + configuration + administration connectors/index app_a_errcodes app_b_proctitle diff --git a/doc/sphinx/book/replication.rst b/doc/sphinx/book/replication.rst new file mode 100644 index 0000000000000000000000000000000000000000..9909d9f6703096ec3b9eb7b0623d9e833d25e4ef --- /dev/null +++ b/doc/sphinx/book/replication.rst @@ -0,0 +1,227 @@ +.. include:: ../directives.rst +.. highlight:: lua + +------------------------------------------------------------------------------- + Server administration +------------------------------------------------------------------------------- + +Replication allows multiple Tarantool servers to work on copies of the same +databases. The databases are kept in synch because each server can communicate +its changes to all the other servers. Servers which share the same databases +are a "cluster". Each server in a cluster also has a numeric identifier which +is unique within the cluster, known as the "server id". + + To set up replication, it's necessary to set up the master servers which + make the original data-change requests, set up the replica servers which + copy data-change requests from masters, and establish procedures for + recovery from a degraded state. + +===================================================================== + Replication architecture +===================================================================== + +A replica gets all updates from the master by continuously fetching and +applying its write-ahead log (WAL). Each record in the WAL represents a +single Tarantool data-change request such as INSERT or UPDATE or DELETE, +and is assigned a monotonically growing log sequence number (LSN). In +essence, Tarantool replication is row-based: each data change command is +fully deterministic and operates on a single tuple. + +A stored program invocation is not written to the write-ahead log. Instead, +log events for actual data-change requests, performed by the Lua code, are +written to the log. This ensures that possible non-determinism of Lua does +not cause replication to go out of sync. + +===================================================================== + Setting up the master +===================================================================== + +To prepare the master for connections from the replica, it's only necessary +to include "listen" in the initial ``box.cfg`` request, for example +``box.cfg{listen=3301}``. A master with enabled "listen" URI can accept +connections from as many replicas as necessary on that URI. Each replica +has its own replication state. + +===================================================================== + Setting up a replica +===================================================================== + +A server requires a valid snapshot (.snap) file. A snapshot file is created +for a server the first time that ``box.cfg`` occurs for it. If this first +``box.cfg`` request occurs without a "replication_source" clause, then the +server is a master and starts its own new cluster with a new unique UUID. +If this first ``box.cfg`` request occurs with a "replication_source" clause, +then the server is a replica and its snapshot file, along with the cluster +information, is constructed from the write-ahead logs of the master. +Therefore, to start replication, specify `replication_source`_ in a ``box.cfg`` +request. When a replica contacts a master for the first time, it becomes part +of a cluster. On subsequent occasions, it should always contact a master in +the same cluster. + +Once connected to the master, the replica requests all changes that happened +after the latest local LSN. It is therefore necessary to keep WAL files on +the master host as long as there are replicas that haven't applied them yet. +A replica can be "re-seeded" by deleting all its files (the snapshot .snap +file and the WAL .xlog files), then starting replication again - the replica +will then catch up with the master by retrieving all the master's tuples. +Again, this procedure works only if the master's WAL files are present. + +.. NOTE:: + + Replication parameters are "dynamic", which allows the replica to become + a master and vice versa with the help of the :func:`box.cfg` statement. + +.. NOTE:: + + The replica does not inherit the master's configuration parameters, such + as the ones that cause the `snapshot daemon`_ to run on the master. To get + the same behavior, one would have to set the relevant parameters explicitly + so that they are the same on both master and replica. + +===================================================================== + Recovering from a degraded state +===================================================================== + +"Degraded state" is a situation when the master becomes unavailable - due to +hardware or network failure, or due to a programming bug. There is no automatic +way for a replica to detect that the master is gone for good, since sources of +failure and replication environments vary significantly. So the detection of +degraded state requires a human inspection. + +However, once a master failure is detected, the recovery is simple: declare +that the replica is now the new master, by saying ``box.cfg{... listen=URI}``. +Then, if there are updates on the old master that were not propagated before +the old master went down, they would have to be re-applied manually. + + + +===================================================================== + Instructions for quick startup of a new two-server simple cluster +===================================================================== + +Step 1. Start the first server thus: + +.. code-block:: lua + + box.cfg{listen=uri#1} + -- replace with more restrictive request + box.schema.user.grant('guest','read,write,execute','universe') + box.snapshot() + +... Now a new cluster exists. + +Step 2. Check where the second server's files will go by looking at its +directories (`snap_dir`_ for snapshot files, `wal_dir`_ for .xlog files). +They must be empty - when the second server joins for the first time, it +has to be working with a clean slate so that the initial copy of the first +server's databases can happen without conflicts. + +Step 3. Start the second server thus: + +.. code-block:: lua + + box.cfg{listen=uri#2, replication_source=uri#1} + +... where ``uri#1`` = the `URI`_ that the first server is listening on. + +That's all. + +In this configuration, the first server is the "master" and the second server +is the "replica". Henceforth every change that happens on the master will be +visible on the replica. A simple two-server cluster with the master on one +computer and the replica on a different computer is very common and provides +two benefits: FAILOVER (because if the master goes down then the replica can +take over), or LOAD BALANCING (because clients can connect to either the master +or the replica for select requests). + +===================================================================== + Master-Master Replication +===================================================================== + +In the simple master-replica configuration, the master's changes are seen by +the replica, but not vice versa, because the master was specified as the sole +replication source. Starting with Tarantool 1.6, it's possible to go both ways. +Starting with the simple configuration, the first server has to say: +``box.cfg{replication_source=uri#2}``. This request can be performed at any time. + +In this configuration, both servers are "masters" and both servers are +"replicas". Henceforth every change that happens on either server will +be visible on the other. The failover benefit is still present, and the +load-balancing benefit is enhanced (because clients can connect to either +server for data-change requests as well as select requests). + +If two operations for the same tuple take place "concurrently" (which can +involve a long interval because replication is asynchronous), and one of +the operations is ``delete`` or ``replace``, there is a possibility that +servers will end up with different contents. + + +===================================================================== + All the "What If?" Questions +===================================================================== + +:Q: What if there are more than two servers with master-master? +:A: On each server, specify the replication_source for all the others. For + example, server #3 would have a request: + ``box.cfg{replication_source=uri#1, replication_source=uri#2}``. + +:Q: What if a a server should be taken out of the cluster? +:A: Run ``box.cfg{}`` again specifying a blank replication source: + ``box.cfg{replication_source=''}``. + +:Q: What if a server leaves the cluster? +:A: The other servers carry on. If the wayward server rejoins, it will receive + all the updates that the other servers made while it was away. + +:Q: What if two servers both change the same tuple? +:A: The last changer wins. For example, suppose that server#1 changes the tuple, + then server#2 changes the tuple. In that case server#2's change overrides + whatever server#1 did. In order to keep track of who came last, Tarantool + implements a `vector clock`_. + +:Q: What if a master disappears and the replica must take over? +:A: A message will appear on the replica stating that the connection is lost. + The replica must now become independent, which can be done by saying + ``box.cfg{replication_source=''}``. + +:Q: What if it's necessary to know what cluster a server is in? +:A: The identification of the cluster is a UUID which is generated when the + first master starts for the first time. This UUID is stored in a tuple + of the :data:`box.space._cluster` system space, and in a tuple of the + :data:`box.space._schema` system space. So to see it, say: + ``box.space._schema:select{'cluster'}`` + +:Q: What if one of the server's files is corrupted or deleted? +:A: Stop the server, destroy all the database files (the ones with extension + "snap" or "xlog" or ".inprogress"), restart the server, and catch up with + the master by contacting it again (just say ``box.cfg{...replication_source=...}``). + +:Q: What if replication causes security concerns? +:A: Prevent unauthorized replication sources by associating a password with + every user that has access privileges for the relevant spaces. That way, + the `URI`_ for the replication_source parameter will always have to have + the long form ``replication_source='username:password@host:port'``. + +.. _vector clock: https://en.wikipedia.org/wiki/Vector_clock + +===================================================================== + Hands-On Replication Tutorial +===================================================================== + +After following the steps here, an administrator will have experience creating +a cluster and adding a replica. + +Start two shells. Put them side by side on the screen. + ++-------------------------------------------------------------------------------+-------------------------------------------------------------------------------+ +| Terminal #1 | Terminal #2 | ++===============================================================================+===============================================================================+ +| | | +| .. code-block:: lua | .. code-block:: lua | +| | | +| $ | $ | +| | | ++-------------------------------------------------------------------------------+-------------------------------------------------------------------------------+ + +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 3371f50228a5193f0a41b7ec3b7acec4f0e55e4b..58d917314372edbf732f957f47e8d11820032874 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -25,7 +25,7 @@ lua_source(lua_sources lua/console.lua) lua_source(lua_sources lua/bsdsocket.lua) lua_source(lua_sources lua/errno.lua) lua_source(lua_sources lua/log.lua) -lua_source(lua_sources lua/box_net_box.lua) +lua_source(lua_sources lua/net_box.lua) lua_source(lua_sources lua/help.lua) lua_source(lua_sources lua/help_en_US.lua) lua_source(lua_sources lua/tap.lua) diff --git a/src/lua/bsdsocket.lua b/src/lua/bsdsocket.lua index 7eaeafdbe7cfd80fb204403cc0ea1d3216330374..966fba483ce452a683999307a17b92f038e6150e 100644 --- a/src/lua/bsdsocket.lua +++ b/src/lua/bsdsocket.lua @@ -2,6 +2,7 @@ local TIMEOUT_INFINITY = 500 * 365 * 86400 local LIMIT_INFINITY = 4294967295 +local READAHEAD = 16380 local ffi = require('ffi') local boxerrno = require('errno') @@ -178,7 +179,7 @@ end socket_methods.sysread = function(self, size) local fd = check_socket(self) - size = size or 4096 + size = size or READAHEAD self._errno = nil local buf = ffi.new('char[?]', size) local res = ffi.C.read(fd, buf, size) @@ -554,7 +555,7 @@ local function readchunk(self, limit, timeout) if self.rlen >= limit then self._errno = nil - local data = string.sub(self.rbuf, self.rpos, self.rpos + limit) + local data = string.sub(self.rbuf, self.rpos, self.rpos - 1 + limit) self.rlen = self.rlen - limit self.rpos = self.rpos + limit return data @@ -570,7 +571,7 @@ local function readchunk(self, limit, timeout) timeout = timeout - ( fiber.time() - started ) local to_read - if limit ~= LIMIT_INFINITY then + if limit ~= LIMIT_INFINITY and limit > READAHEAD then to_read = limit - self.rlen end local data = self:sysread(to_read) @@ -582,7 +583,7 @@ local function readchunk(self, limit, timeout) limit = self.rlen end if self.rlen >= limit then - data = string.sub(self.rbuf, self.rpos, self.rpos + limit) + data = string.sub(self.rbuf, self.rpos, self.rpos - 1 + limit) self.rlen = self.rlen - limit self.rpos = self.rpos + limit return data @@ -618,7 +619,7 @@ local function readline_check(self, eols, limit) end end if shortest == nil and self.rlen >= limit then - shortest = string.sub(self.rbuf, self.rpos, self.rpos + limit) + shortest = string.sub(self.rbuf, self.rpos, self.rpos - 1 + limit) end if shortest ~= nil then local len = string.len(shortest) @@ -653,7 +654,7 @@ local function readline(self, limit, eol, timeout) timeout = timeout - ( fiber.time() - started ) local to_read - if limit ~= LIMIT_INFINITY then + if limit ~= LIMIT_INFINITY and limit > READAHEAD then to_read = limit - self.rlen end local data = self:sysread(to_read) diff --git a/src/lua/init.cc b/src/lua/init.cc index c966eda74b702373c200410acd9ba4ece3817cfa..aa1b11cc6680ffc3f0f66e65319d7acf5a1579d5 100644 --- a/src/lua/init.cc +++ b/src/lua/init.cc @@ -79,7 +79,7 @@ extern char uuid_lua[], uri_lua[], bsdsocket_lua[], console_lua[], - box_net_box_lua[], + net_box_lua[], help_lua[], help_en_US_lua[], tap_lua[], @@ -96,7 +96,7 @@ static const char *lua_modules[] = { "uri", uri_lua, "fio", fio_lua, "socket", bsdsocket_lua, - "net.box", box_net_box_lua, + "net.box", net_box_lua, "console", console_lua, "tap", tap_lua, "help.en_US", help_en_US_lua, diff --git a/src/lua/box_net_box.lua b/src/lua/net_box.lua similarity index 95% rename from src/lua/box_net_box.lua rename to src/lua/net_box.lua index 70e3e2077ba21a8b41cd4a59dc8d3b9f11e1daad..cc77795de575121a5e2c3e1b649076923cffa35a 100644 --- a/src/lua/box_net_box.lua +++ b/src/lua/net_box.lua @@ -592,7 +592,9 @@ local remote_methods = { self:_switch_state('error') self:_error_waiters(emsg) self.rbuf = '' - self.wbuf = '' + self.rpos = 1 + self.rlen = 0 + self.wbuf = {} self.handshake = '' end, @@ -615,11 +617,14 @@ local remote_methods = { _check_console_response = function(self) while true do - local resp = string.match(self.rbuf, '.-\n[.][.][.]\r?\n') + local resp = string.match(self.rbuf, '.-\n[.][.][.]\r?\n', + self.rpos) if resp == nil then break end - self.rbuf = string.sub(self.rbuf, #resp + 1) + local len = #resp + self.rpos = self.rpos + len + self.rlen = self.rlen - len local result = yaml.decode(resp) if result ~= nil then @@ -656,31 +661,29 @@ local remote_methods = { end while true do - if #self.rbuf < 5 then + if self.rlen < 5 then break end - local len, off = msgpack.decode(self.rbuf) + local len, off = msgpack.decode(self.rbuf, self.rpos) -- wait for correct package length - local roff = off + len - 1 - if roff > #self.rbuf then + if off + len > #self.rbuf + 1 then break end - local pkt = string.sub(self.rbuf, 1, roff) - self.rbuf = string.sub(self.rbuf, roff + 1) - - local hdr, body - hdr, off = msgpack.decode(pkt, off) - if off <= #pkt then - body, off = msgpack.decode(pkt, off) + hdr, off = msgpack.decode(self.rbuf, off) + if off <= #self.rbuf then + body, off = msgpack.decode(self.rbuf, off) -- disable YAML flow output (useful for admin console) setmetatable(body, mapping_mt) else body = {} end + self.rpos = off + self.rlen = #self.rbuf + 1 - self.rpos + local sync = hdr[SYNC] if self.ch.sync[sync] ~= nil then @@ -791,8 +794,10 @@ local remote_methods = { elseif string.len(self.handshake) ~= 128 then self:_fatal("Can't read handshake") else - self.wbuf = '' + self.wbuf = {} self.rbuf = '' + self.rpos = 1 + self.rlen = 0; if string.match(self.handshake, '^Tarantool .*console') then self.console = true @@ -959,10 +964,13 @@ local remote_methods = { local data = self.s:sysread() if data ~= nil then - if data == '' then + if #data == 0 then self:_fatal('Remote host closed connection') else - self.rbuf = self.rbuf .. data + self.rbuf = string.sub(self.rbuf, self.rpos) .. + data + self.rpos = 1 + self.rlen = #self.rbuf self:_check_response() end else @@ -985,7 +993,7 @@ local remote_methods = { break end - if string.len(self.wbuf) == 0 then + if self.wbuf[1] == nil then local wstate = self._to_rstate[self.state] if wstate ~= nil then @@ -998,12 +1006,17 @@ local remote_methods = { break end if self:_is_rw_state() then - if #self.wbuf > 0 then - local written = self.s:syswrite(self.wbuf) + if self.wbuf[1] ~= nil then + local s = table.concat(self.wbuf) + self.wbuf = {} + local written = self.s:syswrite(s) if written ~= nil then - self.wbuf = string.sub(self.wbuf, - tonumber(1 + written)) + if written ~= #s then + table.insert(self.wbuf, + string.sub(s, written + 1)) + end else + table.insert(self.wbuf, s) self:_fatal(errno.strerror(errno())) end end @@ -1062,7 +1075,7 @@ local remote_methods = { self.timeouts[fid] = TIMEOUT_INFINITY end - self.wbuf = self.wbuf .. request + table.insert(self.wbuf, request) local wstate = self._to_wstate[self.state] if wstate ~= nil then diff --git a/test/box/bsdsocket.result b/test/box/bsdsocket.result index 9f37c31cb49eb6d2436528906465a557cf4a9fad..948aeb702425a96a89cae5efc1d5d53797c203c1 100644 --- a/test/box/bsdsocket.result +++ b/test/box/bsdsocket.result @@ -70,7 +70,7 @@ s:close() s:close() --- -- error: 'builtin/socket.lua:82: attempt to use closed socket' +- error: 'builtin/socket.lua:83: attempt to use closed socket' ... LISTEN = require('uri').parse(box.cfg.listen) --- @@ -279,7 +279,7 @@ s:getsockopt('SOL_SOCKET', 'SO_DEBUG') ... s:setsockopt('SOL_SOCKET', 'SO_ACCEPTCONN', 1) --- -- error: 'builtin/socket.lua:356: Socket option SO_ACCEPTCONN is read only' +- error: 'builtin/socket.lua:357: Socket option SO_ACCEPTCONN is read only' ... s:getsockopt('SOL_SOCKET', 'SO_RCVBUF') > 32 --- @@ -1030,7 +1030,7 @@ ch:get(1) ... s:error() --- -- error: 'builtin/socket.lua:82: attempt to use closed socket' +- error: 'builtin/socket.lua:83: attempt to use closed socket' ... -- random port port = 33123