From 523f71576edce05501ab146ceddd78311f24279a Mon Sep 17 00:00:00 2001
From: Georgy Moshkin <gmoshkin@picodata.io>
Date: Wed, 20 Mar 2024 13:52:26 +0300
Subject: [PATCH] feat: config.yaml: add support for some memtx, vinyl & iproto
 box.cfg parameters

---
 src/config.rs                | 267 ++++++++++++++++++++++++++++++++++-
 src/tarantool.rs             |  13 ++
 test/int/test_config_file.py |  67 ++++++++-
 3 files changed, 338 insertions(+), 9 deletions(-)

diff --git a/src/config.rs b/src/config.rs
index 8939454395..8bbaec8fc6 100644
--- a/src/config.rs
+++ b/src/config.rs
@@ -247,7 +247,7 @@ Using configuration file '{args_path}'.");
         }
 
         if let Some(memtx_memory) = args.memtx_memory {
-            self.instance.memtx_memory = Some(memtx_memory);
+            self.instance.memtx.memory = Some(memtx_memory);
         }
 
         // --config-parameter has higher priority than other command line
@@ -623,12 +623,22 @@ pub struct InstanceConfig {
     /// deleting.
     pub shredding: Option<bool>,
 
-    pub memtx_memory: Option<u64>,
-
     #[serde(default)]
     #[introspection(nested)]
     pub log: LogSection,
 
+    #[serde(default)]
+    #[introspection(nested)]
+    pub memtx: MemtxSection,
+
+    #[serde(default)]
+    #[introspection(nested)]
+    pub vinyl: VinylSection,
+
+    #[serde(default)]
+    #[introspection(nested)]
+    pub iproto: IprotoSection,
+
     /// Special catch-all field which will be filled by serde with all unknown
     /// fields from the yaml file.
     #[serde(flatten)]
@@ -721,10 +731,255 @@ impl InstanceConfig {
 
     #[inline]
     pub fn memtx_memory(&self) -> u64 {
-        self.memtx_memory.unwrap_or(64 * 1024 * 1024)
+        self.memtx.memory.unwrap_or(64 * 1024 * 1024)
+    }
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// MemtxSection
+////////////////////////////////////////////////////////////////////////////////
+
+#[derive(
+    PartialEq,
+    Default,
+    Debug,
+    Clone,
+    serde::Deserialize,
+    serde::Serialize,
+    tlua::Push,
+    tlua::PushInto,
+    Introspection,
+)]
+#[serde(deny_unknown_fields)]
+pub struct MemtxSection {
+    /// Enable [transactional manager](https://www.tarantool.io/en/doc/latest/concepts/atomic/txn_mode_mvcc/#txn-mode-transaction-manager)
+    /// if set to true.
+    // pub use_mvcc_engine: Option<bool>,
+
+    /// Specify the allocator that manages memory for memtx tuples. Possible values:
+    /// - `system` – the memory is allocated as needed, checking that the quota
+    ///              is not exceeded. The allocator is based on the `malloc` function.
+    /// - `small` – a slab allocator. The allocator repeatedly uses a memory
+    ///             block to allocate objects of the same type. Note that this allocator is
+    ///             prone to unresolvable fragmentation on specific workloads, so you can
+    ///             switch to system in such cases.
+    ///
+    /// Corresponds to `box.cfg.memtx_allocator`.
+    // pub allocator: Option<MemtxAllocator>,
+
+    /// The multiplier for computing the sizes of memory chunks that tuples are
+    /// stored in. A lower value may result in less wasted memory depending on
+    /// the total amount of memory available and the distribution of item sizes.
+    ///
+    /// Corresponds to `box.cfg.slab_alloc_factor`.
+    pub allocation_factor: Option<f64>,
+
+    /// Specify the granularity (in bytes) of memory allocation in the small
+    /// allocator. The memtx.slab_alloc_granularity value should meet the
+    /// following conditions:
+    /// - The value is a power of two.
+    /// - The value is greater than or equal to 4.
+    ///
+    /// Below are few recommendations on how to adjust the
+    /// memtx.slab_alloc_granularity option:
+    /// - If the tuples in space are small and have about the same size, set the
+    ///   option to 4 bytes to save memory.
+    /// - If the tuples are different-sized, increase the option value to
+    ///   allocate tuples from the same mempool (memory pool).
+    ///
+    /// Corresponds to `box.cfg.slab_alloc_granularity`.
+    // pub allocation_granularity: Option<u64>,
+    // Not supported yet.
+    // Size of the smallest allocation unit. It can be decreased if most of the
+    // tuples are very small.
+    // pub min_tuple_size: Option<u64>,
+
+    // Not supported yet.
+    // Size of the largest allocation unit, for the memtx storage engine. It can
+    // be increased if it is necessary to store large tuples.
+    // pub max_tuple_size: Option<u64>,
+
+    /// How much memory is allocated to store tuples. When the limit is
+    /// reached, INSERT or UPDATE requests begin failing with error
+    /// ER_MEMORY_ISSUE. The server does not go beyond the memtx_memory limit to
+    /// allocate tuples, but there is additional memory used to store indexes
+    /// and connection information.
+    ///
+    /// Minimum is 32MB (32 * 1024 * 1024).
+    ///
+    /// Corresponds to `box.cfg.memtx_memory`.
+    pub memory: Option<u64>,
+
+    /// The maximum number of snapshots that are stored in the memtx_dir
+    /// directory. If the number of snapshots after creating a new one exceeds
+    /// this value, the Tarantool garbage collector deletes old snapshots. If
+    /// the option is set to zero, the garbage collector does not delete old
+    /// snapshots.
+    ///
+    /// Corresponds to `box.cfg.checkpoint_count`.
+    pub checkpoint_count: Option<u64>,
+
+    /// The interval in seconds between actions by the checkpoint daemon. If the
+    /// option is set to a value greater than zero, and there is activity that
+    /// causes change to a database, then the checkpoint daemon calls
+    /// box.snapshot() every checkpoint_interval seconds, creating a new
+    /// snapshot file each time. If the option is set to zero, the checkpoint
+    /// daemon is disabled.
+    ///
+    /// Corresponds to `box.cfg.checkpoint_interval`.
+    pub checkpoint_interval: Option<f64>,
+}
+
+tarantool::define_str_enum! {
+    #[derive(Default)]
+    pub enum MemtxAllocator {
+        #[default]
+        Small = "small",
+        System = "system",
     }
 }
 
+////////////////////////////////////////////////////////////////////////////////
+// VinylSection
+////////////////////////////////////////////////////////////////////////////////
+
+#[derive(
+    PartialEq,
+    Default,
+    Debug,
+    Clone,
+    serde::Deserialize,
+    serde::Serialize,
+    tlua::Push,
+    tlua::PushInto,
+    Introspection,
+)]
+#[serde(deny_unknown_fields)]
+pub struct VinylSection {
+    /// The maximum number of in-memory bytes that vinyl uses.
+    ///
+    /// Corresponds to `box.cfg.vinyl_memory`
+    pub memory: Option<u64>,
+
+    /// The cache size for the vinyl storage engine.
+    ///
+    /// Corresponds to `box.cfg.vinyl_cache`
+    pub cache: Option<u64>,
+
+    /// The maximum number of read threads that vinyl can use for some
+    /// concurrent operations, such as I/O and compression.
+    ///
+    /// Corresponds to `box.cfg.vinyl_read_threads`
+    pub read_threads: Option<u64>,
+
+    /// The maximum number of write threads that vinyl can use for some
+    /// concurrent operations, such as I/O and compression.
+    ///
+    /// Corresponds to `box.cfg.vinyl_write_threads`
+    pub write_threads: Option<u64>,
+
+    // pub max_tuple_size: Option<u64>, <- не надо это разрешать пока,
+    /// Enables the deferred DELETE optimization for vinyl spaces by default.
+    ///
+    /// This can also be controlled on a per-table basis in the options for
+    /// `space_object:create_index()` (we don't support this yet in picodata).
+    ///
+    /// Corresponds to `box.cfg.vinyl_defer_deletes`
+    pub default_defer_deletes: Option<bool>,
+
+    /// Page size. Page is a read/write unit for vinyl disk operations.
+    ///
+    /// This can also be controlled on a per-table basis in the options for
+    /// `space_object:create_index()` (we don't support this yet in picodata).
+    ///
+    /// Corresponds to `box.cfg.vinyl_page_size`
+    pub default_page_size: Option<u64>,
+
+    /// The maximal number of runs per level in vinyl LSM tree.
+    /// If this number is exceeded, a new level is created.
+    ///
+    /// This can also be controlled on a per-table basis in the options for
+    /// `space_object:create_index()` (we don't support this yet in picodata).
+    ///
+    /// Corresponds to `box.cfg.vinyl_run_count_per_level`
+    pub default_run_count_per_level: Option<u64>,
+
+    /// Ratio between the sizes of different levels in the LSM tree.
+    ///
+    /// This can also be controlled on a per-table basis in the options for
+    /// `space_object:create_index()` (we don't support this yet in picodata).
+    ///
+    /// Corresponds to `box.cfg.vinyl_run_size_ratio`
+    pub default_run_size_ratio: Option<f64>,
+
+    /// Bloom filter false positive rate – the suitable probability of the bloom
+    /// filter to give a wrong result.
+    ///
+    /// This can also be controlled on a per-table basis in the options for
+    /// `space_object:create_index()` (we don't support this yet in picodata).
+    ///
+    /// Corresponds to `box.cfg.vinyl_bloom_fpr`
+    pub default_bloom_fpr: Option<f64>,
+    // pub vinyl_timeout: Option<f64>, // do we need this also?
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// IprotoSection
+////////////////////////////////////////////////////////////////////////////////
+
+#[derive(
+    PartialEq,
+    Default,
+    Debug,
+    Clone,
+    serde::Deserialize,
+    serde::Serialize,
+    tlua::Push,
+    tlua::PushInto,
+    Introspection,
+)]
+#[serde(deny_unknown_fields)]
+pub struct IprotoSection {
+    /// To handle messages, Tarantool allocates fibers. To prevent fiber
+    /// overhead from affecting the whole system, Tarantool restricts how many
+    /// messages the fibers handle, so that some pending requests are blocked.
+    ///
+    /// On powerful systems, increase net_msg_max and the scheduler will
+    /// immediately start processing pending requests.
+    ///
+    /// On weaker systems, decrease net_msg_max and the overhead may decrease
+    /// although this may take some time because the scheduler must wait until
+    /// already-running requests finish.
+    ///
+    /// When net_msg_max is reached, Tarantool suspends processing of incoming
+    /// packages until it has processed earlier messages. This is not a direct
+    /// restriction of the number of fibers that handle network messages, rather
+    /// it is a system-wide restriction of channel bandwidth. This in turn
+    /// causes restriction of the number of incoming network messages that the
+    /// transaction processor thread handles, and therefore indirectly affects
+    /// the fibers that handle network messages. (The number of fibers is
+    /// smaller than the number of messages because messages can be released as
+    /// soon as they are delivered, while incoming requests might not be
+    /// processed until some time after delivery.)
+    ///
+    /// On typical systems, the default value (768) is correct.
+    ///
+    /// Corresponds to `box.cfg.net_msg_max`
+    pub max_concurrent_messages: Option<u64>,
+    // /// The size of the read-ahead buffer associated with a client connection.
+    // /// The larger the buffer, the more memory an active connection consumes and
+    // /// the more requests can be read from the operating system buffer in a
+    // /// single system call. The rule of thumb is to make sure the buffer can
+    // /// contain at least a few dozen requests. Therefore, if a typical tuple in
+    // /// a request is large, e.g. a few kilobytes or even megabytes, the
+    // /// read-ahead buffer size should be increased. If batched request
+    // /// processing is not used, it’s prudent to leave this setting at its
+    // /// default.
+    // ///
+    // /// Corresponds to `box.cfg.readahead`
+    // pub readahead_buffer_size: Option<u64>,
+}
+
 ////////////////////////////////////////////////////////////////////////////////
 // LogSection
 ////////////////////////////////////////////////////////////////////////////////
@@ -1343,13 +1598,13 @@ instance:
             let mut config = PicodataConfig::read_yaml_contents(&yaml).unwrap();
             let args = args::Run::try_parse_from(["run",
                 "-c", "  instance.log .level =debug  ",
-                "--config-parameter", "instance. memtx_memory=  0xdeadbeef",
+                "--config-parameter", "instance. memtx . memory=  0xdeadbeef",
             ]).unwrap();
             config.set_from_args(args).unwrap();
             assert_eq!(config.instance.tier.unwrap(), "ABC");
             assert_eq!(config.cluster.cluster_id.unwrap(), "DEF");
             assert_eq!(config.instance.log.level.unwrap(), args::LogLevel::Debug);
-            assert_eq!(config.instance.memtx_memory.unwrap(), 0xdead_beef);
+            assert_eq!(config.instance.memtx.memory.unwrap(), 0xdead_beef);
             assert_eq!(config.instance.audit.unwrap(), "audit.txt");
             assert_eq!(config.instance.data_dir.unwrap(), ".");
 
diff --git a/src/tarantool.rs b/src/tarantool.rs
index 4642fe4194..347b5c413f 100644
--- a/src/tarantool.rs
+++ b/src/tarantool.rs
@@ -223,6 +223,19 @@ impl Cfg {
         const MAPPING: &[(&str, &str)] = &[
             // Other instance.log.* parameters are set explicitly above
             ("log_format",                  "instance.log.format"),
+            ("slab_alloc_factor",           "instance.memtx.allocation_factor"),
+            ("checkpoint_count",            "instance.memtx.checkpoint_count"),
+            ("checkpoint_interval",         "instance.memtx.checkpoint_interval"),
+            ("vinyl_memory",                "instance.vinyl.memory"),
+            ("vinyl_cache",                 "instance.vinyl.cache"),
+            ("vinyl_read_threads",          "instance.vinyl.read_threads"),
+            ("vinyl_write_threads",         "instance.vinyl.write_threads"),
+            ("vinyl_defer_deletes",         "instance.vinyl.default_defer_deletes"),
+            ("vinyl_page_size",             "instance.vinyl.default_page_size"),
+            ("vinyl_run_count_per_level",   "instance.vinyl.default_run_count_per_level"),
+            ("vinyl_run_size_ratio",        "instance.vinyl.default_run_size_ratio"),
+            ("vinyl_bloom_fpr",             "instance.vinyl.default_bloom_fpr"),
+            ("net_msg_max",                 "instance.iproto.max_concurrent_messages"),
         ];
         for (box_field, picodata_field) in MAPPING {
             let value = config
diff --git a/test/int/test_config_file.py b/test/int/test_config_file.py
index 2bf975f670..0961c4059f 100644
--- a/test/int/test_config_file.py
+++ b/test/int/test_config_file.py
@@ -12,7 +12,9 @@ instance:
     cluster_id: test
     instance_id: from-config
     replicaset_id: with-love
-    memtx_memory: 42069
+
+    memtx:
+        memory: 42069
 """
     )
     instance = cluster.add_instance(instance_id=False, wait_online=False)
@@ -90,6 +92,9 @@ instance:
             listen=dict(host=host, port=str(port)),
             log=dict(level="verbose"),
             peers=[dict(host=host, port=str(port))],
+            iproto=[],
+            memtx=[],
+            vinyl=[],
             unknown_parameters=[],
         ),
         unknown_sections=[],
@@ -110,7 +115,8 @@ cluster:
     tiers:
         default:
 instance:
-    memtx_memory: 0xdeadbeef
+    memtx:
+        memory: 0xdeadbeef
             """
         )
     instance.start(cwd=work_dir)
@@ -129,7 +135,8 @@ cluster:
     tiers:
         default:
 instance:
-    memtx_memory: 0xcafebabe
+    memtx:
+        memory: 0xcafebabe
             """
         )
     instance.env["PICODATA_CONFIG_FILE"] = config_path
@@ -270,6 +277,23 @@ cluster:
     assert box_cfg["log_level"] == 6  # means verbose -- set by our testing harness
     assert box_cfg["log_format"] == "plain"
 
+    assert box_cfg["memtx_memory"] == 64 * 1024 * 1024
+    assert box_cfg["slab_alloc_factor"] == 1.05
+    assert box_cfg["checkpoint_count"] == 2
+    assert box_cfg["checkpoint_interval"] == 3600
+
+    assert box_cfg["vinyl_memory"] == 128 * 1024 * 1024
+    assert box_cfg["vinyl_cache"] == 128 * 1024 * 1024
+    assert box_cfg["vinyl_read_threads"] == 1
+    assert box_cfg["vinyl_write_threads"] == 4
+    assert box_cfg["vinyl_defer_deletes"] == False  # noqa: E712
+    assert box_cfg["vinyl_page_size"] == 8 * 1024
+    assert box_cfg["vinyl_run_count_per_level"] == 2
+    assert box_cfg["vinyl_run_size_ratio"] == 3.5
+    assert box_cfg["vinyl_bloom_fpr"] == 0.05
+
+    assert box_cfg["net_msg_max"] == 0x300
+
     #
     # Check explicitly set values
     #
@@ -286,6 +310,26 @@ instance:
         destination: file:/proc/self/fd/2  # this is how you say `stderr` explicitly
         level: debug
         format: json
+
+    memtx:
+        memory: 0x7777777
+        allocation_factor: 1.7
+        checkpoint_count: 8
+        checkpoint_interval: 1800
+
+    vinyl:
+        memory: 0x8888888
+        cache: 0x4444444
+        read_threads: 2
+        write_threads: 3
+        default_defer_deletes: true
+        default_page_size: 0x8000
+        default_run_count_per_level: 3
+        default_run_size_ratio: 7
+        default_bloom_fpr: 0.777
+
+    iproto:
+        max_concurrent_messages: 0x600
 """
     )
 
@@ -301,3 +345,20 @@ instance:
     assert box_cfg["log"] == "file:/proc/self/fd/2"
     assert box_cfg["log_level"] == 7  # means debug
     assert box_cfg["log_format"] == "json"
+
+    assert box_cfg["memtx_memory"] == 0x777_7777
+    assert box_cfg["slab_alloc_factor"] == 1.7
+    assert box_cfg["checkpoint_count"] == 8
+    assert box_cfg["checkpoint_interval"] == 1800
+
+    assert box_cfg["vinyl_memory"] == 0x8888888
+    assert box_cfg["vinyl_cache"] == 0x4444444
+    assert box_cfg["vinyl_read_threads"] == 2
+    assert box_cfg["vinyl_write_threads"] == 3
+    assert box_cfg["vinyl_defer_deletes"] == True  # noqa: E712
+    assert box_cfg["vinyl_page_size"] == 0x8000
+    assert box_cfg["vinyl_run_count_per_level"] == 3
+    assert box_cfg["vinyl_run_size_ratio"] == 7
+    assert box_cfg["vinyl_bloom_fpr"] == 0.777
+
+    assert box_cfg["net_msg_max"] == 0x600
-- 
GitLab