From 16e97e14fce47d81e5bab10a7fcc8987926f7a25 Mon Sep 17 00:00:00 2001
From: Vartan Babayan <v.babayan@picodata.io>
Date: Mon, 19 Aug 2024 16:53:33 +0400
Subject: [PATCH] feat: support human numbers in config for memtx

---
 CHANGELOG.md                 |   5 ++
 src/cli/args.rs              |   4 +-
 src/config.rs                | 151 +++++++++++++++++++++++++++++++++--
 test/int/test_config_file.py |  21 +++--
 4 files changed, 163 insertions(+), 18 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index a60e088ce0..4e489f91dd 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -15,7 +15,12 @@ with the `YY.MINOR.MICRO` scheme.
   Tiers can span multiple failure domains and a single cluster can have
   multiple tiers. Going forward it will be possible to specify which
   tier a table belongs to.
+
 - Order of columns in `_pico_service_route` table has changed.
+
+- Support human numbers to configure memtx.memory
+  Supported suffixes: K, M, G, T, 1K = 1024
+  (e.g picodata run --memtx-memory 10G)
 -->
 
 ### Lua API
diff --git a/src/cli/args.rs b/src/cli/args.rs
index 335d31262a..c327b192d8 100644
--- a/src/cli/args.rs
+++ b/src/cli/args.rs
@@ -1,5 +1,5 @@
 use crate::address::{HttpAddress, IprotoAddress};
-use crate::config::DEFAULT_USERNAME;
+use crate::config::{ByteSize, DEFAULT_USERNAME};
 use crate::instance::InstanceId;
 use crate::util::Uppercase;
 use clap::Parser;
@@ -255,7 +255,7 @@ pub struct Run {
     /// The amount of memory in bytes to allocate for the database engine.
     ///
     /// By default, 64 MiB is used.
-    pub memtx_memory: Option<u64>,
+    pub memtx_memory: Option<ByteSize>,
 
     #[clap(
         long = "service-password-file",
diff --git a/src/config.rs b/src/config.rs
index 469e024d1c..92cd595030 100644
--- a/src/config.rs
+++ b/src/config.rs
@@ -20,9 +20,12 @@ use crate::util::edit_distance;
 use crate::util::file_exists;
 use serde_yaml::Value as YamlValue;
 use std::collections::HashMap;
+use std::convert::{From, Into};
+use std::fmt::{Display, Formatter};
 use std::io::Write;
 use std::path::Path;
 use std::path::PathBuf;
+use std::str::FromStr;
 use tarantool::log::SayLevel;
 use tarantool::tlua;
 
@@ -169,7 +172,7 @@ Using configuration file '{args_path}'.");
     ///
     /// Note that this is different from [`Self::default`], which returns an
     /// "empty" instance of `Self`, because we need to distinguish between
-    /// paramteres being unspecified and parameters being set to the default
+    /// parameters being unspecified and parameters being set to the default
     /// value explicitly.
     pub fn with_defaults() -> Self {
         let mut config = Self::default();
@@ -1116,7 +1119,104 @@ impl InstanceConfig {
     pub fn memtx_memory(&self) -> u64 {
         self.memtx
             .memory
+            .clone()
             .expect("is set in PicodataConfig::set_defaults_explicitly")
+            .into()
+    }
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// ByteSize
+////////////////////////////////////////////////////////////////////////////////
+
+#[derive(Debug, Clone, PartialEq, serde::Deserialize, serde::Serialize, Introspection)]
+#[serde(try_from = "String", into = "String")]
+pub struct ByteSize {
+    amount: u64,
+    scale: Scale,
+}
+
+impl ByteSize {
+    fn as_u64(&self) -> Option<u64> {
+        const UNIT: u64 = 1024;
+        let multiplier = match self.scale {
+            Scale::Bytes => 1,
+            Scale::Kilobytes => UNIT,
+            Scale::Megabytes => UNIT.pow(2),
+            Scale::Gigabytes => UNIT.pow(3),
+            Scale::Terabytes => UNIT.pow(4),
+        };
+
+        self.amount.checked_mul(multiplier)
+    }
+}
+
+tarantool::define_str_enum! {
+    #[derive(Default)]
+   pub enum Scale {
+        #[default]
+        Bytes = "B",
+        Kilobytes = "K",
+        Megabytes = "M",
+        Gigabytes = "G",
+        Terabytes = "T",
+   }
+}
+
+impl Scale {
+    fn from_char(value: char) -> Option<Self> {
+        value.to_string().parse().ok()
+    }
+}
+
+impl TryFrom<String> for ByteSize {
+    type Error = String;
+
+    fn try_from(value: String) -> Result<Self, Self::Error> {
+        value.parse()
+    }
+}
+
+impl FromStr for ByteSize {
+    type Err = String;
+
+    fn from_str(s: &str) -> Result<Self, Self::Err> {
+        let (s, scale) = if let Some(scale) = s.chars().last().and_then(Scale::from_char) {
+            (&s[..s.len() - 1], scale)
+        } else {
+            (s, Scale::Bytes)
+        };
+
+        let size = ByteSize {
+            amount: s
+                .parse::<u64>()
+                .map_err(|err: std::num::ParseIntError| err.to_string())?,
+            scale,
+        };
+
+        if size.as_u64().is_none() {
+            return Err("Value is too large".to_string());
+        }
+
+        Ok(size)
+    }
+}
+
+impl From<ByteSize> for u64 {
+    fn from(memory: ByteSize) -> Self {
+        memory.as_u64().expect("should be valid ByteSize")
+    }
+}
+
+impl From<ByteSize> for String {
+    fn from(memory: ByteSize) -> Self {
+        memory.to_string()
+    }
+}
+
+impl Display for ByteSize {
+    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+        write!(f, "{}{}", self.amount, self.scale)
     }
 }
 
@@ -1136,8 +1236,8 @@ pub struct MemtxSection {
     /// Minimum is 32MB (32 * 1024 * 1024).
     ///
     /// Corresponds to `box.cfg.memtx_memory`.
-    #[introspection(config_default = 64 * 1024 * 1024)]
-    pub memory: Option<u64>,
+    #[introspection(config_default = "64M")]
+    pub memory: Option<ByteSize>,
 
     /// The maximum number of snapshots that are stored in the memtx_dir
     /// directory. If the number of snapshots after creating a new one exceeds
@@ -1881,12 +1981,12 @@ instance:
             );
             let config = setup_for_tests(Some(yaml), &["run",
                 "-c", "  instance.log .level =debug  ",
-                "--config-parameter", "instance. memtx . memory=  0xdeadbeef",
+                "--config-parameter", "instance. memtx . memory=  999",
             ]).unwrap();
             assert_eq!(config.instance.tier.unwrap(), "ABC");
             assert_eq!(config.cluster.cluster_id.unwrap(), "DEF");
             assert_eq!(config.instance.log.level.unwrap(), args::LogLevel::Debug);
-            assert_eq!(config.instance.memtx.memory.unwrap(), 0xdead_beef);
+            assert_eq!(config.instance.memtx.memory.unwrap().to_string(), String::from("999B"));
             assert_eq!(config.instance.audit.unwrap(), "audit.txt");
             assert_eq!(config.instance.data_dir.unwrap(), PathBuf::from("."));
 
@@ -1982,4 +2082,45 @@ instance:
         );
         assert!(!pg.ssl());
     }
+
+    #[test]
+    fn test_bytesize_from_str() {
+        let res = |s| ByteSize::from_str(s).unwrap();
+        assert_eq!(res("1024").to_string(), "1024B");
+        assert_eq!(u64::from(res("1024")), 1024);
+
+        assert_eq!(res("256B").to_string(), "256B");
+        assert_eq!(u64::from(res("256B")), 256);
+
+        assert_eq!(res("64K").to_string(), "64K");
+        assert_eq!(u64::from(res("64K")), 65_536);
+
+        assert_eq!(res("949M").to_string(), "949M");
+        assert_eq!(u64::from(res("949M")), 995_098_624);
+
+        assert_eq!(res("1G").to_string(), "1G");
+        assert_eq!(u64::from(res("1G")), 1_073_741_824);
+
+        assert_eq!(res("10T").to_string(), "10T");
+        assert_eq!(u64::from(res("10T")), 10_995_116_277_760);
+
+        assert_eq!(res("0M").to_string(), "0M");
+        assert_eq!(u64::from(res("0M")), 0);
+
+        assert_eq!(res("185T").to_string(), "185T");
+        assert_eq!(u64::from(res("185T")), 203_409_651_138_560);
+
+        let e = |s| ByteSize::from_str(s).unwrap_err();
+        assert_eq!(e(""), "cannot parse integer from empty string");
+        assert_eq!(e("M"), "cannot parse integer from empty string");
+        assert_eq!(e("X"), "invalid digit found in string");
+        assert_eq!(e("errstr"), "invalid digit found in string");
+        assert_eq!(e(" "), "invalid digit found in string");
+        assert_eq!(e("1Z"), "invalid digit found in string");
+        assert_eq!(e("1 K"), "invalid digit found in string");
+        assert_eq!(e("1_000"), "invalid digit found in string");
+        assert_eq!(e("1 000"), "invalid digit found in string");
+        assert_eq!(e("1 000 X"), "invalid digit found in string");
+        assert_eq!(e("17000000T"), "Value is too large");
+    }
 }
diff --git a/test/int/test_config_file.py b/test/int/test_config_file.py
index ba8ef48235..530ab8975d 100644
--- a/test/int/test_config_file.py
+++ b/test/int/test_config_file.py
@@ -104,7 +104,7 @@ instance:
             ),
             peer=dict(value=[f"{host}:{port}"], source="config_file"),
             memtx=dict(
-                memory=dict(value=64 * 1024 * 1024, source="default"),
+                memory=dict(value="64M", source="default"),
                 checkpoint_count=dict(value=2, source="default"),
                 checkpoint_interval=dict(value=3600, source="default"),
             ),
@@ -138,13 +138,13 @@ cluster:
         default:
 instance:
     memtx:
-        memory: 0xdeadbeef
+        memory: 256K
             """
         )
     instance.start(cwd=work_dir)
     instance.wait_online()
 
-    assert instance.eval("return box.cfg.memtx_memory") == 0xDEADBEEF
+    assert instance.eval("return box.cfg.memtx_memory") == 262144
     instance.terminate()
 
     # But if a config is specified explicitly, it will be used instead
@@ -158,7 +158,7 @@ cluster:
         default:
 instance:
     memtx:
-        memory: 0xcafebabe
+        memory: 512M
             """
         )
     instance.env["PICODATA_CONFIG_FILE"] = config_path
@@ -173,7 +173,7 @@ Using configuration file '{config_path}'.
     instance.wait_online()
     assert crawler.matched
 
-    assert instance.eval("return box.cfg.memtx_memory") == 0xCAFEBABE
+    assert instance.eval("return box.cfg.memtx_memory") == 536870912
     instance.terminate()
 
 
@@ -298,8 +298,7 @@ cluster:
     assert box_cfg.get("log") is None  # type: ignore
     assert box_cfg["log_level"] == 6  # means verbose -- set by our testing harness
     assert box_cfg["log_format"] == "plain"
-
-    assert box_cfg["memtx_memory"] == 64 * 1024 * 1024
+    assert box_cfg["memtx_memory"] == 67108864
     assert box_cfg["slab_alloc_factor"] == 1.05
     assert box_cfg["checkpoint_count"] == 2
     assert box_cfg["checkpoint_interval"] == 3600
@@ -337,7 +336,7 @@ instance:
         format: json
 
     memtx:
-        memory: 0x7777777
+        memory: 2G
         checkpoint_count: 8
         checkpoint_interval: 1800
 
@@ -381,7 +380,7 @@ instance:
     assert box_cfg["log_level"] == 7  # means debug
     assert box_cfg["log_format"] == "json"
 
-    assert box_cfg["memtx_memory"] == 0x777_7777
+    assert box_cfg["memtx_memory"] == 2147483648
     assert box_cfg["checkpoint_count"] == 8
     assert box_cfg["checkpoint_interval"] == 1800
 
@@ -457,7 +456,7 @@ def test_output_config_parameters(cluster: Cluster):
         instance_id: from-config
         replicaset_id: with-love
         memtx:
-            memory: 42069
+            memory: 42069B
     """
     )
 
@@ -480,7 +479,7 @@ def test_output_config_parameters(cluster: Cluster):
         'instance.shredding': false
         'instance.log.level': "verbose"
         'instance.log.format': "plain"
-        'instance.memtx.memory': 42069"""
+        'instance.memtx.memory': \"42069B\""""
 
     params_list = [line.strip().encode("ASCII") for line in output_params.splitlines()]
     found_params = set()
-- 
GitLab