From 585ae0b400c0da866e0d8e6d0ec85b44fe00baed Mon Sep 17 00:00:00 2001 From: Denis Smirnov <sd@picodata.io> Date: Fri, 4 Oct 2024 16:11:55 +0700 Subject: [PATCH] fix: _sql_query eviction query logic There was a bug when SQL statistics LRU removed some old query from the _sql_query table. The query ref_counter field in unsigned, but the code decremented it below zero in the space on eviction. That produced an error (fixed). --- src/sql/otm.rs | 59 ++++++++++++++++++++++++++++---------------- test/int/test_sql.py | 33 +++++++++++++++++++++++++ 2 files changed, 71 insertions(+), 21 deletions(-) diff --git a/src/sql/otm.rs b/src/sql/otm.rs index f6c847e8cb..68f75a772a 100644 --- a/src/sql/otm.rs +++ b/src/sql/otm.rs @@ -268,9 +268,17 @@ impl TrackedQueries { /// # Panics /// - If the `STATISTICS_CAPACITY` is less than 1 (impossible at the moment). #[must_use] + #[allow(unused_mut)] pub fn new() -> Self { + let mut capacity = STATISTICS_CAPACITY; + #[cfg(feature = "error_injection")] + { + if crate::error_injection::is_enabled("SQL_STATISTICS_CAPACITY_ONE") { + capacity = 1; + } + } Self { - queries: LRUCache::new(STATISTICS_CAPACITY, Some(Box::new(remove_query))).unwrap(), + queries: LRUCache::new(capacity, Some(Box::new(remove_query))).unwrap(), } } @@ -315,13 +323,13 @@ impl SqlStatTables { } pub fn delete_query(&self, query_id: &str) { - if self.queries.unref_or_delete(query_id).is_some() { - self.stats.delete_query(query_id) - } else { - tlog!( + match self.queries.unref_or_delete(query_id) { + SpaceOp::Delete => self.stats.delete_query(query_id), + SpaceOp::None | SpaceOp::Update => {} + SpaceOp::Error => tlog!( Error, "failed to delete query with id: {query_id}: more than 1 ref" - ); + ), } } } @@ -481,6 +489,14 @@ impl SqlStatDef { // SqlQueries //////////////////////////////////////////////////////////////////////////////// +#[derive(PartialEq)] +enum SpaceOp { + Delete, + Error, + None, + Update, +} + pub struct SqlQueries { space: Space, index: Index, @@ -530,33 +546,34 @@ impl SqlQueries { } } - fn unref_or_delete(&self, query_id: &str) -> Option<SqlQueryDef> { + fn unref_or_delete(&self, query_id: &str) -> SpaceOp { fn unref_or_delete_impl( queries: &SqlQueries, query_id: &str, - ) -> tarantool::Result<Option<SqlQueryDef>> { - let Some(tuple) = queries.index.get(&[query_id])? else { - return Ok(None); + ) -> tarantool::Result<SpaceOp> { + let Some(old_tuple) = queries.index.get(&[query_id])? else { + return Ok(SpaceOp::None); }; - queries - .space - .upsert(&tuple, [("-", SqlQueryDef::FIELD_REF_COUNTER, 1)])?; - - let query_def = tuple.decode::<SqlQueryDef>()?; - if query_def.ref_counter == 0 { + // If the reference counter is 1 before decrementing, + // we should delete the query. + let old_query_def = old_tuple.decode::<SqlQueryDef>()?; + if old_query_def.ref_counter == 1 { queries.space.delete(&[query_id])?; - return Ok(None); + return Ok(SpaceOp::Delete); } - - Ok(Some(query_def)) + // Update the tuple in the space. + queries + .space + .upsert(&old_tuple, [("-", SqlQueryDef::FIELD_REF_COUNTER, 1)])?; + Ok(SpaceOp::Update) } match unref_or_delete_impl(self, query_id) { - Ok(res) => res, + Ok(op) => op, Err(e) => { tlog!(Error, "{}: unref error: {e}", Self::TABLE_NAME); - None + SpaceOp::Error } } } diff --git a/test/int/test_sql.py b/test/int/test_sql.py index 57438eb040..029a8e0c01 100644 --- a/test/int/test_sql.py +++ b/test/int/test_sql.py @@ -5589,3 +5589,36 @@ def test_select_without_scan(cluster: Cluster): # check usage with limit data = i1.sql("select 1 limit 1") assert data == [[1]] + + +def test_sql_stat_tables(cluster: Cluster): + cluster.deploy(instance_count=1) + i1 = cluster.instances[0] + + def sql_options(query_id, traceable): + return {"query_id": query_id, "traceable": traceable} + + def check_sql_stat_tables(query_id): + data = i1.call("box.execute", """ select "query_id" from "_sql_query" """) + assert data["rows"] == [[query_id]] + data = i1.call( + "box.execute", """ select distinct "query_id" from "_sql_stat" """ + ) + assert data["rows"] == [[query_id]] + + # Set SQL statistics LRU capacity to 1 + i1.call("pico._inject_error", "SQL_STATISTICS_CAPACITY_ONE", True) + + # The first query is stored in the statistics tables. + data = i1.sql("values (1)", options=sql_options("query_1", True)) + assert data == [[1]] + check_sql_stat_tables("query_1") + + # Previous query was evicted from the statistics tables + # as LRU capacity is 1. + data = i1.sql("values (2)", options=sql_options("query_2", True)) + assert data == [[2]] + check_sql_stat_tables("query_2") + + # Disable injection + i1.call("pico._inject_error", "SQL_STATISTICS_CAPACITY_ONE", False) -- GitLab