From df2be2081a05ae485e16b3a5cb7dada69bd0e3a6 Mon Sep 17 00:00:00 2001 From: Arseniy Volynets <vol0ncar@yandex.ru> Date: Thu, 2 Feb 2023 02:35:01 +0300 Subject: [PATCH] fix: tmp space with dotted column names --- sbroad-cartridge/src/cartridge/router.rs | 4 +- sbroad-cartridge/src/cartridge/storage.rs | 2 + .../test_app/test/integration/motion_test.lua | 19 ++++ sbroad-core/src/backend/sql/ir.rs | 73 +++++++++++++- sbroad-core/src/backend/sql/space.rs | 3 + sbroad-core/src/executor/result.rs | 31 +++++- sbroad-core/src/executor/result/tests.rs | 24 +++-- sbroad-core/src/executor/tests.rs | 99 ++++++++++++++++++- sbroad-core/src/ir/operator.rs | 26 +++++ 9 files changed, 261 insertions(+), 20 deletions(-) diff --git a/sbroad-cartridge/src/cartridge/router.rs b/sbroad-cartridge/src/cartridge/router.rs index 374669aeec..36409a22bc 100644 --- a/sbroad-cartridge/src/cartridge/router.rs +++ b/sbroad-cartridge/src/cartridge/router.rs @@ -31,6 +31,7 @@ use sbroad::executor::lru::{LRUCache, DEFAULT_CAPACITY}; use sbroad::executor::result::ProducerResult; use sbroad::executor::vtable::VirtualTable; use sbroad::frontend::sql::ast::AbstractSyntaxTree; +use sbroad::ir::expression::Expression; use sbroad::ir::helpers::RepeatableState; use sbroad::ir::tree::Snapshot; use sbroad::ir::value::Value; @@ -330,6 +331,7 @@ impl Coordinator for RouterRuntime { buckets: &Buckets, ) -> Result<VirtualTable, SbroadError> { let top_id = plan.get_motion_subtree_root(motion_node_id)?; + let column_names = plan.get_ir_plan().get_relational_aliases(top_id)?; // We should get a motion alias name before we take the subtree in dispatch. let alias = plan.get_motion_alias(motion_node_id)?.map(String::from); let result = self.dispatch(plan, top_id, buckets)?; @@ -347,7 +349,7 @@ impl Coordinator for RouterRuntime { .ok_or_else(|| { SbroadError::NotFound(Entity::ProducerResult, "from the tuple".into()) })? - .as_virtual_table()? + .as_virtual_table(column_names)? } else { return Err(SbroadError::Invalid( Entity::Motion, diff --git a/sbroad-cartridge/src/cartridge/storage.rs b/sbroad-cartridge/src/cartridge/storage.rs index d952c29312..a470f267d1 100644 --- a/sbroad-cartridge/src/cartridge/storage.rs +++ b/sbroad-cartridge/src/cartridge/storage.rs @@ -208,6 +208,7 @@ impl StorageRuntime { } let (pattern_with_params, tmp_spaces) = compile_encoded_optional(raw_optional)?; + println!("{}", String::from(pattern_with_params.clone())); debug!( Option::from("execute"), &format!( @@ -273,6 +274,7 @@ impl StorageRuntime { ); let (pattern_with_params, tmp_spaces) = compile_encoded_optional(raw_optional)?; + println!("{}", String::from(pattern_with_params.clone())); let result = match prepare(&pattern_with_params.pattern) { Ok(stmt) => { let stmt_id = stmt.id()?; diff --git a/sbroad-cartridge/test_app/test/integration/motion_test.lua b/sbroad-cartridge/test_app/test/integration/motion_test.lua index 082b43e6cd..5f36d205ba 100644 --- a/sbroad-cartridge/test_app/test/integration/motion_test.lua +++ b/sbroad-cartridge/test_app/test/integration/motion_test.lua @@ -201,4 +201,23 @@ g.test_empty_motion_result = function() {1, "123", 1, 360} }, }) +end + +g.test_motion_dotted_name = function() + local api = cluster:server("api-1").net_box + + local r, err = api:call("sbroad.execute", { [[SELECT "sysOp", "product_units" FROM "testing_space" + INNER JOIN (SELECT "sysOp" FROM (SELECT "product_units" from "testing_space_hist") as r + INNER JOIN "space_simple_shard_key" + on r."product_units" = "space_simple_shard_key"."sysOp") as q + on q."sysOp" = "testing_space"."product_units"]], {} }) + + t.assert_equals(err, nil) + t.assert_equals(r, { + metadata = { + {name = "Q.sysOp", type = "integer"}, + {name = "testing_space.product_units", type = "integer"}, + }, + rows = {}, + }) end \ No newline at end of file diff --git a/sbroad-core/src/backend/sql/ir.rs b/sbroad-core/src/backend/sql/ir.rs index df80b80545..806bb0bdc9 100644 --- a/sbroad-core/src/backend/sql/ir.rs +++ b/sbroad-core/src/backend/sql/ir.rs @@ -1,7 +1,7 @@ use ahash::AHashMap; use opentelemetry::Context; use serde::{Deserialize, Serialize}; -use std::collections::HashMap; +use std::collections::{HashMap, HashSet}; use std::fmt::Write as _; use tarantool::tlua::{self, Push}; use tarantool::tuple::{FunctionArgs, Tuple}; @@ -22,7 +22,7 @@ use crate::otm::{ use super::space::TmpSpace; use super::tree::SyntaxData; -#[derive(Debug, Eq, Deserialize, Serialize, Push)] +#[derive(Debug, Eq, Deserialize, Serialize, Push, Clone)] pub struct PatternWithParams { pub pattern: String, pub params: Vec<Value>, @@ -395,7 +395,15 @@ impl ExecutionPlan { let col_names = vtable .get_columns() .iter() - .map(|c| format!("\"{}\"", c.name)) + .map(|c| { + if let (Some('"'), Some('"')) = + (c.name.chars().next(), c.name.chars().last()) + { + c.name.clone() + } else { + format!("\"{}\"", c.name) + } + }) .collect::<Vec<_>>() .join(","); write!(sql, "SELECT {col_names} FROM \"{name}\"").map_err(|e| { @@ -425,6 +433,65 @@ impl ExecutionPlan { Ok((PatternWithParams::new(sql, params), tmp_spaces)) } + // column names are from tarantool instance output, it can happen that + // column name is different from motion output, for example in query like this: + // select s."b" from (select "t.b" from TMP_SPACE) as s + // where TMP_SPACE is output from `select "t"."b" from "t"` + // in this case we must alias column name to output name. + // Because we support only column names like this: scan_name.col_name + // where scan_name and col_name do not contain dots (.), we can assume that + // column name != output_alias => column_name = X.output_alias, where X is some string + // and we also can assume that there are no columns: Y.output_alias and X.output_alias in VTable + // because we ensure that output row aliases are unique (see `add_row_of_aliases`). + fn construct_proj_cols(&self, vtable: &SyntaxData) -> Result<Vec<String>, SbroadError> { + let (vtable, motion_id) = if let SyntaxData::VTable(motion_id) = vtable { + (self.get_motion_vtable(*motion_id)?, motion_id) + } else { + return Err(SbroadError::Invalid( + Entity::SyntaxNode, + Some("Expected VTable!".into()), + )); + }; + let plan = self.get_ir_plan(); + let nodes = &plan.nodes; + let aliases = plan + .get_relation_node(*motion_id)? + .output_alias_position_map(nodes)? + .into_keys() + .collect::<HashSet<&str>>(); + let mut proj_cols: Vec<String> = Vec::with_capacity(vtable.get_columns().len()); + for vcol in vtable.get_columns() { + let vcol_q = format!("\"{}\"", vcol.name); + // currently if we generate bucket_id for vtable, we do not specify it in the output of motion + if vcol.name == "bucket_id" { + proj_cols.push(vcol_q); + continue; + } + // aliases can be surrounded by " or not + if aliases.contains(vcol_q.as_str()) || aliases.contains(vcol.name.as_str()) { + proj_cols.push(vcol_q); + continue; + } else if vcol.name.contains('.') { + if let Some(c) = vcol.name.split('.').last() { + let c_q = format!("\"{c}\""); + if aliases.contains(c_q.as_str()) || aliases.contains(c) { + proj_cols.push(format!("{vcol_q} as {c_q}")); + continue; + } + } + } + return Err(SbroadError::Invalid( + Entity::SyntaxNode, + Some(format!( + "VTable column with name {} is not found in motion output: {aliases:?}!", + vcol.name + )), + )); + } + + Ok(proj_cols) + } + /// Checks if the given query subtree modifies data or not. /// /// # Errors diff --git a/sbroad-core/src/backend/sql/space.rs b/sbroad-core/src/backend/sql/space.rs index 06a7f3ff4f..abbb5e99dd 100644 --- a/sbroad-core/src/backend/sql/space.rs +++ b/sbroad-core/src/backend/sql/space.rs @@ -45,6 +45,9 @@ impl TmpSpace { .iter() .map(|c| Field::from(c.clone())) .collect(); + + println!("cols: {:?}", vtable.get_columns()); + println!("fields: {fields:?}"); let pk_name = TmpSpace::generate_pk_name(base, motion_id); fields.push(Field::unsigned(pk_name.clone())); let fields_len = fields.len() as u32; diff --git a/sbroad-core/src/executor/result.rs b/sbroad-core/src/executor/result.rs index 1714ceea79..fb0405fbe2 100644 --- a/sbroad-core/src/executor/result.rs +++ b/sbroad-core/src/executor/result.rs @@ -1,12 +1,15 @@ use core::fmt::Debug; use serde::ser::{Serialize, SerializeMap, Serializer}; use serde::Deserialize; +use std::collections::HashSet; use tarantool::tlua::{self, LuaRead}; use crate::errors::{Entity, SbroadError}; use crate::executor::vtable::VirtualTable; +use crate::ir::operator::Relational; use crate::ir::relation::{Column, ColumnRole, Type}; use crate::ir::value::{EncodedValue, Value}; +use crate::ir::Plan; type ExecutorTuple = Vec<EncodedValue>; @@ -87,11 +90,29 @@ impl ProducerResult { /// /// # Errors /// - convert to virtual table error - pub fn as_virtual_table(&self) -> Result<VirtualTable, SbroadError> { - let mut result = VirtualTable::new(); + pub fn as_virtual_table(&self, column_names: Vec<String>) -> Result<VirtualTable, SbroadError> { + let mut vtable = VirtualTable::new(); for col in &self.metadata { - result.add_column(col.try_into()?); + vtable.add_column(col.try_into()?); + } + + for (vcol, name) in vtable + .get_mut_columns() + .iter_mut() + .zip(column_names.into_iter().map(|qsq| { + if let Some(qs) = qsq.strip_suffix('"') { + if let Some(s) = qs.strip_prefix('"') { + s.to_string() + } else { + qsq + } + } else { + qsq + } + })) + { + vcol.name = name; } for encoded_tuple in &self.rows { @@ -99,10 +120,10 @@ impl ProducerResult { .iter() .map(|v| Value::from(v.clone())) .collect(); - result.add_tuple(tuple); + vtable.add_tuple(tuple); } - Ok(result) + Ok(vtable) } } diff --git a/sbroad-core/src/executor/result/tests.rs b/sbroad-core/src/executor/result/tests.rs index d8a1780e53..de8b6cc66d 100644 --- a/sbroad-core/src/executor/result/tests.rs +++ b/sbroad-core/src/executor/result/tests.rs @@ -1,4 +1,5 @@ use pretty_assertions::{assert_eq, assert_ne}; +use serde::de::IntoDeserializer; use tarantool::decimal; use super::*; @@ -48,12 +49,13 @@ fn box_execute_result_serialize() { #[test] fn convert_to_vtable() { + let col_names = ["id", "name", "count", "price"]; let r = ProducerResult { metadata: vec![ - MetadataColumn::new("id".into(), "integer".into()), - MetadataColumn::new("name".into(), "string".into()), - MetadataColumn::new("count".into(), "unsigned".into()), - MetadataColumn::new("price".into(), "decimal".into()), + MetadataColumn::new(col_names[0].into(), "integer".into()), + MetadataColumn::new(col_names[1].into(), "string".into()), + MetadataColumn::new(col_names[2].into(), "unsigned".into()), + MetadataColumn::new(col_names[3].into(), "decimal".into()), ], rows: vec![ vec![ @@ -74,23 +76,23 @@ fn convert_to_vtable() { let mut excepted = VirtualTable::new(); excepted.add_column(Column { - name: "id".into(), + name: col_names[0].into(), r#type: Type::Integer, role: ColumnRole::User, }); excepted.add_column(Column { - name: "name".into(), + name: col_names[1].into(), r#type: Type::String, role: ColumnRole::User, }); excepted.add_column(Column { - name: "count".into(), + name: col_names[2].into(), r#type: Type::Unsigned, role: ColumnRole::User, }); excepted.add_column(Column { - name: "price".into(), + name: col_names[3].into(), r#type: Type::Decimal, role: ColumnRole::User, }); @@ -109,5 +111,9 @@ fn convert_to_vtable() { Value::from(decimal!(2.0)), ]); - assert_eq!(excepted, r.as_virtual_table().unwrap()); + assert_eq!( + excepted, + r.as_virtual_table(col_names.into_iter().map(|s| s.to_string()).collect()) + .unwrap() + ); } diff --git a/sbroad-core/src/executor/tests.rs b/sbroad-core/src/executor/tests.rs index cb47c79ece..07185270bc 100644 --- a/sbroad-core/src/executor/tests.rs +++ b/sbroad-core/src/executor/tests.rs @@ -488,7 +488,7 @@ fn join_linker3_test() { role: ColumnRole::User, }); virtual_table.add_column(Column { - name: "id2".into(), + name: "FIRST_NAME".into(), r#type: Type::Integer, role: ColumnRole::User, }); @@ -525,7 +525,7 @@ fn join_linker3_test() { r#"SELECT "t2"."id1" FROM"#, r#"(SELECT "test_space"."id" FROM "test_space") as "t1""#, r#"INNER JOIN"#, - r#"(SELECT "id1","id2" FROM "TMP_test_69") as "t2""#, + r#"(SELECT "id1","FIRST_NAME" FROM "TMP_test_69") as "t2""#, r#"ON ("t2"."id1") = (?)"#, ), vec![Value::from(1_u64)], @@ -651,6 +651,98 @@ fn join_linker4_test() { assert_eq!(expected, result); } +#[test] +fn join_linker5_test() { + let sql = r#"select * from "t1" inner join ( + select "f", "b" as B from "t2" + inner join "t3" on "t2"."g" = "t3"."b") as q +on q."f" = "t1"."a""#; + + let coordinator = RouterRuntimeMock::new(); + + let mut query = Query::new(&coordinator, sql, vec![]).unwrap(); + + let motion_t2_id = *query + .exec_plan + .get_ir_plan() + .clone_slices() + .slice(0) + .unwrap() + .position(0) + .unwrap(); + let mut virtual_t2 = VirtualTable::new(); + virtual_t2.add_column(Column { + name: "b".into(), + r#type: Type::Integer, + role: ColumnRole::User, + }); + virtual_t2.set_alias("\"t3\"").unwrap(); + if let MotionPolicy::Segment(key) = + get_motion_policy(query.exec_plan.get_ir_plan(), motion_t2_id) + { + query + .reshard_vtable(&mut virtual_t2, key, &DataGeneration::None) + .unwrap(); + } + query + .coordinator + .add_virtual_table(motion_t2_id, virtual_t2); + + let motion_sq_id = *query + .exec_plan + .get_ir_plan() + .clone_slices() + .slice(1) + .unwrap() + .position(0) + .unwrap(); + let mut virtual_sq = VirtualTable::new(); + virtual_sq.add_column(Column { + name: "f".into(), + r#type: Type::Integer, + role: ColumnRole::User, + }); + virtual_sq.add_column(Column { + name: "B".into(), + r#type: Type::Integer, + role: ColumnRole::User, + }); + virtual_sq.set_alias("Q"); + if let MotionPolicy::Segment(key) = + get_motion_policy(query.exec_plan.get_ir_plan(), motion_sq_id) + { + query + .reshard_vtable(&mut virtual_sq, key, &DataGeneration::None) + .unwrap(); + } + query + .coordinator + .add_virtual_table(motion_sq_id, virtual_sq); + + let result = *query + .dispatch() + .unwrap() + .downcast::<ProducerResult>() + .unwrap(); + + let mut expected = ProducerResult::new(); + + expected.rows.extend(vec![vec![ + EncodedValue::String(format!("Execute query on all buckets")), + EncodedValue::String(String::from(PatternWithParams::new( + format!( + "{} {} {} {}", + r#"SELECT "t1"."a", "t1"."b", "Q"."f", "Q"."B" FROM"#, + r#"(SELECT "t1"."a", "t1"."b" FROM "t1") as "t1""#, + r#"INNER JOIN (SELECT "f","B" FROM "TMP_test_146")"#, + r#"as Q ON ("Q"."f") = ("t1"."a")"#, + ), + vec![], + ))), + ]]); + assert_eq!(expected, result); +} + // select * from "test_1" where "identification_number" in (select COLUMN_2 as "b" from (values (1), (2))) or "identification_number" in (select COLUMN_2 as "c" from (values (3), (4))); #[test] fn anonymous_col_index_test() { @@ -1092,6 +1184,9 @@ fn insert4_test() { assert_eq!(expected, result); } +// this is not a valid sql for tarantool, what this tests checks? +// if tmp space have columns: COLUMN_5, COLUMN_6 we can't select a, b from it +#[ignore] #[test] fn insert5_test() { let sql = r#"insert into "t" ("b", "a") select 5, 6 from "t" diff --git a/sbroad-core/src/ir/operator.rs b/sbroad-core/src/ir/operator.rs index 7523741cf8..4426af61bd 100644 --- a/sbroad-core/src/ir/operator.rs +++ b/sbroad-core/src/ir/operator.rs @@ -1096,6 +1096,32 @@ impl Plan { } } + /// Gets list of aliases in output tuple of rel_id + /// + /// # Errors + /// - node is not relational + /// - output is not Expression::Row + /// - any node in the output tuple is not Expression::Alias + pub fn get_relational_aliases(&self, rel_id: usize) -> Result<Vec<String>, SbroadError> { + let output = self.get_relational_output(rel_id)?; + if let Expression::Row { list, .. } = self.get_expression_node(output)? { + return list + .iter() + .map(|alias_id| { + self.get_expression_node(*alias_id)? + .get_alias_name() + .map(|a| a.to_string()) + }) + .collect::<Result<Vec<String>, SbroadError>>(); + } + return Err(SbroadError::Invalid( + Entity::Node, + Some(format!( + "expected output of Relational node {rel_id} to be Row" + )), + )); + } + /// Gets children from relational node. /// /// # Errors -- GitLab