diff --git a/sbroad-cartridge/test_app/test/integration/arbitrary_expressions_test.lua b/sbroad-cartridge/test_app/test/integration/arbitrary_expressions_test.lua index a468edf2aca719c063aa2151a6876ba7f23e1e07..1890c7ad1e62a3abc9dd0da2d605491ae6f91f0e 100644 --- a/sbroad-cartridge/test_app/test/integration/arbitrary_expressions_test.lua +++ b/sbroad-cartridge/test_app/test/integration/arbitrary_expressions_test.lua @@ -66,6 +66,12 @@ arbitrary_projection.test_arbitrary_invalid = function() select cast("id" * 2 > 0 as boolean), cast("id" * 2 > 0 as boolean) as "cast" from "arithmetic_space" ]], {} }) t.assert_str_contains(tostring(err), "rule parsing error") + + -- selection from values without cast + local _, err = api:call("sbroad.execute", { [[ + SELECT "id" FROM "arithmetic_space" WHERE "id" IN (SELECT * FROM (VALUES (1))) + ]], {} }) + t.assert_str_contains(tostring(err), "Sbroad Error: type any not implemented") end arbitrary_projection.test_arbitrary_valid = function() @@ -75,12 +81,12 @@ arbitrary_projection.test_arbitrary_valid = function() t.assert_equals(err, nil) t.assert_not_equals(res_all.rows, {}) - -- array of {true,true} with lenght equals to rows amount + -- array of {true,true} with length equals to rows amount local all_true = fun.map(function() return { true, true } end, res_all.rows):totable() - -- array of {false,false} with lenght equals to rows amount + -- array of {false,false} with length equals to rows amount local all_false = fun.map(function() return { false, false } end, res_all.rows):totable() @@ -147,8 +153,39 @@ arbitrary_projection.test_arbitrary_valid = function() -- projection consisted of arithmetic and unary local r, err = api:call("sbroad.execute", { [[ - select "id" is not null, "id" is not null as "not_null" from "arithmetic_space" + select "id" is not null, ("id" + 2) is not null as "cmp" from "arithmetic_space" ]], {} }) t.assert_equals(err, nil) t.assert_equals(r.rows, all_true) + + -- column selection from values + -- results in type erasing + local r, err = api:call("sbroad.execute", { [[ + SELECT COLUMN_1 FROM (VALUES (1)) + ]], {} }) + + t.assert_equals(err, nil) + t.assert_equals(r, { + metadata = { + {name = "COLUMN_1", type = "any"}, + }, + rows = { + {1} + }, + }) + + -- column selection from values with cast + r, err = api:call("sbroad.execute", { [[ + SELECT CAST(COLUMN_1 as int) FROM (VALUES (1)) + ]], {} }) + + t.assert_equals(err, nil) + t.assert_equals(r, { + metadata = { + {name = "COL_1", type = "integer"}, + }, + rows = { + {1} + }, + }) end diff --git a/sbroad-cartridge/test_app/test/integration/arithmetic_test.lua b/sbroad-cartridge/test_app/test/integration/arithmetic_test.lua index 2d047257ac4c489bdbffbb17e8a4a84c97469c25..906c02bca83683fc0fb6ba19b2bc26fd639a843e 100644 --- a/sbroad-cartridge/test_app/test/integration/arithmetic_test.lua +++ b/sbroad-cartridge/test_app/test/integration/arithmetic_test.lua @@ -654,8 +654,8 @@ end g.test_arithmetic_in_subquery = function() local api = cluster:server("api-1").net_box - local res_all, err = api:call("sbroad.execute", { [[select "id" from "arithmetic_space"]], {} }) - t.assert_equals(err, nil) + local res_all, err_all = api:call("sbroad.execute", { [[select "id" from "arithmetic_space"]], {} }) + t.assert_equals(err_all, nil) t.assert_not_equals(res_all.rows, {}) -- test arithmetic expressions in subquery projection and condition @@ -667,7 +667,7 @@ g.test_arithmetic_in_subquery = function() t.assert_equals(res_all, r) -- test subquery with asterisk and multiplication - local r, err = api:call("sbroad.execute", { [[ + r, err = api:call("sbroad.execute", { [[ select "id" from "arithmetic_space" where exists (select * from "arithmetic_space" where 1 * 1 = 2) ]], {} }) @@ -680,7 +680,7 @@ g.test_arithmetic_in_subquery = function() }) -- test subquery with multiplication in projection - local r, err = api:call("sbroad.execute", { [[ + r, err = api:call("sbroad.execute", { [[ select "id" from "arithmetic_space" where "id" in (select 2 * 3 from "arithmetic_space") ]], {} }) @@ -695,7 +695,7 @@ g.test_arithmetic_in_subquery = function() }) -- test nested subquery with arithmetic - local r, err = api:call("sbroad.execute", { [[ + r, err = api:call("sbroad.execute", { [[ select "id" from "arithmetic_space" where "id" in ( select 1 + 0 from "arithmetic_space" where exists ( diff --git a/sbroad-cartridge/test_app/test/integration/operators_test.lua b/sbroad-cartridge/test_app/test/integration/operators_test.lua index 3225c6e25abff7eff3e3abb7badc3794a7dfb866..55966e5d96808bed949040318ecba5ad7f505c35 100644 --- a/sbroad-cartridge/test_app/test/integration/operators_test.lua +++ b/sbroad-cartridge/test_app/test/integration/operators_test.lua @@ -334,6 +334,245 @@ g.test_is_not_null_2 = function() }) end +g.test_in_subquery_select_from_table = function() + local api = cluster:server("api-1").net_box + + local r, err = api:call("sbroad.execute", { [[ + SELECT "id" FROM "space_simple_shard_key" WHERE "id" IN (SELECT "id" FROM "testing_space") + ]], {} }) + + t.assert_equals(err, nil) + t.assert_equals(r, { + metadata = { + {name = "id", type = "integer"}, + }, + rows = { + {1} + }, + }) +end + +g.test_not_in_subquery_select_from_values = function() + local api = cluster:server("api-1").net_box + + local r, err = api:call("sbroad.execute", { [[ + SELECT "id" FROM "space_simple_shard_key" + WHERE "id" NOT IN (SELECT cast(COLUMN_2 as int) FROM (VALUES (1), (3))) + ]], {} }) + + t.assert_equals(err, nil) + t.assert_equals(r, { + metadata = { + {name = "id", type = "integer"}, + }, + rows = { + {10} + }, + }) +end + +g.test_in_subquery_select_from_values = function() + local api = cluster:server("api-1").net_box + + local r, err = api:call("sbroad.execute", { [[ + SELECT "id" FROM "space_simple_shard_key_hist" WHERE "id" IN (SELECT cast(COLUMN_1 as int) FROM (VALUES (1))) + ]], {1} }) + + t.assert_equals(err, nil) + t.assert_equals(r, { + metadata = { + {name = "id", type = "integer"}, + }, + rows = { + {1} + }, + }) +end + +g.test_exists_subquery_select_from_values = function () + local api = cluster:server("api-1").net_box + + -- Exists condition should return true on each row from t + -- as soon as it's subquery always returns one row. + local r, err = api:call("sbroad.execute", { [[ + SELECT "id" FROM "t" WHERE EXISTS (SELECT 0 FROM (VALUES (1))) + ]], {} }) + + t.assert_equals(err, nil) + t.assert_equals(r, { + metadata = { + {name = "id", type = "integer"}, + }, + rows = { + {1}, {2} + }, + }) +end + +g.test_not_exists_subquery_select_from_values = function() + local api = cluster:server("api-1").net_box + + -- NotExists condition should return false on each row from t + -- as soon as it's subquery always returns one row. + local r, err = api:call("sbroad.execute", { [[ + SELECT "id" FROM "t" WHERE NOT EXISTS (SELECT cast(COLUMN_1 as int) FROM (VALUES (1))) + ]], {} }) + + t.assert_equals(err, nil) + t.assert_equals(r, { + metadata = { + {name = "id", type = "integer"}, + }, + rows = { }, + }) +end + +g.test_exists_subquery_with_several_rows = function () + local api = cluster:server("api-1").net_box + + -- Exists condition should return true on each row from testing_space + -- as soon as it's subquery always returns two rows. + local _, err = api:call("sbroad.execute", { [[ + SELECT * FROM "testing_space" WHERE EXISTS (SELECT 0 FROM "t" WHERE "t"."id" = 1 or "t"."a" = (?)) + ]], {require('decimal').new(6.66)} }) + + t.assert_str_contains( + tostring(err), + "Failed to execute SQL statement: Expression subquery returned more than 1 row" + ) +end + +g.test_not_exists_subquery_with_several_rows = function() + local api = cluster:server("api-1").net_box + + -- NotExists condition should return false on each row from testing_space + -- as soon as it's subquery always returns two rows. + local _, err = api:call("sbroad.execute", { [[ + SELECT * FROM "testing_space" + WHERE NOT EXISTS (SELECT 0 FROM "t" WHERE "t"."id" = 1 or "t"."a" = (?)) + ]], {require('decimal').new(6.66)} }) + + t.assert_str_contains( + tostring(err), + "Failed to execute SQL statement: Expression subquery returned more than 1 row" + ) +end + +g.test_exists_nested = function() + local api = cluster:server("api-1").net_box + + local r, err = api:call("sbroad.execute", { [[ + SELECT * FROM "testing_space" WHERE EXISTS + (SELECT 0 FROM (VALUES (1)) WHERE EXISTS (SELECT 0 FROM "t" WHERE "t"."id" = 1)) + ]], {1} }) + + t.assert_equals(err, nil) + t.assert_equals(r, { + metadata = { + {name = "id", type = "integer"}, + {name = "name", type = "string"}, + {name = "product_units", type = "integer"}, + }, + rows = { + {1, "123", 1} + }, + }) +end + +g.test_exists_partitioned_in_selection_condition = function() + -- make sure table is located on both storages, not only on one storage + local storage1 = cluster:server("storage-1-1").net_box + local r, err = storage1:call("box.execute", { + [[select * from "t"]], {} + }) + t.assert_equals(err, nil) + t.assert_equals(true, next(r.rows) ~= nil) + + local storage2 = cluster:server("storage-2-1").net_box + r, err = storage2:call("box.execute", { + [[select * from "t"]], {} + }) + t.assert_equals(err, nil) + t.assert_equals(true, next(r.rows) ~= nil) + + + local api = cluster:server("api-1").net_box + + local r_all, err_all = api:call("sbroad.execute", { [[ + SELECT * FROM "t" + ]], {} }) + + t.assert_equals(err_all, nil) + t.assert_equals(r_all, { + metadata = { + {name = "id", type = "integer"}, + {name = "a", type = "number"}, + }, + rows = { + {1, 4.2}, {2, 6.66} + }, + }) + + local r, err = api:call("sbroad.execute", { [[ + SELECT * FROM "t" WHERE EXISTS (SELECT * FROM "testing_space") + ]], {} }) + + t.assert_equals(err, nil) + t.assert_equals(r_all, r) +end + +g.test_exists_partitioned_in_join_filter = function() + -- make sure table is located on both storages, not only on one storage + local storage1 = cluster:server("storage-1-1").net_box + local r, err = storage1:call("box.execute", { + [[select * from "t"]], {} + }) + t.assert_equals(err, nil) + t.assert_equals(true, next(r.rows) ~= nil) + + local storage2 = cluster:server("storage-2-1").net_box + r, err = storage2:call("box.execute", { + [[select * from "t"]], {} + }) + t.assert_equals(err, nil) + t.assert_equals(true, next(r.rows) ~= nil) + + + local api = cluster:server("api-1").net_box + + -- Inner child would be broadcasted and join still will be located + -- on both storages + local r_all, err_all = api:call("sbroad.execute", { [[ + SELECT * FROM + (SELECT "id" as "tid" FROM "t") as "t" + INNER JOIN + (SELECT "id" as "sid" FROM "space_simple_shard_key") as "s" + ON true + ]], {} }) + + t.assert_equals(err_all, nil) + t.assert_equals(r_all, { + metadata = { + {name = "t.tid", type = "integer"}, + {name = "s.sid", type = "integer"}, + }, + rows = { + {1, 1}, {1, 10}, {2, 1}, {2, 10} + }, + }) + + local r, err = api:call("sbroad.execute", { [[ + SELECT * FROM + (SELECT "id" as "tid" FROM "t") as "t" + INNER JOIN + (SELECT "id" as "sid" FROM "space_simple_shard_key") as "s" + ON EXISTS (SELECT * FROM "testing_space") + ]], {} }) + + t.assert_equals(err, nil) + t.assert_equals(r_all, r) +end + g.test_between1 = function() local api = cluster:server("api-1").net_box @@ -371,4 +610,4 @@ g.test_between2 = function() {1} }, }) -end \ No newline at end of file +end diff --git a/sbroad-core/src/backend/sql/ir/tests.rs b/sbroad-core/src/backend/sql/ir/tests.rs index 87ce24b046c38f98c742fb7b7496787700b9eb91..1b361531ca6263e52d5439af24e0400558589382 100644 --- a/sbroad-core/src/backend/sql/ir/tests.rs +++ b/sbroad-core/src/backend/sql/ir/tests.rs @@ -6,6 +6,7 @@ use crate::executor::engine::mock::RouterConfigurationMock; use crate::executor::ir::ExecutionPlan; use crate::frontend::sql::ast::AbstractSyntaxTree; use crate::frontend::Ast; +use crate::ir::transformation::helpers::sql_to_ir; use crate::ir::tree::Snapshot; use super::*; @@ -17,10 +18,7 @@ fn check_sql_with_snapshot( expected: PatternWithParams, snapshot: Snapshot, ) { - let metadata = &RouterConfigurationMock::new(); - let ast = AbstractSyntaxTree::new(query).unwrap(); - let mut plan = ast.resolve_metadata(metadata).unwrap(); - plan.bind_params(params).unwrap(); + let mut plan = sql_to_ir(query, params); plan.replace_in_operator().unwrap(); plan.split_columns().unwrap(); plan.set_dnf().unwrap(); diff --git a/sbroad-core/src/backend/sql/ir/tests/selection.rs b/sbroad-core/src/backend/sql/ir/tests/selection.rs index 1d791249d82a407bda7872893b4bd521f224de3c..65e855825413c38ba920a2acf65a1f8ac493b34f 100644 --- a/sbroad-core/src/backend/sql/ir/tests/selection.rs +++ b/sbroad-core/src/backend/sql/ir/tests/selection.rs @@ -2,6 +2,20 @@ use super::*; use crate::ir::tree::Snapshot; use crate::ir::value::Value; +#[test] +fn selection_column_from_values() { + let query = r#" + SELECT COLUMN_1 FROM (VALUES (1)) + "#; + + let expected = PatternWithParams::new( + format!("{}", r#"SELECT "COLUMN_1" FROM (VALUES (?))"#,), + vec![Value::Unsigned(1)], + ); + check_sql_with_snapshot(query, vec![], expected.clone(), Snapshot::Oldest); + check_sql_with_snapshot(query, vec![], expected, Snapshot::Latest); +} + #[test] fn selection1_latest() { let query = r#"SELECT "product_code" FROM "hash_testing" diff --git a/sbroad-core/src/backend/sql/ir/tests/sub_query.rs b/sbroad-core/src/backend/sql/ir/tests/sub_query.rs index f1a88d0ea1cf2f7f9ce374a90d6bb98f6bdbd1c0..1338dce922cb80809003a3fb7cdd0bbb7289ef72 100644 --- a/sbroad-core/src/backend/sql/ir/tests/sub_query.rs +++ b/sbroad-core/src/backend/sql/ir/tests/sub_query.rs @@ -139,3 +139,20 @@ fn sub_query3_latest() { ); check_sql_with_snapshot(query, params, expected, Snapshot::Latest); } + +#[test] +fn sub_query_exists() { + let query = + r#"SELECT "FIRST_NAME" FROM "test_space" WHERE EXISTS (SELECT 0 FROM "hash_testing")"#; + + let expected = PatternWithParams::new( + format!( + "{} {}", + r#"SELECT "test_space"."FIRST_NAME" FROM "test_space""#, + r#"WHERE exists (SELECT ? as "COL_1" FROM "hash_testing")"# + ), + vec![Value::from(0_u64)], + ); + check_sql_with_snapshot(query, vec![], expected.clone(), Snapshot::Oldest); + check_sql_with_snapshot(query, vec![], expected, Snapshot::Oldest); +} diff --git a/sbroad-core/src/backend/sql/tree.rs b/sbroad-core/src/backend/sql/tree.rs index 98f44be52457cecd13a119915dc6b051a2db4174..bfecec092790baad23e614c6b1fce57d64fe85cb 100644 --- a/sbroad-core/src/backend/sql/tree.rs +++ b/sbroad-core/src/backend/sql/tree.rs @@ -7,7 +7,7 @@ use std::mem::take; use crate::errors::{Action, Entity, SbroadError}; use crate::executor::ir::ExecutionPlan; use crate::ir::expression::Expression; -use crate::ir::operator::{Bool, Relational}; +use crate::ir::operator::{Bool, Relational, Unary}; use crate::ir::tree::traversal::PostOrder; use crate::ir::tree::Snapshot; use crate::ir::Node; @@ -47,6 +47,9 @@ pub enum SyntaxData { } /// A syntax tree node. +/// +/// In order to understand the process of `left` (and `right`) fields filling +/// see `add_plan_node` function. #[derive(Clone, Deserialize, Debug, PartialEq, Eq, Serialize)] pub struct SyntaxNode { /// Payload @@ -56,8 +59,17 @@ pub struct SyntaxNode { /// other nodes have values (all children should be on the right of the /// current node in a case of in-order traversal - row or sub-query as /// an example). + /// + /// Literally the left node if we look at SQL query representation. + /// It's `None` in case: + /// * It's a first token in an SQL query. + /// * `OrderedSyntaxNodes` `try_from` method made it so during traversal. pub(crate) left: Option<usize>, /// Pointers to the right children. + /// + /// Literally the right node if we look at SQL query representation. + /// Sometimes this field may contain the node itself but converted from `Node` to `SyntaxNode` representation. E.g. see how + /// `Expression::Bool` operator is added to `right` being transformed to `SyntaxNode::Operator` in `add_plan_node` function). pub(crate) right: Vec<usize>, } @@ -181,6 +193,7 @@ impl SyntaxNode { #[derive(Debug, Deserialize, PartialEq, Eq, Serialize)] pub struct SyntaxNodes { pub(crate) arena: Vec<SyntaxNode>, + /// Map of { node_id from `Plan` arena -> node_id from `SyntaxNodes`(Self) arena }. map: HashMap<usize, usize, RandomState>, } @@ -229,6 +242,7 @@ fn syntax_next<'nodes>(iter: &mut SyntaxIterator<'nodes>) -> Option<&'nodes usiz None => None, } } + impl SyntaxNodes { /// Add sub-query syntax node /// @@ -543,9 +557,26 @@ impl Select { /// A wrapper over original plan tree. /// We can modify it as we wish without any influence /// on the original plan tree. +/// +/// Example: +/// - Query: `SELECT "id" FROM "test_space"` +/// - `SyntaxPlan` (syntax node id -> plan node id): +/// 5 -> 11 (`ScanRelation` (`"test_space"`)) <- `top` = 5 +/// ├── 7 -> 15 (`Projection` (child = 11)) <- left +/// │ ├── None <- left +/// │ ├── 1 -> 13 (`Alias` (`"id"`)) +/// │ │ ├── None <- left +/// │ │ └── 0 -> 12 (`Reference` (target = 15, position = 0)) +/// │ │ ├── None <- left +/// │ │ └── [] +/// │ └── 6 -> From +/// │ ├── None <- left +/// │ └── [] +/// └── [] #[derive(Debug)] pub struct SyntaxPlan<'p> { pub(crate) nodes: SyntaxNodes, + /// Id of top `SyntaxNode`. top: Option<usize>, plan: &'p ExecutionPlan, snapshot: Snapshot, @@ -867,7 +898,7 @@ impl<'p> SyntaxPlan<'p> { Expression::Row { list, .. } => { // In projections with a huge amount of columns it can be // very expensive to retrieve corresponding relational nodes. - let rel_ids = ir_plan.get_relational_from_row_nodes(id)?; + let rel_ids = ir_plan.get_relational_nodes_from_row(id)?; if let Some(motion_id) = ir_plan.get_motion_among_rel_nodes(&rel_ids)? { // Replace motion node to virtual table node @@ -972,13 +1003,15 @@ impl<'p> SyntaxPlan<'p> { Ok(self.nodes.push_syntax_node(sn)) } Expression::Unary { child, op, .. } => { - let sn = SyntaxNode::new_pointer( - id, - Some(self.nodes.get_syntax_node_id(*child)?), - vec![self - .nodes - .push_syntax_node(SyntaxNode::new_operator(&format!("{op}")))], - ); + let operator_node_id = self + .nodes + .push_syntax_node(SyntaxNode::new_operator(&format!("{op}"))); + let child_node_id = self.nodes.get_syntax_node_id(*child)?; + let (left, right) = match op { + Unary::IsNull | Unary::IsNotNull => (child_node_id, operator_node_id), + Unary::Exists | Unary::NotExists => (operator_node_id, child_node_id), + }; + let sn = SyntaxNode::new_pointer(id, Some(left), vec![right]); Ok(self.nodes.push_syntax_node(sn)) } Expression::StableFunction { @@ -1227,9 +1260,12 @@ impl<'p> SyntaxPlan<'p> { } } +/// Wrapper over `SyntaxNode` `arena` that is used for converting it to SQL. #[derive(Clone, Debug, Default, Deserialize, Serialize, PartialEq, Eq)] pub struct OrderedSyntaxNodes { arena: Vec<SyntaxNode>, + /// Indices of nodes from `arena`. During the conversion to SQL the order of nodes from + /// `positions` is the order they will appear in SQL string representation. positions: Vec<usize>, } @@ -1271,6 +1307,7 @@ impl TryFrom<SyntaxPlan<'_>> for OrderedSyntaxNodes { stack.push(sp.get_top()?); while let Some(id) = stack.last() { let sn = sp.nodes.get_mut_syntax_node(*id)?; + // Note that in case `left` is a `Some(...)`, call of `take` will make it None. if let Some(left_id) = sn.left.take() { stack.push(left_id); } else if let Some(id) = stack.pop() { diff --git a/sbroad-core/src/executor/ir.rs b/sbroad-core/src/executor/ir.rs index 91c931082a49c9708ca7b70bb4a2c295fe384526..100689c718d3dd4aa930b5f252667d36345f7b20 100644 --- a/sbroad-core/src/executor/ir.rs +++ b/sbroad-core/src/executor/ir.rs @@ -39,9 +39,12 @@ impl ConnectionType { } } +/// Wrapper over `Plan` containing `vtables` map. #[derive(Debug, Default, Clone, Deserialize, Serialize, PartialEq, Eq)] pub struct ExecutionPlan { plan: Plan, + /// Virtual tables for `Motion` nodes. + /// Map of { `Motion` node_id -> it's corresponding data } pub vtables: Option<VirtualTableMap>, } diff --git a/sbroad-core/src/executor/tests.rs b/sbroad-core/src/executor/tests.rs index e08d312fd3ff037ae17a283fb654aec40f5fa618..97915daeab0282e2ba8487c3fedba14b0cf7d9f2 100644 --- a/sbroad-core/src/executor/tests.rs +++ b/sbroad-core/src/executor/tests.rs @@ -1244,7 +1244,7 @@ fn insert6_test() { "{} {} {} {}", r#"INSERT INTO "t" ("a", "b", "bucket_id")"#, r#"SELECT COL_0, COL_1, bucket_id (coalesce (CAST (COL_0 as string), ?) || coalesce (CAST (COL_1 as string), ?)) FROM"#, - r#"(SELECT CAST (COLUMN_5 as unsigned) as COL_0, CAST (COLUMN_6 as unsigned) as COL_1 FROM"#, + r#"(SELECT CAST ("COLUMN_5" as unsigned) as COL_0, CAST ("COLUMN_6" as unsigned) as COL_1 FROM"#, r#"((SELECT "COLUMN_5","COLUMN_6" FROM "TMP_test_94") as "t"))"#, ), vec![Value::from("NULL"), Value::from("NULL")], @@ -1257,7 +1257,7 @@ fn insert6_test() { "{} {} {} {}", r#"INSERT INTO "t" ("a", "b", "bucket_id")"#, r#"SELECT COL_0, COL_1, bucket_id (coalesce (CAST (COL_0 as string), ?) || coalesce (CAST (COL_1 as string), ?)) FROM"#, - r#"(SELECT CAST (COLUMN_5 as unsigned) as COL_0, CAST (COLUMN_6 as unsigned) as COL_1 FROM"#, + r#"(SELECT CAST ("COLUMN_5" as unsigned) as COL_0, CAST ("COLUMN_6" as unsigned) as COL_1 FROM"#, r#"((SELECT "COLUMN_5","COLUMN_6" FROM "TMP_test_94") as "t"))"#, ), vec![Value::from("NULL"), Value::from("NULL")], @@ -1412,7 +1412,7 @@ fn insert9_test() { "{} {} {}", r#"INSERT INTO "t" ("a", "b", "bucket_id")"#, r#"SELECT COL_0, COL_1, bucket_id (coalesce (CAST (COL_0 as string), ?) || coalesce (CAST (COL_1 as string), ?)) FROM"#, - r#"(SELECT CAST (COLUMN_1 as unsigned) as COL_0, CAST (COLUMN_2 as unsigned) as COL_1 FROM ((SELECT "COLUMN_1","COLUMN_2" FROM "TMP_test_82") as "t"))"#, + r#"(SELECT CAST ("COLUMN_1" as unsigned) as COL_0, CAST ("COLUMN_2" as unsigned) as COL_1 FROM ((SELECT "COLUMN_1","COLUMN_2" FROM "TMP_test_82") as "t"))"#, ), vec![Value::from("NULL"), Value::from("NULL")], ))), diff --git a/sbroad-core/src/frontend/sql.rs b/sbroad-core/src/frontend/sql.rs index ffba3931950ecc1466a2c9c59f755b23e8c8250a..e45a62dcbcb02b4c069c3f6b207fa47f927dc89c 100644 --- a/sbroad-core/src/frontend/sql.rs +++ b/sbroad-core/src/frontend/sql.rs @@ -21,7 +21,6 @@ use crate::ir::value::Value; use crate::ir::{Node, Plan}; use crate::otm::child_span; -use crate::ir::aggregates::SimpleAggregate; use sbroad_proc::otm_child_span; /// Helper structure to fix the double linking @@ -35,12 +34,6 @@ struct Between { less_eq_id: usize, } -pub struct AggregateInfo { - pub expression_top: usize, - pub aggregate: SimpleAggregate, - pub is_distinct: bool, -} - impl Between { fn new(left_id: usize, less_eq_id: usize) -> Self { Self { @@ -118,6 +111,7 @@ impl Ast for AbstractSyntaxTree { self.nodes.arena.is_empty() } + /// Function that transforms `AbstractSyntaxTree` into `Plan`. #[allow(dead_code)] #[allow(clippy::too_many_lines)] #[otm_child_span("ast.resolve")] @@ -132,8 +126,12 @@ impl Ast for AbstractSyntaxTree { }; let capacity = self.nodes.arena.len(); let mut dft_post = PostOrder::with_capacity(|node| self.nodes.ast_iter(node), capacity); + // Map of { `ParseNode` id -> `Node` id }. let mut map = Translation::with_capacity(self.nodes.next_id()); + // Set of all `Expression::Row` generated from AST. let mut rows: HashSet<usize> = HashSet::with_capacity(self.nodes.next_id()); + // Counter for `Expression::ValuesRow` output column name aliases ("COLUMN_<`col_idx`>"). + // Is it global for every `ValuesRow` met in the AST. let mut col_idx: usize = 0; let mut groupby_nodes: Vec<usize> = Vec::new(); @@ -141,17 +139,18 @@ impl Ast for AbstractSyntaxTree { let mut sq_nodes: Vec<usize> = Vec::new(); let mut betweens: Vec<Between> = Vec::new(); - // ids of arithmetic expressions that have parentheses - let mut arithmetic_expression_ids: Vec<usize> = Vec::new(); + // Ids of arithmetic expressions that have parentheses. + let mut arith_expr_with_parentheses_ids: Vec<usize> = Vec::new(); + // Closure to retrieve arithmetic expression under parenthesis. let get_arithmetic_plan_id = |plan: &mut Plan, map: &Translation, - arithmetic_expression_ids: &mut Vec<usize>, + arith_expr_with_parentheses_ids: &mut Vec<usize>, rows: &mut HashSet<usize>, ast_id: usize| -> Result<usize, SbroadError> { let plan_id; - // if child of current multiplication or addition is `(expr)` then + // If child of current multiplication or addition is `(expr)` then // we need to get expr that is child of `()` and add it to the plan // also we will mark this expr to add in the future `()` let arithmetic_parse_node = self.nodes.get_node(ast_id)?; @@ -162,7 +161,7 @@ impl Ast for AbstractSyntaxTree { ) })?; plan_id = plan.as_row(map.get(*arithmetic_id)?, rows)?; - arithmetic_expression_ids.push(plan_id); + arith_expr_with_parentheses_ids.push(plan_id); } else { plan_id = plan.as_row(map.get(ast_id)?, rows)?; } @@ -170,41 +169,41 @@ impl Ast for AbstractSyntaxTree { Ok(plan_id) }; - let get_arithmetic_cond_id = - |plan: &mut Plan, - current_node: &ParseNode, - map: &Translation, - arithmetic_expression_ids: &mut Vec<usize>, - rows: &mut HashSet<usize>| { - let ast_left_id = current_node.children.first().ok_or_else(|| { - SbroadError::UnexpectedNumberOfValues( - "Multiplication or Addition has no children.".into(), - ) - })?; - let plan_left_id = get_arithmetic_plan_id( - plan, - map, - arithmetic_expression_ids, - rows, - *ast_left_id, - )?; + // Closure to add arithmetic expression operator to plan and get id of newly added node. + let get_arithmetic_op_id = |plan: &mut Plan, + current_node: &ParseNode, + map: &Translation, + arith_expr_with_parentheses_ids: &mut Vec<usize>, + rows: &mut HashSet<usize>| { + let ast_left_id = current_node.children.first().ok_or_else(|| { + SbroadError::UnexpectedNumberOfValues( + "Multiplication or Addition has no children.".into(), + ) + })?; + let plan_left_id = get_arithmetic_plan_id( + plan, + map, + arith_expr_with_parentheses_ids, + rows, + *ast_left_id, + )?; - let ast_right_id = current_node.children.get(2).ok_or_else(|| { - SbroadError::NotFound( - Entity::Node, - "that is right node with index 2 among Multiplication or Addition children" - .into(), - ) - })?; - let plan_right_id = get_arithmetic_plan_id( - plan, - map, - arithmetic_expression_ids, - rows, - *ast_right_id, - )?; + let ast_right_id = current_node.children.get(2).ok_or_else(|| { + SbroadError::NotFound( + Entity::Node, + "that is right node with index 2 among Multiplication or Addition children" + .into(), + ) + })?; + let plan_right_id = get_arithmetic_plan_id( + plan, + map, + arith_expr_with_parentheses_ids, + rows, + *ast_right_id, + )?; - let ast_op_id = current_node.children.get(1).ok_or_else(|| { + let ast_op_id = current_node.children.get(1).ok_or_else(|| { SbroadError::NotFound( Entity::Node, "that is center node (operator) with index 1 among Multiplication or Addition children" @@ -212,13 +211,12 @@ impl Ast for AbstractSyntaxTree { ) })?; - let op_node = self.nodes.get_node(*ast_op_id)?; - let op = Arithmetic::from_node_type(&op_node.rule)?; + let op_node = self.nodes.get_node(*ast_op_id)?; + let op = Arithmetic::from_node_type(&op_node.rule)?; - let cond_id = - plan.add_arithmetic_to_plan(plan_left_id, op, plan_right_id, false)?; - Ok(cond_id) - }; + let op_id = plan.add_arithmetic_to_plan(plan_left_id, op, plan_right_id, false)?; + Ok(op_id) + }; for (_, id) in dft_post.iter(top) { let node = self.nodes.get_node(id)?; @@ -298,6 +296,7 @@ impl Ast for AbstractSyntaxTree { plan_rel_list.push(plan_id); } + // Closure to get uppercase name from AST `ColumnName` node. let get_column_name = |ast_id: usize| -> Result<String, SbroadError> { let ast_col_name = self.nodes.get_node(ast_id)?; if let Type::ColumnName = ast_col_name.rule { @@ -317,6 +316,8 @@ impl Ast for AbstractSyntaxTree { } }; + // Closure to get the nearest name of relation the output column came from + // E.g. for `Scan` it would be it's `realation`. let get_scan_name = |col_name: &str, plan_id: usize| -> Result<Option<String>, SbroadError> { let child = plan.get_relation_node(plan_id)?; @@ -333,10 +334,12 @@ impl Ast for AbstractSyntaxTree { } }; - // Reference to the join node. - if let (Some(plan_left_id), Some(plan_right_id)) = - (plan_rel_list.first(), plan_rel_list.get(1)) + let plan_left_id = plan_rel_list.first(); + let plan_right_id = plan_rel_list.get(1); + if let (Some(plan_left_id), Some(plan_right_id)) = (plan_left_id, plan_right_id) { + // Handling case of referencing join node. + if let (Some(ast_scan_id), Some(ast_col_name_id)) = (node.children.first(), node.children.get(1)) { @@ -440,13 +443,13 @@ impl Ast for AbstractSyntaxTree { "expected children nodes contain a column name.".into(), )); }; + } else if let (Some(plan_rel_id), None) = (plan_left_id, plan_right_id) { + // Handling case of referencing a single child node. - // Reference to a single child node. - } else if let (Some(plan_rel_id), None) = - (plan_rel_list.first(), plan_rel_list.get(1)) - { + let first_child_id = node.children.first(); + let second_child_id = node.children.get(1); let col_name: String = if let (Some(ast_scan_id), Some(ast_col_id)) = - (node.children.first(), node.children.get(1)) + (first_child_id, second_child_id) { // Get column name. let col_name = get_column_name(*ast_col_id)?; @@ -474,9 +477,7 @@ impl Ast for AbstractSyntaxTree { )); }; col_name - } else if let (Some(ast_col_id), None) = - (node.children.first(), node.children.get(1)) - { + } else if let (Some(ast_col_id), None) = (first_child_id, second_child_id) { // Get the column name. get_column_name(*ast_col_id)? } else { @@ -615,7 +616,7 @@ impl Ast for AbstractSyntaxTree { let cond_id = plan.add_cond(plan_left_id, op, plan_right_id)?; map.add(id, cond_id); } - Type::IsNull | Type::IsNotNull => { + Type::IsNull | Type::IsNotNull | Type::Exists | Type::NotExists => { let ast_child_id = node.children.first().ok_or_else(|| { SbroadError::UnexpectedNumberOfValues(format!( "{:?} has no children.", @@ -907,11 +908,11 @@ impl Ast for AbstractSyntaxTree { map.add(id, projection_id); } Type::Multiplication | Type::Addition => { - let cond_id = get_arithmetic_cond_id( + let cond_id = get_arithmetic_op_id( &mut plan, node, &map, - &mut arithmetic_expression_ids, + &mut arith_expr_with_parentheses_ids, &mut rows, )?; map.add(id, cond_id); @@ -924,7 +925,7 @@ impl Ast for AbstractSyntaxTree { ) })?; let plan_child_id = map.get(ast_child_id)?; - arithmetic_expression_ids.push(plan_child_id); + arith_expr_with_parentheses_ids.push(plan_child_id); map.add(id, plan_child_id); } Type::Except => { @@ -963,12 +964,13 @@ impl Ast for AbstractSyntaxTree { map.add(id, values_row_id); } Type::Values => { - let mut plan_children_ids: Vec<usize> = Vec::with_capacity(node.children.len()); + let mut plan_value_row_ids: Vec<usize> = + Vec::with_capacity(node.children.len()); for ast_child_id in &node.children { let plan_child_id = map.get(*ast_child_id)?; - plan_children_ids.push(plan_child_id); + plan_value_row_ids.push(plan_child_id); } - let plan_values_id = plan.add_values(plan_children_ids)?; + let plan_values_id = plan.add_values(plan_value_row_ids)?; map.add(id, plan_values_id); } Type::Insert => { @@ -1080,7 +1082,7 @@ impl Ast for AbstractSyntaxTree { plan.set_top(plan_top_id)?; let replaces = plan.replace_sq_with_references()?; plan.fix_betweens(&betweens, &replaces)?; - plan.fix_arithmetic_parentheses(&arithmetic_expression_ids)?; + plan.fix_arithmetic_parentheses(&arith_expr_with_parentheses_ids)?; Ok(plan) } } diff --git a/sbroad-core/src/frontend/sql/ast.rs b/sbroad-core/src/frontend/sql/ast.rs index 54b320dab90cc7a56053d23be8d3cb64f02e6ae0..c3a35f3e8be7cba0b0c234c1f2b5bd1e51a2eac5 100644 --- a/sbroad-core/src/frontend/sql/ast.rs +++ b/sbroad-core/src/frontend/sql/ast.rs @@ -45,6 +45,7 @@ pub enum Type { Double, Eq, Except, + Exists, Explain, False, Function, @@ -68,6 +69,7 @@ pub enum Type { Multiply, Name, NotEq, + NotExists, NotIn, Null, Or, @@ -129,6 +131,7 @@ impl Type { Rule::Double => Ok(Type::Double), Rule::Eq => Ok(Type::Eq), Rule::Except => Ok(Type::Except), + Rule::Exists => Ok(Type::Exists), Rule::Explain => Ok(Type::Explain), Rule::False => Ok(Type::False), Rule::Function => Ok(Type::Function), @@ -153,6 +156,7 @@ impl Type { Rule::Multiply => Ok(Type::Multiply), Rule::Name => Ok(Type::Name), Rule::NotEq => Ok(Type::NotEq), + Rule::NotExists => Ok(Type::NotExists), Rule::NotIn => Ok(Type::NotIn), Rule::Null => Ok(Type::Null), Rule::Or => Ok(Type::Or), @@ -219,6 +223,7 @@ impl fmt::Display for Type { Type::Double => "Double".to_string(), Type::Eq => "Eq".to_string(), Type::Except => "Except".to_string(), + Type::Exists => "Exists".to_string(), Type::Explain => "Explain".to_string(), Type::False => "False".to_string(), Type::Function => "Function".to_string(), @@ -240,6 +245,7 @@ impl fmt::Display for Type { Type::Multiply => "Multiply".to_string(), Type::Name => "Name".to_string(), Type::NotEq => "NotEq".to_string(), + Type::NotExists => "NotExists".to_string(), Type::NotIn => "NotIn".to_string(), Type::Null => "Null".to_string(), Type::Or => "Or".to_string(), @@ -446,7 +452,10 @@ impl<'n> StackParseNode<'n> { #[derive(Clone, Debug, Deserialize, Serialize)] pub struct AbstractSyntaxTree { pub(in crate::frontend::sql) nodes: ParseNodes, + /// Index of top `ParseNode` in `nodes.arena`. pub(in crate::frontend::sql) top: Option<usize>, + /// Map of { reference node_id -> relation node_id it refers to }. + /// See `build_ref_to_relation_map` to understand how it is filled. pub(super) map: HashMap<usize, Vec<usize>>, } diff --git a/sbroad-core/src/frontend/sql/ir.rs b/sbroad-core/src/frontend/sql/ir.rs index 2f28fff89ece50ec7c9784f29292d738aa5c3b91..192102b124bafffa6ed0ac1d276ce340b3ad8aef 100644 --- a/sbroad-core/src/frontend/sql/ir.rs +++ b/sbroad-core/src/frontend/sql/ir.rs @@ -71,6 +71,8 @@ impl Unary { match s { Type::IsNull => Ok(Unary::IsNull), Type::IsNotNull => Ok(Unary::IsNotNull), + Type::Exists => Ok(Unary::Exists), + Type::NotExists => Ok(Unary::NotExists), _ => Err(SbroadError::Invalid( Entity::Operator, Some(format!("unary operator: {s:?}")), @@ -129,6 +131,7 @@ impl Value { } #[derive(Debug)] +/// Helper struct representing map of { `ParseNode` id -> `Node` id } pub(super) struct Translation { map: HashMap<usize, usize>, } @@ -191,8 +194,9 @@ impl Plan { capacity, ); for (_, op_id) in expr_post.iter(*tree) { + let expression_node = self.get_node(op_id)?; if let Node::Expression(Expression::Bool { left, right, .. }) = - self.get_node(op_id)? + expression_node { let children = &[*left, *right]; for child in children { @@ -202,6 +206,14 @@ impl Plan { set.insert(SubQuery::new(id, op_id, *child)); } } + } else if let Node::Expression(Expression::Unary { child, .. }) = + expression_node + { + if let Node::Relational(Relational::ScanSubQuery { .. }) = + self.get_node(*child)? + { + set.insert(SubQuery::new(id, op_id, *child)); + } } } } @@ -296,6 +308,9 @@ impl Plan { )); } replaces.insert(sq.sq, row_id); + } else if let Expression::Unary { child, .. } = op { + *child = row_id; + replaces.insert(sq.sq, row_id); } else { return Err(SbroadError::Invalid( Entity::Expression, diff --git a/sbroad-core/src/frontend/sql/ir/tests.rs b/sbroad-core/src/frontend/sql/ir/tests.rs index 0a6629775ec72c644496cab70858c9f3a2a2263e..f06a48ae18bfaa16d40fd4e21946931a5153d883 100644 --- a/sbroad-core/src/frontend/sql/ir/tests.rs +++ b/sbroad-core/src/frontend/sql/ir/tests.rs @@ -234,9 +234,9 @@ fn front_sql10() { r#"insert "t" projection (COL_0 -> COL_0, COL_1 -> COL_1, COL_2 -> COL_2, COL_3 -> COL_3, bucket_id((coalesce(('NULL', COL_0::string)) || coalesce(('NULL', COL_1::string))))) scan - projection (COLUMN_1::unsigned -> COL_0, COLUMN_2::unsigned -> COL_1, COLUMN_3::unsigned -> COL_2, COLUMN_4::unsigned -> COL_3) + projection ("COLUMN_1"::unsigned -> COL_0, "COLUMN_2"::unsigned -> COL_1, "COLUMN_3"::unsigned -> COL_2, "COLUMN_4"::unsigned -> COL_3) scan - motion [policy: segment([ref(COLUMN_1), ref(COLUMN_2)])] + motion [policy: segment([ref("COLUMN_1"), ref("COLUMN_2")])] values value row (data=ROW(1, 2, 3, 4)) "#, @@ -255,9 +255,9 @@ fn front_sql11() { r#"insert "t" projection (COL_0 -> COL_0, COL_1 -> COL_1, bucket_id((coalesce(('NULL', COL_0::string)) || coalesce(('NULL', NULL::string))))) scan - projection (COLUMN_1::unsigned -> COL_0, COLUMN_2::unsigned -> COL_1) + projection ("COLUMN_1"::unsigned -> COL_0, "COLUMN_2"::unsigned -> COL_1) scan - motion [policy: segment([ref(COLUMN_1), value(NULL)])] + motion [policy: segment([ref("COLUMN_1"), value(NULL)])] values value row (data=ROW(1, 2)) "#, @@ -374,6 +374,70 @@ fn front_sql20() { assert_eq!(expected_explain, plan.as_explain().unwrap()); } +#[test] +fn front_sql_exists_subquery_select_from_table() { + let input = r#"SELECT "id" FROM "test_space" WHERE EXISTS (SELECT 0 FROM "hash_testing")"#; + + let mut plan = sql_to_optimized_ir(input, vec![]); + + let expected_explain = String::from( + r#"projection ("test_space"."id" -> "id") + selection exists ROW($0) + scan "test_space" +subquery $0: +motion [policy: full] + scan + projection (0 -> "COL_1") + scan "hash_testing" +"#, + ); + + assert_eq!(expected_explain, plan.as_explain().unwrap()); +} + +#[test] +fn front_sql_not_exists_subquery_select_from_table() { + let input = r#"SELECT "id" FROM "test_space" WHERE NOT EXISTS (SELECT 0 FROM "hash_testing")"#; + + let plan = sql_to_optimized_ir(input, vec![]); + + let expected_explain = String::from( + r#"projection ("test_space"."id" -> "id") + selection not exists ROW($0) + scan "test_space" +subquery $0: +motion [policy: full] + scan + projection (0 -> "COL_1") + scan "hash_testing" +"#, + ); + + assert_eq!(expected_explain, plan.as_explain().unwrap()); +} + +#[test] +fn front_sql_exists_subquery_select_from_table_with_condition() { + let input = r#"SELECT "id" FROM "test_space" WHERE EXISTS (SELECT 0 FROM "hash_testing" WHERE "identification_number" != 42)"#; + + let plan = sql_to_optimized_ir(input, vec![]); + + let expected_explain = String::from( + r#"projection ("test_space"."id" -> "id") + selection exists ROW($0) + scan "test_space" +subquery $0: +motion [policy: full] + scan + projection (0 -> "COL_1") + selection ROW("hash_testing"."identification_number") <> ROW(42) + scan "hash_testing" +"#, + ); + + assert_eq!(expected_explain, plan.as_explain().unwrap()); +} + #[test] fn front_sql_groupby() { let input = r#"SELECT "identification_number", "product_code" FROM "hash_testing" group by "identification_number", "product_code""#; diff --git a/sbroad-core/src/frontend/sql/query.pest b/sbroad-core/src/frontend/sql/query.pest index 7c1f3518c9878b2c90cafa03319d79777bc7ad5c..475829712733acbd738fd76db102747f6efb4401 100644 --- a/sbroad-core/src/frontend/sql/query.pest +++ b/sbroad-core/src/frontend/sql/query.pest @@ -10,9 +10,9 @@ Query = _{ Except | UnionAll | Select | Values | Insert } Join? ~ (^"where" ~ Selection)? ~ (^"group" ~ ^"by" ~ GroupBy)? } - Projection = { (Asterisk | Column) ~ ("," ~ (Asterisk | Column))*? } - Column = { Alias | Expr | ArithmeticExpr | Value } - Alias = {(Expr | Expr | ArithmeticExpr | Value) ~ ^"as" ~ AliasName } + Projection = { (Asterisk | Column) ~ ("," ~ (Asterisk | Column))* } + Column = { Alias | Expr | Value } + Alias = {(Expr | Value) ~ ^"as" ~ AliasName } AliasName = @{ Name } Reference = { (ScanName ~ "." ~ ColumnName) | ColumnName } ColumnName = @{ Name } @@ -33,26 +33,27 @@ Query = _{ Except | UnionAll | Select | Values | Insert } SubQuery = { "(" ~ (Except | UnionAll | Select | Values) ~ ")" } Insert = { ^"insert" ~ ^"into" ~ Table ~ ("(" ~ TargetColumns ~ ")")? ~ (Values | Select) } TargetColumns = { ColumnName ~ ("," ~ ColumnName)* } - Values = { ^"values" ~ ValuesRow ~ ("," ~ ValuesRow)*? } + Values = { ^"values" ~ ValuesRow ~ ("," ~ ValuesRow)* } ValuesRow = { Row } -ArithmeticExpr = _{ Addition | Multiplication | ArithParentheses } - ArithParentheses = { "(" ~ ArithmeticExpr ~ ")" } - ArithELeft = _{ ArithParentheses | Value } - Multiplication = { ArithELeft ~ (Multiply | Divide) ~ MultiplicationRight } - MultiplicationRight = _{ Multiplication | ArithELeft } - Addition = { AdditionLeft ~ (Add | Subtract) ~ AdditionRight } - AdditionLeft = _{ MultiplicationRight } - AdditionRight = _{ Addition | Multiplication | ArithELeft } - Multiply = { "*" } - Divide = { "/" } - Add = { "+" } - Subtract = { "-" } - Expr = _{ Or | And | Unary | Between | Cmp | Primary | Parentheses } Parentheses = _{ "(" ~ Expr ~ ")" } - Primary = _{ ArithmeticExpr | SubQuery | Value } - Unary = _{ IsNull | IsNotNull} + Primary = _{ SubQuery | ArithmeticExpr | Value } + ArithmeticExpr = _{ Addition | Multiplication | ArithParentheses } + ArithParentheses = { "(" ~ ArithmeticExpr ~ ")" } + ArithELeft = _{ ArithParentheses | Value } + Multiplication = { ArithELeft ~ (Multiply | Divide) ~ MultiplicationRight } + MultiplicationRight = _{ Multiplication | ArithELeft } + Addition = { AdditionLeft ~ (Add | Subtract) ~ AdditionRight } + AdditionLeft = _{ MultiplicationRight } + AdditionRight = _{ Addition | Multiplication | ArithELeft } + Multiply = { "*" } + Divide = { "/" } + Add = { "+" } + Subtract = { "-" } + Unary = _{ IsNull | IsNotNull | Exists | NotExists } + Exists = { ^"exists" ~ SubQuery } + NotExists = { ^"not" ~ ^"exists" ~ SubQuery } IsNull = { Primary ~ ^"is" ~ ^"null" } IsNotNull = { Primary ~ ^"is" ~ ^"not" ~ ^"null" } Cmp = _{ Eq | In | Gt | GtEq | Lt | LtEq | NotEq | NotIn } diff --git a/sbroad-core/src/ir.rs b/sbroad-core/src/ir.rs index b64819ba4db5343a086caddaf7fb74de139313a1..5545e9a1933aaf7411966623780809c098a2d283 100644 --- a/sbroad-core/src/ir.rs +++ b/sbroad-core/src/ir.rs @@ -407,7 +407,7 @@ impl Plan { self.nodes.add_bool(left, op, right) } - /// Add rithmetic node to the plan. + /// Add arithmetic node to the plan. /// /// # Errors /// Returns `SbroadError` when the condition node can't append'. diff --git a/sbroad-core/src/ir/explain.rs b/sbroad-core/src/ir/explain.rs index 202f9fb7ff07c136864a3f165d382fa4b560e826..741d3316274fc95f99e5594d32fc1a41d13f2166 100644 --- a/sbroad-core/src/ir/explain.rs +++ b/sbroad-core/src/ir/explain.rs @@ -575,6 +575,7 @@ impl Display for Selection { } Selection::UnaryOp { op, child } => match op { Unary::IsNull | Unary::IsNotNull => format!("{child} {op}"), + Unary::Exists | Unary::NotExists => format!("{op} {child}"), }, }; diff --git a/sbroad-core/src/ir/explain/tests.rs b/sbroad-core/src/ir/explain/tests.rs index 82332d22d6a19a7f2ef4fc83938a2324c4b7f11b..eab32652dc0e450e0d58aff3a9c16266f0eeb632 100644 --- a/sbroad-core/src/ir/explain/tests.rs +++ b/sbroad-core/src/ir/explain/tests.rs @@ -317,9 +317,9 @@ fn insert_plan() { r#"insert "test_space" projection (COL_0 -> COL_0, COL_1 -> COL_1, bucket_id((coalesce(('NULL', COL_0::string))))) scan - projection (COLUMN_1::unsigned -> COL_0, COLUMN_2::string -> COL_1) + projection ("COLUMN_1"::unsigned -> COL_0, "COLUMN_2"::string -> COL_1) scan - motion [policy: segment([ref(COLUMN_1)])] + motion [policy: segment([ref("COLUMN_1")])] values value row (data=ROW(1, '123')) "#, @@ -342,9 +342,9 @@ fn multiply_insert_plan() { r#"insert "test_space" projection (COL_0 -> COL_0, COL_1 -> COL_1, bucket_id((coalesce(('NULL', COL_0::string))))) scan - projection (COLUMN_5::unsigned -> COL_0, COLUMN_6::string -> COL_1) + projection ("COLUMN_5"::unsigned -> COL_0, "COLUMN_6"::string -> COL_1) scan - motion [policy: segment([ref(COLUMN_5)])] + motion [policy: segment([ref("COLUMN_5")])] values value row (data=ROW(1, '123')) value row (data=ROW(2, '456')) @@ -392,7 +392,7 @@ fn select_value_plan() { let mut actual_explain = String::new(); actual_explain.push_str( - r#"projection (COLUMN_1 -> COLUMN_1) + r#"projection ("COLUMN_1" -> "COLUMN_1") scan values value row (data=ROW(1)) diff --git a/sbroad-core/src/ir/expression.rs b/sbroad-core/src/ir/expression.rs index e224624ddd25addb1e3909f97231861c3a58538e..e47cd832f9fe959c7fa45ce86a299995a10a9350 100644 --- a/sbroad-core/src/ir/expression.rs +++ b/sbroad-core/src/ir/expression.rs @@ -668,6 +668,7 @@ impl Plan { let relational_op = self.get_relation_node(child_node)?; let output_id = relational_op.output(); let output = self.get_expression_node(output_id)?; + // Map of { column name (aliased) from child output -> its index in output } let map: HashMap<&str, usize, RandomState> = if let Expression::Row { list, .. } = output { let state = RandomState::new(); let mut map: HashMap<&str, usize, RandomState> = @@ -692,6 +693,7 @@ impl Plan { )); }; + // Vec of { `map` key, targets, `map` value } let mut refs: Vec<(&str, Vec<usize>, usize)> = Vec::with_capacity(col_names.len()); let all_found = col_names.iter().all(|col| { map.get(col).map_or(false, |pos| { @@ -898,7 +900,7 @@ impl Plan { /// - node is not a row /// - row is invalid /// - `relational_map` is not initialized - pub fn get_relational_from_row_nodes( + pub fn get_relational_nodes_from_row( &self, row_id: usize, ) -> Result<HashSet<usize, RandomState>, SbroadError> { diff --git a/sbroad-core/src/ir/expression/tests.rs b/sbroad-core/src/ir/expression/tests.rs index c875f2995d109b9db192fc93783be322ec403b87..f46c7b3fb752cc07deac0b289e5b3253194948d4 100644 --- a/sbroad-core/src/ir/expression/tests.rs +++ b/sbroad-core/src/ir/expression/tests.rs @@ -37,7 +37,7 @@ fn rel_nodes_from_reference_in_scan() { let scan_id = plan.add_scan("t", None).unwrap(); let output = plan.get_relational_output(scan_id).unwrap(); - let rel_set = plan.get_relational_from_row_nodes(output).unwrap(); + let rel_set = plan.get_relational_nodes_from_row(output).unwrap(); assert_eq!(true, rel_set.is_empty()); } @@ -59,7 +59,7 @@ fn rel_nodes_from_reference_in_proj() { let proj_id = plan.add_proj(scan_id, &["a"]).unwrap(); let output = plan.get_relational_output(proj_id).unwrap(); - let rel_set = plan.get_relational_from_row_nodes(output).unwrap(); + let rel_set = plan.get_relational_nodes_from_row(output).unwrap(); assert_eq!(1, rel_set.len()); assert_eq!(Some(&scan_id), rel_set.get(&scan_id)); } diff --git a/sbroad-core/src/ir/helpers.rs b/sbroad-core/src/ir/helpers.rs index b59f042423a2a43055aaf8be014ba7d9ea69a2bc..74b6d22bf35085f3c39c6e18c78edfbf39f8b1d4 100644 --- a/sbroad-core/src/ir/helpers.rs +++ b/sbroad-core/src/ir/helpers.rs @@ -86,7 +86,9 @@ impl Plan { writeln!(buf, "Constant [value = {value}]")?; } Expression::Reference { - targets, position, .. + targets, + position, + parent, } => { let alias_name = self.get_alias_from_reference_node(expr).unwrap(); @@ -107,7 +109,7 @@ impl Plan { } formatted_tabulate(buf, tabulation_number + 1)?; - writeln!(buf, "Parent: (current relational node)")?; + writeln!(buf, "Parent: {parent:?}")?; if let Some(targets) = targets { for target_id in targets { @@ -129,7 +131,12 @@ impl Plan { Expression::Cast { .. } => writeln!(buf, "Cast")?, Expression::Concat { .. } => writeln!(buf, "Concat")?, Expression::StableFunction { .. } => writeln!(buf, "StableFunction")?, - Expression::Unary { .. } => writeln!(buf, "Unary")?, + Expression::Unary { op, child } => { + writeln!(buf, "Unary [op: {op}]")?; + formatted_tabulate(buf, tabulation_number + 1)?; + writeln!(buf, "Child")?; + self.formatted_arena_node(buf, tabulation_number + 1, *child)?; + } Expression::Arithmetic { .. } => writeln!(buf, "Arithmetic")?, }; } diff --git a/sbroad-core/src/ir/helpers/tests.rs b/sbroad-core/src/ir/helpers/tests.rs index cdc867c8dceb6806d2d72e7178f55db358018fb9..bf2ee40b4063a54b6835c56812a44419da8cbf93 100644 --- a/sbroad-core/src/ir/helpers/tests.rs +++ b/sbroad-core/src/ir/helpers/tests.rs @@ -131,7 +131,7 @@ fn simple_join() { [id: 40] expression: Reference Alias: "id" Referenced table name (or alias): "t1" - Parent: (current relational node) + Parent: Some(50) target_id: 0 Right child [id: 56] expression: Row [distribution = Some(Any)] @@ -139,7 +139,7 @@ fn simple_join() { [id: 42] expression: Reference Alias: "identification_number" Referenced table name (or alias): "t2" - Parent: (current relational node) + Parent: Some(50) target_id: 1 Children: Child_id = 19 diff --git a/sbroad-core/src/ir/operator.rs b/sbroad-core/src/ir/operator.rs index bbba4ed3c0ecd3bee7143047be727ba5074d4c6d..2c70cbe824869010c9b8e74ef52621de2540cfcc 100644 --- a/sbroad-core/src/ir/operator.rs +++ b/sbroad-core/src/ir/operator.rs @@ -137,6 +137,10 @@ pub enum Unary { IsNull, /// `is not null` IsNotNull, + /// `exists` + Exists, + /// `not exists` + NotExists, } impl Unary { @@ -148,6 +152,8 @@ impl Unary { match s.to_lowercase().as_str() { "is null" => Ok(Unary::IsNull), "is not null" => Ok(Unary::IsNotNull), + "exists" => Ok(Unary::Exists), + "not exists" => Ok(Unary::NotExists), _ => Err(SbroadError::Invalid( Entity::Operator, Some(format!( @@ -163,6 +169,8 @@ impl Display for Unary { let op = match &self { Unary::IsNull => "is null", Unary::IsNotNull => "is not null", + Unary::Exists => "exists", + Unary::NotExists => "not exists", }; write!(f, "{op}") @@ -1056,7 +1064,7 @@ impl Plan { Ok(union_all_id) } - /// Adds a values row node + /// Adds a values row node. /// /// # Errors /// - Row node is not of a row type @@ -1072,7 +1080,7 @@ impl Plan { // Generate a row of aliases for the incoming row. *col_idx += 1; // The column names are generated according to tarantool naming of anonymous columns - let name = format!("COLUMN_{col_idx}"); + let name = format!("\"COLUMN_{col_idx}\""); let alias_id = self.nodes.add_alias(&name, col_id)?; aliases.push(alias_id); } @@ -1093,18 +1101,23 @@ impl Plan { /// # Errors /// - No child nodes /// - Child node is not relational - pub fn add_values(&mut self, children: Vec<usize>) -> Result<usize, SbroadError> { - // Get the last row of the children list. We need it to - // get the correct anonymous column names. - let last_id = if let Some(last_id) = children.last() { + pub fn add_values(&mut self, value_rows: Vec<usize>) -> Result<usize, SbroadError> { + // In case we have several `ValuesRow` under `Values` + // (e.g. VALUES (1, "test_1"), (2, "test_2")), + // the list of alias column names for it will look like: + // (COLUMN_1, COLUMN_2), (COLUMN_3, COLUMN_4). + // As soon as we want to assign name for column and not for the specific value, + // we choose the names of last `ValuesRow` and set them as names of all the columns of `Values`. + // The assumption always is that the child `ValuesRow` has the same number of elements. + let last_id = if let Some(last_id) = value_rows.last() { *last_id } else { return Err(SbroadError::UnexpectedNumberOfValues( "Values node has no children, expected at least one child.".into(), )); }; - let child_last = self.get_relation_node(last_id)?; - let last_output_id = if let Relational::ValuesRow { output, .. } = child_last { + let value_row_last = self.get_relation_node(last_id)?; + let last_output_id = if let Relational::ValuesRow { output, .. } = value_row_last { *output } else { return Err(SbroadError::UnexpectedNumberOfValues( @@ -1135,15 +1148,20 @@ impl Plan { // Generate a row of aliases referencing all the children. let mut aliases: Vec<usize> = Vec::with_capacity(names.len()); for (pos, name) in names.iter().enumerate() { - let ref_id = - self.nodes - .add_ref(None, Some((0..children.len()).collect::<Vec<usize>>()), pos); + let ref_id = self.nodes.add_ref( + None, + Some((0..value_rows.len()).collect::<Vec<usize>>()), + pos, + ); let alias_id = self.nodes.add_alias(name, ref_id)?; aliases.push(alias_id); } let output = self.nodes.add_row(aliases, None); - let values = Relational::Values { output, children }; + let values = Relational::Values { + output, + children: value_rows, + }; let values_id = self.nodes.push(Node::Relational(values)); self.replace_parent_in_subtree(output, None, Some(values_id))?; Ok(values_id) diff --git a/sbroad-core/src/ir/transformation/redistribution.rs b/sbroad-core/src/ir/transformation/redistribution.rs index 567a611e2ede65844c00ced22bcec31f3420aec3..69cb8974976c9bd9fa2ffa397705bdbd743cd033 100644 --- a/sbroad-core/src/ir/transformation/redistribution.rs +++ b/sbroad-core/src/ir/transformation/redistribution.rs @@ -8,7 +8,7 @@ use std::collections::{hash_map::Entry, HashMap, HashSet}; use crate::errors::{Action, Entity, SbroadError}; use crate::ir::distribution::{Distribution, Key, KeySet}; use crate::ir::expression::Expression; -use crate::ir::operator::{Bool, JoinKind, Relational}; +use crate::ir::operator::{Bool, JoinKind, Relational, Unary}; use crate::ir::transformation::redistribution::eq_cols::EqualityCols; use crate::ir::tree::traversal::{BreadthFirst, PostOrder, EXPR_CAPACITY, REL_CAPACITY}; @@ -81,6 +81,7 @@ pub enum MotionPolicy { Local, } +/// Helper struct that unwraps `Expression::Bool` fields. struct BoolOp { left: usize, op: Bool, @@ -106,6 +107,8 @@ impl BoolOp { type ChildId = usize; +/// Helper struct to store motion policy for every child of +/// relational node with `parent_id`. #[derive(Debug)] struct Strategy { parent_id: usize, @@ -120,6 +123,8 @@ impl Strategy { } } + /// Add motion policy for child node. + /// Update policy in case `child_id` key is already in the `children_policy` map. fn add_child(&mut self, child_id: usize, policy: MotionPolicy) { self.children_policy.insert(child_id, policy); } @@ -170,7 +175,10 @@ impl Plan { Ok(nodes) } - /// Get boolean expressions with row children in the sub-tree. + /// Get boolean expressions with both row children in the sub-tree. + /// It's a helper function for resolving subquery conflicts. + /// E.g. boolean `In` operator will have both row children where + /// right `Row` is a transformed subquery. /// /// # Errors /// - some of the expression nodes are invalid @@ -201,13 +209,43 @@ impl Plan { Ok(nodes) } + /// Get unary expressions with both row children in the sub-tree. + /// It's a helper function for resolving subquery conflicts. + /// E.g. unary `Exists` operator will have `Row` child that + /// is a transformed subquery. + /// + /// # Errors + /// - some of the expression nodes are invalid + pub(crate) fn get_unary_nodes_with_row_children( + &self, + top: usize, + ) -> Result<Vec<usize>, SbroadError> { + let mut nodes: Vec<usize> = Vec::new(); + + let mut post_tree = + PostOrder::with_capacity(|node| self.nodes.expr_iter(node, false), EXPR_CAPACITY); + for (_, id) in post_tree.iter(top) { + // Append only unaries with row children. + if let Node::Expression(Expression::Unary { child, .. }) = self.get_node(id)? { + let child_is_row = matches!( + self.get_node(*child)?, + Node::Expression(Expression::Row { .. }) + ); + if child_is_row { + nodes.push(id); + } + } + } + Ok(nodes) + } + /// Get a single sub-query from the row node. /// /// # Errors /// - Row node is not of a row type /// There are more than one sub-queries in the row node. pub fn get_sub_query_from_row_node(&self, row_id: usize) -> Result<Option<usize>, SbroadError> { - let rel_ids = self.get_relational_from_row_nodes(row_id)?; + let rel_ids = self.get_relational_nodes_from_row(row_id)?; self.get_sub_query_among_rel_nodes(&rel_ids) } @@ -245,7 +283,7 @@ impl Plan { /// - Row node is not of a row type /// - There are more than one motion nodes in the row node pub fn get_motion_from_row(&self, node_id: usize) -> Result<Option<usize>, SbroadError> { - let rel_nodes = self.get_relational_from_row_nodes(node_id)?; + let rel_nodes = self.get_relational_nodes_from_row(node_id)?; self.get_motion_among_rel_nodes(&rel_nodes) } @@ -283,7 +321,7 @@ impl Plan { /// # Errors /// - nodes are not rows /// - uninitialized distribution for some row - fn choose_strategy_for_inner_sq( + fn choose_strategy_for_bool_op_inner_sq( &self, outer_id: usize, inner_id: usize, @@ -364,6 +402,8 @@ impl Plan { Ok(()) } + /// Get `Relational::SubQuery` node that is referenced by passed `row_id`. + /// Only returns `SubQuery` that is an additional child of passed `rel_id` node. fn get_additional_sq( &self, rel_id: usize, @@ -379,13 +419,14 @@ impl Plan { Ok(None) } - fn get_sq_node_strategies( + /// Get `SubQuery`s from passed boolean `op_id` node (e.g. `In`). + fn get_sq_node_strategies_for_bool_op( &self, rel_id: usize, - node_id: usize, + op_id: usize, ) -> Result<Vec<(usize, MotionPolicy)>, SbroadError> { let mut strategies: Vec<(usize, MotionPolicy)> = Vec::new(); - let bool_op = BoolOp::from_expr(self, node_id)?; + let bool_op = BoolOp::from_expr(self, op_id)?; let left = self.get_additional_sq(rel_id, bool_op.left)?; let right = self.get_additional_sq(rel_id, bool_op.right)?; match left { @@ -400,7 +441,7 @@ impl Plan { // Left side is sub-query, right is an outer tuple. strategies.push(( left_sq, - self.choose_strategy_for_inner_sq( + self.choose_strategy_for_bool_op_inner_sq( bool_op.right, self.get_relational_output(left_sq)?, &bool_op.op, @@ -414,7 +455,7 @@ impl Plan { // Left side is an outer tuple, right is sub-query. strategies.push(( right_sq, - self.choose_strategy_for_inner_sq( + self.choose_strategy_for_bool_op_inner_sq( bool_op.left, self.get_relational_output(right_sq)?, &bool_op.op, @@ -426,26 +467,59 @@ impl Plan { Ok(strategies) } + /// Get `SubQuery`s from passed unary `op_id` node (e.g. `Exists`). + fn get_sq_node_strategy_for_unary_op( + &self, + rel_id: usize, + op_id: usize, + ) -> Result<Option<(usize, MotionPolicy)>, SbroadError> { + let unary_op_expr = self.get_expression_node(op_id)?; + let Expression::Unary { child, op } = unary_op_expr else { + return Err(SbroadError::Invalid( + Entity::Expression, + Some(format!("Expected Unary expression, got {unary_op_expr:?}")), + )); + }; + + if let Unary::Exists | Unary::NotExists = op { + let child_sq = self.get_additional_sq(rel_id, *child)?; + if let Some(child_sq) = child_sq { + return Ok(Some((child_sq, MotionPolicy::Full))); + } + } + + Ok(None) + } + /// Resolve sub-query conflicts with motion policies. fn resolve_sub_query_conflicts( &mut self, - rel_id: usize, - expr_id: usize, + select_id: usize, + filter_id: usize, ) -> Result<Strategy, SbroadError> { - let nodes = self.get_bool_nodes_with_row_children(expr_id)?; - for node in &nodes { - let bool_op = BoolOp::from_expr(self, *node)?; + let mut strategy = Strategy::new(select_id); + + let bool_nodes = self.get_bool_nodes_with_row_children(filter_id)?; + for bool_node in &bool_nodes { + let bool_op = BoolOp::from_expr(self, *bool_node)?; self.set_distribution(bool_op.left)?; self.set_distribution(bool_op.right)?; } - - let mut strategy = Strategy::new(rel_id); - for node in &nodes { - let strategies = self.get_sq_node_strategies(rel_id, *node)?; + for bool_node in &bool_nodes { + let strategies = self.get_sq_node_strategies_for_bool_op(select_id, *bool_node)?; for (id, policy) in strategies { strategy.add_child(id, policy); } } + + let unary_nodes = self.get_unary_nodes_with_row_children(filter_id)?; + for unary_node in &unary_nodes { + let unary_strategy = self.get_sq_node_strategy_for_unary_op(select_id, *unary_node)?; + if let Some((id, policy)) = unary_strategy { + strategy.add_child(id, policy); + } + } + Ok(strategy) } @@ -593,8 +667,8 @@ impl Plan { Ok(inner_positions) } - // Take the positions of the columns in the join condition row - // and return the positions of the columns in the inner child row. + /// Take the positions of the columns in the join condition row + /// and return the positions of the columns in the inner child row. fn get_referred_inner_child_column_positions( &self, column_positions: &[usize], @@ -761,17 +835,17 @@ impl Plan { fn resolve_join_conflicts( &mut self, rel_id: usize, - expr_id: usize, + cond_id: usize, join_kind: &JoinKind, ) -> Result<(), SbroadError> { // If one of the children has Distribution::Single, then we can't compute Distribution of // Rows in condition, because in case of Single it depends on join condition, and computing // distribution of Row in condition makes no sense, so we handle the single distribution separately if let Some(strategy) = - self.calculate_strategy_for_single_distribution(rel_id, expr_id, join_kind)? + self.calculate_strategy_for_single_distribution(rel_id, cond_id, join_kind)? { self.create_motion_nodes(&strategy)?; - let nodes = self.get_bool_nodes_with_row_children(expr_id)?; + let nodes = self.get_bool_nodes_with_row_children(cond_id)?; for node in &nodes { let bool_op = BoolOp::from_expr(self, *node)?; self.set_distribution(bool_op.left)?; @@ -782,7 +856,7 @@ impl Plan { // First, we need to set the motion policy for each boolean expression in the join condition. { - let nodes = self.get_bool_nodes_with_row_children(expr_id)?; + let nodes = self.get_bool_nodes_with_row_children(cond_id)?; for node in &nodes { let bool_op = BoolOp::from_expr(self, *node)?; self.set_distribution(bool_op.left)?; @@ -814,7 +888,7 @@ impl Plan { let mut new_inner_policy = MotionPolicy::Full; let mut expr_tree = PostOrder::with_capacity(|node| self.nodes.expr_iter(node, true), EXPR_CAPACITY); - for (_, node_id) in expr_tree.iter(expr_id) { + for (_, node_id) in expr_tree.iter(cond_id) { let expr = self.get_expression_node(node_id)?; let bool_op = if let Expression::Bool { .. } = expr { BoolOp::from_expr(self, node_id)? @@ -825,7 +899,12 @@ impl Plan { // Try to improve full motion policy in the sub-queries. // We don't influence the inner child here, so the inner map is empty // for the current node id. - let sq_strategies = self.get_sq_node_strategies(rel_id, node_id)?; + // `get_sq_node_strategies_for_bool_op` will be triggered only in case `node_id` is a + // boolean operator with both `Row` children. + // Note, that we don't have to call `get_sq_node_strategy_for_unary_op` here, because + // the only strategy it can return is `Motion::Full` for its child and all subqueries + // are covered with `Motion::Full` by default. + let sq_strategies = self.get_sq_node_strategies_for_bool_op(rel_id, node_id)?; let sq_strategies_len = sq_strategies.len(); for (id, policy) in sq_strategies { strategy.add_child(id, policy); diff --git a/sbroad-core/src/ir/transformation/redistribution/groupby.rs b/sbroad-core/src/ir/transformation/redistribution/groupby.rs index 90466218d7fdc60dd16bf3594bd7f8f0896018f9..296a1bce8483f4ff63b007755b06661c3801fafa 100644 --- a/sbroad-core/src/ir/transformation/redistribution/groupby.rs +++ b/sbroad-core/src/ir/transformation/redistribution/groupby.rs @@ -377,6 +377,12 @@ impl<'plan> ExpressionMapper<'plan> { } } +pub struct AggregateInfo { + pub expression_top: usize, + pub aggregate: SimpleAggregate, + pub is_distinct: bool, +} + impl Plan { #[allow(unreachable_code)] fn generate_local_alias(id: usize) -> String { diff --git a/sbroad-core/src/ir/transformation/redistribution/tests/segment.rs b/sbroad-core/src/ir/transformation/redistribution/tests/segment.rs index 28dd2dc6b6f152343d01b6252cd9fe84404b186a..1ad0ffc37d3f0a03df5d94d57cc0325c9a21ca30 100644 --- a/sbroad-core/src/ir/transformation/redistribution/tests/segment.rs +++ b/sbroad-core/src/ir/transformation/redistribution/tests/segment.rs @@ -63,7 +63,7 @@ fn sub_query1() { expected_rel_set.insert(sq_id); assert_eq!( expected_rel_set, - plan.get_relational_from_row_nodes(b_id).unwrap() + plan.get_relational_nodes_from_row(b_id).unwrap() ); assert_eq!(Some(sq_id), plan.get_sub_query_from_row_node(b_id).unwrap());