diff --git a/doc/sql/query.ebnf b/doc/sql/query.ebnf index 9f19aa39b026b741ac88647e10c110b32019c993..949451b40543b55335c69808e4010113c825a748 100644 --- a/doc/sql/query.ebnf +++ b/doc/sql/query.ebnf @@ -13,7 +13,7 @@ select ::= 'SELECT' 'DISTINCT'? ((projection (',' projection)*)) ((table | ('(' (select | values) ')')) (('AS')? name)?) ) 'ON' expression - )? + )* ('WHERE' expression)? ('GROUP' 'BY' expression (',' expression)*)? ('HAVING' expression)? diff --git a/sbroad-core/src/executor/engine/mock.rs b/sbroad-core/src/executor/engine/mock.rs index a427271786d5dbdb11b1a08a3993547240145f55..8203c6ce7cb1ac7618da9c6668b9ce6899b5b283 100644 --- a/sbroad-core/src/executor/engine/mock.rs +++ b/sbroad-core/src/executor/engine/mock.rs @@ -301,6 +301,25 @@ impl RouterConfigurationMock { .unwrap(), ); + let columns = vec![ + Column::new("\"bucket_id\"", Type::Unsigned, ColumnRole::Sharding, true), + Column::new("\"c\"", Type::String, ColumnRole::User, false), + Column::new("\"d\"", Type::Integer, ColumnRole::User, false), + ]; + let sharding_key: &[&str] = &["\"c\""]; + let primary_key: &[&str] = &["\"d\""]; + tables.insert( + "\"t4\"".to_string(), + Table::new_sharded( + "\"t4\"", + columns, + sharding_key, + primary_key, + SpaceEngine::Memtx, + ) + .unwrap(), + ); + let columns = vec![ Column::new("\"a\"", Type::Integer, ColumnRole::User, false), Column::new("\"b\"", Type::Integer, ColumnRole::User, false), diff --git a/sbroad-core/src/frontend/sql/ast.rs b/sbroad-core/src/frontend/sql/ast.rs index c70377031d2e39efc10ae22b6985c7b6288aaa20..668076ac209bc72bdb06d272f5dc267930d6c394 100644 --- a/sbroad-core/src/frontend/sql/ast.rs +++ b/sbroad-core/src/frontend/sql/ast.rs @@ -197,22 +197,6 @@ impl PartialEq for AbstractSyntaxTree { } } -/// Helper function to extract i-th element of array, when we sure it is safe -/// But we don't want to panic if future changes break something, so we -/// bubble out with error. -/// -/// Supposed to be used only in `transform_select_X` methods! -#[inline] -fn get_or_err(arr: &[usize], idx: usize) -> Result<usize, SbroadError> { - arr.get(idx) - .ok_or_else(|| { - SbroadError::UnexpectedNumberOfValues(format!( - "AST children array: {arr:?}. Requested index: {idx}" - )) - }) - .map(|v| *v) -} - #[allow(dead_code)] impl AbstractSyntaxTree { /// Set the top of AST. @@ -427,27 +411,11 @@ impl AbstractSyntaxTree { Ok(()) } - /// `Select` node is not IR-friendly as it can have up to six children. - /// Transform this node in IR-way (to a binary sub-tree). - /// - /// The task of `transform_select_i` functions is to build nested structure of rules. - /// E.g. if we work with query `select ... from t ... where expr`, the initial structure like - /// `Select [ - /// children = [Projection, Scan(t), WhereClause(expr)] - /// ]` - /// will be transformed into - /// `Select [ - /// children = [Projection [ - /// children = [Scan(t) [ - /// children = [WhereClause(expr)] - /// ] - /// ] - /// ]` - /// - /// At the end of `transform_select` work all `Select` nodes are replaced with - /// their first child (always Projection): - /// * If some node contained Select as a child, we replace that child with Select child - /// * In case Select is a top node, it's replaced itself + /// Transform select AST to IR friendly one. At the end of transformation + /// all `Select` nodes are replaced with their first children (always `Projection`). + /// - When some node contains `Select` as a child, that child is replaced with + /// `Projection` (`Select`'s first child). + /// - When `Select` is a top node, its `Projection` (first child) becomes a new top. pub(super) fn transform_select(&mut self) -> Result<(), SbroadError> { let mut selects: HashSet<usize> = HashSet::new(); for id in 0..self.nodes.arena.len() { @@ -462,14 +430,7 @@ impl AbstractSyntaxTree { for node in &selects { let select = self.nodes.get_node(*node)?; let children: Vec<usize> = select.children.clone(); - match children.len() { - 2 => self.transform_select_2(*node, &children)?, - 3 => self.transform_select_3(*node, &children)?, - 4 => self.transform_select_4(*node, &children)?, - 5 => self.transform_select_5(*node, &children)?, - 6 => self.transform_select_6(*node, &children)?, - _ => return Err(SbroadError::Invalid(Entity::AST, None)), - } + self.reorder_select_children(*node, &children)?; } // Collect select nodes' parents. @@ -508,191 +469,81 @@ impl AbstractSyntaxTree { Ok(()) } - fn check<const N: usize, const M: usize>( - &self, - allowed: &[[Rule; N]; M], - select_children: &[usize], + fn reorder_select_children( + &mut self, + select_id: usize, + children: &[usize], ) -> Result<(), SbroadError> { - let allowed_len = if let Some(seq) = allowed.first() { - seq.len() + // SQL grammar produces a defined order of children in select node: + // 1. Projection: required + // 2. Scan: required (bind with Projection) + // 3. Join: optional (can be repeated multiple times) + // 4. Selection: optional + // 5. GroupBy: optional + // 6. Having: optional + // + // We need to reorder this sequence to the following: + // 1. Projection: required + // 2. Having: optional + // 3. GroupBy: optional + // 4. Selection: optional + // 5. Join: optional (can be repeated multiple times) + // 6. Scan: required + let mut proj_id: Option<usize> = None; + let mut scan_id: Option<usize> = None; + let mut join_ids = if children.len() > 2 { + Vec::with_capacity(children.len() - 2) } else { - return Err(SbroadError::UnexpectedNumberOfValues( - "Expected at least one sequence to check select children".into(), - )); + Vec::new() }; - if select_children.len() != allowed_len { - return Err(SbroadError::UnexpectedNumberOfValues(format!( - "Expected select {allowed_len} children, got {}", - select_children.len() - ))); - } - let mut is_match = false; - for seq in allowed { - let mut all_types_matched = true; - for (child, expected_type) in select_children.iter().zip(seq) { - let node = self.nodes.get_node(*child)?; - if node.rule != *expected_type { - all_types_matched = false; - break; - } - } - if all_types_matched { - is_match = true; - break; + let mut filter_id: Option<usize> = None; + let mut group_id: Option<usize> = None; + let mut having_id: Option<usize> = None; + + for child_id in children { + let child = self.nodes.get_node(*child_id)?; + match child.rule { + Rule::Projection => proj_id = Some(*child_id), + Rule::Scan => scan_id = Some(*child_id), + Rule::Join => join_ids.push(*child_id), + Rule::Selection => filter_id = Some(*child_id), + Rule::GroupBy => group_id = Some(*child_id), + Rule::Having => having_id = Some(*child_id), + _ => panic!("{} {:?}", "Unexpected rule in select children:", child.rule), } } - if !is_match { - return Err(SbroadError::Invalid( - Entity::AST, - Some("Could not match select children to any expected sequence".into()), - )); - } - Ok(()) - } - - fn transform_select_2( - &mut self, - select_id: usize, - children: &[usize], - ) -> Result<(), SbroadError> { - let allowed = [[Rule::Projection, Rule::Scan]]; - self.check(&allowed, children)?; - self.nodes - .push_front_child(get_or_err(children, 0)?, get_or_err(children, 1)?)?; - self.nodes.set_children(select_id, vec![children[0]])?; - Ok(()) - } - fn transform_select_3( - &mut self, - select_id: usize, - children: &[usize], - ) -> Result<(), SbroadError> { - let allowed = [ - [Rule::Projection, Rule::Scan, Rule::Join], - [Rule::Projection, Rule::Scan, Rule::GroupBy], - [Rule::Projection, Rule::Scan, Rule::Selection], - [Rule::Projection, Rule::Scan, Rule::Having], - ]; - self.check(&allowed, children)?; - self.nodes - .push_front_child(get_or_err(children, 2)?, get_or_err(children, 1)?)?; - self.nodes - .push_front_child(get_or_err(children, 0)?, get_or_err(children, 2)?)?; - self.nodes - .set_children(select_id, vec![get_or_err(children, 0)?])?; - Ok(()) - } - - fn transform_select_4( - &mut self, - select_id: usize, - children: &[usize], - ) -> Result<(), SbroadError> { - let allowed = [ - [Rule::Projection, Rule::Scan, Rule::Selection, Rule::GroupBy], - [Rule::Projection, Rule::Scan, Rule::Selection, Rule::Having], - [Rule::Projection, Rule::Scan, Rule::GroupBy, Rule::Having], - [Rule::Projection, Rule::Scan, Rule::Join, Rule::Selection], - [Rule::Projection, Rule::Scan, Rule::Join, Rule::GroupBy], - [Rule::Projection, Rule::Scan, Rule::Join, Rule::Having], - ]; - self.check(&allowed, children)?; - // insert Selection | InnerJoin as first child of GroupBy - self.nodes - .push_front_child(get_or_err(children, 3)?, get_or_err(children, 2)?)?; - // insert Scan as first child of Selection | InnerJoin - self.nodes - .push_front_child(get_or_err(children, 2)?, get_or_err(children, 1)?)?; - // insert GroupBy as first child of Projection - self.nodes - .push_front_child(get_or_err(children, 0)?, get_or_err(children, 3)?)?; - self.nodes.set_children(select_id, vec![children[0]])?; - Ok(()) - } - - fn transform_select_5( - &mut self, - select_id: usize, - children: &[usize], - ) -> Result<(), SbroadError> { - let allowed = [ - [ - Rule::Projection, - Rule::Scan, - Rule::Join, - Rule::Selection, - Rule::GroupBy, - ], - [ - Rule::Projection, - Rule::Scan, - Rule::Join, - Rule::Selection, - Rule::Having, - ], - [ - Rule::Projection, - Rule::Scan, - Rule::Join, - Rule::GroupBy, - Rule::Having, - ], - [ - Rule::Projection, - Rule::Scan, - Rule::Selection, - Rule::GroupBy, - Rule::Having, - ], - ]; - self.check(&allowed, children)?; - // insert Selection as first child of GroupBy - self.nodes - .push_front_child(get_or_err(children, 4)?, get_or_err(children, 3)?)?; - // insert InnerJoin as first child of Selection - self.nodes - .push_front_child(get_or_err(children, 3)?, get_or_err(children, 2)?)?; - // insert Scan as first child of InnerJoin - self.nodes - .push_front_child(get_or_err(children, 2)?, get_or_err(children, 1)?)?; - // insert GroupBy as first child of Projection - self.nodes - .push_front_child(get_or_err(children, 0)?, get_or_err(children, 4)?)?; - self.nodes.set_children(select_id, vec![children[0]])?; - Ok(()) - } + // Projection and Scan are required. If they are not present, there is an error + // in the SQL grammar. + let proj_id = proj_id.expect("Projection node is required in select node"); + let scan_id = scan_id.expect("Scan node is required in select node"); + let mut child_id = scan_id; + + // The order of the nodes in the chain is partially reversed. + // Original nodes from grammar: + // Projection -> Scan -> Join1 -> ... -> JoinK -> Selection -> GroupBy -> Having. + // We need to change the order of the chain to: + // Projection -> Having -> GroupBy -> Selection -> JoinK -> ... -> Join1 + let mut chain = Vec::with_capacity(children.len() - 1); + chain.push(proj_id); + if let Some(having_id) = having_id { + chain.push(having_id); + } + if let Some(group_id) = group_id { + chain.push(group_id); + } + if let Some(filter_id) = filter_id { + chain.push(filter_id); + } + while let Some(join_id) = join_ids.pop() { + chain.push(join_id); + } + while let Some(id) = chain.pop() { + self.nodes.push_front_child(id, child_id)?; + child_id = id; + } + self.nodes.set_children(select_id, vec![child_id])?; - fn transform_select_6( - &mut self, - select_id: usize, - children: &[usize], - ) -> Result<(), SbroadError> { - let allowed = [[ - Rule::Projection, - Rule::Scan, - Rule::Join, - Rule::Selection, - Rule::GroupBy, - Rule::Having, - ]]; - self.check(&allowed, children)?; - // insert GroupBy as first child of Having - self.nodes - .push_front_child(get_or_err(children, 5)?, get_or_err(children, 4)?)?; - // insert Selection as first child of GroupBy - self.nodes - .push_front_child(get_or_err(children, 4)?, get_or_err(children, 3)?)?; - // insert InnerJoin as first child of Selection - self.nodes - .push_front_child(get_or_err(children, 3)?, get_or_err(children, 2)?)?; - // insert Scan as first child of InnerJoin - self.nodes - .push_front_child(get_or_err(children, 2)?, get_or_err(children, 1)?)?; - // insert Having as first child of Projection - self.nodes - .push_front_child(get_or_err(children, 0)?, get_or_err(children, 5)?)?; - self.nodes.set_children(select_id, vec![children[0]])?; Ok(()) } } diff --git a/sbroad-core/src/frontend/sql/ir/tests.rs b/sbroad-core/src/frontend/sql/ir/tests.rs index bfd477151295ddacfdc0aa7cf389a7400bf11591..06e71a049e91388eaf5cc21e1e69be2dc9c9c05c 100644 --- a/sbroad-core/src/frontend/sql/ir/tests.rs +++ b/sbroad-core/src/frontend/sql/ir/tests.rs @@ -2897,6 +2897,8 @@ mod global; #[cfg(test)] mod insert; #[cfg(test)] +mod join; +#[cfg(test)] mod params; #[cfg(test)] mod single; diff --git a/sbroad-core/src/frontend/sql/ir/tests/join.rs b/sbroad-core/src/frontend/sql/ir/tests/join.rs new file mode 100644 index 0000000000000000000000000000000000000000..70c30a1a1b50b724fc04cb711d37e7d87c6ca654 --- /dev/null +++ b/sbroad-core/src/frontend/sql/ir/tests/join.rs @@ -0,0 +1,107 @@ +use crate::ir::transformation::helpers::sql_to_optimized_ir; +use crate::ir::value::Value; +use pretty_assertions::assert_eq; + +#[test] +fn milti_join1() { + let input = r#"SELECT * FROM ( + SELECT "identification_number", "product_code" FROM "hash_testing" + ) as t1 + INNER JOIN (SELECT "id" FROM "test_space") as t2 + ON t1."identification_number" = t2."id" + LEFT JOIN (SELECT "id" FROM "test_space") as t3 + ON t1."identification_number" = t3."id" + WHERE t1."identification_number" = 5 and t1."product_code" = '123'"#; + let plan = sql_to_optimized_ir(input, vec![]); + + let expected_explain = String::from( + r#"projection ("T1"."identification_number"::integer -> "identification_number", "T1"."product_code"::string -> "product_code", "T2"."id"::unsigned -> "id", "T3"."id"::unsigned -> "id") + selection ROW("T1"."identification_number"::integer) = ROW(5::unsigned) and ROW("T1"."product_code"::string) = ROW('123'::string) + left join on ROW("T1"."identification_number"::integer) = ROW("T3"."id"::unsigned) + join on ROW("T1"."identification_number"::integer) = ROW("T2"."id"::unsigned) + scan "T1" + projection ("hash_testing"."identification_number"::integer -> "identification_number", "hash_testing"."product_code"::string -> "product_code") + scan "hash_testing" + motion [policy: full] + scan "T2" + projection ("test_space"."id"::unsigned -> "id") + scan "test_space" + motion [policy: full] + scan "T3" + projection ("test_space"."id"::unsigned -> "id") + scan "test_space" +execution options: +sql_vdbe_max_steps = 45000 +vtable_max_rows = 5000 +"#, + ); + + assert_eq!(expected_explain, plan.as_explain().unwrap()); +} + +#[test] +fn milti_join2() { + let input = r#"SELECT * FROM "t1" LEFT JOIN "t2" ON "t1"."a" = "t2"."e" + LEFT JOIN "t4" ON true +"#; + let plan = sql_to_optimized_ir(input, vec![]); + + let expected_explain = String::from( + r#"projection ("t1"."a"::string -> "a", "t1"."b"::integer -> "b", "t2"."e"::unsigned -> "e", "t2"."f"::unsigned -> "f", "t2"."g"::unsigned -> "g", "t2"."h"::unsigned -> "h", "t4"."c"::string -> "c", "t4"."d"::integer -> "d") + left join on true::boolean + left join on ROW("t1"."a"::string) = ROW("t2"."e"::unsigned) + scan "t1" + projection ("t1"."a"::string -> "a", "t1"."b"::integer -> "b") + scan "t1" + motion [policy: full] + scan "t2" + projection ("t2"."e"::unsigned -> "e", "t2"."f"::unsigned -> "f", "t2"."g"::unsigned -> "g", "t2"."h"::unsigned -> "h") + scan "t2" + motion [policy: full] + scan "t4" + projection ("t4"."c"::string -> "c", "t4"."d"::integer -> "d") + scan "t4" +execution options: +sql_vdbe_max_steps = 45000 +vtable_max_rows = 5000 +"#, + ); + + assert_eq!(expected_explain, plan.as_explain().unwrap()); +} + +#[test] +fn milti_join3() { + let input = r#"SELECT * FROM "t1" LEFT JOIN "t2" ON "t1"."a" = "t2"."e" + JOIN "t3" ON "t1"."a" = "t3"."a" JOIN "t4" ON "t2"."f" = "t4"."c" +"#; + let plan = sql_to_optimized_ir(input, vec![]); + + let expected_explain = String::from( + r#"projection ("t1"."a"::string -> "a", "t1"."b"::integer -> "b", "t2"."e"::unsigned -> "e", "t2"."f"::unsigned -> "f", "t2"."g"::unsigned -> "g", "t2"."h"::unsigned -> "h", "t3"."a"::string -> "a", "t3"."b"::integer -> "b", "t4"."c"::string -> "c", "t4"."d"::integer -> "d") + join on ROW("t2"."f"::unsigned) = ROW("t4"."c"::string) + join on ROW("t1"."a"::string) = ROW("t3"."a"::string) + left join on ROW("t1"."a"::string) = ROW("t2"."e"::unsigned) + scan "t1" + projection ("t1"."a"::string -> "a", "t1"."b"::integer -> "b") + scan "t1" + motion [policy: full] + scan "t2" + projection ("t2"."e"::unsigned -> "e", "t2"."f"::unsigned -> "f", "t2"."g"::unsigned -> "g", "t2"."h"::unsigned -> "h") + scan "t2" + motion [policy: full] + scan "t3" + projection ("t3"."a"::string -> "a", "t3"."b"::integer -> "b") + scan "t3" + motion [policy: full] + scan "t4" + projection ("t4"."c"::string -> "c", "t4"."d"::integer -> "d") + scan "t4" +execution options: +sql_vdbe_max_steps = 45000 +vtable_max_rows = 5000 +"#, + ); + + assert_eq!(expected_explain, plan.as_explain().unwrap()); +} diff --git a/sbroad-core/src/frontend/sql/query.pest b/sbroad-core/src/frontend/sql/query.pest index 6ad8fb4555f237897c33f43ec8902a7afe22988e..f8248af145ea733e7cef624afa6614346daf6f61 100644 --- a/sbroad-core/src/frontend/sql/query.pest +++ b/sbroad-core/src/frontend/sql/query.pest @@ -108,7 +108,7 @@ Query = { (SelectWithOptionalContinuation | Values | Insert | Update | Delete) ~ UnionAllContinuation = { ^"union" ~ ^"all" ~ Select } Select = { ^"select" ~ Projection ~ ^"from" ~ Scan ~ - Join? ~ WhereClause? ~ + Join* ~ WhereClause? ~ (^"group" ~ ^"by" ~ GroupBy)? ~ (^"having" ~ Having)? }