From 27651004067fdeb0d5458c6debb291d432918008 Mon Sep 17 00:00:00 2001 From: Denis Smirnov <sd@picodata.io> Date: Wed, 23 Mar 2022 15:17:59 +0700 Subject: [PATCH] feat: merge tuples from the AND-ed boolean expressions --- src/frontend/sql/ast.rs | 7 +- src/frontend/sql/ast/tests.rs | 11 +- src/ir/expression.rs | 18 ++ src/ir/operator.rs | 14 +- src/ir/operator/tests.rs | 4 +- src/ir/transformation.rs | 1 + src/ir/transformation/merge_tuples.rs | 313 ++++++++++++++++++++ src/ir/transformation/merge_tuples/tests.rs | 112 +++++++ src/ir/tree.rs | 46 +++ test_app/test/integration/api_test.lua | 2 +- 10 files changed, 517 insertions(+), 11 deletions(-) create mode 100644 src/ir/transformation/merge_tuples.rs create mode 100644 src/ir/transformation/merge_tuples/tests.rs diff --git a/src/frontend/sql/ast.rs b/src/frontend/sql/ast.rs index 506457fab7..d96a97ca5a 100644 --- a/src/frontend/sql/ast.rs +++ b/src/frontend/sql/ast.rs @@ -251,7 +251,12 @@ impl AbstractSyntaxTree { let mut command_pair = match ParseTree::parse(Rule::Command, query) { Ok(p) => p, - Err(_) => return Err(QueryPlannerError::CustomError("Invalid command.".into())), + Err(e) => { + return Err(QueryPlannerError::CustomError(format!( + "Parsing error: {:?}", + e + ))) + } }; let top_pair = command_pair.next().ok_or_else(|| { QueryPlannerError::CustomError("No query found in the parse tree.".to_string()) diff --git a/src/frontend/sql/ast/tests.rs b/src/frontend/sql/ast/tests.rs index 1becbf31a4..2968c6d355 100644 --- a/src/frontend/sql/ast/tests.rs +++ b/src/frontend/sql/ast/tests.rs @@ -144,5 +144,14 @@ fn traversal() { fn invalid_query() { let query = r#"select a frAm t"#; let ast = AbstractSyntaxTree::new(query).unwrap_err(); - assert_eq!("Invalid command.", format!("{}", ast)); + assert_eq!( + format!( + "{} {} {} {}", + r#"Parsing error: Error { variant: ParsingError { positives:"#, + r#"[Alias, Asterisk, Number, True, False, Null, Row], negatives: [] },"#, + r#"location: Pos(7), line_col: Pos((1, 8)), path: None, line: "select a frAm t","#, + r#"continued_line: None }"#, + ), + format!("{}", ast), + ); } diff --git a/src/ir/expression.rs b/src/ir/expression.rs index a72c865751..88a18c2bba 100644 --- a/src/ir/expression.rs +++ b/src/ir/expression.rs @@ -658,6 +658,24 @@ impl Plan { Ok(false) } + /// The node is a reference (or a row of a single reference column). + /// + /// # Errors + /// - If node is not an expression. + pub fn is_ref(&self, expr_id: usize) -> Result<bool, QueryPlannerError> { + let expr = self.get_expression_node(expr_id)?; + match expr { + Expression::Reference { .. } => return Ok(true), + Expression::Row { list, .. } => { + if let (Some(inner_id), None) = (list.first(), list.get(1)) { + return self.is_ref(*inner_id); + } + } + _ => {} + } + Ok(false) + } + /// Extract `Const` value from `Row` by index /// /// # Errors diff --git a/src/ir/operator.rs b/src/ir/operator.rs index a016a794fd..c10c68563b 100644 --- a/src/ir/operator.rs +++ b/src/ir/operator.rs @@ -372,9 +372,10 @@ impl Plan { right: usize, condition: usize, ) -> Result<usize, QueryPlannerError> { - if let Node::Expression(Expression::Bool { .. }) = self.get_node(condition)? { - } else { - return Err(QueryPlannerError::InvalidBool); + if !self.is_trivalent(condition)? { + return Err(QueryPlannerError::CustomError(String::from( + "Condition is not a trivalent expression", + ))); } let output = self.add_row_for_join(left, right)?; @@ -479,9 +480,10 @@ impl Plan { _ => children[0], }; - if let Node::Expression(Expression::Bool { .. }) = self.get_node(filter)? { - } else { - return Err(QueryPlannerError::InvalidBool); + if !self.is_trivalent(filter)? { + return Err(QueryPlannerError::CustomError( + "Filter expression is not a trivalent expression.".into(), + )); } for child in children { diff --git a/src/ir/operator/tests.rs b/src/ir/operator/tests.rs index 3cd20e0b18..5a9865d2bd 100644 --- a/src/ir/operator/tests.rs +++ b/src/ir/operator/tests.rs @@ -165,9 +165,9 @@ fn selection() { // Correct Selection operator plan.add_select(&[scan_id], gt_id).unwrap(); - // Non-boolean filter + // Non-trivalent filter assert_eq!( - QueryPlannerError::InvalidBool, + QueryPlannerError::CustomError("Filter expression is not a trivalent expression.".into()), plan.add_select(&[scan_id], const_row).unwrap_err() ); diff --git a/src/ir/transformation.rs b/src/ir/transformation.rs index 3935fadc31..9334f8ea96 100644 --- a/src/ir/transformation.rs +++ b/src/ir/transformation.rs @@ -5,6 +5,7 @@ pub mod bool_in; pub mod dnf; pub mod equality_propagation; +pub mod merge_tuples; pub mod redistribution; pub mod split_columns; diff --git a/src/ir/transformation/merge_tuples.rs b/src/ir/transformation/merge_tuples.rs new file mode 100644 index 0000000000..15ef271807 --- /dev/null +++ b/src/ir/transformation/merge_tuples.rs @@ -0,0 +1,313 @@ +use crate::errors::QueryPlannerError; +use crate::ir::expression::Expression; +use crate::ir::operator::Bool; +use crate::ir::Plan; +use std::collections::{hash_map::Entry, HashMap, HashSet}; +use traversal::Bft; + +fn call_expr_tree_merge_tuples(plan: &mut Plan, top_id: usize) -> Result<usize, QueryPlannerError> { + plan.expr_tree_merge_tuples(top_id) +} + +#[derive(Debug)] +struct Chain { + // Left and right sides of the boolean expression + // grouped by the operator. + grouped: HashMap<Bool, (Vec<usize>, Vec<usize>)>, + // Non-boolean expressions in the AND chain (true, false, null). + other: Vec<usize>, +} + +impl Chain { + fn new() -> Self { + Self { + grouped: HashMap::new(), + other: Vec::new(), + } + } + + fn insert(&mut self, plan: &mut Plan, expr_id: usize) -> Result<(), QueryPlannerError> { + let bool_expr = plan.get_expression_node(expr_id)?; + if let Expression::Bool { left, op, right } = bool_expr { + let (left_id, right_id, group_op) = match *op { + Bool::And | Bool::Or => { + // We can only merge tuples in non AND/OR expressions. + return Err(QueryPlannerError::CustomError(format!( + "AND/OR expressions are not supported: {:?}", + bool_expr + ))); + } + Bool::Eq | Bool::NotEq => { + // Try to put expressions with references to the left side. + match (plan.is_ref(*left)?, plan.is_ref(*right)?) { + (false, true) => (*right, *left, op.clone()), + _ => (*left, *right, op.clone()), + } + } + Bool::Gt | Bool::GtEq | Bool::In => (*left, *right, op.clone()), + // Invert operator to unite expressions with Gt and GtEq. + Bool::Lt => (*right, *left, Bool::Gt), + Bool::LtEq => (*right, *left, Bool::GtEq), + }; + match self.grouped.entry(group_op) { + Entry::Occupied(mut entry) => { + let (left, right) = entry.get_mut(); + let new_left_id = plan.expr_clone(left_id)?; + let new_right_id = plan.expr_clone(right_id)?; + plan.get_columns_or_self(new_left_id)? + .iter() + .for_each(|id| { + left.push(*id); + }); + plan.get_columns_or_self(new_right_id)? + .iter() + .for_each(|id| { + right.push(*id); + }); + } + Entry::Vacant(entry) => { + let new_left_id = plan.expr_clone(left_id)?; + let new_right_id = plan.expr_clone(right_id)?; + entry.insert(( + plan.get_columns_or_self(new_left_id)?, + plan.get_columns_or_self(new_right_id)?, + )); + } + } + } else { + let new_expr_id = plan.expr_clone(expr_id)?; + self.other.push(new_expr_id); + } + Ok(()) + } + + fn as_plan(&self, plan: &mut Plan) -> Result<usize, QueryPlannerError> { + let other_top_id = match self.other.split_first() { + Some((first, other)) => { + let mut top_id = *first; + for id in other { + top_id = plan.add_cond(*id, Bool::And, top_id)?; + } + Some(top_id) + } + None => None, + }; + + // Chain is grouped by the operators in the hash map. + // To make serialization non-flaky, we extract operators + // in a deterministic order. + let mut grouped_top_id: Option<usize> = None; + // No need for "And" and "Or" operators. + let ordered_ops = &[ + Bool::Eq, + Bool::Gt, + Bool::GtEq, + Bool::In, + Bool::Lt, + Bool::LtEq, + Bool::NotEq, + ]; + for op in ordered_ops { + if let Some((left, right)) = self.grouped.get(op) { + let left_row_id = plan.nodes.add_row(left.clone(), None); + let right_row_id = plan.nodes.add_row(right.clone(), None); + let cond_id = plan.add_cond(left_row_id, op.clone(), right_row_id)?; + match grouped_top_id { + None => { + grouped_top_id = Some(cond_id); + } + Some(top_id) => { + grouped_top_id = Some(plan.add_cond(top_id, Bool::And, cond_id)?); + } + } + } + } + match (grouped_top_id, other_top_id) { + (Some(grouped_top_id), Some(other_top_id)) => { + Ok(plan.add_cond(grouped_top_id, Bool::And, other_top_id)?) + } + (Some(grouped_top_id), None) => Ok(grouped_top_id), + (None, Some(other_top_id)) => Ok(other_top_id), + (None, None) => Err(QueryPlannerError::CustomError( + "No expressions to merge".to_string(), + )), + } + } + + fn is_empty(&self) -> bool { + self.grouped.is_empty() && self.other.is_empty() + } +} + +impl Plan { + fn get_columns_or_self(&self, expr_id: usize) -> Result<Vec<usize>, QueryPlannerError> { + let expr = self.get_expression_node(expr_id)?; + match expr { + Expression::Row { list, .. } => Ok(list.clone()), + _ => Ok(vec![expr_id]), + } + } + + fn populate_and_chains( + &mut self, + nodes: &[usize], + ) -> Result<HashMap<usize, Chain>, QueryPlannerError> { + let mut visited: HashSet<usize> = HashSet::new(); + let mut chains: HashMap<usize, Chain> = HashMap::new(); + + for id in nodes { + if visited.contains(id) { + continue; + } + visited.insert(*id); + + let tree_and = Bft::new(id, |node| self.nodes.and_iter(node)); + let mut nodes_and: Vec<usize> = Vec::new(); + for (_, and_id) in tree_and { + nodes_and.push(*and_id); + } + let mut chain = Chain::new(); + let mut nodes_for_chain: Vec<usize> = Vec::new(); + for and_id in nodes_and { + let expr = self.get_expression_node(and_id)?; + if let Expression::Bool { + left, + op: Bool::And, + right, + .. + } = expr + { + let children = vec![*left, *right]; + for child_id in children { + visited.insert(child_id); + let child_expr = self.get_expression_node(child_id)?; + if let Expression::Bool { + op: Bool::And | Bool::Or, + .. + } = child_expr + { + continue; + } + nodes_for_chain.push(child_id); + } + } + } + for node_id in nodes_for_chain { + chain.insert(self, node_id)?; + } + + if !chain.is_empty() { + chains.insert(*id, chain); + } + } + Ok(chains) + } + + fn expr_tree_merge_tuples(&mut self, expr_id: usize) -> Result<usize, QueryPlannerError> { + let mut nodes: Vec<usize> = Vec::new(); + let tree = Bft::new(&expr_id, |node| self.nodes.expr_iter(node, false)); + for (_, id) in tree { + nodes.push(*id); + } + let chains = self.populate_and_chains(&nodes)?; + + // Replace nodes' children with the merged tuples. + for id in nodes { + let expr = self.get_expression_node(id)?; + match expr { + Expression::Alias { child, .. } => { + let chain = chains.get(child); + if let Some(chain) = chain { + let new_child_id = chain.as_plan(self)?; + let expr_mut = self.get_mut_expression_node(id)?; + if let Expression::Alias { + child: ref mut child_id, + .. + } = expr_mut + { + *child_id = new_child_id; + } else { + return Err(QueryPlannerError::CustomError(format!( + "Expected alias expression: {:?}", + expr_mut + ))); + } + } + } + Expression::Bool { left, right, .. } => { + let children = vec![*left, *right]; + for (pos, child) in children.iter().enumerate() { + let chain = chains.get(child); + if let Some(chain) = chain { + let new_child_id = chain.as_plan(self)?; + let expr_mut = self.get_mut_expression_node(id)?; + if let Expression::Bool { + left: ref mut left_id, + right: ref mut right_id, + .. + } = expr_mut + { + if pos == 0 { + *left_id = new_child_id; + } else { + *right_id = new_child_id; + } + } else { + return Err(QueryPlannerError::CustomError(format!( + "Expected boolean expression: {:?}", + expr_mut + ))); + } + } + } + } + Expression::Row { list, .. } => { + let children = list.clone(); + for (pos, child) in children.iter().enumerate() { + let chain = chains.get(child); + if let Some(chain) = chain { + let new_child_id = chain.as_plan(self)?; + let expr_mut = self.get_mut_expression_node(id)?; + if let Expression::Row { ref mut list, .. } = expr_mut { + if let Some(child_id) = list.get_mut(pos) { + *child_id = new_child_id; + } else { + return Err(QueryPlannerError::CustomError(format!( + "Expected a column at position {} in the row {:?}", + pos, expr_mut + ))); + } + } else { + return Err(QueryPlannerError::CustomError(format!( + "Expected row expression: {:?}", + expr_mut + ))); + } + } + } + } + _ => continue, + } + } + + // Try to replace the subtree top node (if it is also AND). + if let Some(top_chain) = chains.get(&expr_id) { + let new_expr_id = top_chain.as_plan(self)?; + return Ok(new_expr_id); + } + + Ok(expr_id) + } + + /// Group boolean operators in the AND-ed chain by operator type and merge + /// them into a single boolean operator. + /// + /// # Errors + /// - If the plan tree is invalid (doesn't contain correct nodes where we expect it to). + pub fn merge_tuples(&mut self) -> Result<(), QueryPlannerError> { + self.transform_expr_trees(&call_expr_tree_merge_tuples) + } +} + +#[cfg(test)] +mod tests; diff --git a/src/ir/transformation/merge_tuples/tests.rs b/src/ir/transformation/merge_tuples/tests.rs new file mode 100644 index 0000000000..8e8e1d8a2f --- /dev/null +++ b/src/ir/transformation/merge_tuples/tests.rs @@ -0,0 +1,112 @@ +use pretty_assertions::assert_eq; + +use crate::executor::engine::mock::MetadataMock; +use crate::executor::ir::ExecutionPlan; +use crate::frontend::sql::ast::AbstractSyntaxTree; + +#[test] +fn merge_tuples1() { + let query = r#"SELECT "a" FROM "t" WHERE "a" = 1 and "b" = 2 and "c" < 3 and 4 < "a""#; + + let metadata = &MetadataMock::new(); + let ast = AbstractSyntaxTree::new(query).unwrap(); + let mut plan = ast.to_ir(metadata).unwrap(); + plan.merge_tuples().unwrap(); + let ex_plan = ExecutionPlan::from(&plan); + + let top_id = plan.get_top().unwrap(); + let sql = ex_plan.subtree_as_sql(top_id).unwrap(); + assert_eq!( + format!( + "{} {}", + r#"SELECT "t"."a" as "a" FROM "t""#, + r#"WHERE ("t"."a", "t"."b") = (1, 2) and (3, "t"."a") > ("t"."c", 4)"#, + ), + sql + ); +} + +#[test] +fn merge_tuples2() { + let query = r#"SELECT "a" FROM "t" + WHERE "a" = 1 and null and "b" = 2 + or true and "c" >= 3 and 4 <= "a""#; + + let metadata = &MetadataMock::new(); + let ast = AbstractSyntaxTree::new(query).unwrap(); + let mut plan = ast.to_ir(metadata).unwrap(); + plan.merge_tuples().unwrap(); + let ex_plan = ExecutionPlan::from(&plan); + + let top_id = plan.get_top().unwrap(); + let sql = ex_plan.subtree_as_sql(top_id).unwrap(); + assert_eq!( + format!( + "{} {} {}", + r#"SELECT "t"."a" as "a" FROM "t""#, + r#"WHERE (("t"."a", "t"."b") = (1, 2) and (NULL)"#, + r#"or ("t"."c", "t"."a") >= (3, 4) and (true))"#, + ), + sql + ); +} + +#[test] +fn merge_tuples3() { + let query = r#"SELECT "a" FROM "t" WHERE true"#; + + let metadata = &MetadataMock::new(); + let ast = AbstractSyntaxTree::new(query).unwrap(); + let mut plan = ast.to_ir(metadata).unwrap(); + plan.merge_tuples().unwrap(); + let ex_plan = ExecutionPlan::from(&plan); + + let top_id = plan.get_top().unwrap(); + let sql = ex_plan.subtree_as_sql(top_id).unwrap(); + assert_eq!( + format!("{}", r#"SELECT "t"."a" as "a" FROM "t" WHERE true"#,), + sql + ); +} + +#[test] +fn merge_tuples4() { + let query = r#"SELECT "a" FROM "t" WHERE ("a", "b") = (1, 2) and 3 = "c""#; + + let metadata = &MetadataMock::new(); + let ast = AbstractSyntaxTree::new(query).unwrap(); + let mut plan = ast.to_ir(metadata).unwrap(); + plan.merge_tuples().unwrap(); + let ex_plan = ExecutionPlan::from(&plan); + + let top_id = plan.get_top().unwrap(); + let sql = ex_plan.subtree_as_sql(top_id).unwrap(); + assert_eq!( + format!( + "{} {}", + r#"SELECT "t"."a" as "a" FROM "t""#, r#"WHERE ("t"."a", "t"."b", "t"."c") = (1, 2, 3)"#, + ), + sql + ); +} + +#[test] +fn merge_tuples5() { + let query = r#"SELECT "a" FROM "t" WHERE 3 < "c" and ("a", "b") > (1, 2)"#; + + let metadata = &MetadataMock::new(); + let ast = AbstractSyntaxTree::new(query).unwrap(); + let mut plan = ast.to_ir(metadata).unwrap(); + plan.merge_tuples().unwrap(); + let ex_plan = ExecutionPlan::from(&plan); + + let top_id = plan.get_top().unwrap(); + let sql = ex_plan.subtree_as_sql(top_id).unwrap(); + assert_eq!( + format!( + "{} {}", + r#"SELECT "t"."a" as "a" FROM "t""#, r#"WHERE ("t"."c", "t"."a", "t"."b") > (3, 1, 2)"#, + ), + sql + ); +} diff --git a/src/ir/tree.rs b/src/ir/tree.rs index fe5451a43b..4330e44a50 100644 --- a/src/ir/tree.rs +++ b/src/ir/tree.rs @@ -45,6 +45,16 @@ pub struct EqClassIterator<'n> { nodes: &'n Nodes, } +/// Children iterator for "and"-ed expression chains. +/// +/// Iterator returns the next child for the chained `Bool::And` nodes. +#[derive(Debug)] +pub struct AndIterator<'n> { + current: &'n usize, + child: RefCell<usize>, + nodes: &'n Nodes, +} + impl<'n> Nodes { #[must_use] pub fn expr_iter(&'n self, current: &'n usize, make_row_leaf: bool) -> ExpressionIterator<'n> { @@ -65,6 +75,15 @@ impl<'n> Nodes { } } + #[must_use] + pub fn and_iter(&'n self, current: &'n usize) -> AndIterator<'n> { + AndIterator { + current, + child: RefCell::new(0), + nodes: self, + } + } + #[must_use] pub fn rel_iter(&'n self, current: &'n usize) -> RelationalIterator<'n> { RelationalIterator { @@ -167,6 +186,33 @@ impl<'n> Iterator for EqClassIterator<'n> { } } +impl<'n> Iterator for AndIterator<'n> { + type Item = &'n usize; + + fn next(&mut self) -> Option<Self::Item> { + let node = self.nodes.arena.get(*self.current); + if let Some(Node::Expression(Expression::Bool { + left, op, right, .. + })) = node + { + if *op != Bool::And { + return None; + } + let child_step = *self.child.borrow(); + if child_step == 0 { + *self.child.borrow_mut() += 1; + return Some(left); + } else if child_step == 1 { + *self.child.borrow_mut() += 1; + return Some(right); + } + None + } else { + None + } + } +} + impl<'n> Iterator for RelationalIterator<'n> { type Item = &'n usize; diff --git a/test_app/test/integration/api_test.lua b/test_app/test/integration/api_test.lua index 06cd42f839..1d9e92d692 100644 --- a/test_app/test/integration/api_test.lua +++ b/test_app/test/integration/api_test.lua @@ -46,7 +46,7 @@ g.test_incorrect_query = function() local api = cluster:server("api-1").net_box local _, err = api:call("query", { [[SELECT * FROM "testing_space" INNER JOIN "testing_space"]] }) - t.assert_equals(err, "CustomError(\"Invalid command.\")") + t.assert_equals(err, "CustomError(\"Parsing error: Error { variant: ParsingError { positives: [SubQuery], negatives: [] }, location: Pos(41), line_col: Pos((1, 42)), path: None, line: \\\"SELECT * FROM \\\\\\\"testing_space\\\\\\\" INNER JOIN \\\\\\\"testing_space\\\\\\\"\\\", continued_line: None }\")") end g.test_join_query_is_valid = function() -- GitLab