From 185d20ef0323a36aae8dbe95497e0d11b35a1710 Mon Sep 17 00:00:00 2001 From: Denis Smirnov <sd@picodata.io> Date: Fri, 1 Apr 2022 18:08:44 +0700 Subject: [PATCH] fix: do not merge sub-query into a single tuple during transformation --- .../engine/cartridge/backend/sql/ir.rs | 30 -------- .../engine/cartridge/backend/sql/tree.rs | 2 +- src/executor/tests.rs | 71 +++++++++++++++++++ src/ir/operator.rs | 53 ++++++++++++++ src/ir/transformation/merge_tuples.rs | 20 ++++++ src/ir/transformation/redistribution.rs | 38 +++------- 6 files changed, 154 insertions(+), 60 deletions(-) diff --git a/src/executor/engine/cartridge/backend/sql/ir.rs b/src/executor/engine/cartridge/backend/sql/ir.rs index d0ce88ab76..d83bb383dd 100644 --- a/src/executor/engine/cartridge/backend/sql/ir.rs +++ b/src/executor/engine/cartridge/backend/sql/ir.rs @@ -46,36 +46,6 @@ impl ExecutionPlan { Ok(result) } - /// Check that node is an additional child of some relational operator. - /// - /// # Errors - /// - node is invalid - pub fn is_additional_child(&self, node_id: usize) -> Result<bool, QueryPlannerError> { - let ir_plan = self.get_ir_plan(); - for id in 0..ir_plan.nodes.next_id() { - let node = ir_plan.get_node(id)?; - match node { - Node::Relational(rel) => match rel { - Relational::Projection { children, .. } - | Relational::Selection { children, .. } => { - if children.first() == Some(&node_id) { - return Ok(false); - } - } - Relational::InnerJoin { children, .. } - | Relational::UnionAll { children, .. } => { - if children.first() == Some(&node_id) || children.get(1) == Some(&node_id) { - return Ok(false); - } - } - _ => continue, - }, - Node::Expression(_) => continue, - } - } - Ok(true) - } - /// Transform plan sub-tree (pointed by top) to sql string /// /// # Errors diff --git a/src/executor/engine/cartridge/backend/sql/tree.rs b/src/executor/engine/cartridge/backend/sql/tree.rs index 2cdeb65da7..13aad6aca4 100644 --- a/src/executor/engine/cartridge/backend/sql/tree.rs +++ b/src/executor/engine/cartridge/backend/sql/tree.rs @@ -543,7 +543,7 @@ impl<'p> SyntaxPlan<'p> { if let Some(sq_id) = ir_plan.get_sub_query_from_row_node(id)? { // Replace current row with the referred sub-query // (except the case when sub-query is located in the FROM clause). - if self.plan.is_additional_child(sq_id)? { + if ir_plan.is_additional_child(sq_id)? { let rel = ir_plan.get_relation_node(sq_id)?; return self.nodes.add_sq(rel, id); } diff --git a/src/executor/tests.rs b/src/executor/tests.rs index 11615df387..ec3c247776 100644 --- a/src/executor/tests.rs +++ b/src/executor/tests.rs @@ -383,6 +383,77 @@ fn join_linker3_test() { assert_eq!(expected, result) } +#[test] +fn join_linker4_test() { + let sql = r#"SELECT t1."id" FROM "test_space" as t1 JOIN + (SELECT "FIRST_NAME" as "r_id" FROM "test_space") as t2 + on t1."id" = t2."r_id" and + t1."FIRST_NAME" = (SELECT "FIRST_NAME" as "fn" FROM "test_space" WHERE "id" = 1)"#; + + let engine = EngineMock::new(); + + let mut query = Query::new(engine, sql).unwrap(); + + let motion_t2_id = query.exec_plan.get_ir_plan().get_slices().unwrap()[0][0]; + let mut virtual_t2 = VirtualTable::new(); + virtual_t2.add_column(Column { + name: "r_id".into(), + r#type: Type::Integer, + }); + virtual_t2.add_values_tuple(vec![IrValue::number_from_str("1").unwrap()]); + virtual_t2.add_values_tuple(vec![IrValue::number_from_str("2").unwrap()]); + virtual_t2.set_alias("t2").unwrap(); + query + .engine + .add_virtual_table(motion_t2_id, virtual_t2) + .unwrap(); + + let motion_sq_id = query.exec_plan.get_ir_plan().get_slices().unwrap()[0][1]; + let mut virtual_sq = VirtualTable::new(); + virtual_sq.add_column(Column { + name: "fn".into(), + r#type: Type::Integer, + }); + virtual_sq.add_values_tuple(vec![IrValue::number_from_str("2").unwrap()]); + virtual_sq.add_values_tuple(vec![IrValue::number_from_str("3").unwrap()]); + query + .engine + .add_virtual_table(motion_sq_id, virtual_sq) + .unwrap(); + + let result = query.exec().unwrap(); + + let mut expected = BoxExecuteFormat::new(); + let bucket1 = query.engine.determine_bucket_id("1"); + let bucket2 = query.engine.determine_bucket_id("2"); + + expected.rows.extend(vec![ + vec![ + Value::String(format!("Execute query on a bucket [{}]", bucket2)), + Value::String(format!( + "{} {} {} {} {}", + r#"SELECT t1."id" as "id" FROM "test_space" as t1"#, + r#"INNER JOIN"#, + r#"(SELECT COLUMN_2 as "r_id" FROM (VALUES (1),(2))) as t2"#, + r#"ON (t1."id") = (t2."r_id")"#, + r#"and (t1."FIRST_NAME") = (SELECT COLUMN_4 as "fn" FROM (VALUES (2),(3)))"#, + )), + ], + vec![ + Value::String(format!("Execute query on a bucket [{}]", bucket1)), + Value::String(format!( + "{} {} {} {} {}", + r#"SELECT t1."id" as "id" FROM "test_space" as t1"#, + r#"INNER JOIN"#, + r#"(SELECT COLUMN_2 as "r_id" FROM (VALUES (1),(2))) as t2"#, + r#"ON (t1."id") = (t2."r_id")"#, + r#"and (t1."FIRST_NAME") = (SELECT COLUMN_4 as "fn" FROM (VALUES (2),(3)))"#, + )), + ], + ]); + assert_eq!(expected, result) +} + // select * from "test_1" where "identification_number" in (select COLUMN_2 as "b" from (values (1), (2))) or "identification_number" in (select COLUMN_2 as "c" from (values (3), (4))); #[test] fn anonymous_col_index_test() { diff --git a/src/ir/operator.rs b/src/ir/operator.rs index b952be3ce7..22060d829a 100644 --- a/src/ir/operator.rs +++ b/src/ir/operator.rs @@ -10,6 +10,7 @@ use crate::errors::QueryPlannerError; use super::expression::Expression; use super::transformation::redistribution::MotionPolicy; use super::{Node, Nodes, Plan}; +use traversal::Bft; /// Binary operator returning Bool expression. #[derive(Serialize, Deserialize, PartialEq, Debug, Eq, Hash, Clone)] @@ -622,6 +623,58 @@ impl Plan { Err(QueryPlannerError::InvalidRelation) } } + + /// Check if the node is an additional child of some relational node. + /// + /// # Errors + /// - Failed to get plan top + /// - Node returned by the relational iterator is not relational (bug) + pub fn is_additional_child(&self, node_id: usize) -> Result<bool, QueryPlannerError> { + let top_id = self.get_top()?; + let rel_tree = Bft::new(&top_id, |node| self.nodes.rel_iter(node)); + for (_, id) in rel_tree { + let rel = self.get_relation_node(*id)?; + match rel { + Relational::Selection { children, .. } => { + if children.iter().skip(1).any(|&c| c == node_id) { + return Ok(true); + } + } + Relational::InnerJoin { children, .. } => { + if children.iter().skip(2).any(|&c| c == node_id) { + return Ok(true); + } + } + _ => continue, + } + } + Ok(false) + } + + /// Check that the sub-query is an additional child of the parent relational node. + /// + /// # Errors + /// - If relational node is not relational. + pub fn is_additional_child_of_rel( + &self, + rel_id: usize, + sq_id: usize, + ) -> Result<bool, QueryPlannerError> { + let children = if let Some(children) = self.get_relational_children(rel_id)? { + children + } else { + return Ok(false); + }; + match self.get_relation_node(rel_id)? { + Relational::Selection { .. } | Relational::Projection { .. } => { + Ok(children.get(0) != Some(&sq_id)) + } + Relational::InnerJoin { .. } => { + Ok(children.get(0) != Some(&sq_id) && children.get(1) != Some(&sq_id)) + } + _ => Ok(false), + } + } } #[cfg(test)] diff --git a/src/ir/transformation/merge_tuples.rs b/src/ir/transformation/merge_tuples.rs index 15ef271807..345250bf33 100644 --- a/src/ir/transformation/merge_tuples.rs +++ b/src/ir/transformation/merge_tuples.rs @@ -49,11 +49,27 @@ impl Chain { Bool::Lt => (*right, *left, Bool::Gt), Bool::LtEq => (*right, *left, Bool::GtEq), }; + + // If boolean expression contains a reference to an additional + // sub-query, it should be added to the "other" list. + let left_sq = plan.get_sub_query_from_row_node(left_id)?; + let right_sq = plan.get_sub_query_from_row_node(right_id)?; + for sq_id in [left_sq, right_sq].iter().flatten() { + if plan.is_additional_child(*sq_id)? { + self.other.push(expr_id); + return Ok(()); + } + } + match self.grouped.entry(group_op) { Entry::Occupied(mut entry) => { let (left, right) = entry.get_mut(); let new_left_id = plan.expr_clone(left_id)?; let new_right_id = plan.expr_clone(right_id)?; + println!( + "occupied new_left_id {}, new_right_id {}", + new_left_id, new_right_id + ); plan.get_columns_or_self(new_left_id)? .iter() .for_each(|id| { @@ -68,6 +84,10 @@ impl Chain { Entry::Vacant(entry) => { let new_left_id = plan.expr_clone(left_id)?; let new_right_id = plan.expr_clone(right_id)?; + println!( + "vacant new_left_id {}, new_right_id {}", + new_left_id, new_right_id + ); entry.insert(( plan.get_columns_or_self(new_left_id)?, plan.get_columns_or_self(new_right_id)?, diff --git a/src/ir/transformation/redistribution.rs b/src/ir/transformation/redistribution.rs index 6f788d7980..754753bfea 100644 --- a/src/ir/transformation/redistribution.rs +++ b/src/ir/transformation/redistribution.rs @@ -142,7 +142,7 @@ impl Plan { /// Get a sub-query from the row node (if it points to sub-query) /// /// # Errors - /// - reference points to multiple sub-queries (invalid SQL) + /// - row contains multiple sub-queries (we don't support this at the moment) /// - `relational_map` is not initialized pub fn get_sub_query_from_row_node( &self, @@ -158,14 +158,18 @@ impl Plan { match sq_set.len().cmp(&1) { Ordering::Equal => sq_set.iter().next().map_or_else( || { - Err(QueryPlannerError::CustomError(String::from( - "Failed to get the first sub-query node from the set.", + Err(QueryPlannerError::CustomError(format!( + "Failed to get the first sub-query node from the row: {:?}.", + self.get_expression_node(row_id)? ))) }, |sq_id| Ok(Some(*sq_id)), ), Ordering::Less => Ok(None), - Ordering::Greater => Err(QueryPlannerError::InvalidSubQuery), + Ordering::Greater => Err(QueryPlannerError::CustomError(format!( + "Found multiple sub-queries in a single row: {:?}.", + self.get_expression_node(row_id)? + ))), } } @@ -282,30 +286,6 @@ impl Plan { Ok(()) } - /// Check that the sub-query is an additional child of the parent relational node. - fn sub_query_is_additional_child( - &self, - rel_id: usize, - sq_id: usize, - ) -> Result<bool, QueryPlannerError> { - let children = if let Some(children) = self.get_relational_children(rel_id)? { - children - } else { - return Ok(false); - }; - match self.get_relation_node(rel_id)? { - Relational::Selection { .. } | Relational::Projection { .. } => { - Ok(children.get(0) != Some(&sq_id)) - } - Relational::InnerJoin { .. } => { - Ok(children.get(0) != Some(&sq_id) && children.get(1) != Some(&sq_id)) - } - _ => Err(QueryPlannerError::CustomError(String::from( - "Trying to check if sub-query is an additional child of the wrong node.", - ))), - } - } - fn get_additional_sq( &self, rel_id: usize, @@ -313,7 +293,7 @@ impl Plan { ) -> Result<Option<usize>, QueryPlannerError> { if self.get_expression_node(row_id)?.is_row() { if let Some(sq_id) = self.get_sub_query_from_row_node(row_id)? { - if self.sub_query_is_additional_child(rel_id, sq_id)? { + if self.is_additional_child_of_rel(rel_id, sq_id)? { return Ok(Some(sq_id)); } } -- GitLab