From 185d20ef0323a36aae8dbe95497e0d11b35a1710 Mon Sep 17 00:00:00 2001
From: Denis Smirnov <sd@picodata.io>
Date: Fri, 1 Apr 2022 18:08:44 +0700
Subject: [PATCH] fix: do not merge sub-query into a single tuple during
 transformation

---
 .../engine/cartridge/backend/sql/ir.rs        | 30 --------
 .../engine/cartridge/backend/sql/tree.rs      |  2 +-
 src/executor/tests.rs                         | 71 +++++++++++++++++++
 src/ir/operator.rs                            | 53 ++++++++++++++
 src/ir/transformation/merge_tuples.rs         | 20 ++++++
 src/ir/transformation/redistribution.rs       | 38 +++-------
 6 files changed, 154 insertions(+), 60 deletions(-)

diff --git a/src/executor/engine/cartridge/backend/sql/ir.rs b/src/executor/engine/cartridge/backend/sql/ir.rs
index d0ce88ab76..d83bb383dd 100644
--- a/src/executor/engine/cartridge/backend/sql/ir.rs
+++ b/src/executor/engine/cartridge/backend/sql/ir.rs
@@ -46,36 +46,6 @@ impl ExecutionPlan {
         Ok(result)
     }
 
-    /// Check that node is an additional child of some relational operator.
-    ///
-    /// # Errors
-    /// - node is invalid
-    pub fn is_additional_child(&self, node_id: usize) -> Result<bool, QueryPlannerError> {
-        let ir_plan = self.get_ir_plan();
-        for id in 0..ir_plan.nodes.next_id() {
-            let node = ir_plan.get_node(id)?;
-            match node {
-                Node::Relational(rel) => match rel {
-                    Relational::Projection { children, .. }
-                    | Relational::Selection { children, .. } => {
-                        if children.first() == Some(&node_id) {
-                            return Ok(false);
-                        }
-                    }
-                    Relational::InnerJoin { children, .. }
-                    | Relational::UnionAll { children, .. } => {
-                        if children.first() == Some(&node_id) || children.get(1) == Some(&node_id) {
-                            return Ok(false);
-                        }
-                    }
-                    _ => continue,
-                },
-                Node::Expression(_) => continue,
-            }
-        }
-        Ok(true)
-    }
-
     /// Transform plan sub-tree (pointed by top) to sql string
     ///
     /// # Errors
diff --git a/src/executor/engine/cartridge/backend/sql/tree.rs b/src/executor/engine/cartridge/backend/sql/tree.rs
index 2cdeb65da7..13aad6aca4 100644
--- a/src/executor/engine/cartridge/backend/sql/tree.rs
+++ b/src/executor/engine/cartridge/backend/sql/tree.rs
@@ -543,7 +543,7 @@ impl<'p> SyntaxPlan<'p> {
                     if let Some(sq_id) = ir_plan.get_sub_query_from_row_node(id)? {
                         // Replace current row with the referred sub-query
                         // (except the case when sub-query is located in the FROM clause).
-                        if self.plan.is_additional_child(sq_id)? {
+                        if ir_plan.is_additional_child(sq_id)? {
                             let rel = ir_plan.get_relation_node(sq_id)?;
                             return self.nodes.add_sq(rel, id);
                         }
diff --git a/src/executor/tests.rs b/src/executor/tests.rs
index 11615df387..ec3c247776 100644
--- a/src/executor/tests.rs
+++ b/src/executor/tests.rs
@@ -383,6 +383,77 @@ fn join_linker3_test() {
     assert_eq!(expected, result)
 }
 
+#[test]
+fn join_linker4_test() {
+    let sql = r#"SELECT t1."id" FROM "test_space" as t1 JOIN
+    (SELECT "FIRST_NAME" as "r_id" FROM "test_space") as t2
+    on t1."id" = t2."r_id" and
+    t1."FIRST_NAME" = (SELECT "FIRST_NAME" as "fn" FROM "test_space" WHERE "id" = 1)"#;
+
+    let engine = EngineMock::new();
+
+    let mut query = Query::new(engine, sql).unwrap();
+
+    let motion_t2_id = query.exec_plan.get_ir_plan().get_slices().unwrap()[0][0];
+    let mut virtual_t2 = VirtualTable::new();
+    virtual_t2.add_column(Column {
+        name: "r_id".into(),
+        r#type: Type::Integer,
+    });
+    virtual_t2.add_values_tuple(vec![IrValue::number_from_str("1").unwrap()]);
+    virtual_t2.add_values_tuple(vec![IrValue::number_from_str("2").unwrap()]);
+    virtual_t2.set_alias("t2").unwrap();
+    query
+        .engine
+        .add_virtual_table(motion_t2_id, virtual_t2)
+        .unwrap();
+
+    let motion_sq_id = query.exec_plan.get_ir_plan().get_slices().unwrap()[0][1];
+    let mut virtual_sq = VirtualTable::new();
+    virtual_sq.add_column(Column {
+        name: "fn".into(),
+        r#type: Type::Integer,
+    });
+    virtual_sq.add_values_tuple(vec![IrValue::number_from_str("2").unwrap()]);
+    virtual_sq.add_values_tuple(vec![IrValue::number_from_str("3").unwrap()]);
+    query
+        .engine
+        .add_virtual_table(motion_sq_id, virtual_sq)
+        .unwrap();
+
+    let result = query.exec().unwrap();
+
+    let mut expected = BoxExecuteFormat::new();
+    let bucket1 = query.engine.determine_bucket_id("1");
+    let bucket2 = query.engine.determine_bucket_id("2");
+
+    expected.rows.extend(vec![
+        vec![
+            Value::String(format!("Execute query on a bucket [{}]", bucket2)),
+            Value::String(format!(
+                "{} {} {} {} {}",
+                r#"SELECT t1."id" as "id" FROM "test_space" as t1"#,
+                r#"INNER JOIN"#,
+                r#"(SELECT COLUMN_2 as "r_id" FROM (VALUES (1),(2))) as t2"#,
+                r#"ON (t1."id") = (t2."r_id")"#,
+                r#"and (t1."FIRST_NAME") = (SELECT COLUMN_4 as "fn" FROM (VALUES (2),(3)))"#,
+            )),
+        ],
+        vec![
+            Value::String(format!("Execute query on a bucket [{}]", bucket1)),
+            Value::String(format!(
+                "{} {} {} {} {}",
+                r#"SELECT t1."id" as "id" FROM "test_space" as t1"#,
+                r#"INNER JOIN"#,
+                r#"(SELECT COLUMN_2 as "r_id" FROM (VALUES (1),(2))) as t2"#,
+                r#"ON (t1."id") = (t2."r_id")"#,
+                r#"and (t1."FIRST_NAME") = (SELECT COLUMN_4 as "fn" FROM (VALUES (2),(3)))"#,
+            )),
+        ],
+    ]);
+    assert_eq!(expected, result)
+}
+
 // select * from "test_1" where "identification_number" in (select COLUMN_2 as "b" from (values (1), (2))) or "identification_number" in (select COLUMN_2 as "c" from (values (3), (4)));
 #[test]
 fn anonymous_col_index_test() {
diff --git a/src/ir/operator.rs b/src/ir/operator.rs
index b952be3ce7..22060d829a 100644
--- a/src/ir/operator.rs
+++ b/src/ir/operator.rs
@@ -10,6 +10,7 @@ use crate::errors::QueryPlannerError;
 use super::expression::Expression;
 use super::transformation::redistribution::MotionPolicy;
 use super::{Node, Nodes, Plan};
+use traversal::Bft;
 
 /// Binary operator returning Bool expression.
 #[derive(Serialize, Deserialize, PartialEq, Debug, Eq, Hash, Clone)]
@@ -622,6 +623,58 @@ impl Plan {
             Err(QueryPlannerError::InvalidRelation)
         }
     }
+
+    /// Check if the node is an additional child of some relational node.
+    ///
+    /// # Errors
+    /// - Failed to get plan top
+    /// - Node returned by the relational iterator is not relational (bug)
+    pub fn is_additional_child(&self, node_id: usize) -> Result<bool, QueryPlannerError> {
+        let top_id = self.get_top()?;
+        let rel_tree = Bft::new(&top_id, |node| self.nodes.rel_iter(node));
+        for (_, id) in rel_tree {
+            let rel = self.get_relation_node(*id)?;
+            match rel {
+                Relational::Selection { children, .. } => {
+                    if children.iter().skip(1).any(|&c| c == node_id) {
+                        return Ok(true);
+                    }
+                }
+                Relational::InnerJoin { children, .. } => {
+                    if children.iter().skip(2).any(|&c| c == node_id) {
+                        return Ok(true);
+                    }
+                }
+                _ => continue,
+            }
+        }
+        Ok(false)
+    }
+
+    /// Check that the sub-query is an additional child of the parent relational node.
+    ///
+    /// # Errors
+    /// - If relational node is not relational.
+    pub fn is_additional_child_of_rel(
+        &self,
+        rel_id: usize,
+        sq_id: usize,
+    ) -> Result<bool, QueryPlannerError> {
+        let children = if let Some(children) = self.get_relational_children(rel_id)? {
+            children
+        } else {
+            return Ok(false);
+        };
+        match self.get_relation_node(rel_id)? {
+            Relational::Selection { .. } | Relational::Projection { .. } => {
+                Ok(children.get(0) != Some(&sq_id))
+            }
+            Relational::InnerJoin { .. } => {
+                Ok(children.get(0) != Some(&sq_id) && children.get(1) != Some(&sq_id))
+            }
+            _ => Ok(false),
+        }
+    }
 }
 
 #[cfg(test)]
diff --git a/src/ir/transformation/merge_tuples.rs b/src/ir/transformation/merge_tuples.rs
index 15ef271807..345250bf33 100644
--- a/src/ir/transformation/merge_tuples.rs
+++ b/src/ir/transformation/merge_tuples.rs
@@ -49,11 +49,27 @@ impl Chain {
                 Bool::Lt => (*right, *left, Bool::Gt),
                 Bool::LtEq => (*right, *left, Bool::GtEq),
             };
+
+            // If boolean expression contains a reference to an additional
+            //  sub-query, it should be added to the "other" list.
+            let left_sq = plan.get_sub_query_from_row_node(left_id)?;
+            let right_sq = plan.get_sub_query_from_row_node(right_id)?;
+            for sq_id in [left_sq, right_sq].iter().flatten() {
+                if plan.is_additional_child(*sq_id)? {
+                    self.other.push(expr_id);
+                    return Ok(());
+                }
+            }
+
             match self.grouped.entry(group_op) {
                 Entry::Occupied(mut entry) => {
                     let (left, right) = entry.get_mut();
                     let new_left_id = plan.expr_clone(left_id)?;
                     let new_right_id = plan.expr_clone(right_id)?;
+                    println!(
+                        "occupied new_left_id {}, new_right_id {}",
+                        new_left_id, new_right_id
+                    );
                     plan.get_columns_or_self(new_left_id)?
                         .iter()
                         .for_each(|id| {
@@ -68,6 +84,10 @@ impl Chain {
                 Entry::Vacant(entry) => {
                     let new_left_id = plan.expr_clone(left_id)?;
                     let new_right_id = plan.expr_clone(right_id)?;
+                    println!(
+                        "vacant new_left_id {}, new_right_id {}",
+                        new_left_id, new_right_id
+                    );
                     entry.insert((
                         plan.get_columns_or_self(new_left_id)?,
                         plan.get_columns_or_self(new_right_id)?,
diff --git a/src/ir/transformation/redistribution.rs b/src/ir/transformation/redistribution.rs
index 6f788d7980..754753bfea 100644
--- a/src/ir/transformation/redistribution.rs
+++ b/src/ir/transformation/redistribution.rs
@@ -142,7 +142,7 @@ impl Plan {
     /// Get a sub-query from the row node (if it points to sub-query)
     ///
     /// # Errors
-    /// - reference points to multiple sub-queries (invalid SQL)
+    /// - row contains multiple sub-queries (we don't support this at the moment)
     /// - `relational_map` is not initialized
     pub fn get_sub_query_from_row_node(
         &self,
@@ -158,14 +158,18 @@ impl Plan {
         match sq_set.len().cmp(&1) {
             Ordering::Equal => sq_set.iter().next().map_or_else(
                 || {
-                    Err(QueryPlannerError::CustomError(String::from(
-                        "Failed to get the first sub-query node from the set.",
+                    Err(QueryPlannerError::CustomError(format!(
+                        "Failed to get the first sub-query node from the row: {:?}.",
+                        self.get_expression_node(row_id)?
                     )))
                 },
                 |sq_id| Ok(Some(*sq_id)),
             ),
             Ordering::Less => Ok(None),
-            Ordering::Greater => Err(QueryPlannerError::InvalidSubQuery),
+            Ordering::Greater => Err(QueryPlannerError::CustomError(format!(
+                "Found multiple sub-queries in a single row: {:?}.",
+                self.get_expression_node(row_id)?
+            ))),
         }
     }
 
@@ -282,30 +286,6 @@ impl Plan {
         Ok(())
     }
 
-    /// Check that the sub-query is an additional child of the parent relational node.
-    fn sub_query_is_additional_child(
-        &self,
-        rel_id: usize,
-        sq_id: usize,
-    ) -> Result<bool, QueryPlannerError> {
-        let children = if let Some(children) = self.get_relational_children(rel_id)? {
-            children
-        } else {
-            return Ok(false);
-        };
-        match self.get_relation_node(rel_id)? {
-            Relational::Selection { .. } | Relational::Projection { .. } => {
-                Ok(children.get(0) != Some(&sq_id))
-            }
-            Relational::InnerJoin { .. } => {
-                Ok(children.get(0) != Some(&sq_id) && children.get(1) != Some(&sq_id))
-            }
-            _ => Err(QueryPlannerError::CustomError(String::from(
-                "Trying to check if sub-query is an additional child of the wrong node.",
-            ))),
-        }
-    }
-
     fn get_additional_sq(
         &self,
         rel_id: usize,
@@ -313,7 +293,7 @@ impl Plan {
     ) -> Result<Option<usize>, QueryPlannerError> {
         if self.get_expression_node(row_id)?.is_row() {
             if let Some(sq_id) = self.get_sub_query_from_row_node(row_id)? {
-                if self.sub_query_is_additional_child(rel_id, sq_id)? {
+                if self.is_additional_child_of_rel(rel_id, sq_id)? {
                     return Ok(Some(sq_id));
                 }
             }
-- 
GitLab