From 17b8781263c687c05d2210adabd04f73522ca219 Mon Sep 17 00:00:00 2001
From: Denis Smirnov <sd@picodata.io>
Date: Mon, 21 Mar 2022 20:36:01 +0700
Subject: [PATCH] feat: split boolean operator's tuples into AND-ed chains

---
 src/ir/transformation.rs                     |   1 +
 src/ir/transformation/split_columns.rs       | 117 +++++++++++++++++++
 src/ir/transformation/split_columns/tests.rs | 109 +++++++++++++++++
 3 files changed, 227 insertions(+)
 create mode 100644 src/ir/transformation/split_columns.rs
 create mode 100644 src/ir/transformation/split_columns/tests.rs

diff --git a/src/ir/transformation.rs b/src/ir/transformation.rs
index b025a6bddf..3935fadc31 100644
--- a/src/ir/transformation.rs
+++ b/src/ir/transformation.rs
@@ -6,6 +6,7 @@ pub mod bool_in;
 pub mod dnf;
 pub mod equality_propagation;
 pub mod redistribution;
+pub mod split_columns;
 
 use crate::errors::QueryPlannerError;
 use crate::ir::expression::Expression;
diff --git a/src/ir/transformation/split_columns.rs b/src/ir/transformation/split_columns.rs
new file mode 100644
index 0000000000..f8997503f7
--- /dev/null
+++ b/src/ir/transformation/split_columns.rs
@@ -0,0 +1,117 @@
+//! Split the tuples to columns in the expression trees of the plan.
+//!
+//! The transformation splits the tuples in the boolean operators into
+//! the AND-ed chain of the single-column tuples.
+//!
+//! For example:
+//! ```sql
+//!    select a from t where (a, 2) = (1, b)
+//! ```
+//! is transformed to:
+//! ```sql
+//!   select a from t where (a) = (1) and (2) = (b)
+//! ```
+
+use crate::errors::QueryPlannerError;
+use crate::ir::expression::Expression;
+use crate::ir::operator::Bool;
+use crate::ir::Plan;
+
+fn call_expr_tree_split_columns(
+    plan: &mut Plan,
+    top_id: usize,
+) -> Result<usize, QueryPlannerError> {
+    plan.expr_tree_replace_bool(
+        top_id,
+        &call_split_bool,
+        &[
+            Bool::Eq,
+            Bool::Gt,
+            Bool::GtEq,
+            Bool::Lt,
+            Bool::LtEq,
+            Bool::NotEq,
+        ],
+    )
+}
+
+fn call_split_bool(plan: &mut Plan, top_id: usize) -> Result<usize, QueryPlannerError> {
+    plan.split_bool(top_id)
+}
+
+impl Plan {
+    /// Replace left and right tuples in the boolean operator with the chain
+    /// of the AND-ed operators constructed from the tuple's columns.
+    ///
+    /// # Errors
+    /// - If the operator is not a boolean operator.
+    /// - If left and right tuples have different number of columns.
+    /// - If the plan is invalid for some unknown reason.
+    fn split_bool(&mut self, expr_id: usize) -> Result<usize, QueryPlannerError> {
+        let expr = self.get_expression_node(expr_id)?;
+        let (left_id, right_id, op) = match expr {
+            Expression::Bool {
+                left, op, right, ..
+            } => (*left, *right, op.clone()),
+            _ => {
+                return Err(QueryPlannerError::CustomError(format!(
+                    "Node is not a boolean expression: {:?}",
+                    expr
+                )));
+            }
+        };
+        let left_expr = self.get_expression_node(left_id)?;
+        let right_expr = self.get_expression_node(right_id)?;
+        if let (
+            Expression::Row {
+                list: left_list, ..
+            },
+            Expression::Row {
+                list: right_list, ..
+            },
+        ) = (left_expr, right_expr)
+        {
+            if left_list.len() != right_list.len() {
+                return Err(QueryPlannerError::CustomError(format!(
+                    "Left and right rows have different number of columns: {:?}, {:?}",
+                    left_expr, right_expr
+                )));
+            }
+            let pairs = left_list
+                .iter()
+                .zip(right_list.iter())
+                .map(|(l, r)| (*l, *r))
+                .collect::<Vec<_>>();
+            if let Some((first, other)) = pairs.split_first() {
+                let left_col_id = self.expr_clone(first.0)?;
+                let right_col_id = self.expr_clone(first.1)?;
+                let left_row_id = self.nodes.add_row(vec![left_col_id], None);
+                let right_row_id = self.nodes.add_row(vec![right_col_id], None);
+                let mut top_id = self.add_cond(left_row_id, op.clone(), right_row_id)?;
+
+                for (left_col_id, right_col_id) in other {
+                    let left_col_id = self.expr_clone(*left_col_id)?;
+                    let right_col_id = self.expr_clone(*right_col_id)?;
+                    let left_row_id = self.nodes.add_row(vec![left_col_id], None);
+                    let right_row_id = self.nodes.add_row(vec![right_col_id], None);
+                    let new_top_id = self.add_cond(left_row_id, op.clone(), right_row_id)?;
+                    top_id = self.concat_and(top_id, new_top_id)?;
+                }
+
+                return Ok(top_id);
+            }
+        }
+        Ok(expr_id)
+    }
+
+    /// Split columns in all the boolean operators of the plan.
+    ///
+    /// # Errors
+    /// - If the plan tree is invalid (doesn't contain correct nodes where we expect it to).
+    pub fn split_columns(&mut self) -> Result<(), QueryPlannerError> {
+        self.transform_expr_trees(&call_expr_tree_split_columns)
+    }
+}
+
+#[cfg(test)]
+mod tests;
diff --git a/src/ir/transformation/split_columns/tests.rs b/src/ir/transformation/split_columns/tests.rs
new file mode 100644
index 0000000000..26787ab279
--- /dev/null
+++ b/src/ir/transformation/split_columns/tests.rs
@@ -0,0 +1,109 @@
+use pretty_assertions::assert_eq;
+
+use crate::executor::engine::mock::MetadataMock;
+use crate::executor::ir::ExecutionPlan;
+use crate::frontend::sql::ast::AbstractSyntaxTree;
+
+#[test]
+fn split_columns1() {
+    let query = r#"SELECT "a" FROM "t" WHERE ("a", 2) = (1, "b")"#;
+
+    let metadata = &MetadataMock::new();
+    let ast = AbstractSyntaxTree::new(query).unwrap();
+    let mut plan = ast.to_ir(metadata).unwrap();
+    plan.split_columns().unwrap();
+    let ex_plan = ExecutionPlan::from(&plan);
+
+    let top_id = plan.get_top().unwrap();
+    let sql = ex_plan.subtree_as_sql(top_id).unwrap();
+    assert_eq!(
+        format!(
+            "{}",
+            r#"SELECT "t"."a" as "a" FROM "t" WHERE ("t"."a") = (1) and (2) = ("t"."b")"#,
+        ),
+        sql
+    );
+}
+
+#[test]
+fn split_columns2() {
+    let query = r#"SELECT "a" FROM "t" WHERE "a" = 1"#;
+
+    let metadata = &MetadataMock::new();
+    let ast = AbstractSyntaxTree::new(query).unwrap();
+    let mut plan = ast.to_ir(metadata).unwrap();
+    plan.split_columns().unwrap();
+    let ex_plan = ExecutionPlan::from(&plan);
+
+    let top_id = plan.get_top().unwrap();
+    let sql = ex_plan.subtree_as_sql(top_id).unwrap();
+    assert_eq!(
+        format!(
+            "{}",
+            r#"SELECT "t"."a" as "a" FROM "t" WHERE ("t"."a") = (1)"#,
+        ),
+        sql
+    );
+}
+
+#[test]
+fn split_columns3() {
+    let query = r#"SELECT "a" FROM "t" WHERE ("a", 2, "b") = (1, "b")"#;
+
+    let metadata = &MetadataMock::new();
+    let ast = AbstractSyntaxTree::new(query).unwrap();
+    let mut plan = ast.to_ir(metadata).unwrap();
+    let plan_err = plan.split_columns().unwrap_err();
+    assert_eq!(
+        format!(
+            "{} {} {}",
+            r#"Left and right rows have different number of columns:"#,
+            r#"Row { list: [8, 9, 10], distribution: None },"#,
+            r#"Row { list: [12, 13], distribution: None }"#,
+        ),
+        format!("{}", plan_err)
+    );
+}
+
+#[test]
+fn split_columns4() {
+    let query = r#"SELECT "a" FROM "t" WHERE "a" in (1, 2)"#;
+
+    let metadata = &MetadataMock::new();
+    let ast = AbstractSyntaxTree::new(query).unwrap();
+    let mut plan = ast.to_ir(metadata).unwrap();
+    plan.split_columns().unwrap();
+    let ex_plan = ExecutionPlan::from(&plan);
+
+    let top_id = plan.get_top().unwrap();
+    let sql = ex_plan.subtree_as_sql(top_id).unwrap();
+    assert_eq!(
+        format!(
+            "{}",
+            r#"SELECT "t"."a" as "a" FROM "t" WHERE ("t"."a") in (1, 2)"#,
+        ),
+        sql
+    );
+}
+
+#[test]
+fn split_columns5() {
+    let query = r#"SELECT "a" FROM "t" WHERE ("a", 2) < (1, "b") and "a" > 2"#;
+
+    let metadata = &MetadataMock::new();
+    let ast = AbstractSyntaxTree::new(query).unwrap();
+    let mut plan = ast.to_ir(metadata).unwrap();
+    plan.split_columns().unwrap();
+    let ex_plan = ExecutionPlan::from(&plan);
+
+    let top_id = plan.get_top().unwrap();
+    let sql = ex_plan.subtree_as_sql(top_id).unwrap();
+    assert_eq!(
+        format!(
+            "{} {}",
+            r#"SELECT "t"."a" as "a" FROM "t" WHERE ("t"."a") < (1) and (2) < ("t"."b")"#,
+            r#"and ("t"."a") > (2)"#,
+        ),
+        sql
+    );
+}
-- 
GitLab