From f678f7cc06c48d535910ead499a72ab37f377a15 Mon Sep 17 00:00:00 2001
From: EmirVildanov <reddog201030@gmail.com>
Date: Tue, 4 Jun 2024 10:34:30 +0300
Subject: [PATCH] fix: solve the problem of wrong parsing priorities of
 BETWEEN, AND, NOT operators

---
 sbroad-core/src/executor/tests/between.rs |   6 +-
 sbroad-core/src/frontend/sql.rs           | 234 ++++++++++++++--------
 sbroad-core/src/frontend/sql/ir/tests.rs  | 142 ++++++++++++-
 3 files changed, 296 insertions(+), 86 deletions(-)

diff --git a/sbroad-core/src/executor/tests/between.rs b/sbroad-core/src/executor/tests/between.rs
index c1c755abb..2678106ee 100644
--- a/sbroad-core/src/executor/tests/between.rs
+++ b/sbroad-core/src/executor/tests/between.rs
@@ -55,7 +55,7 @@ fn between1_test() {
                 "{} {} {}",
                 r#"SELECT "t"."identification_number" FROM "hash_testing" as "t""#,
                 r#"WHERE ("t"."identification_number") >= (?)"#,
-                r#"and ("t"."identification_number") <= (SELECT "id" FROM "TMP_test_103")"#,
+                r#"and ("t"."identification_number") <= (SELECT "id" FROM "TMP_test_102")"#,
             ),
             vec![Value::from(1_u64)],
         ))),
@@ -108,8 +108,8 @@ fn between2_test() {
             format!(
                 "{} {} {}",
                 r#"SELECT "t"."identification_number" FROM "hash_testing" as "t""#,
-                r#"WHERE (SELECT "id" FROM "TMP_test_105") >= (?)"#,
-                r#"and (SELECT "id" FROM "TMP_test_105") <= (?)"#,
+                r#"WHERE (SELECT "id" FROM "TMP_test_104") >= (?)"#,
+                r#"and (SELECT "id" FROM "TMP_test_104") <= (?)"#,
             ),
             vec![Value::from(1_u64), Value::from(3_u64)],
         ))),
diff --git a/sbroad-core/src/frontend/sql.rs b/sbroad-core/src/frontend/sql.rs
index 642a38b2a..55a72d854 100644
--- a/sbroad-core/src/frontend/sql.rs
+++ b/sbroad-core/src/frontend/sql.rs
@@ -1312,9 +1312,9 @@ lazy_static::lazy_static! {
         // Precedence is defined lowest to highest.
         PrattParser::new()
             .op(Op::infix(Or, Left))
-            .op(Op::infix(Between, Left))
             .op(Op::infix(And, Left))
             .op(Op::prefix(UnaryNot))
+            .op(Op::infix(Between, Left))
             .op(
                 Op::infix(Eq, Right) | Op::infix(NotEq, Right) | Op::infix(NotEq, Right)
                 | Op::infix(Gt, Right) | Op::infix(GtEq, Right) | Op::infix(Lt, Right)
@@ -1436,8 +1436,8 @@ enum ParseExpression {
         child: Box<ParseExpression>,
     },
     Infix {
-        op: ParseExpressionInfixOperator,
         is_not: bool,
+        op: ParseExpressionInfixOperator,
         left: Box<ParseExpression>,
         right: Box<ParseExpression>,
     },
@@ -1475,9 +1475,28 @@ enum ParseExpression {
         pattern: Option<Box<ParseExpression>>,
         target: Box<ParseExpression>,
     },
-    Between {
+    /// Workaround for the mixfix BETWEEN operator breaking the logic of
+    /// pratt parsing for infix operators.
+    /// For expression `expr_1 BETWEEN expr_2 AND expr_3` we would create ParseExpression tree of
+    /// And
+    ///   - left  = InterimBetween
+    ///               - left  = expr_1
+    ///               - right = expr_2
+    ///   - right = expr_3
+    /// because priority of BETWEEN is higher than of AND.
+    ///
+    /// When we face such a tree, we transform it into Expression::Between. So during parsing AND
+    /// operator we have to check whether we have to transform it into BETWEEN.
+    InterimBetween {
+        is_not: bool,
+        left: Box<ParseExpression>,
+        right: Box<ParseExpression>,
+    },
+    /// Fixed version of `InterimBetween`.
+    FinalBetween {
         is_not: bool,
         left: Box<ParseExpression>,
+        center: Box<ParseExpression>,
         right: Box<ParseExpression>,
     },
 }
@@ -1523,64 +1542,6 @@ impl SelectExpr {
     }
 }
 
-impl Plan {
-    /// Find a pair of:
-    /// * AND operator that must actually represent BETWEEN right part
-    ///   (presented as it's left and right children)
-    /// * This ^ operator optional AND parent which child we should replace with BETWEEN operator
-    fn find_leftmost_and(
-        &self,
-        current_node_plan_id: usize,
-        parent: Option<usize>,
-    ) -> Result<(usize, usize, Option<usize>), SbroadError> {
-        let root_expr = self.get_expression_node(current_node_plan_id)?;
-        match root_expr {
-            Expression::Alias { .. }
-            | Expression::ExprInParentheses { .. }
-            | Expression::Arithmetic { .. }
-            | Expression::Cast { .. }
-            | Expression::Case { .. }
-            | Expression::Concat { .. }
-            | Expression::Constant { .. }
-            | Expression::Reference { .. }
-            | Expression::Row { .. }
-            | Expression::StableFunction { .. }
-            | Expression::Trim { .. }
-            | Expression::Unary { .. }
-            | Expression::CountAsterisk => Err(SbroadError::Invalid(
-                Entity::Expression,
-                Some(SmolStr::from(
-                    "Expected to see bool node during leftmost AND search",
-                )),
-            )),
-            Expression::Bool { left, op, right } => match op {
-                Bool::And => {
-                    let left_child = self.get_expression_node(*left)?;
-                    if let Expression::Bool { op: Bool::And, .. } = left_child {
-                        self.find_leftmost_and(*left, Some(current_node_plan_id))
-                    } else {
-                        Ok((*left, *right, parent))
-                    }
-                }
-                Bool::Eq
-                | Bool::In
-                | Bool::Gt
-                | Bool::GtEq
-                | Bool::Lt
-                | Bool::LtEq
-                | Bool::NotEq
-                | Bool::Or
-                | Bool::Between => Err(SbroadError::Invalid(
-                    Entity::Expression,
-                    Some(SmolStr::from(
-                        "Expected to see AND bool node during leftmost AND search",
-                    )),
-                )),
-            },
-        }
-    }
-}
-
 impl ParseExpression {
     #[allow(clippy::too_many_lines)]
     fn populate_plan<M>(
@@ -1663,20 +1624,20 @@ impl ParseExpression {
                 };
                 plan.nodes.push(Node::Expression(trim_expr))
             }
-            ParseExpression::Between {
+            ParseExpression::FinalBetween {
                 is_not,
                 left,
+                center,
                 right,
             } => {
                 let plan_left_id = left.populate_plan(plan, worker)?;
                 let left_covered_with_row = plan.row(plan_left_id)?;
 
-                let plan_right_id = right.populate_plan(plan, worker)?;
-                let (left_and_child, right_and_child, parent) =
-                    plan.find_leftmost_and(plan_right_id, None)?;
+                let plan_center_id = center.populate_plan(plan, worker)?;
+                let center_covered_with_row = plan.row(plan_center_id)?;
 
-                let center_covered_with_row = plan.row(left_and_child)?;
-                let right_covered_with_row = plan.row(right_and_child)?;
+                let plan_right_id = right.populate_plan(plan, worker)?;
+                let right_covered_with_row = plan.row(plan_right_id)?;
 
                 let greater_eq_id =
                     plan.add_cond(left_covered_with_row, Bool::GtEq, center_covered_with_row)?;
@@ -1693,16 +1654,7 @@ impl ParseExpression {
                     .betweens
                     .push(Between::new(left_covered_with_row, less_eq_id));
 
-                if let Some(leftmost_and_parent) = parent {
-                    let and_to_replace_child = plan.get_mut_expression_node(leftmost_and_parent)?;
-                    let Expression::Bool { ref mut left, .. } = and_to_replace_child else {
-                        unreachable!("Expected to see AND node as leftmost parent")
-                    };
-                    *left = between_id;
-                    plan_right_id
-                } else {
-                    between_id
-                }
+                between_id
             }
             ParseExpression::Infix {
                 op,
@@ -1729,6 +1681,55 @@ impl ParseExpression {
                 } else {
                     right.populate_plan(plan, worker)?
                 };
+
+                // In case:
+                // * `op` = AND (left = `left_plan_id`, right = `right_plan_id`)
+                // * `right_plan_id` = AND (left = expr_1, right = expr_2) (resulted from BETWEEN
+                //   transformation)
+                //                And
+                //  left_plan_id        And
+                //                expr_1    expr_2
+                //
+                // we'll end up in a situation when AND is a right child of another AND (We don't
+                // expect it later as soon as AND is a left-associative operator).
+                // We have to fix it the following way:
+                //                      And
+                //               And        expr_2
+                //  left_plan_id   expr_1
+                let right_plan_is_and = {
+                    let right_expr = plan.get_node(right_plan_id)?;
+                    matches!(
+                        right_expr,
+                        Node::Expression(Expression::Bool { op: Bool::And, .. })
+                    )
+                };
+                if matches!(op, ParseExpressionInfixOperator::InfixBool(Bool::And))
+                    && right_plan_is_and
+                {
+                    let right_expr = plan.get_expression_node(right_plan_id)?;
+                    let fixed_left_and_id = if let Expression::Bool {
+                        op: Bool::And,
+                        left,
+                        ..
+                    } = right_expr
+                    {
+                        plan.add_cond(left_row_id, Bool::And, *left)?
+                    } else {
+                        panic!("Expected to see AND operator as right child.");
+                    };
+
+                    let right_expr_mut = plan.get_mut_expression_node(right_plan_id)?;
+                    if let Expression::Bool {
+                        op: Bool::And,
+                        left,
+                        ..
+                    } = right_expr_mut
+                    {
+                        *left = fixed_left_and_id;
+                        return Ok(right_plan_id);
+                    }
+                }
+
                 let right_row_id = plan.row(right_plan_id)?;
 
                 let op_plan_id = match op {
@@ -1835,11 +1836,55 @@ impl ParseExpression {
                     op_id
                 }
             }
+            ParseExpression::InterimBetween { .. } => return Err(SbroadError::Invalid(
+                Entity::Expression,
+                Some(SmolStr::from(
+                    "BETWEEN operator should have a view of `expr_1 BETWEEN expr_2 AND expr_3`.",
+                )),
+            )),
         };
         Ok(plan_id)
     }
 }
 
+/// Workaround for the following expressions:
+///     * `expr_1 AND expr_2 BETWEEN expr_3 AND expr_4`
+///     * `NOT expr_1 BETWEEN expr_2 AND expr_3`
+/// which are parsed as:
+///     * `(expr_1 AND (expr_2 BETWEEN expr_3)) AND expr_4`
+///     * `(NOT (expr_1 BETWEEN expr_2)) AND expr_3`
+/// but we should transform them into:
+///     * `expr_1 AND (expr_2 BETWEEN expr_3 AND expr_4)`
+///     * `NOT (expr_1 BETWEEN expr_2 AND expr_3)`
+///
+/// Returns:
+/// * None in case `expr` doesn't have `InterimBetween` as a child
+/// * Expression whose right child is an `InterimBetween` (or `InterimBetween` itself)
+fn find_interim_between(mut expr: &mut ParseExpression) -> Option<(&mut ParseExpression, bool)> {
+    let mut is_exact_match = true;
+    loop {
+        match expr {
+            ParseExpression::Infix {
+                op: ParseExpressionInfixOperator::InfixBool(Bool::And),
+                right,
+                ..
+            } => {
+                expr = right;
+                is_exact_match = false;
+            }
+            ParseExpression::Prefix {
+                op: Unary::Not,
+                child,
+            } => {
+                expr = child;
+                is_exact_match = false;
+            }
+            ParseExpression::InterimBetween { .. } => break Some((expr, is_exact_match)),
+            _ => break None,
+        }
+    }
+}
+
 /// Function responsible for parsing expressions using Pratt parser.
 ///
 /// Parameters:
@@ -2207,6 +2252,8 @@ where
             Ok(parse_expr)
         })
         .map_infix(|lhs, op, rhs| {
+            let mut lhs = lhs?;
+            let rhs = rhs?;
             let mut is_not = false;
             let op = match op.as_rule() {
                 Rule::And => ParseExpressionInfixOperator::InfixBool(Bool::And),
@@ -2214,10 +2261,10 @@ where
                 Rule::Between => {
                     let mut op_inner = op.into_inner();
                     is_not = op_inner.next().is_some();
-                    return Ok(ParseExpression::Between {
+                    return Ok(ParseExpression::InterimBetween {
                         is_not,
-                        left: Box::new(lhs?),
-                        right: Box::new(rhs?),
+                        left: Box::new(lhs),
+                        right: Box::new(rhs),
                     })
                 },
                 Rule::Eq => ParseExpressionInfixOperator::InfixBool(Bool::Eq),
@@ -2238,11 +2285,34 @@ where
                 Rule::ConcatInfixOp => ParseExpressionInfixOperator::Concat,
                 rule           => unreachable!("Expr::parse expected infix operation, found {:?}", rule),
             };
+
+            // HACK: InterimBetween(e1, e2) AND e3 => FinalBetween(e1, e2, e3).
+            if matches!(op, ParseExpressionInfixOperator::InfixBool(Bool::And)) {
+                if let Some((expr, is_exact_match)) = find_interim_between(&mut lhs) {
+                    let ParseExpression::InterimBetween { is_not, left, right } = expr else {
+                        panic!("expected ParseExpression::InterimBetween");
+                    };
+
+                    let fb = ParseExpression::FinalBetween {
+                        is_not: *is_not,
+                        left: left.clone(),
+                        center: right.clone(),
+                        right: Box::new(rhs),
+                    };
+
+                    if is_exact_match {
+                        return Ok(fb);
+                    }
+                    *expr = fb;
+                    return Ok(lhs);
+                }
+            }
+
             Ok(ParseExpression::Infix {
                 op,
                 is_not,
-                left: Box::new(lhs?),
-                right: Box::new(rhs?),
+                left: Box::new(lhs),
+                right: Box::new(rhs),
             })
         })
         .map_prefix(|op, child| {
diff --git a/sbroad-core/src/frontend/sql/ir/tests.rs b/sbroad-core/src/frontend/sql/ir/tests.rs
index 2137d5c3b..7d752c31b 100644
--- a/sbroad-core/src/frontend/sql/ir/tests.rs
+++ b/sbroad-core/src/frontend/sql/ir/tests.rs
@@ -2,7 +2,7 @@ use crate::errors::SbroadError;
 use crate::frontend::sql::ast::AbstractSyntaxTree;
 use crate::frontend::Ast;
 use crate::ir::operator::Relational;
-use crate::ir::transformation::helpers::sql_to_optimized_ir;
+use crate::ir::transformation::helpers::{sql_to_ir, sql_to_optimized_ir};
 use crate::ir::tree::traversal::PostOrder;
 use crate::ir::value::Value;
 use crate::ir::Plan;
@@ -407,6 +407,146 @@ vtable_max_rows = 5000
     assert_eq!(expected_explain, plan.as_explain().unwrap());
 }
 
+#[test]
+fn front_sql_between_with_additional_non_bool_value_from_left() {
+    let input = r#"SELECT * FROM "test_space" WHERE 42 and 1 between 2 and 3"#;
+    let mut plan = sql_to_ir(input, vec![]);
+    let err = plan.optimize().unwrap_err();
+
+    assert_eq!(
+        true,
+        err.to_string()
+            .contains("Left expression is not a boolean expression or NULL")
+    );
+}
+
+#[test]
+fn front_sql_between_with_additional_and_from_left() {
+    // Was previously misinterpreted as
+    //                  SELECT "id" FROM "test_space" as "t" WHERE ("t"."id" > 1 AND "t"."id") BETWEEN "t"."id" AND "t"."id" + 10
+    let input = r#"SELECT "id" FROM "test_space" as "t" WHERE "t"."id" > 1 AND "t"."id" BETWEEN "t"."id" AND "t"."id" + 10"#;
+
+    let plan = sql_to_optimized_ir(input, vec![]);
+
+    let expected_explain = String::from(
+        r#"projection ("t"."id"::unsigned -> "id")
+    selection ROW("t"."id"::unsigned) > ROW(1::unsigned) and ROW("t"."id"::unsigned) >= ROW("t"."id"::unsigned) and ROW("t"."id"::unsigned) <= ROW("t"."id"::unsigned) + ROW(10::unsigned)
+        scan "test_space" -> "t"
+execution options:
+sql_vdbe_max_steps = 45000
+vtable_max_rows = 5000
+"#,
+    );
+
+    assert_eq!(expected_explain, plan.as_explain().unwrap());
+}
+
+#[test]
+fn front_sql_between_with_additional_not_from_left() {
+    // Was previously misinterpreted as
+    //                  SELECT "id" FROM "test_space" as "t" WHERE (not "t"."id") BETWEEN "t"."id" AND "t"."id" + 10 and true
+    let input = r#"SELECT "id" FROM "test_space" as "t" WHERE not "t"."id" BETWEEN "t"."id" AND "t"."id" + 10 and true"#;
+
+    let plan = sql_to_optimized_ir(input, vec![]);
+
+    let expected_explain = String::from(
+        r#"projection ("t"."id"::unsigned -> "id")
+    selection not (ROW("t"."id"::unsigned) >= ROW("t"."id"::unsigned) and ROW("t"."id"::unsigned) <= ROW("t"."id"::unsigned) + ROW(10::unsigned)) and ROW(true::boolean)
+        scan "test_space" -> "t"
+execution options:
+sql_vdbe_max_steps = 45000
+vtable_max_rows = 5000
+"#,
+    );
+
+    assert_eq!(expected_explain, plan.as_explain().unwrap());
+}
+
+#[test]
+fn front_sql_between_with_additional_and_from_left_and_right() {
+    // Was previously misinterpreted as
+    //                  SELECT "id" FROM "test_space" as "t" WHERE ("t"."id" > 1 AND "t"."id") BETWEEN "t"."id" AND "t"."id" + 10 AND true
+    let input = r#"SELECT "id" FROM "test_space" as "t" WHERE "t"."id" > 1 AND "t"."id" BETWEEN "t"."id" AND "t"."id" + 10 AND true"#;
+
+    let plan = sql_to_optimized_ir(input, vec![]);
+
+    let expected_explain = String::from(
+        r#"projection ("t"."id"::unsigned -> "id")
+    selection ROW("t"."id"::unsigned) > ROW(1::unsigned) and ROW("t"."id"::unsigned) >= ROW("t"."id"::unsigned) and ROW("t"."id"::unsigned) <= ROW("t"."id"::unsigned) + ROW(10::unsigned) and ROW(true::boolean)
+        scan "test_space" -> "t"
+execution options:
+sql_vdbe_max_steps = 45000
+vtable_max_rows = 5000
+"#,
+    );
+
+    assert_eq!(expected_explain, plan.as_explain().unwrap());
+}
+
+#[test]
+fn front_sql_between_with_nested_not_from_the_left() {
+    // `not not false between false and true` should be interpreted as
+    // `not (not (false between false and true))`
+    let input =
+        r#"SELECT "id" FROM "test_space" as "t" WHERE not not false between false and true"#;
+
+    let plan = sql_to_optimized_ir(input, vec![]);
+
+    let expected_explain = String::from(
+        r#"projection ("t"."id"::unsigned -> "id")
+    selection not not (ROW(false::boolean) >= ROW(false::boolean) and ROW(false::boolean) <= ROW(true::boolean))
+        scan "test_space" -> "t"
+execution options:
+sql_vdbe_max_steps = 45000
+vtable_max_rows = 5000
+"#,
+    );
+
+    assert_eq!(expected_explain, plan.as_explain().unwrap());
+}
+
+#[test]
+fn front_sql_between_with_nested_and_from_the_left() {
+    // `false and true and false between false and true` should be interpreted as
+    // `(false and true) and (false between false and true)`
+    let input = r#"SELECT "id" FROM "test_space" as "t" WHERE false and true and false between false and true"#;
+
+    let plan = sql_to_optimized_ir(input, vec![]);
+
+    let expected_explain = String::from(
+        r#"projection ("t"."id"::unsigned -> "id")
+    selection ROW(false::boolean) and ROW(true::boolean) and ROW(false::boolean) >= ROW(false::boolean) and ROW(false::boolean) <= ROW(true::boolean)
+        scan "test_space" -> "t"
+execution options:
+sql_vdbe_max_steps = 45000
+vtable_max_rows = 5000
+"#,
+    );
+
+    assert_eq!(expected_explain, plan.as_explain().unwrap());
+}
+
+#[test]
+fn front_sql_between_invalid() {
+    let invalid_between_expressions = vec![
+        r#"SELECT * FROM "test_space" WHERE 1 BETWEEN 2"#,
+        r#"SELECT * FROM "test_space" WHERE 1 BETWEEN 2 OR 3"#,
+    ];
+
+    for invalid_expr in invalid_between_expressions {
+        let metadata = &RouterConfigurationMock::new();
+        let plan = AbstractSyntaxTree::transform_into_plan(invalid_expr, metadata);
+        let err = plan.unwrap_err();
+
+        assert_eq!(
+            true,
+            err.to_string().contains(
+                "BETWEEN operator should have a view of `expr_1 BETWEEN expr_2 AND expr_3`."
+            )
+        );
+    }
+}
+
 #[test]
 fn front_sql_check_arbitrary_utf_in_single_quote_strings() {
     let input = r#"SELECT "identification_number" FROM "hash_testing"
-- 
GitLab