From e7999cee6c4445cc1eb8c7fd3e886b4b592e8af9 Mon Sep 17 00:00:00 2001 From: Arseniy Volynets <a.volynets@picodata.io> Date: Thu, 29 Aug 2024 20:42:42 +0300 Subject: [PATCH] feat: support like operator - Support like operator with signature: expr1 LIKE expr2 [ESCAPE expr3] which returns TRUE only if expr1 matches the string specified by expr2 (pattern). '_' in pattern matches any single character, '%' matches any character 0 or more times. All other characters match itself according to case. - Optional escape clause specifies character to use for escaping '_' and '%' --- doc/sql/query.ebnf | 2 + .../test_app/test/integration/api_test.lua | 29 ++++ sbroad-core/src/backend/sql/ir.rs | 3 + sbroad-core/src/backend/sql/tree.rs | 47 +++++- sbroad-core/src/executor/ir.rs | 14 +- sbroad-core/src/executor/tests.rs | 3 + sbroad-core/src/executor/tests/like.rs | 20 +++ sbroad-core/src/frontend/sql.rs | 72 ++++++++- sbroad-core/src/frontend/sql/ir.rs | 24 ++- sbroad-core/src/frontend/sql/ir/tests.rs | 2 + sbroad-core/src/frontend/sql/ir/tests/like.rs | 147 ++++++++++++++++++ sbroad-core/src/frontend/sql/query.pest | 4 +- sbroad-core/src/ir.rs | 47 ++++++ sbroad-core/src/ir/api/parameter.rs | 29 +++- sbroad-core/src/ir/explain.rs | 31 ++++ sbroad-core/src/ir/expression.rs | 30 ++++ sbroad-core/src/ir/expression/types.rs | 4 +- sbroad-core/src/ir/helpers.rs | 15 +- sbroad-core/src/ir/node.rs | 21 +++ sbroad-core/src/ir/node/expression.rs | 7 +- sbroad-core/src/ir/transformation.rs | 1 + .../transformation/redistribution/eq_cols.rs | 14 +- .../transformation/redistribution/groupby.rs | 23 ++- sbroad-core/src/ir/tree.rs | 25 ++- sbroad-core/src/ir/tree/expression.rs | 1 + sbroad-core/src/ir/tree/subtree.rs | 1 + 26 files changed, 598 insertions(+), 18 deletions(-) create mode 100644 sbroad-core/src/executor/tests/like.rs create mode 100644 sbroad-core/src/frontend/sql/ir/tests/like.rs diff --git a/doc/sql/query.ebnf b/doc/sql/query.ebnf index 5c884df3d8..3887f5d907 100644 --- a/doc/sql/query.ebnf +++ b/doc/sql/query.ebnf @@ -26,6 +26,7 @@ expression ::= ('NOT'* ( | cast | current_date | substr + | like | to_char | to_date | trim @@ -49,6 +50,7 @@ case ::= 'CASE' expression? ('WHEN' expression 'THEN' expression)+ ('ELSE' expression)? 'END' cast ::= 'CAST' '(' expression 'AS' type ')' | expression '::' type +like ::= (expression 'LIKE' expression ('ESCAPE' expression)?) to_char ::= 'TO_CHAR' '(' expression ',' format ')' to_date ::= 'TO_DATE' '(' expression ',' format ')' trim ::= 'TRIM' '(' diff --git a/sbroad-cartridge/test_app/test/integration/api_test.lua b/sbroad-cartridge/test_app/test/integration/api_test.lua index 51f5e2c63f..cf60d18a39 100644 --- a/sbroad-cartridge/test_app/test/integration/api_test.lua +++ b/sbroad-cartridge/test_app/test/integration/api_test.lua @@ -697,3 +697,32 @@ g.test_union_operator_works = function () rows = { {1}, {2} }, }) end + +g.test_like_works = function () + local api = cluster:server("api-1").net_box + + -- all conditions must evaluate to true + local r, err = api:call("sbroad.execute", { [[ + select id from testing_space + where name like '123' + and name like '1__' + and name like '%2_' + and '%_%' like '\%\_\%' escape '\' + and 'A' || 'a' like '_' || '%' + and 'A' || '_' like '_' || '\_' escape '' || '\' + and (values ('a')) like (values ('_')) + and (values ('_')) like (values ('\_')) escape (values ('\')) + and (select name from testing_space) like (select name from testing_space) + and '_' like '\_' + and '%' like '\%' + ]] }) + + t.assert_equals(err, nil) + t.assert_equals(r, { + metadata = { + {name = "id", type = "integer"}, + }, + rows = { {1} }, + }) +end + diff --git a/sbroad-core/src/backend/sql/ir.rs b/sbroad-core/src/backend/sql/ir.rs index f94de8205f..1270d17cd1 100644 --- a/sbroad-core/src/backend/sql/ir.rs +++ b/sbroad-core/src/backend/sql/ir.rs @@ -342,6 +342,8 @@ impl ExecutionPlan { SyntaxData::Concat => sql.push_str("||"), SyntaxData::Comma => sql.push(','), SyntaxData::Condition => sql.push_str("ON"), + SyntaxData::Escape => sql.push_str("ESCAPE"), + SyntaxData::Like => sql.push_str("LIKE"), SyntaxData::Distinct => sql.push_str("DISTINCT"), SyntaxData::OrderByPosition(index) => sql.push_str(format!("{index}").as_str()), SyntaxData::OrderByType(order_type) => match order_type { @@ -450,6 +452,7 @@ impl ExecutionPlan { | Expression::Cast { .. } | Expression::Case { .. } | Expression::Concat { .. } + | Expression::Like { .. } | Expression::Row { .. } | Expression::Trim { .. } | Expression::Unary { .. } => {} diff --git a/sbroad-core/src/backend/sql/tree.rs b/sbroad-core/src/backend/sql/tree.rs index 6977b07c36..f93570a63b 100644 --- a/sbroad-core/src/backend/sql/tree.rs +++ b/sbroad-core/src/backend/sql/tree.rs @@ -10,7 +10,7 @@ use crate::ir::node::expression::Expression; use crate::ir::node::relational::Relational; use crate::ir::node::{ Alias, ArithmeticExpr, BoolExpr, Case, Cast, Concat, Except, ExprInParentheses, GroupBy, - Having, Intersect, Join, Limit, Motion, Node, NodeId, OrderBy, Projection, Reference, + Having, Intersect, Join, Like, Limit, Motion, Node, NodeId, OrderBy, Projection, Reference, ReferenceAsteriskSource, Row, ScanCte, ScanRelation, ScanSubQuery, Selection, StableFunction, Trim, UnaryExpr, Union, UnionAll, Values, ValuesRow, }; @@ -35,6 +35,10 @@ pub enum SyntaxData { Cast, // "case" Case, + // "escape" + Escape, + // "like" + Like, // "when" When, // "then" @@ -159,6 +163,22 @@ impl SyntaxNode { } } + fn new_escape() -> Self { + SyntaxNode { + data: SyntaxData::Escape, + left: None, + right: Vec::new(), + } + } + + fn new_like() -> Self { + SyntaxNode { + data: SyntaxData::Like, + left: None, + right: Vec::new(), + } + } + fn new_end() -> Self { SyntaxNode { data: SyntaxData::End, @@ -790,6 +810,7 @@ impl<'p> SyntaxPlan<'p> { let sn = SyntaxNode::new_parameter(id); self.nodes.push_sn_plan(sn); } + Expression::Like { .. } => self.add_like(id), Expression::Reference { .. } | Expression::CountAsterisk { .. } => { let sn = SyntaxNode::new_pointer(id, None, vec![]); self.nodes.push_sn_plan(sn); @@ -1355,6 +1376,30 @@ impl<'p> SyntaxPlan<'p> { self.nodes.push_sn_plan(sn); } + fn add_like(&mut self, id: NodeId) { + let (_, expr) = self.prologue_expr(id); + let Expression::Like(Like { + left, + right, + escape: escape_id, + }) = expr + else { + panic!("Expected LIKE node"); + }; + let (left, right) = (*left, *right); + let escape_sn_id = self.pop_from_stack(*escape_id, id); + let right_sn_id = self.pop_from_stack(right, id); + let left_sn_id = self.pop_from_stack(left, id); + let mut children = vec![ + self.nodes.push_sn_non_plan(SyntaxNode::new_like()), + right_sn_id, + ]; + children.push(self.nodes.push_sn_non_plan(SyntaxNode::new_escape())); + children.push(escape_sn_id); + let sn = SyntaxNode::new_pointer(id, Some(left_sn_id), children); + self.nodes.push_sn_plan(sn); + } + fn add_expr_in_parentheses(&mut self, id: NodeId) { let (_, expr) = self.prologue_expr(id); let Expression::ExprInParentheses(ExprInParentheses { child }) = expr else { diff --git a/sbroad-core/src/executor/ir.rs b/sbroad-core/src/executor/ir.rs index 36125f1ddc..f0a947aa7e 100644 --- a/sbroad-core/src/executor/ir.rs +++ b/sbroad-core/src/executor/ir.rs @@ -13,8 +13,9 @@ use crate::ir::node::expression::{Expression, MutExpression}; use crate::ir::node::relational::{MutRelational, RelOwned, Relational}; use crate::ir::node::{ Alias, ArenaType, ArithmeticExpr, BoolExpr, Case, Cast, Concat, Delete, ExprInParentheses, - GroupBy, Having, Insert, Join, Motion, Node, Node136, NodeId, NodeOwned, OrderBy, Reference, - Row, ScanCte, ScanRelation, Selection, StableFunction, Trim, UnaryExpr, Update, ValuesRow, + GroupBy, Having, Insert, Join, Like, Motion, Node, Node136, NodeId, NodeOwned, OrderBy, + Reference, Row, ScanCte, ScanRelation, Selection, StableFunction, Trim, UnaryExpr, Update, + ValuesRow, }; use crate::ir::operator::{OrderByElement, OrderByEntity}; use crate::ir::relation::SpaceEngine; @@ -778,6 +779,15 @@ impl ExecutionPlan { *left = subtree_map.get_id(*left); *right = subtree_map.get_id(*right); } + ExprOwned::Like(Like { + escape: ref mut escape_id, + ref mut right, + ref mut left, + }) => { + *left = subtree_map.get_id(*left); + *right = subtree_map.get_id(*right); + *escape_id = subtree_map.get_id(*escape_id); + } ExprOwned::Trim(Trim { ref mut pattern, ref mut target, diff --git a/sbroad-core/src/executor/tests.rs b/sbroad-core/src/executor/tests.rs index 4acebfdc52..bc83cfb5dd 100644 --- a/sbroad-core/src/executor/tests.rs +++ b/sbroad-core/src/executor/tests.rs @@ -909,6 +909,9 @@ mod cast; #[cfg(test)] mod concat; +#[cfg(test)] +mod like; + #[cfg(test)] mod empty_motion; diff --git a/sbroad-core/src/executor/tests/like.rs b/sbroad-core/src/executor/tests/like.rs new file mode 100644 index 0000000000..45ac334161 --- /dev/null +++ b/sbroad-core/src/executor/tests/like.rs @@ -0,0 +1,20 @@ +use super::*; +use crate::ir::value::Value; + +#[test] +fn like1_test() { + broadcast_check( + r#"SELECT a || 'a' like 'ab' FROM t1"#, + r#"SELECT (("t1"."a") || (?)) LIKE (?) ESCAPE (?) as "col_1" FROM "t1""#, + vec![Value::from("a"), Value::from("ab"), Value::from("\\")], + ); +} + +#[test] +fn like2_test() { + broadcast_check( + r#"SELECT a like a escape 'x' FROM t1"#, + r#"SELECT ("t1"."a") LIKE ("t1"."a") ESCAPE (?) as "col_1" FROM "t1""#, + vec![Value::from("x")], + ); +} diff --git a/sbroad-core/src/frontend/sql.rs b/sbroad-core/src/frontend/sql.rs index 8db4236a09..bf07e211e3 100644 --- a/sbroad-core/src/frontend/sql.rs +++ b/sbroad-core/src/frontend/sql.rs @@ -1457,13 +1457,16 @@ fn parse_param<M: Metadata>( lazy_static::lazy_static! { static ref PRATT_PARSER: PrattParser<Rule> = { use pest::pratt_parser::{Assoc::{Left, Right}, Op}; - use Rule::{Add, And, Between, ConcatInfixOp, Divide, Eq, Gt, GtEq, In, IsNullPostfix, CastPostfix, Lt, LtEq, Multiply, NotEq, Or, Subtract, UnaryNot}; + use Rule::{Add, And, Between, ConcatInfixOp, Divide, Eq, Escape, Gt, GtEq, In, IsNullPostfix, CastPostfix, Like, Lt, LtEq, Multiply, NotEq, Or, Subtract, UnaryNot}; // Precedence is defined lowest to highest. PrattParser::new() .op(Op::infix(Or, Left)) .op(Op::infix(And, Left)) .op(Op::prefix(UnaryNot)) + // ESCAPE must be followed by LIKE + .op(Op::infix(Escape, Left)) + .op(Op::infix(Like, Left)) .op(Op::infix(Between, Left)) .op( Op::infix(Eq, Right) | Op::infix(NotEq, Right) | Op::infix(NotEq, Right) @@ -1655,6 +1658,7 @@ enum ParseExpressionInfixOperator { InfixBool(Bool), InfixArithmetic(Arithmetic), Concat, + Escape, } #[derive(Clone, Debug)] @@ -1679,6 +1683,11 @@ enum ParseExpression { args: Vec<ParseExpression>, feature: Option<FunctionFeature>, }, + Like { + left: Box<ParseExpression>, + right: Box<ParseExpression>, + escape: Option<Box<ParseExpression>>, + }, Row { children: Vec<ParseExpression>, }, @@ -1911,6 +1920,29 @@ impl ParseExpression { }; plan.nodes.push(trim_expr.into()) } + ParseExpression::Like { + left, + right, + escape, + } => { + let plan_left_id = left.populate_plan(plan, worker)?; + let left_covered_with_row = plan.row(plan_left_id)?; + + let plan_right_id = right.populate_plan(plan, worker)?; + let right_covered_with_row = plan.row(plan_right_id)?; + + let escape_covered_with_row = if let Some(escape) = escape { + let plan_escape_id = escape.populate_plan(plan, worker)?; + Some(plan.row(plan_escape_id)?) + } else { + None + }; + plan.add_like( + left_covered_with_row, + right_covered_with_row, + escape_covered_with_row, + )? + } ParseExpression::FinalBetween { is_not, left, @@ -2064,6 +2096,9 @@ impl ParseExpression { ParseExpressionInfixOperator::InfixBool(bool) => { plan.add_cond(left_row_id, bool.clone(), right_row_id)? } + ParseExpressionInfixOperator::Escape => { + unreachable!("escape op is not added to AST") + } }; if *is_not { plan.add_unary(Unary::Not, op_plan_id)? @@ -2190,6 +2225,7 @@ fn find_interim_between(mut expr: &mut ParseExpression) -> Option<(&mut ParseExp loop { match expr { ParseExpression::Infix { + // TODO: why we handle AND, but don't handle OR? op: ParseExpressionInfixOperator::InfixBool(Bool::And), right, .. @@ -2210,6 +2246,29 @@ fn find_interim_between(mut expr: &mut ParseExpression) -> Option<(&mut ParseExp } } +fn connect_escape_to_like_node( + mut lhs: ParseExpression, + rhs: ParseExpression, +) -> Result<ParseExpression, SbroadError> { + let ParseExpression::Like { escape, .. } = &mut lhs else { + return Err(SbroadError::Invalid( + Entity::Expression, + Some(format_smolstr!( + "ESCAPE can go only after LIKE expression, got: {:?}", + lhs + )), + )); + }; + if escape.is_some() { + return Err(SbroadError::Invalid( + Entity::Expression, + Some("escape specified twice: expr1 LIKE expr2 ESCAPE expr 3 ESCAPE expr4".into()), + )); + } + *escape = Some(Box::new(rhs)); + Ok(lhs) +} + fn cast_type_from_pair(type_pair: Pair<Rule>) -> Result<CastType, SbroadError> { if type_pair.as_rule() != Rule::ColumnDefType { // TypeAny. @@ -2597,6 +2656,13 @@ where let op = match op.as_rule() { Rule::And => ParseExpressionInfixOperator::InfixBool(Bool::And), Rule::Or => ParseExpressionInfixOperator::InfixBool(Bool::Or), + Rule::Like => { + return Ok(ParseExpression::Like { + left: Box::new(lhs), + right: Box::new(rhs), + escape: None + }) + }, Rule::Between => { let mut op_inner = op.into_inner(); is_not = op_inner.next().is_some(); @@ -2606,6 +2672,7 @@ where right: Box::new(rhs), }) }, + Rule::Escape => ParseExpressionInfixOperator::Escape, Rule::Eq => ParseExpressionInfixOperator::InfixBool(Bool::Eq), Rule::NotEq => ParseExpressionInfixOperator::InfixBool(Bool::NotEq), Rule::Lt => ParseExpressionInfixOperator::InfixBool(Bool::Lt), @@ -2646,6 +2713,9 @@ where return Ok(lhs); } } + if matches!(op, ParseExpressionInfixOperator::Escape) { + return connect_escape_to_like_node(lhs, rhs) + } Ok(ParseExpression::Infix { op, diff --git a/sbroad-core/src/frontend/sql/ir.rs b/sbroad-core/src/frontend/sql/ir.rs index 4e40897820..2b058db95a 100644 --- a/sbroad-core/src/frontend/sql/ir.rs +++ b/sbroad-core/src/frontend/sql/ir.rs @@ -11,9 +11,9 @@ use crate::ir::node::expression::{ExprOwned, Expression}; use crate::ir::node::relational::{MutRelational, RelOwned, Relational}; use crate::ir::node::{ Alias, ArithmeticExpr, BoolExpr, Case, Cast, Concat, Constant, Delete, Except, - ExprInParentheses, GroupBy, Having, Insert, Intersect, Join, Limit, Motion, Node, NodeAligned, - NodeId, OrderBy, Projection, Reference, Row, ScanCte, ScanRelation, ScanSubQuery, Selection, - StableFunction, Trim, UnaryExpr, Union, UnionAll, Update, Values, ValuesRow, + ExprInParentheses, GroupBy, Having, Insert, Intersect, Join, Like, Limit, Motion, Node, + NodeAligned, NodeId, OrderBy, Projection, Reference, Row, ScanCte, ScanRelation, ScanSubQuery, + Selection, StableFunction, Trim, UnaryExpr, Union, UnionAll, Update, Values, ValuesRow, }; use crate::ir::operator::{OrderByElement, OrderByEntity}; use crate::ir::transformation::redistribution::MotionOpcode; @@ -174,6 +174,15 @@ impl Plan { map.replace(left); map.replace(right); } + ExprOwned::Like(Like { + ref mut left, + ref mut right, + ref mut escape, + }) => { + map.replace(left); + map.replace(right); + map.replace(escape); + } ExprOwned::Trim(Trim { ref mut pattern, ref mut target, @@ -318,6 +327,15 @@ impl SubtreeCloner { *left = self.get_new_id(*left)?; *right = self.get_new_id(*right)?; } + ExprOwned::Like(Like { + ref mut left, + ref mut right, + ref mut escape, + }) => { + *left = self.get_new_id(*left)?; + *right = self.get_new_id(*right)?; + *escape = self.get_new_id(*escape)?; + } ExprOwned::Trim(Trim { ref mut pattern, ref mut target, diff --git a/sbroad-core/src/frontend/sql/ir/tests.rs b/sbroad-core/src/frontend/sql/ir/tests.rs index cb2cf1e9ef..e74f6d26eb 100644 --- a/sbroad-core/src/frontend/sql/ir/tests.rs +++ b/sbroad-core/src/frontend/sql/ir/tests.rs @@ -3927,6 +3927,8 @@ mod insert; #[cfg(test)] mod join; #[cfg(test)] +mod like; +#[cfg(test)] mod limit; #[cfg(test)] mod params; diff --git a/sbroad-core/src/frontend/sql/ir/tests/like.rs b/sbroad-core/src/frontend/sql/ir/tests/like.rs new file mode 100644 index 0000000000..b9e5c3860b --- /dev/null +++ b/sbroad-core/src/frontend/sql/ir/tests/like.rs @@ -0,0 +1,147 @@ +use crate::{ + executor::engine::mock::RouterConfigurationMock, frontend::sql::ast::AbstractSyntaxTree, + frontend::sql::Ast, ir::transformation::helpers::sql_to_optimized_ir, +}; +use pretty_assertions::assert_eq; + +#[test] +fn like_valid() { + let queries = vec![ + "SELECT a like a FROM t1", + "SELECT a like a escape 'abc' FROM t1", + "SELECT a || 'a' like a FROM t1", + "SELECT a || 'a' like a escape 'a' FROM t1", + "SELECT a || 'a' like a || 'a' FROM t1", + "SELECT a || 'a' like a || 'a' escape 'a' FROM t1", + "SELECT a || 'a' like a || 'a' escape 'a' || 'a' FROM t1", + "SELECT not a || 'a' like a || 'a' FROM t1", + "SELECT not a || 'a' like a || 'a' FROM t1", + "SELECT true or a || 'a' like a || 'a' FROM t1", + "SELECT true or a || 'a' like a || 'a' and false FROM t1", + "SELECT true or a || 'a' like a || 'a' and false FROM t1", + "SELECT true between false and 'a' like 'b' FROM t1", + ]; + for query in queries { + let _ = sql_to_optimized_ir(query, vec![]); + } +} + +#[test] +fn like_invalid1() { + let input = r#"select a like a escape 'a' escape 'a' from t1"#; + + let metadata = &RouterConfigurationMock::new(); + let err = AbstractSyntaxTree::transform_into_plan(input, metadata).unwrap_err(); + + assert_eq!( + "invalid expression: escape specified twice: expr1 LIKE expr2 ESCAPE expr 3 ESCAPE expr4", + err.to_string() + ); +} + +#[test] +fn like_invalid2() { + let input = r#"select a escape 'b' from t1"#; + + let metadata = &RouterConfigurationMock::new(); + let err = AbstractSyntaxTree::transform_into_plan(input, metadata).unwrap_err(); + + assert_eq!( + "invalid expression: ESCAPE can go only after LIKE expression, got: PlanId { plan_id: NodeId { offset: 3, arena_type: Arena96 } }", + err.to_string() + ); +} + +#[test] +fn like_explain1() { + let input = r#"select a like a from t1 where a || 'a' like 'a' || 'a'"#; + + let plan = sql_to_optimized_ir(input, vec![]); + + let expected_explain = String::from( + r#"projection (ROW("t1"."a"::string) LIKE ROW("t1"."a"::string) ESCAPE ROW('\'::string) -> "col_1") + selection ROW(ROW("t1"."a"::string) || ROW('a'::string)) LIKE ROW(ROW('a'::string) || ROW('a'::string)) ESCAPE ROW('\'::string) + scan "t1" +execution options: +sql_vdbe_max_steps = 45000 +vtable_max_rows = 5000 +"#, + ); + + assert_eq!(expected_explain, plan.as_explain().unwrap()); +} + +#[test] +fn like_explain2() { + let input = r#"select a like a escape '\' from t1 where a || 'a' like 'a' || 'a' escape 'x'"#; + + let plan = sql_to_optimized_ir(input, vec![]); + + let expected_explain = String::from( + r#"projection (ROW("t1"."a"::string) LIKE ROW("t1"."a"::string) ESCAPE ROW('\'::string) -> "col_1") + selection ROW(ROW("t1"."a"::string) || ROW('a'::string)) LIKE ROW(ROW('a'::string) || ROW('a'::string)) ESCAPE ROW('x'::string) + scan "t1" +execution options: +sql_vdbe_max_steps = 45000 +vtable_max_rows = 5000 +"#, + ); + + assert_eq!(expected_explain, plan.as_explain().unwrap()); +} + +#[test] +fn like_explain3() { + let input = r#"select a like a from t1 group by a like a"#; + + let plan = sql_to_optimized_ir(input, vec![]); + + let expected_explain = String::from( + r#"projection ("column_332"::boolean -> "col_1") + group by ("column_332"::boolean) output: ("column_332"::boolean -> "column_332") + motion [policy: segment([ref("column_332")])] + projection (ROW("t1"."a"::string) LIKE ROW("t1"."a"::string) ESCAPE ROW('\'::string) -> "column_332") + group by (ROW("t1"."a"::string) LIKE ROW("t1"."a"::string) ESCAPE ROW('\'::string)) output: ("t1"."a"::string -> "a", "t1"."bucket_id"::unsigned -> "bucket_id", "t1"."b"::integer -> "b") + scan "t1" +execution options: +sql_vdbe_max_steps = 45000 +vtable_max_rows = 5000 +"#, + ); + + assert_eq!(expected_explain, plan.as_explain().unwrap()); +} + +#[test] +fn like_explain4() { + let input = r#"select * from t1 where (select 'hi' from t1) like (select 'hi' from t1) escape (select '\' from t1)"#; + + let plan = sql_to_optimized_ir(input, vec![]); + + let expected_explain = String::from( + r#"projection ("t1"."a"::string -> "a", "t1"."b"::integer -> "b") + selection ROW($2) LIKE ROW($1) ESCAPE ROW($0) + scan "t1" +subquery $0: +motion [policy: full] + scan + projection ('\'::string -> "col_1") + scan "t1" +subquery $1: +motion [policy: full] + scan + projection ('hi'::string -> "col_1") + scan "t1" +subquery $2: +motion [policy: full] + scan + projection ('hi'::string -> "col_1") + scan "t1" +execution options: +sql_vdbe_max_steps = 45000 +vtable_max_rows = 5000 +"#, + ); + + assert_eq!(expected_explain, plan.as_explain().unwrap()); +} diff --git a/sbroad-core/src/frontend/sql/query.pest b/sbroad-core/src/frontend/sql/query.pest index ab49f8d039..e85a4de839 100644 --- a/sbroad-core/src/frontend/sql/query.pest +++ b/sbroad-core/src/frontend/sql/query.pest @@ -284,7 +284,9 @@ Identifier = @{ DelimitedIdentifier | RegularIdentifier } EmptyQuery = { WHITESPACE* } Expr = { ExprAtomValue ~ (ExprInfixOp ~ ExprAtomValue)* } - ExprInfixOp = _{ Between | ArithInfixOp | CmpInfixOp | ConcatInfixOp | And | Or } + ExprInfixOp = _{ Like | Escape | Between | ArithInfixOp | CmpInfixOp | ConcatInfixOp | And | Or } + Like = { ^"like" } + Escape = { ^"escape" } Between = { NotFlag? ~ ^"between" } And = { ^"and" } Or = @{ ^"or" ~ SP } diff --git a/sbroad-core/src/ir.rs b/sbroad-core/src/ir.rs index 5aaf383d62..3916b337b4 100644 --- a/sbroad-core/src/ir.rs +++ b/sbroad-core/src/ir.rs @@ -46,6 +46,8 @@ use crate::ir::undo::TransformationLog; use crate::ir::value::Value; use crate::{collection, error, warn}; +use self::node::Like; + // TODO: remove when rust version in bumped in module #[allow(elided_lifetimes_in_associated_constant)] pub mod acl; @@ -90,6 +92,7 @@ impl Nodes { Node32::Concat(concat) => Node::Expression(Expression::Concat(concat)), Node32::Cast(cast) => Node::Expression(Expression::Cast(cast)), Node32::CountAsterisk(count) => Node::Expression(Expression::CountAsterisk(count)), + Node32::Like(like) => Node::Expression(Expression::Like(like)), Node32::Except(except) => Node::Relational(Relational::Except(except)), Node32::ExprInParentheses(expr) => { Node::Expression(Expression::ExprInParentheses(expr)) @@ -208,6 +211,7 @@ impl Nodes { Node32::CountAsterisk(count) => { MutNode::Expression(MutExpression::CountAsterisk(count)) } + Node32::Like(like) => MutNode::Expression(MutExpression::Like(like)), Node32::Except(except) => MutNode::Relational(MutRelational::Except(except)), Node32::ExprInParentheses(expr) => { MutNode::Expression(MutExpression::ExprInParentheses(expr)) @@ -1257,6 +1261,30 @@ impl Plan { self.nodes.add_bool(left, op, right) } + /// Add Like operator to the plan. + /// + /// # Errors + /// Returns `SbroadError` when the escape pattern is more than 1 char. + pub fn add_like( + &mut self, + left: NodeId, + right: NodeId, + escape_id: Option<NodeId>, + ) -> Result<NodeId, SbroadError> { + let escape_id = if let Some(id) = escape_id { + id + } else { + let s_id = self.add_const(Value::String('\\'.into())); + self.nodes.add_row(vec![s_id], None) + }; + let node = Like { + left, + right, + escape: escape_id, + }; + Ok(self.nodes.push(node.into())) + } + /// Add node covered with parentheses to the plan. /// /// # Errors @@ -1539,6 +1567,7 @@ impl Plan { /// # Note /// This function assumes that parent expression does NOT have two or more /// children with the same id. So, if this happens, only one child will be replaced. + #[allow(clippy::too_many_lines)] pub fn replace_expression( &mut self, parent_id: NodeId, @@ -1595,6 +1624,24 @@ impl Plan { return Ok(()); } } + MutExpression::Like(Like { + escape, + left, + right, + }) => { + if *left == old_id { + *left = new_id; + return Ok(()); + } + if *right == old_id { + *right = new_id; + return Ok(()); + } + if *escape == old_id { + *escape = new_id; + return Ok(()); + } + } MutExpression::Trim(Trim { pattern, target, .. }) => { diff --git a/sbroad-core/src/ir/api/parameter.rs b/sbroad-core/src/ir/api/parameter.rs index 91b9902fd6..7fe94423bb 100644 --- a/sbroad-core/src/ir/api/parameter.rs +++ b/sbroad-core/src/ir/api/parameter.rs @@ -3,8 +3,8 @@ use crate::ir::node::block::{Block, MutBlock}; use crate::ir::node::expression::{Expression, MutExpression}; use crate::ir::node::relational::{MutRelational, Relational}; use crate::ir::node::{ - Alias, ArithmeticExpr, BoolExpr, Case, Cast, Concat, ExprInParentheses, Having, Join, MutNode, - Node64, NodeId, Parameter, Procedure, Row, Selection, StableFunction, Trim, UnaryExpr, + Alias, ArithmeticExpr, BoolExpr, Case, Cast, Concat, ExprInParentheses, Having, Join, Like, + MutNode, Node64, NodeId, Parameter, Procedure, Row, Selection, StableFunction, Trim, UnaryExpr, ValuesRow, }; use crate::ir::tree::traversal::{LevelNode, PostOrder}; @@ -267,6 +267,21 @@ impl<'binder> ParamsBinder<'binder> { ); } } + Expression::Like(Like { + escape, + left, + right, + }) => { + for param_id in &[*left, *right] { + self.cover_param_with_row( + *param_id, + true, + &mut param_index, + &mut row_ids, + ); + } + self.cover_param_with_row(*escape, true, &mut param_index, &mut row_ids); + } Expression::Trim(Trim { ref pattern, ref target, @@ -481,6 +496,16 @@ impl<'binder> ParamsBinder<'binder> { bind_param(param_id, true, &mut param_index); } } + MutExpression::Like(Like { + ref mut escape, + ref mut left, + ref mut right, + }) => { + bind_param(escape, true, &mut param_index); + for param_id in [left, right] { + bind_param(param_id, true, &mut param_index); + } + } MutExpression::Trim(Trim { ref mut pattern, ref mut target, diff --git a/sbroad-core/src/ir/explain.rs b/sbroad-core/src/ir/explain.rs index 6a945fcb7f..bfaaf2e0b6 100644 --- a/sbroad-core/src/ir/explain.rs +++ b/sbroad-core/src/ir/explain.rs @@ -45,6 +45,7 @@ enum ColExpr { Option<Box<ColExpr>>, ), Concat(Box<ColExpr>, Box<ColExpr>), + Like(Box<ColExpr>, Box<ColExpr>, Option<Box<ColExpr>>), StableFunction(SmolStr, Vec<ColExpr>, Option<FunctionFeature>, Type, bool), Trim(Option<TrimKind>, Option<Box<ColExpr>>, Box<ColExpr>), Row(Row), @@ -107,6 +108,10 @@ impl Display for ColExpr { }, ColExpr::Row(row) => row.to_string(), ColExpr::None => String::new(), + ColExpr::Like(l, r, escape) => match escape { + Some(e) => format!("{l} LIKE {r} ESCAPE {e}"), + None => format!("{l} LIKE {r}"), + }, }; write!(f, "{s}") @@ -229,6 +234,32 @@ impl ColExpr { let concat_expr = ColExpr::Concat(Box::new(left), Box::new(right)); stack.push((concat_expr, id)); } + Expression::Like { .. } => { + let escape = Some( + stack + .pop() + .ok_or_else(|| { + SbroadError::UnexpectedNumberOfValues( + "stack is empty while processing ESCAPE expression" + .to_smolstr(), + ) + })? + .0, + ); + let (right, _) = stack.pop().ok_or_else(|| { + SbroadError::UnexpectedNumberOfValues( + "stack is empty while processing right LIKE expression".to_smolstr(), + ) + })?; + let (left, _) = stack.pop().ok_or_else(|| { + SbroadError::UnexpectedNumberOfValues( + "stack is empty while processing left LIKE expression".to_smolstr(), + ) + })?; + let concat_expr = + ColExpr::Like(Box::new(left), Box::new(right), escape.map(Box::new)); + stack.push((concat_expr, id)); + } Expression::Constant(Constant { value }) => { let expr = ColExpr::Column(value.to_string(), current_node.calculate_type(plan)?); diff --git a/sbroad-core/src/ir/expression.rs b/sbroad-core/src/ir/expression.rs index 2b18a20f8f..d4c94c1e50 100644 --- a/sbroad-core/src/ir/expression.rs +++ b/sbroad-core/src/ir/expression.rs @@ -14,6 +14,7 @@ use std::collections::{BTreeMap, HashSet}; use std::hash::{Hash, Hasher}; use std::ops::Bound::Included; +use super::node::Like; use super::{ distribution, operator, Alias, ArithmeticExpr, BoolExpr, Case, Cast, Concat, Constant, ExprInParentheses, Expression, LevelNode, MutExpression, MutNode, Node, NodeId, Reference, @@ -256,6 +257,9 @@ impl<'plan> Comparator<'plan> { /// # Errors /// - invalid [`Expression::Reference`]s in either of subtrees /// - invalid children in some expression + /// + /// # Panics + /// - never #[allow(clippy::too_many_lines)] pub fn are_subtrees_equal(&self, lhs: NodeId, rhs: NodeId) -> Result<bool, SbroadError> { let l = self.plan.get_node(lhs)?; @@ -356,6 +360,22 @@ impl<'plan> Comparator<'plan> { && self.are_subtrees_equal(*child_left, *child_right)?); } } + Expression::Like(Like { + left: left_left, + right: right_left, + escape: escape_left, + }) => { + if let Expression::Like(Like { + left: left_right, + right: right_right, + escape: escape_right, + }) = right + { + return Ok(self.are_subtrees_equal(*escape_left, *escape_right)? + && self.are_subtrees_equal(*left_left, *left_right)? + && self.are_subtrees_equal(*right_left, *right_right)?); + } + } Expression::Concat(Concat { left: left_left, right: right_left, @@ -533,6 +553,15 @@ impl<'plan> Comparator<'plan> { self.hash_for_child_expr(*left, depth); self.hash_for_child_expr(*right, depth); } + Expression::Like(Like { + left, + right, + escape: escape_id, + }) => { + self.hash_for_child_expr(*left, depth); + self.hash_for_child_expr(*right, depth); + self.hash_for_child_expr(*escape_id, depth); + } Expression::Trim(Trim { kind, pattern, @@ -1451,6 +1480,7 @@ impl Plan { }; match expr { Expression::Bool(_) + | Expression::Like { .. } | Expression::Arithmetic(_) | Expression::Unary(_) | Expression::Constant(Constant { diff --git a/sbroad-core/src/ir/expression/types.rs b/sbroad-core/src/ir/expression/types.rs index 9642fa87da..12eb132e7f 100644 --- a/sbroad-core/src/ir/expression/types.rs +++ b/sbroad-core/src/ir/expression/types.rs @@ -101,7 +101,9 @@ impl Expression<'_> { | Expression::ExprInParentheses(ExprInParentheses { child }) => { plan.get_node_type(*child) } - Expression::Bool(_) | Expression::Unary(_) => Ok(Type::Boolean), + Expression::Bool(_) | Expression::Unary(_) | Expression::Like { .. } => { + Ok(Type::Boolean) + } Expression::Arithmetic(ArithmeticExpr { left, right, op, .. }) => { diff --git a/sbroad-core/src/ir/helpers.rs b/sbroad-core/src/ir/helpers.rs index 3750ddde17..5b4ef18e09 100644 --- a/sbroad-core/src/ir/helpers.rs +++ b/sbroad-core/src/ir/helpers.rs @@ -18,7 +18,7 @@ use std::hash::BuildHasher; use super::node::expression::Expression; use super::node::relational::Relational; -use super::node::Limit; +use super::node::{Like, Limit}; /// Helper macros to build a hash map or set /// from the list of arguments. @@ -202,6 +202,19 @@ impl Plan { Expression::Cast(_) => writeln!(buf, "Cast")?, Expression::Trim(_) => writeln!(buf, "Trim")?, Expression::Concat(_) => writeln!(buf, "Concat")?, + Expression::Like(Like { + left, + right, + escape, + }) => { + writeln!(buf, "Like")?; + writeln_with_tabulation(buf, tabulation_number + 1, "Left child")?; + self.formatted_arena_node(buf, tabulation_number + 1, *left)?; + writeln_with_tabulation(buf, tabulation_number + 1, "Right child")?; + self.formatted_arena_node(buf, tabulation_number + 1, *right)?; + writeln_with_tabulation(buf, tabulation_number + 1, "Escape")?; + self.formatted_arena_node(buf, tabulation_number + 1, *escape)?; + } Expression::StableFunction(_) => writeln!(buf, "StableFunction")?, Expression::Unary(UnaryExpr { op, child }) => { writeln!(buf, "Unary [op: {op}]")?; diff --git a/sbroad-core/src/ir/node.rs b/sbroad-core/src/ir/node.rs index e64b88dfe1..d6a9ab577e 100644 --- a/sbroad-core/src/ir/node.rs +++ b/sbroad-core/src/ir/node.rs @@ -217,6 +217,25 @@ impl ReferenceAsteriskSource { } } +/// Like expressions. +/// +/// Example: `a like b escape '\'`. +#[derive(Clone, Debug, Deserialize, PartialEq, Eq, Serialize)] +pub struct Like { + /// Left child id + pub left: NodeId, + /// Right child id + pub right: NodeId, + /// Escape child id + pub escape: NodeId, +} + +impl From<Like> for NodeAligned { + fn from(value: Like) -> Self { + Self::Node32(Node32::Like(value)) + } +} + /// Reference to the position in the incoming tuple(s). /// Uses a relative pointer as a coordinate system: /// - relational node (containing this reference) @@ -999,6 +1018,7 @@ pub enum Node32 { ExprInParentheses(ExprInParentheses), Unary(UnaryExpr), Concat(Concat), + Like(Like), Bool(BoolExpr), Limit(Limit), Arithmetic(ArithmeticExpr), @@ -1022,6 +1042,7 @@ impl Node32 { Node32::Cast(cast) => NodeOwned::Expression(ExprOwned::Cast(cast)), Node32::Concat(concat) => NodeOwned::Expression(ExprOwned::Concat(concat)), Node32::CountAsterisk(count) => NodeOwned::Expression(ExprOwned::CountAsterisk(count)), + Node32::Like(like) => NodeOwned::Expression(ExprOwned::Like(like)), Node32::Except(except) => NodeOwned::Relational(RelOwned::Except(except)), Node32::ExprInParentheses(expr_in_par) => { NodeOwned::Expression(ExprOwned::ExprInParentheses(expr_in_par)) diff --git a/sbroad-core/src/ir/node/expression.rs b/sbroad-core/src/ir/node/expression.rs index e9d3738457..1c3d62a5c6 100644 --- a/sbroad-core/src/ir/node/expression.rs +++ b/sbroad-core/src/ir/node/expression.rs @@ -7,7 +7,7 @@ use crate::{ use super::{ Alias, ArithmeticExpr, BoolExpr, Case, Cast, Concat, Constant, CountAsterisk, - ExprInParentheses, NodeAligned, NodeId, Reference, Row, StableFunction, Trim, UnaryExpr, + ExprInParentheses, Like, NodeAligned, NodeId, Reference, Row, StableFunction, Trim, UnaryExpr, }; #[allow(clippy::module_name_repetitions)] @@ -19,6 +19,7 @@ pub enum ExprOwned { Cast(Cast), Concat(Concat), Constant(Constant), + Like(Like), Reference(Reference), Row(Row), StableFunction(StableFunction), @@ -40,6 +41,7 @@ impl From<ExprOwned> for NodeAligned { ExprOwned::Concat(concat) => concat.into(), ExprOwned::Constant(constant) => constant.into(), ExprOwned::CountAsterisk(count) => count.into(), + ExprOwned::Like(like) => like.into(), ExprOwned::ExprInParentheses(expr) => expr.into(), ExprOwned::Reference(reference) => reference.into(), ExprOwned::Row(row) => row.into(), @@ -70,6 +72,7 @@ pub enum Expression<'a> { Cast(&'a Cast), Concat(&'a Concat), Constant(&'a Constant), + Like(&'a Like), Reference(&'a Reference), Row(&'a Row), StableFunction(&'a StableFunction), @@ -89,6 +92,7 @@ pub enum MutExpression<'a> { Cast(&'a mut Cast), Concat(&'a mut Concat), Constant(&'a mut Constant), + Like(&'a mut Like), Reference(&'a mut Reference), Row(&'a mut Row), StableFunction(&'a mut StableFunction), @@ -186,6 +190,7 @@ impl Expression<'_> { Expression::Cast(cast) => ExprOwned::Cast((*cast).clone()), Expression::Concat(con) => ExprOwned::Concat((*con).clone()), Expression::Constant(constant) => ExprOwned::Constant((*constant).clone()), + Expression::Like(like) => ExprOwned::Like((*like).clone()), Expression::CountAsterisk(count) => ExprOwned::CountAsterisk((*count).clone()), Expression::ExprInParentheses(expr_par) => { ExprOwned::ExprInParentheses((*expr_par).clone()) diff --git a/sbroad-core/src/ir/transformation.rs b/sbroad-core/src/ir/transformation.rs index cf7e9142b2..8f20605d4c 100644 --- a/sbroad-core/src/ir/transformation.rs +++ b/sbroad-core/src/ir/transformation.rs @@ -287,6 +287,7 @@ impl Plan { } MutExpression::Concat(_) | MutExpression::Constant(_) + | MutExpression::Like(_) | MutExpression::Reference(_) | MutExpression::CountAsterisk(_) => {} } diff --git a/sbroad-core/src/ir/transformation/redistribution/eq_cols.rs b/sbroad-core/src/ir/transformation/redistribution/eq_cols.rs index 640c5dd315..277adcb143 100644 --- a/sbroad-core/src/ir/transformation/redistribution/eq_cols.rs +++ b/sbroad-core/src/ir/transformation/redistribution/eq_cols.rs @@ -2,8 +2,8 @@ use crate::errors::SbroadError; use crate::ir::expression::ExpressionId; use crate::ir::node::expression::Expression; use crate::ir::node::{ - Alias, ArithmeticExpr, BoolExpr, Case, Cast, Concat, ExprInParentheses, NodeId, Reference, Row, - StableFunction, Trim, UnaryExpr, + Alias, ArithmeticExpr, BoolExpr, Case, Cast, Concat, ExprInParentheses, Like, NodeId, + Reference, Row, StableFunction, Trim, UnaryExpr, }; use crate::ir::operator::Bool; use crate::ir::transformation::redistribution::BoolOp; @@ -84,6 +84,14 @@ impl ReferredMap { | Expression::Concat(Concat { left, right, .. }) => referred .get_or_none(*left) .add(referred.get_or_none(*right)), + Expression::Like(Like { + escape, + left, + right, + }) => referred + .get_or_none(*left) + .add(referred.get_or_none(*right)) + .add(referred.get_or_none(*escape)), Expression::Case(Case { search_expr, when_blocks, @@ -485,7 +493,7 @@ impl EqualityCols { /// /// # Returns /// - `None` in case this join condition does - /// not allow Repartition join. + /// not allow Repartition join. /// - Otherwise, returns non-empty `EqualityCols` wrapped in `Option` pub fn from_join_condition( plan: &Plan, diff --git a/sbroad-core/src/ir/transformation/redistribution/groupby.rs b/sbroad-core/src/ir/transformation/redistribution/groupby.rs index c377275d64..18b6e3ba7a 100644 --- a/sbroad-core/src/ir/transformation/redistribution/groupby.rs +++ b/sbroad-core/src/ir/transformation/redistribution/groupby.rs @@ -11,7 +11,7 @@ use crate::ir::node::expression::Expression; use crate::ir::node::relational::{MutRelational, Relational}; use crate::ir::node::{ Alias, ArenaType, ArithmeticExpr, BoolExpr, Case, Cast, Concat, Constant, ExprInParentheses, - GroupBy, Having, NodeId, Projection, Reference, Row, StableFunction, Trim, UnaryExpr, + GroupBy, Having, Like, NodeId, Projection, Reference, Row, StableFunction, Trim, UnaryExpr, }; use crate::ir::relation::Type; use crate::ir::transformation::redistribution::{ @@ -387,6 +387,9 @@ impl Plan { /// /// # Errors /// - invalid [`Expression::Reference`]s in either of subtrees + /// + /// # Panics + /// - never pub fn are_aggregate_subtrees_equal( &self, lhs: NodeId, @@ -547,6 +550,24 @@ impl Plan { .are_aggregate_subtrees_equal(*right_left, *right_right)?); } } + Expression::Like(Like { + escape: escape_left, + left: left_left, + right: right_left, + }) => { + if let Expression::Like(Like { + left: left_right, + right: right_right, + escape: escape_right, + }) = right + { + return Ok(self + .are_aggregate_subtrees_equal(*escape_left, *escape_right)? + && self.are_aggregate_subtrees_equal(*left_left, *left_right)? + && self + .are_aggregate_subtrees_equal(*right_left, *right_right)?); + } + } Expression::Constant(Constant { value: value_left }) => { if let Expression::Constant(Constant { value: value_right }) = right { return Ok(*value_left == *value_right); diff --git a/sbroad-core/src/ir/tree.rs b/sbroad-core/src/ir/tree.rs index 245de35217..9d8c0f35b0 100644 --- a/sbroad-core/src/ir/tree.rs +++ b/sbroad-core/src/ir/tree.rs @@ -1,6 +1,9 @@ //! IR tree traversal module. -use super::{node::expression::Expression, Nodes, Plan}; +use super::{ + node::{expression::Expression, Like}, + Nodes, Plan, +}; use crate::ir::node::{ Alias, ArithmeticExpr, BoolExpr, Case, Cast, Concat, ExprInParentheses, NodeId, Trim, UnaryExpr, }; @@ -35,6 +38,26 @@ trait TreeIterator<'nodes> { } } + fn handle_like(&mut self, expr: Expression) -> Option<NodeId> { + let Expression::Like(Like { + left, + right, + escape: escape_id, + }) = expr + else { + panic!("Like expected") + }; + let child_step = *self.get_child().borrow(); + let res = match child_step { + 0 => Some(*left), + 1 => Some(*right), + 2 => Some(*escape_id), + _ => None, + }; + *self.get_child().borrow_mut() += 1; + res + } + fn handle_left_right_children(&mut self, expr: Expression) -> Option<NodeId> { let (Expression::Bool(BoolExpr { left, right, .. }) | Expression::Arithmetic(ArithmeticExpr { left, right, .. }) diff --git a/sbroad-core/src/ir/tree/expression.rs b/sbroad-core/src/ir/tree/expression.rs index 467efe2fa5..6290ae7a85 100644 --- a/sbroad-core/src/ir/tree/expression.rs +++ b/sbroad-core/src/ir/tree/expression.rs @@ -161,6 +161,7 @@ fn expression_next<'nodes>(iter: &mut impl ExpressionTreeIterator<'nodes>) -> Op } } Expression::Trim { .. } => iter.handle_trim(expr), + Expression::Like { .. } => iter.handle_like(expr), Expression::Case { .. } => iter.handle_case_iter(expr), Expression::Constant { .. } | Expression::Reference { .. } diff --git a/sbroad-core/src/ir/tree/subtree.rs b/sbroad-core/src/ir/tree/subtree.rs index 4cc268f409..6ceeb98920 100644 --- a/sbroad-core/src/ir/tree/subtree.rs +++ b/sbroad-core/src/ir/tree/subtree.rs @@ -217,6 +217,7 @@ fn subtree_next<'plan>( | Expression::Arithmetic { .. } | Expression::Concat { .. } => iter.handle_left_right_children(expr), Expression::Trim { .. } => iter.handle_trim(expr), + Expression::Like { .. } => iter.handle_like(expr), Expression::Row(Row { list, .. }) | Expression::StableFunction(StableFunction { children: list, .. }) => { let child_step = *iter.get_child().borrow(); -- GitLab