diff --git a/doc/sql/query.ebnf b/doc/sql/query.ebnf index aa6f8cf8731a056ee51f6c60d741c4ffee1cb52a..b6fde666d7496b2c94a390309f5a4dc940637a98 100644 --- a/doc/sql/query.ebnf +++ b/doc/sql/query.ebnf @@ -32,6 +32,7 @@ expression ::= (table '.')? column | ('(' (expression(',' expression)*) ')') | 'NOT' expression | '(' expression ')' + | 'TRIM' '(' ((('LEADING' | 'TRAILING' | 'BOTH')? expression) | ('LEADING' | 'TRAILING' | 'BOTH')) 'FROM' expression ')' aggregate ::= ('AVG' | 'COUNT' | 'MAX' | 'MIN' | 'SUM' | 'TOTAL') '(' expression ')' | 'GROUP_CONCAT' '(' expression ',' "'" string "'" ')' cast ::= 'CAST' '(' expression 'AS' type ')' diff --git a/sbroad-core/src/backend/sql/ir.rs b/sbroad-core/src/backend/sql/ir.rs index 3493900fface1d26ec2e97e296c35478f792d63d..7854190b1b04c781f0f29fbbf3d182792197ee71 100644 --- a/sbroad-core/src/backend/sql/ir.rs +++ b/sbroad-core/src/backend/sql/ir.rs @@ -288,6 +288,9 @@ impl ExecutionPlan { SyntaxData::Distinct => sql.push_str("DISTINCT"), SyntaxData::Inline(content) => sql.push_str(content), SyntaxData::From => sql.push_str("FROM"), + SyntaxData::Leading => sql.push_str("LEADING"), + SyntaxData::Both => sql.push_str("BOTH"), + SyntaxData::Trailing => sql.push_str("TRAILING"), SyntaxData::Operator(s) => sql.push_str(s.as_str()), SyntaxData::OpenParenthesis => sql.push('('), SyntaxData::PlanId(id) => { diff --git a/sbroad-core/src/backend/sql/tree.rs b/sbroad-core/src/backend/sql/tree.rs index 7e385996470a237ddd1347fced295c52ef1f2453..69bea1777623f855dfa23a4779521255cc251d6d 100644 --- a/sbroad-core/src/backend/sql/tree.rs +++ b/sbroad-core/src/backend/sql/tree.rs @@ -6,7 +6,7 @@ use std::mem::take; use crate::errors::{Action, Entity, SbroadError}; use crate::executor::ir::ExecutionPlan; -use crate::ir::expression::Expression; +use crate::ir::expression::{Expression, TrimKind}; use crate::ir::operator::{Bool, Relational, Unary}; use crate::ir::transformation::redistribution::{MotionOpcode, MotionPolicy}; use crate::ir::tree::traversal::PostOrder; @@ -36,6 +36,12 @@ pub enum SyntaxData { Inline(String), /// "from" From, + /// "leading" + Leading, + /// "both" + Both, + /// "trailing" + Trailing, /// "(" OpenParenthesis, /// "=, >, <, and, or, ..." @@ -149,6 +155,30 @@ impl SyntaxNode { } } + fn new_leading() -> Self { + SyntaxNode { + data: SyntaxData::Leading, + left: None, + right: Vec::new(), + } + } + + fn new_both() -> Self { + SyntaxNode { + data: SyntaxData::Both, + left: None, + right: Vec::new(), + } + } + + fn new_trailing() -> Self { + SyntaxNode { + data: SyntaxData::Trailing, + left: None, + right: Vec::new(), + } + } + fn new_operator(value: &str) -> Self { SyntaxNode { data: SyntaxData::Operator(value.into()), @@ -961,20 +991,46 @@ impl<'p> SyntaxPlan<'p> { Expression::StableFunction { children, is_distinct, + trim_kind, .. } => { let mut nodes: Vec<usize> = vec![self.nodes.push_syntax_node(SyntaxNode::new_open())]; - if *is_distinct { - nodes.push(self.nodes.push_syntax_node(SyntaxNode::new_distinct())); - } - if let Some((last, others)) = children.split_last() { - for child in others { - nodes.push(self.nodes.get_syntax_node_id(*child)?); - nodes.push(self.nodes.push_syntax_node(SyntaxNode::new_comma())); + if let Some(kind) = trim_kind { + // `trim` function has a special format. For instance, here how we can + // call it: trim(leading 'a' from 'ab'). + match kind { + TrimKind::Leading => { + nodes.push(self.nodes.push_syntax_node(SyntaxNode::new_leading())); + } + TrimKind::Trailing => { + nodes.push(self.nodes.push_syntax_node(SyntaxNode::new_trailing())); + } + TrimKind::Both => { + nodes.push(self.nodes.push_syntax_node(SyntaxNode::new_both())); + } + } + + if let Some((string, removal_chars)) = children.split_last() { + for child in removal_chars { + nodes.push(self.nodes.get_syntax_node_id(*child)?); + } + nodes.push(self.nodes.push_syntax_node(SyntaxNode::new_from())); + nodes.push(self.nodes.get_syntax_node_id(*string)?); + } + } else { + if *is_distinct { + nodes.push(self.nodes.push_syntax_node(SyntaxNode::new_distinct())); + } + if let Some((last, others)) = children.split_last() { + for child in others { + nodes.push(self.nodes.get_syntax_node_id(*child)?); + nodes.push(self.nodes.push_syntax_node(SyntaxNode::new_comma())); + } + nodes.push(self.nodes.get_syntax_node_id(*last)?); } - nodes.push(self.nodes.get_syntax_node_id(*last)?); } + nodes.push(self.nodes.push_syntax_node(SyntaxNode::new_close())); let sn = SyntaxNode::new_pointer(id, None, nodes); Ok(self.nodes.push_syntax_node(sn)) diff --git a/sbroad-core/src/frontend/sql.rs b/sbroad-core/src/frontend/sql.rs index 00b02d50c7c0c3c35c92b6ecb6f6f8384d324e6c..d905bd9fd3d8e481b6bdbb8d8a2979d65b283348 100644 --- a/sbroad-core/src/frontend/sql.rs +++ b/sbroad-core/src/frontend/sql.rs @@ -25,6 +25,7 @@ use crate::ir::ddl::{Language, ParamDef}; use crate::ir::expression::cast::Type as CastType; use crate::ir::expression::{ ColumnPositionMap, ColumnWithScan, ColumnsRetrievalSpec, Expression, ExpressionId, Position, + TrimKind, }; use crate::ir::operator::{Arithmetic, Bool, ConflictStrategy, JoinKind, Relational, Unary}; use crate::ir::relation::{Column, ColumnRole, Type as RelationType}; @@ -729,6 +730,76 @@ fn parse_grant_revoke( Ok((grant_revoke_type, grantee_name, timeout)) } +fn parse_trim_function_args<M: Metadata>( + function_name: String, + function_args: Pair<'_, Rule>, + referred_relation_ids: &[usize], + worker: &mut ExpressionsWorker<M>, + plan: &mut Plan, +) -> Result<ParseExpression, SbroadError> { + let normalized_name = function_name.to_lowercase(); + if "trim" != normalized_name.as_str() { + return Err(SbroadError::Invalid( + Entity::Query, + Some(format!( + "Trim function artuments format is allowed only inside \"trim\" function. Got: {normalized_name}", + )) + )); + } + let mut parse_exprs_args = Vec::new(); + let mut kind = None; + let mut removal_chars = None; + let mut string = None; + + let args_inner = function_args.into_inner(); + for arg_pair in args_inner { + match arg_pair.as_rule() { + Rule::TrimKind => { + for kind_pair in arg_pair.into_inner() { + match kind_pair.as_rule() { + Rule::TrimKindBoth => kind = Some(TrimKind::Both), + Rule::TrimKindLeading => kind = Some(TrimKind::Leading), + Rule::TrimKindTrailing => kind = Some(TrimKind::Trailing), + rule => unreachable!("Expected TrimKind variant. Got: {rule:?}"), + } + } + } + Rule::TrimChars => { + removal_chars = Some(parse_expr_pratt( + arg_pair.into_inner(), + referred_relation_ids, + worker, + plan, + )?); + } + Rule::TrimString => { + string = Some(parse_expr_pratt( + arg_pair.into_inner(), + referred_relation_ids, + worker, + plan, + )?); + } + rule => unreachable!("Unexpected rule under TrimFunctionArgs: {rule:?}"), + } + } + let string = string.expect("string is required by grammar"); + + if let Some(removal_chars) = removal_chars { + parse_exprs_args.push(removal_chars); + } + parse_exprs_args.push(string); + // mark this function as `trim` function + let trim_kind = Some(kind.unwrap_or_default()); + + Ok(ParseExpression::Function { + name: function_name, + args: parse_exprs_args, + is_distinct: false, + trim_kind, + }) +} + /// Common logic for `SqlVdbeMaxSteps` and `VTableMaxRows` parsing. fn parse_option<M: Metadata>( ast: &AbstractSyntaxTree, @@ -1013,6 +1084,7 @@ enum ParseExpression { name: String, args: Vec<ParseExpression>, is_distinct: bool, + trim_kind: Option<TrimKind>, }, Row { children: Vec<ParseExpression>, @@ -1230,6 +1302,7 @@ impl ParseExpression { name, args, is_distinct, + trim_kind, } => { let mut plan_arg_ids = Vec::new(); for arg in args { @@ -1246,7 +1319,7 @@ impl ParseExpression { } else { let func = worker.metadata.function(name)?; if func.is_stable() { - plan.add_stable_function(func, plan_arg_ids)? + plan.add_stable_function(func, plan_arg_ids, trim_kind.clone())? } else { // At the moment we don't support any non-stable functions. // Later this code block should handle other function behaviors. @@ -1420,6 +1493,15 @@ where parse_exprs_args.push(arg_expr); } } + Rule::TrimFunctionArgs => { + return parse_trim_function_args( + function_name, + function_args, + referred_relation_ids, + worker, + plan + ); + } rule => unreachable!("{}", format!("Unexpected rule under FunctionInvocation: {rule:?}")) } } @@ -1427,6 +1509,7 @@ where name: function_name, args: parse_exprs_args, is_distinct, + trim_kind: None }) } rule => unreachable!("Expr::parse expected identifier continuation, found {:?}", rule) diff --git a/sbroad-core/src/frontend/sql/ir.rs b/sbroad-core/src/frontend/sql/ir.rs index 7261a060a53ee66efc88b6a2cae22e06a75b5a03..04bfa467d8d547b007cfaed522dde0f74694d93e 100644 --- a/sbroad-core/src/frontend/sql/ir.rs +++ b/sbroad-core/src/frontend/sql/ir.rs @@ -428,10 +428,7 @@ impl SubtreeCloner { distribution: _, } | Expression::StableFunction { - ref mut children, - name: _, - is_distinct: _, - func_type: _, + ref mut children, .. } => { *children = self.copy_list(&*children)?; } diff --git a/sbroad-core/src/frontend/sql/query.pest b/sbroad-core/src/frontend/sql/query.pest index 046082e3956e66c9a9baa9352aeb77f81a96cc77..376a89d355ab84e7d6cecf925c42046efaf227c7 100644 --- a/sbroad-core/src/frontend/sql/query.pest +++ b/sbroad-core/src/frontend/sql/query.pest @@ -215,9 +215,16 @@ Expr = { ExprAtomValue ~ (ExprInfixOp ~ ExprAtomValue)* } PgParameter = { "$" ~ Unsigned } IdentifierWithOptionalContinuation = { Identifier ~ (ReferenceContinuation | FunctionInvocationContinuation)? } ReferenceContinuation = { "." ~ Identifier } - FunctionInvocationContinuation = { "(" ~ (CountAsterisk | FunctionArgs)? ~ ")" } + FunctionInvocationContinuation = { "(" ~ (CountAsterisk | TrimFunctionArgs | FunctionArgs)? ~ ")" } FunctionArgs = { Distinct? ~ (Expr ~ ("," ~ Expr)*)? } CountAsterisk = { "*" } + TrimFunctionArgs = { ((TrimKind? ~ TrimChars) | TrimKind) ~ ^"from" ~ TrimString } + TrimKind = { (TrimKindLeading | TrimKindTrailing | TrimKindBoth) } + TrimKindLeading = { ^"leading" } + TrimKindTrailing = { ^"trailing" } + TrimKindBoth = { ^"both" } + TrimChars = { Expr } + TrimString = { Expr } ExpressionInParentheses = { "(" ~ Expr ~ ")" } Cast = { ^"cast" ~ "(" ~ Expr ~ ^"as" ~ TypeCast ~ ")" } TypeCast = _{ TypeAny | ColumnDefType } diff --git a/sbroad-core/src/ir/aggregates.rs b/sbroad-core/src/ir/aggregates.rs index 69cb1ed0619433b116074e55f26857f0245cad0d..5b06eaae397fbb4246247376cf64ad02c2d1d491 100644 --- a/sbroad-core/src/ir/aggregates.rs +++ b/sbroad-core/src/ir/aggregates.rs @@ -285,6 +285,7 @@ impl SimpleAggregate { children, is_distinct, func_type: RelType::from(final_func), + trim_kind: None, }; let aggr_id = plan.nodes.push(Node::Expression(final_aggr)); final_aggregates.insert(local_kind, aggr_id); diff --git a/sbroad-core/src/ir/explain.rs b/sbroad-core/src/ir/explain.rs index 5824998e329b7a34092aac90675e216a93f0f300..0a3b271f3cf980e71532cb1cc22f7bf66df80b35 100644 --- a/sbroad-core/src/ir/explain.rs +++ b/sbroad-core/src/ir/explain.rs @@ -15,6 +15,7 @@ use crate::ir::transformation::redistribution::{ }; use crate::ir::{OptionKind, Plan}; +use super::expression::TrimKind; use super::operator::{Arithmetic, Bool, Unary}; use super::tree::traversal::{PostOrder, EXPR_CAPACITY, REL_CAPACITY}; use super::value::Value; @@ -29,7 +30,7 @@ enum ColExpr { Column(String, Type), Cast(Box<ColExpr>, CastType), Concat(Box<ColExpr>, Box<ColExpr>), - StableFunction(String, Vec<ColExpr>, bool, Type), + StableFunction(String, Vec<ColExpr>, bool, Option<TrimKind>, Type), Row(Row), None, } @@ -55,8 +56,12 @@ impl Display for ColExpr { ColExpr::Column(c, col_type) => format!("{c}::{col_type}"), ColExpr::Cast(v, t) => format!("{v}::{t}"), ColExpr::Concat(l, r) => format!("{l} || {r}"), - ColExpr::StableFunction(name, args, is_distinct, func_type) => { - let formatted_args = format!("({})", args.iter().format(", ")); + ColExpr::StableFunction(name, args, is_distinct, trim_kind, func_type) => { + let formatted_args = if let Some(kind) = trim_kind { + format!("{} {}", kind.as_str(), args.iter().format(" ")) + } else { + format!("({})", args.iter().format(", ")) + }; format!( "{name}({}{formatted_args})::{func_type}", if *is_distinct { "distinct " } else { "" } @@ -152,6 +157,7 @@ impl ColExpr { children, is_distinct, func_type, + trim_kind, } => { let mut len = children.len(); let mut args: Vec<ColExpr> = Vec::with_capacity(len); @@ -169,6 +175,7 @@ impl ColExpr { name.clone(), args, *is_distinct, + trim_kind.clone(), func_type.clone(), ); stack.push((func_expr, id)); diff --git a/sbroad-core/src/ir/expression.rs b/sbroad-core/src/ir/expression.rs index a781b221e1eafb6afab41547d4084e679d7be06b..82dad44f50e186d00ff87013f44e5bcc1479c5b4 100644 --- a/sbroad-core/src/ir/expression.rs +++ b/sbroad-core/src/ir/expression.rs @@ -147,6 +147,9 @@ pub enum Expression { children: Vec<usize>, /// If this function is an aggregate function: whether it is marked DISTINCT or not is_distinct: bool, + /// Some if function is `trim`. This is the kind of `trim` function that can be set + /// by using keywords LEADING, TRAILING or BOTH. + trim_kind: Option<TrimKind>, /// Function return type. func_type: Type, }, @@ -164,6 +167,25 @@ pub enum Expression { }, } +#[derive(Default, Clone, Debug, Hash, Deserialize, PartialEq, Eq, Serialize)] +pub enum TrimKind { + #[default] + Both, + Leading, + Trailing, +} + +impl TrimKind { + #[must_use] + pub fn as_str(&self) -> &'static str { + match self { + TrimKind::Leading => "LEADING", + TrimKind::Trailing => "TRAILING", + TrimKind::Both => "BOTH", + } + } +} + #[allow(dead_code)] impl Expression { /// Gets current row distribution. @@ -613,17 +635,20 @@ impl<'plan> Comparator<'plan> { children: children_left, is_distinct: distinct_left, func_type: func_type_left, + trim_kind: trim_kind_left, } => { if let Expression::StableFunction { name: name_right, children: children_right, is_distinct: distinct_right, func_type: func_type_right, + trim_kind: trim_kind_right, } = right { return Ok(name_left == name_right && distinct_left == distinct_right && func_type_left == func_type_right + && trim_kind_left == trim_kind_right && children_left.iter().zip(children_right.iter()).all( |(l, r)| self.are_subtrees_equal(*l, *r).unwrap_or(false), )); @@ -713,10 +738,12 @@ impl<'plan> Comparator<'plan> { children, is_distinct, func_type, + trim_kind, } => { is_distinct.hash(state); func_type.hash(state); name.hash(state); + trim_kind.hash(state); for child in children { self.hash_for_expr(*child, state, depth - 1); } diff --git a/sbroad-core/src/ir/function.rs b/sbroad-core/src/ir/function.rs index 99ed3abc064347abbbf703f4916163ca869d7c55..a14e4221b840429a9920b3bca99372c019ea583a 100644 --- a/sbroad-core/src/ir/function.rs +++ b/sbroad-core/src/ir/function.rs @@ -5,6 +5,8 @@ use crate::ir::relation::Type; use crate::ir::{Node, Plan}; use serde::{Deserialize, Serialize}; +use super::expression::TrimKind; + #[derive(Clone, Debug, Deserialize, Serialize, PartialEq, Eq)] pub enum Behavior { /// The function is a stable function, it does not have any side effects. @@ -53,6 +55,7 @@ impl Plan { &mut self, function: &Function, children: Vec<usize>, + trim_kind: Option<TrimKind>, ) -> Result<usize, SbroadError> { if !function.is_stable() { return Err(SbroadError::Invalid( @@ -65,6 +68,7 @@ impl Plan { children, is_distinct: false, func_type: function.func_type.clone(), + trim_kind, }; let func_id = self.nodes.push(Node::Expression(func_expr)); Ok(func_id) @@ -114,6 +118,7 @@ impl Plan { children, is_distinct, func_type: Type::from(kind), + trim_kind: None, }; let id = self.nodes.push(Node::Expression(func_expr)); Ok(id) diff --git a/sbroad-core/src/ir/transformation/redistribution/groupby.rs b/sbroad-core/src/ir/transformation/redistribution/groupby.rs index 4b2972f62042b60ba567df9db3aeb231f91333d5..ece80a9c6f7ac821f79d8e8b5700f747f63e70bc 100644 --- a/sbroad-core/src/ir/transformation/redistribution/groupby.rs +++ b/sbroad-core/src/ir/transformation/redistribution/groupby.rs @@ -473,17 +473,20 @@ impl Plan { children: children_left, is_distinct: distinct_left, func_type: func_type_left, + trim_kind: trim_kind_left, } => { if let Expression::StableFunction { name: name_right, children: children_right, is_distinct: distinct_right, func_type: func_type_right, + trim_kind: trim_kind_right, } = right { return Ok(name_left == name_right && distinct_left == distinct_right && func_type_left == func_type_right + && trim_kind_left == trim_kind_right && children_left.iter().zip(children_right.iter()).all( |(l, r)| self.are_subtrees_equal(*l, *r).unwrap_or(false), )); @@ -990,7 +993,7 @@ impl Plan { // We can reuse aggregate expression between local aggregates, because // all local aggregates are located inside the same motion subtree and we // assume that each local aggregate does not need to modify its expression - let local_fun_id = self.add_stable_function(&fun, arguments.to_vec())?; + let local_fun_id = self.add_stable_function(&fun, arguments.to_vec(), None)?; let alias_id = self.nodes.add_alias(local_alias, local_fun_id)?; Ok(alias_id) }