From 1435b222627cf655f79ebba4e10c0dce91638e9e Mon Sep 17 00:00:00 2001 From: Denis Smirnov <sd@picodata.io> Date: Tue, 19 Dec 2023 12:07:04 +0700 Subject: [PATCH] feat: allow duplicate column names in IR node output --- sbroad-core/src/backend/sql/tree/tests.rs | 24 +- sbroad-core/src/executor/tests/frontend.rs | 54 +++-- sbroad-core/src/frontend/sql.rs | 209 ++++++++--------- sbroad-core/src/frontend/sql/ir.rs | 47 +--- sbroad-core/src/frontend/sql/ir/tests.rs | 24 ++ sbroad-core/src/ir.rs | 27 ++- sbroad-core/src/ir/aggregates.rs | 78 ++++--- sbroad-core/src/ir/expression.rs | 221 ++++++++++++------ sbroad-core/src/ir/expression/tests.rs | 9 +- sbroad-core/src/ir/operator.rs | 71 +----- sbroad-core/src/ir/operator/tests.rs | 13 +- .../src/ir/transformation/redistribution.rs | 16 +- .../transformation/redistribution/groupby.rs | 74 +++--- .../ir/transformation/redistribution/tests.rs | 10 +- .../redistribution/tests/segment.rs | 2 +- .../sql/tree/arbitrary_projection_plan.yaml | 21 +- .../sql/tree/arithmetic_projection_plan.yaml | 25 +- 17 files changed, 494 insertions(+), 431 deletions(-) diff --git a/sbroad-core/src/backend/sql/tree/tests.rs b/sbroad-core/src/backend/sql/tree/tests.rs index 77e128e8a1..86e760e3ef 100644 --- a/sbroad-core/src/backend/sql/tree/tests.rs +++ b/sbroad-core/src/backend/sql/tree/tests.rs @@ -344,10 +344,9 @@ fn sql_arithmetic_projection_plan() { let arith_subract_id = plan .add_arithmetic_to_plan(arith_addition_id2, Arithmetic::Subtract, b_id, false) .unwrap(); + let alias_id = plan.nodes.add_alias("COL_1", arith_subract_id).unwrap(); - let proj_id = plan - .add_proj_internal(scan_id, &[arith_subract_id], false) - .unwrap(); + let proj_id = plan.add_proj_internal(scan_id, &[alias_id], false).unwrap(); plan.set_top(proj_id).unwrap(); // check the plan @@ -372,7 +371,7 @@ fn sql_arithmetic_projection_plan() { let mut nodes_iter = nodes.into_iter(); // projection - assert_eq!(Some(&SyntaxData::PlanId(35)), nodes_iter.next()); + assert_eq!(Some(&SyntaxData::PlanId(36)), nodes_iter.next()); // row assert_eq!(Some(&SyntaxData::PlanId(17)), nodes_iter.next()); // ( @@ -457,6 +456,12 @@ fn sql_arithmetic_projection_plan() { assert_eq!(Some(&SyntaxData::PlanId(18)), nodes_iter.next()); // ) assert_eq!(Some(&SyntaxData::CloseParenthesis), nodes_iter.next()); + // alias + assert_eq!(Some(&SyntaxData::PlanId(34)), nodes_iter.next()); + assert_eq!( + Some(&SyntaxData::Alias("COL_1".to_string())), + nodes_iter.next() + ); // from assert_eq!(Some(&SyntaxData::From), nodes_iter.next()); // scan @@ -507,8 +512,9 @@ fn sql_arbitrary_projection_plan() { // a + b > c and d is not null let and_id = plan.nodes.add_bool(gt_id, Bool::And, not_null_id).unwrap(); + let alias_id = plan.nodes.add_alias("COL_1", and_id).unwrap(); - let proj_id = plan.add_proj_internal(scan_id, &[and_id], false).unwrap(); + let proj_id = plan.add_proj_internal(scan_id, &[alias_id], false).unwrap(); plan.set_top(proj_id).unwrap(); // check the plan @@ -533,7 +539,7 @@ fn sql_arbitrary_projection_plan() { let mut nodes_iter = nodes.into_iter(); // projection - assert_eq!(Some(&SyntaxData::PlanId(26)), nodes_iter.next()); + assert_eq!(Some(&SyntaxData::PlanId(27)), nodes_iter.next()); // row 12 assert_eq!(Some(&SyntaxData::PlanId(13)), nodes_iter.next()); // ( @@ -589,6 +595,12 @@ fn sql_arbitrary_projection_plan() { Some(&SyntaxData::Operator("is null".into())), nodes_iter.next() ); + // alias + assert_eq!(Some(&SyntaxData::PlanId(25)), nodes_iter.next()); + assert_eq!( + Some(&SyntaxData::Alias("COL_1".to_string())), + nodes_iter.next() + ); // from assert_eq!(Some(&SyntaxData::From), nodes_iter.next()); // scan diff --git a/sbroad-core/src/executor/tests/frontend.rs b/sbroad-core/src/executor/tests/frontend.rs index 0d1de81035..ffca9d9beb 100644 --- a/sbroad-core/src/executor/tests/frontend.rs +++ b/sbroad-core/src/executor/tests/frontend.rs @@ -4,26 +4,17 @@ use crate::executor::engine::mock::RouterRuntimeMock; use pretty_assertions::assert_eq; #[test] -fn front_ivalid_sql1() { +fn front_valid_sql1() { // Tables "test_space" and "hash_testing" have the same columns "sys_op" and "bucket_id", - // that cause a "duplicate column" error in the output tuple of the INNER JOIN node. - // The error is handled by renaming the columns within a sub-query (inner_join test). - // - // TODO: improve reference resolution to avoid this error. + // that previously caused a "duplicate column" error in the output tuple of the + // INNER JOIN node. Now the error is fixed. let query = r#"SELECT "id", "product_units" FROM "hash_testing" INNER JOIN "test_space" as t ON "hash_testing"."identification_number" = t."id" WHERE "hash_testing"."identification_number" = 5 and "hash_testing"."product_code" = '123'"#; let metadata = &RouterRuntimeMock::new(); - let plan_err = Query::new(metadata, query, vec![]).unwrap_err(); - - assert_eq!( - SbroadError::DuplicatedValue( - "row can't be added because `\"sys_op\"` already has an alias".into() - ), - plan_err - ); + Query::new(metadata, query, vec![]).unwrap(); } #[test] @@ -124,8 +115,8 @@ fn front_explain_select_sql2() { #[test] fn front_explain_select_sql3() { - let sql = r#"EXPLAIN SELECT "a" FROM "t3" as "q1" - INNER JOIN (SELECT "t3"."a" as "a2", "t3"."b" as "b2" FROM "t3") as "q2" + let sql = r#"explain select "a" from "t3" as "q1" + inner join (select "t3"."a" as "a2", "t3"."b" as "b2" from "t3") as "q2" on "q1"."a" = "q2"."a2""#; let metadata = &RouterRuntimeMock::new(); @@ -149,6 +140,37 @@ fn front_explain_select_sql3() { if let Ok(actual_explain) = query.dispatch().unwrap().downcast::<String>() { assert_eq!(expected_explain, *actual_explain); } else { - panic!("Explain must be string") + panic!("explain must be string") + } +} + +#[test] +fn front_explain_select_sql4() { + let sql = r#"explain select "q2"."a" from "t3" as "q1" + inner join "t3" as "q2" + on "q1"."a" = "q2"."a""#; + + let metadata = &RouterRuntimeMock::new(); + let mut query = Query::new(metadata, sql, vec![]).unwrap(); + + let expected_explain = format!( + "{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n", + r#"projection ("q2"."a"::string -> "a")"#, + r#" join on ROW("q1"."a"::string) = ROW("q2"."a"::string)"#, + r#" scan "q1""#, + r#" projection ("q1"."a"::string -> "a", "q1"."b"::integer -> "b")"#, + r#" scan "t3" -> "q1""#, + r#" scan "q2""#, + r#" projection ("q2"."a"::string -> "a", "q2"."b"::integer -> "b")"#, + r#" scan "t3" -> "q2""#, + r#"execution options:"#, + r#"sql_vdbe_max_steps = 45000"#, + r#"vtable_max_rows = 5000"#, + ); + + if let Ok(actual_explain) = query.dispatch().unwrap().downcast::<String>() { + assert_eq!(expected_explain, *actual_explain); + } else { + panic!("explain must be string") } } diff --git a/sbroad-core/src/frontend/sql.rs b/sbroad-core/src/frontend/sql.rs index 1b2a06c8a6..7348d24c37 100644 --- a/sbroad-core/src/frontend/sql.rs +++ b/sbroad-core/src/frontend/sql.rs @@ -21,7 +21,7 @@ use crate::frontend::sql::ir::Translation; use crate::frontend::Ast; use crate::ir::ddl::{ColumnDef, Ddl}; use crate::ir::expression::cast::Type as CastType; -use crate::ir::expression::{Expression, ExpressionId}; +use crate::ir::expression::{ColumnPositionMap, Expression, ExpressionId}; use crate::ir::operator::{Arithmetic, Bool, ConflictStrategy, JoinKind, Relational, Unary}; use crate::ir::relation::{Column, ColumnRole, Type as RelationType}; use crate::ir::tree::traversal::PostOrder; @@ -766,7 +766,7 @@ impl Ast for AbstractSyntaxTree { } Type::Reference => { let ast_rel_list = self.get_referred_relational_nodes(id)?; - let mut plan_rel_list = Vec::new(); + let mut plan_rel_list = Vec::with_capacity(ast_rel_list.len()); for ast_id in ast_rel_list { let plan_id = map.get(ast_id)?; plan_rel_list.push(plan_id); @@ -787,109 +787,82 @@ impl Ast for AbstractSyntaxTree { } }; - // Closure to get the nearest name of relation the output column came from - // E.g. for `Scan` it would be it's `realation`. - let get_scan_name = - |col_name: &str, plan_id: usize| -> Result<Option<String>, SbroadError> { - let child = plan.get_relation_node(plan_id)?; - let col_position = child - .output_alias_position_map(&plan.nodes)? - .get(col_name) - .copied(); - match col_position { - Some(pos) => Ok(plan - .get_relation_node(plan_id)? - .scan_name(&plan, pos)? - .map(String::from)), - None => Ok(None), - } - }; - let plan_left_id = plan_rel_list.first(); let plan_right_id = plan_rel_list.get(1); if let (Some(plan_left_id), Some(plan_right_id)) = (plan_left_id, plan_right_id) { // Handling case of referencing join node. + // Get column position maps for the left and right children + // of the join node. + let left_col_map = ColumnPositionMap::new(&plan, *plan_left_id)?; + let right_col_map = ColumnPositionMap::new(&plan, *plan_right_id)?; + + // We get both column and scan names from the AST. if let (Some(ast_scan_id), Some(ast_col_name_id)) = (node.children.first(), node.children.get(1)) { let ast_scan = self.nodes.get_node(*ast_scan_id)?; + let ast_scan_name: Option<String> = + ast_scan.value.as_deref().map(normalize_name_from_sql); + let scan_name = ast_scan_name.as_deref(); if let Type::ScanName = ast_scan.rule { // Get the column name and its positions in the output tuples. let col_name = get_column_name(*ast_col_name_id)?; - let left_name = get_scan_name(&col_name, *plan_left_id)?; - let right_name = get_scan_name(&col_name, *plan_right_id)?; - // Check that the AST scan name matches to the children scan names in the plan join node. - let ast_scan_name: Option<String> = - ast_scan.value.as_deref().map(normalize_name_from_sql); - // Determine the referred side of the join (left or right). - if left_name == ast_scan_name { - let left_col_map = plan - .get_relation_node(*plan_left_id)? - .output_alias_position_map(&plan.nodes)?; - if left_col_map.get(&col_name.as_str()).is_some() { - let ref_id = plan.add_row_from_left_branch( - *plan_left_id, - *plan_right_id, - &[&col_name], - )?; - rows.insert(ref_id); - map.add(id, ref_id); - } else { - return Err(SbroadError::NotFound( - Entity::Column, - format!( - "'{col_name}' in the join left child '{left_name:?}'" - ), - )); - } - } else if right_name == ast_scan_name { - let right_col_map = plan - .get_relation_node(*plan_right_id)? - .output_alias_position_map(&plan.nodes)?; - if right_col_map.get(&col_name.as_str()).is_some() { - let ref_id = plan.add_row_from_right_branch( + // First, try to find the column name in the left child. + let present_in_left = + left_col_map.get_with_scan(&col_name, scan_name).is_ok(); + let ref_id = if present_in_left { + plan.add_row_from_left_branch( + *plan_left_id, + *plan_right_id, + &[&col_name], + )? + } else { + // If it is not found, try to find it in the right child. + let present_in_right = + right_col_map.get_with_scan(&col_name, scan_name).is_ok(); + if present_in_right { + plan.add_row_from_right_branch( *plan_left_id, *plan_right_id, &[&col_name], - )?; - rows.insert(ref_id); - map.add(id, ref_id); + )? } else { return Err(SbroadError::NotFound( Entity::Column, - format!( - "'{col_name}' in the join right child '{right_name:?}'" - ), + format!("'{col_name}' in the join children",), )); } - } else { - return Err(SbroadError::Invalid( - Entity::Plan, - Some(format!( - "left {:?} and right {:?} plan nodes do not match the AST scan name: {:?}", - left_name, - right_name, - ast_scan_name, - )), - )); - } + }; + rows.insert(ref_id); + map.add(id, ref_id); } else { return Err(SbroadError::Invalid( Entity::Node, Some("expected AST node to be a scan name.".into()), )); } + // We get only column name from the AST. } else if let (Some(ast_col_name_id), None) = (node.children.first(), node.children.get(1)) { // Determine the referred side of the join (left or right). let col_name = get_column_name(*ast_col_name_id)?; - let left_col_map = plan - .get_relation_node(*plan_left_id)? - .output_alias_position_map(&plan.nodes)?; - if left_col_map.get(&col_name.as_str()).is_some() { + + // We need to check that the column name is unique in the join children. + // Otherwise, we cannot determine the referred side of the join + // and the user should specify the scan name in the SQL. + let present_in_left = left_col_map.get(&col_name).is_ok(); + let present_in_right = right_col_map.get(&col_name).is_ok(); + if present_in_left && present_in_right { + return Err(SbroadError::Invalid( + Entity::Column, + Some(format!( + "column name '{col_name}' is present in both join children", + )), + )); + } else if present_in_left { let ref_id = plan.add_row_from_left_branch( *plan_left_id, *plan_right_id, @@ -897,24 +870,19 @@ impl Ast for AbstractSyntaxTree { )?; rows.insert(ref_id); map.add(id, ref_id); + } else if present_in_right { + let ref_id = plan.add_row_from_right_branch( + *plan_left_id, + *plan_right_id, + &[&col_name], + )?; + rows.insert(ref_id); + map.add(id, ref_id); } else { - let right_col_map = plan - .get_relation_node(*plan_right_id)? - .output_alias_position_map(&plan.nodes)?; - if right_col_map.get(&col_name.as_str()).is_some() { - let ref_id = plan.add_row_from_right_branch( - *plan_left_id, - *plan_right_id, - &[&col_name], - )?; - rows.insert(ref_id); - map.add(id, ref_id); - } else { - return Err(SbroadError::NotFound( - Entity::Column, - format!("'{col_name}' for the join left or right children"), - )); - } + return Err(SbroadError::NotFound( + Entity::Column, + format!("'{col_name}' in the join left or right children"), + )); } } else { return Err(SbroadError::UnexpectedNumberOfValues( @@ -926,7 +894,7 @@ impl Ast for AbstractSyntaxTree { let first_child_id = node.children.first(); let second_child_id = node.children.get(1); - let col_name: String = if let (Some(ast_scan_id), Some(ast_col_id)) = + if let (Some(ast_scan_id), Some(ast_col_id)) = (first_child_id, second_child_id) { // Get column name. @@ -937,46 +905,51 @@ impl Ast for AbstractSyntaxTree { let ast_scan_name = Some(normalize_name_from_sql( parse_string_value_node(self, *ast_scan_id)?, )); - let plan_scan_name = get_scan_name(&col_name, *plan_rel_id)?; - if plan_scan_name != ast_scan_name { - return Err(SbroadError::UnexpectedNumberOfValues( - format!( - "Scan name for the column {:?} doesn't match: expected {:?}, found {:?}", - get_column_name(*ast_col_id), - plan_scan_name, - ast_scan_name, - ))); - } + + let col_position = ColumnPositionMap::new(&plan, *plan_rel_id)? + .get_with_scan(&col_name, ast_scan_name.as_deref())?; + // Manually build the reference node. + let child = plan.get_relation_node(*plan_rel_id)?; + let child_alias_ids = + plan.get_expression_node(child.output())?.get_row_list()?; + let child_alias_id = child_alias_ids + .get(col_position) + .expect("column position is invalid"); + let col_type = plan + .get_expression_node(*child_alias_id)? + .calculate_type(&plan)?; + let ref_id = + plan.nodes + .add_ref(None, Some(vec![0]), col_position, col_type); + map.add(id, ref_id); } else { return Err(SbroadError::Invalid( Entity::Node, Some("expected AST node to be a scan name.".into()), )); }; - col_name } else if let (Some(ast_col_id), None) = (first_child_id, second_child_id) { // Get the column name. - get_column_name(*ast_col_id)? + let col_name = get_column_name(*ast_col_id)?; + let ref_list = plan.new_columns( + &[*plan_rel_id], + false, + &[0], + &[&col_name], + false, + true, + )?; + let ref_id = *ref_list.first().ok_or_else(|| { + SbroadError::UnexpectedNumberOfValues( + "Referred column is not found.".into(), + ) + })?; + map.add(id, ref_id); } else { return Err(SbroadError::UnexpectedNumberOfValues( "no child node found in the AST reference.".into(), )); }; - - let ref_list = plan.new_columns( - &[*plan_rel_id], - false, - &[0], - &[&col_name], - false, - true, - )?; - let ref_id = *ref_list.first().ok_or_else(|| { - SbroadError::UnexpectedNumberOfValues( - "Referred column is not found.".into(), - ) - })?; - map.add(id, ref_id); } else { return Err(SbroadError::UnexpectedNumberOfValues( "expected one or two referred relational nodes, got less or more." diff --git a/sbroad-core/src/frontend/sql/ir.rs b/sbroad-core/src/frontend/sql/ir.rs index f7e29aef10..fb40e77d18 100644 --- a/sbroad-core/src/frontend/sql/ir.rs +++ b/sbroad-core/src/frontend/sql/ir.rs @@ -251,47 +251,22 @@ impl Plan { } // Generate a reference to the sub-query. - let row_id: usize = match self.get_node(sq.relational)? { - Node::Relational( - Relational::Selection { children, .. } - | Relational::Join { children, .. } - | Relational::Having { children, .. }, - ) => { - let nodes = children.clone(); - let sq_output = self.get_relation_node(sq.sq)?.output(); - let row = self.get_expression_node(sq_output)?; - if let Expression::Row { list, .. } = row { - let mut names: Vec<String> = Vec::new(); - for col_id in list { - names.push(self.get_expression_node(*col_id)?.get_alias_name()?.into()); - } - let names_str: Vec<_> = names.iter().map(String::as_str).collect(); - // TODO: should we add current row_id to the set of the generated rows? - let position: usize = - children.iter().position(|&x| x == sq.sq).ok_or_else(|| { - SbroadError::FailedTo( - Action::Build, - None, - "a reference to the sub-query".into(), - ) - })?; - let row_id = self.add_row_from_sub_query(&nodes, position, &names_str)?; - self.replace_parent_in_subtree(row_id, None, Some(sq.relational))?; - row_id - } else { - return Err(SbroadError::Invalid( - Entity::Expression, - Some("Sub-query output is not a row".into()), - )); - } - } + let rel = self.get_relation_node(sq.relational)?; + let children = match rel { + Relational::Join { children, .. } + | Relational::Having { children, .. } + | Relational::Selection { children, .. } => children.clone(), _ => { return Err(SbroadError::Invalid( - Entity::Node, - Some("Sub-query is not in selection or join node".into()), + Entity::Relational, + Some("Sub-query is not in selection, group by or join node".into()), )) } }; + let position: usize = children.iter().position(|&x| x == sq.sq).ok_or_else(|| { + SbroadError::FailedTo(Action::Build, None, "a reference to the sub-query".into()) + })?; + let row_id = self.add_row_from_subquery(&children, position, Some(sq.relational))?; // Replace sub-query with reference. let op = self.get_mut_expression_node(sq.operator)?; diff --git a/sbroad-core/src/frontend/sql/ir/tests.rs b/sbroad-core/src/frontend/sql/ir/tests.rs index 07001f4582..b2dd66bbcc 100644 --- a/sbroad-core/src/frontend/sql/ir/tests.rs +++ b/sbroad-core/src/frontend/sql/ir/tests.rs @@ -425,6 +425,30 @@ vtable_max_rows = 5000 assert_eq!(expected_explain, plan.as_explain().unwrap()); } +#[test] +fn front_sql_subquery_column_duplicates() { + let input = r#"SELECT "id" FROM "test_space" WHERE ("id", "id") + IN (SELECT "id", "id" from "test_space")"#; + + let plan = sql_to_optimized_ir(input, vec![]); + + let expected_explain = String::from( + r#"projection ("test_space"."id"::unsigned -> "id") + selection ROW("test_space"."id"::unsigned, "test_space"."id"::unsigned) in ROW($0, $0) + scan "test_space" +subquery $0: +scan + projection ("test_space"."id"::unsigned -> "id", "test_space"."id"::unsigned -> "id") + scan "test_space" +execution options: +sql_vdbe_max_steps = 45000 +vtable_max_rows = 5000 +"#, + ); + + assert_eq!(expected_explain, plan.as_explain().unwrap()); +} + #[test] fn front_sql_exists_subquery_select_from_table() { let input = r#"SELECT "id" FROM "test_space" WHERE EXISTS (SELECT 0 FROM "hash_testing")"#; diff --git a/sbroad-core/src/ir.rs b/sbroad-core/src/ir.rs index f3102e681d..89b3dd812a 100644 --- a/sbroad-core/src/ir.rs +++ b/sbroad-core/src/ir.rs @@ -84,6 +84,26 @@ pub struct Nodes { } impl Nodes { + pub(crate) fn get(&self, id: usize) -> Result<&Node, SbroadError> { + match self.arena.get(id) { + None => Err(SbroadError::NotFound( + Entity::Node, + format!("from arena with index {id}"), + )), + Some(node) => Ok(node), + } + } + + pub(crate) fn get_mut(&mut self, id: usize) -> Result<&mut Node, SbroadError> { + match self.arena.get_mut(id) { + None => Err(SbroadError::NotFound( + Entity::Node, + format!("from arena with index {id}"), + )), + Some(node) => Ok(node), + } + } + /// Get the amount of relational nodes in the plan. #[must_use] pub fn relation_node_amount(&self) -> usize { @@ -402,12 +422,7 @@ impl Plan { )) } Some(top) => { - if self.nodes.arena.get(top).is_none() { - return Err(SbroadError::NotFound( - Entity::Node, - format!("from arena with index {top}"), - )); - } + let _ = self.nodes.get(top)?; } } diff --git a/sbroad-core/src/ir/aggregates.rs b/sbroad-core/src/ir/aggregates.rs index f3b07119bf..ba0fb00f34 100644 --- a/sbroad-core/src/ir/aggregates.rs +++ b/sbroad-core/src/ir/aggregates.rs @@ -8,6 +8,8 @@ use std::collections::HashMap; use std::fmt::{Display, Formatter}; use std::rc::Rc; +use super::expression::{ColumnPositionMap, Position}; + /// The kind of aggregate function /// /// Examples: avg, sum, count, .. @@ -167,7 +169,44 @@ impl SimpleAggregate { } } +pub(crate) type PositionKind = (Position, AggregateKind); + impl SimpleAggregate { + pub(crate) fn get_position_kinds( + &self, + alias_to_pos: &ColumnPositionMap, + is_distinct: bool, + ) -> Result<Vec<PositionKind>, SbroadError> { + if is_distinct { + let local_alias = self.lagg_alias.get(&self.kind).ok_or_else(|| { + SbroadError::Invalid( + Entity::Aggregate, + Some(format!( + "missing local alias for distinct aggregate: {self:?}" + )), + ) + })?; + let position = alias_to_pos.get(local_alias)?; + Ok(vec![(position, self.kind)]) + } else { + let aggr_kinds = self.kind.get_local_aggregates_kinds(); + let mut res = Vec::with_capacity(aggr_kinds.len()); + for aggr_kind in aggr_kinds { + let local_alias = self.lagg_alias.get(&aggr_kind).ok_or_else(|| { + SbroadError::Invalid( + Entity::Aggregate, + Some(format!( + "missing local alias for local aggregate ({aggr_kind}): {self:?}" + )), + ) + })?; + let position = alias_to_pos.get(local_alias)?; + res.push((position, aggr_kind)); + } + Ok(res) + } + } + /// Create final aggregate expression and return its id /// /// # Examples @@ -193,30 +232,24 @@ impl SimpleAggregate { /// - Invalid aggregate /// - Could not find local alias position in child output #[allow(clippy::too_many_lines)] - pub fn create_final_aggregate_expr( + pub(crate) fn create_final_aggregate_expr( &self, parent: usize, plan: &mut Plan, - alias_to_pos: &HashMap<String, usize>, + mut position_kinds: Vec<PositionKind>, is_distinct: bool, ) -> Result<usize, SbroadError> { // map local AggregateKind to finalised expression of that aggregate let mut final_aggregates: HashMap<AggregateKind, usize> = HashMap::new(); - let mut create_final_aggr = |local_alias: &str, + let mut create_final_aggr = |position: Position, local_kind: AggregateKind, final_func: AggregateKind| -> Result<(), SbroadError> { - let Some(position) = alias_to_pos.get(local_alias) else { - let parent_node = plan.get_relation_node(parent)?; - return Err(SbroadError::Invalid( - Entity::Node, - Some(format!("could not find aggregate column in final {parent_node:?} child by local alias: {local_alias}. Aliases: {alias_to_pos:?}")))) - }; let ref_node = Expression::Reference { parent: Some(parent), // projection has only one child targets: Some(vec![0]), - position: *position, + position, col_type: RelType::from(local_kind), }; let ref_id = plan.nodes.push(Node::Expression(ref_node)); @@ -260,27 +293,14 @@ impl SimpleAggregate { Ok(()) }; if is_distinct { - let local_alias = self.lagg_alias.get(&self.kind).ok_or_else(|| { - SbroadError::Invalid( - Entity::Aggregate, - Some(format!( - "missing local alias for distinct aggregate: {self:?}" - )), - ) + let (position, kind) = position_kinds.drain(..).next().ok_or_else(|| { + SbroadError::UnexpectedNumberOfValues("position kinds are empty".to_string()) })?; - create_final_aggr(local_alias, self.kind, self.kind)?; + create_final_aggr(position, kind, self.kind)?; } else { - for aggr_kind in self.kind.get_local_aggregates_kinds() { - let local_alias = self.lagg_alias.get(&aggr_kind).ok_or_else(|| { - SbroadError::Invalid( - Entity::Aggregate, - Some(format!( - "missing local alias for local aggregate ({aggr_kind}): {self:?}" - )), - ) - })?; - let final_aggregate_kind = self.kind.get_final_aggregate_kind(&aggr_kind)?; - create_final_aggr(local_alias, aggr_kind, final_aggregate_kind)?; + for (position, kind) in position_kinds { + let final_aggregate_kind = self.kind.get_final_aggregate_kind(&kind)?; + create_final_aggr(position, kind, final_aggregate_kind)?; } } let final_expr_id = if final_aggregates.len() == 1 { diff --git a/sbroad-core/src/ir/expression.rs b/sbroad-core/src/ir/expression.rs index 75ca4711e2..955fb563a9 100644 --- a/sbroad-core/src/ir/expression.rs +++ b/sbroad-core/src/ir/expression.rs @@ -8,8 +8,9 @@ use ahash::RandomState; use serde::{Deserialize, Serialize}; -use std::collections::{HashMap, HashSet}; +use std::collections::{BTreeMap, HashSet}; use std::hash::{Hash, Hasher}; +use std::ops::Bound::Included; use crate::errors::{Entity, SbroadError}; use crate::executor::engine::helpers::is_sharding_column_name; @@ -456,31 +457,24 @@ impl Nodes { /// Mostly used for relational node output. /// /// # Errors - /// - nodes in a list are invalid - /// - nodes in a list are not aliases - /// - aliases in a list have duplicate names + /// - nodes in the list do not exist in the arena + /// - some nodes in the list are not aliases pub fn add_row_of_aliases( &mut self, list: Vec<usize>, distribution: Option<Distribution>, ) -> Result<usize, SbroadError> { - let mut names: HashSet<String> = HashSet::with_capacity(list.len()); - - for node_id in &list { - if let Some(Node::Expression(expr)) = self.arena.get(*node_id) { - if let Expression::Alias { name, .. } = expr { - if !names.insert(String::from(name)) { - return Err(SbroadError::DuplicatedValue(format!( - "row can't be added because `{name}` already has an alias", - ))); - } - } - } else { - return Err(SbroadError::NotFound( - Entity::Node, - format!("with index {node_id}"), - )); + for alias_id in &list { + let node = self.arena.get(*alias_id).ok_or_else(|| { + SbroadError::NotFound(Entity::Node, format!("from arena with index {alias_id}")) + })?; + if let Node::Expression(Expression::Alias { .. }) = node { + continue; } + return Err(SbroadError::Invalid( + Entity::Expression, + Some(format!("expected {alias_id} to be alias, got {node:?}")), + )); } Ok(self.add_row(list, distribution)) } @@ -802,6 +796,109 @@ impl<'plan> Comparator<'plan> { } } +pub(crate) type Position = usize; + +#[derive(Debug, PartialEq)] +pub(crate) enum Positions { + Empty, + Single(Position), + Multiple, +} + +impl Positions { + pub(crate) fn new() -> Self { + Positions::Empty + } + + pub(crate) fn push(&mut self, pos: Position) { + if Positions::Empty == *self { + *self = Positions::Single(pos); + } else { + *self = Positions::Multiple; + } + } +} + +/// (column name, scan name) +pub(crate) type ColumnName<'column> = (&'column str, Option<&'column str>); + +#[derive(Debug)] +pub(crate) struct ColumnPositionMap<'column> { + map: BTreeMap<ColumnName<'column>, Positions>, + max_name: Option<&'column str>, +} + +impl<'plan> ColumnPositionMap<'plan> { + pub(crate) fn new(plan: &'plan Plan, rel_id: usize) -> Result<Self, SbroadError> { + let rel_node = plan.get_relation_node(rel_id)?; + let output = plan.get_expression_node(rel_node.output())?; + let alias_ids = output.get_row_list()?; + + let mut map = BTreeMap::new(); + let mut max_name = None; + for (pos, alias_id) in alias_ids.iter().enumerate() { + let alias = plan.get_expression_node(*alias_id)?; + let Expression::Alias { name, .. } = alias else { + return Err(SbroadError::Invalid( + Entity::Expression, + Some(format!("alias {alias_id} is not an Alias")), + )); + }; + let scan_name = rel_node.scan_name(plan, pos)?; + map.entry((name.as_str(), scan_name)) + .or_insert_with(Positions::new) + .push(pos); + if max_name < scan_name { + max_name = scan_name; + } + } + Ok(Self { map, max_name }) + } + + pub(crate) fn get(&self, column: &str) -> Result<Position, SbroadError> { + let from_key = &(column, None); + let to_key = &(column, self.max_name); + let mut iter = self.map.range((Included(from_key), Included(to_key))); + match (iter.next(), iter.next()) { + (Some(..), Some(..)) => Err(SbroadError::DuplicatedValue(format!( + "column name {column} is ambiguous" + ))), + (Some((_, position)), None) => { + if let Positions::Single(pos) = position { + return Ok(*pos); + } + Err(SbroadError::DuplicatedValue(format!( + "column name {column} is ambiguous" + ))) + } + _ => Err(SbroadError::NotFound( + Entity::Column, + format!("with name {column}"), + )), + } + } + + pub(crate) fn get_with_scan( + &self, + column: &str, + scan: Option<&str>, + ) -> Result<Position, SbroadError> { + let key = &(column, scan); + if let Some(position) = self.map.get(key) { + if let Positions::Single(pos) = position { + return Ok(*pos); + } + return Err(SbroadError::DuplicatedValue(format!( + "column name {column} is ambiguous" + ))); + } + Err(SbroadError::NotFound( + Entity::Column, + format!("with name {column} and scan {scan:?}"), + )) + } +} + impl Plan { /// Add `Row` to plan. pub fn add_row(&mut self, list: Vec<usize>, distribution: Option<Distribution>) -> usize { @@ -959,49 +1056,19 @@ impl Plan { col_names_set.insert(col_name); } - let relational_op = self.get_relation_node(child_node)?; - let output_id = relational_op.output(); - let output = self.get_expression_node(output_id)?; // Map of { column name (aliased) from child output -> its index in output } - let map: HashMap<&str, usize, RandomState> = if let Expression::Row { list, .. } = output { - let state = RandomState::new(); - let mut map: HashMap<&str, usize, RandomState> = - HashMap::with_capacity_and_hasher(col_names.len(), state); - for (pos, col_id) in list.iter().enumerate() { - let alias = self.get_expression_node(*col_id)?; - let name = alias.get_alias_name()?; - if !col_names_set.contains(name) { - continue; - } - if map.insert(name, pos).is_some() { - return Err(SbroadError::DuplicatedValue(format!( - "Duplicate column name {name} at position {pos}" - ))); - } - } - map - } else { - return Err(SbroadError::Invalid( - Entity::Node, - Some("Relational output tuple is not a row".into()), - )); - }; + let map = ColumnPositionMap::new(self, child_node)?; // Vec of { `map` key, targets, `map` value } let mut refs: Vec<(&str, Vec<usize>, usize)> = Vec::with_capacity(col_names.len()); - let all_found = col_names.iter().all(|col| { - map.get(col).map_or(false, |pos| { - refs.push((col, targets.to_vec(), *pos)); - true - }) - }); - if !all_found { - return Err(SbroadError::NotFound( - Entity::Column, - format!("with name {}", col_names.join(", ")), - )); + for col in col_names { + let pos = map.get(col)?; + refs.push((col, targets.to_vec(), pos)); } + let relational_op = self.get_relation_node(child_node)?; + let output_id = relational_op.output(); + let output = self.get_expression_node(output_id)?; let columns = output.clone_row_list()?; for (col, new_targets, pos) in refs { let col_id = *columns.get(pos).ok_or_else(|| { @@ -1085,22 +1152,40 @@ impl Plan { Ok(self.nodes.add_row(list, None)) } - /// Project columns from the child subquery node. + /// Project all the columns from the child's subquery node. + /// + /// New columns don't have aliases. /// - /// New columns don't have aliases. If column names are empty, - /// copy all the columns from the child. /// # Errors - /// Returns `SbroadError`: - /// - children nodes are not a relational - /// - column names don't exist - pub fn add_row_from_sub_query( + /// - children nodes are inconsistent with the target position; + pub fn add_row_from_subquery( &mut self, children: &[usize], - children_pos: usize, - col_names: &[&str], + target: usize, + parent: Option<usize>, ) -> Result<usize, SbroadError> { - let list = self.new_columns(children, false, &[children_pos], col_names, false, true)?; - Ok(self.nodes.add_row(list, None)) + let sq_id = *children.get(target).ok_or_else(|| { + SbroadError::UnexpectedNumberOfValues(format!( + "invalid target index: {target} (children: {children:?})", + )) + })?; + let sq_rel = self.get_relation_node(sq_id)?; + let sq_output_id = sq_rel.output(); + let sq_alias_ids_len = self.get_row_list(sq_output_id)?.len(); + let mut new_refs = Vec::with_capacity(sq_alias_ids_len); + for pos in 0..sq_alias_ids_len { + let alias_id = *self + .get_row_list(sq_output_id)? + .get(pos) + .expect("subquery output row already checked"); + let alias_type = self.get_expression_node(alias_id)?.calculate_type(self)?; + let ref_id = self + .nodes + .add_ref(parent, Some(vec![target]), pos, alias_type); + new_refs.push(ref_id); + } + let row_id = self.nodes.add_row(new_refs, None); + Ok(row_id) } /// Project columns from the join's left branch. diff --git a/sbroad-core/src/ir/expression/tests.rs b/sbroad-core/src/ir/expression/tests.rs index 795c7a22b8..7af24c668c 100644 --- a/sbroad-core/src/ir/expression/tests.rs +++ b/sbroad-core/src/ir/expression/tests.rs @@ -14,12 +14,9 @@ fn row_duplicate_column_names() { let c1_alias_a = plan.nodes.add_alias("a", c1).unwrap(); let c2 = plan.nodes.add_const(Value::from(2_u64)); let c2_alias_a = plan.nodes.add_alias("a", c2).unwrap(); - assert_eq!( - SbroadError::DuplicatedValue("row can't be added because `a` already has an alias".into()), - plan.nodes - .add_row_of_aliases(vec![c1_alias_a, c2_alias_a], None) - .unwrap_err() - ); + plan.nodes + .add_row_of_aliases(vec![c1_alias_a, c2_alias_a], None) + .unwrap(); } #[test] diff --git a/sbroad-core/src/ir/operator.rs b/sbroad-core/src/ir/operator.rs index fb35e2740b..54d0759fc7 100644 --- a/sbroad-core/src/ir/operator.rs +++ b/sbroad-core/src/ir/operator.rs @@ -12,10 +12,10 @@ use std::fmt::{Display, Formatter}; use crate::errors::{Action, Entity, SbroadError}; -use super::expression::Expression; +use super::expression::{ColumnPositionMap, Expression}; use super::transformation::redistribution::{MotionPolicy, Program}; use super::tree::traversal::{BreadthFirst, EXPR_CAPACITY, REL_CAPACITY}; -use super::{Node, Nodes, Plan}; +use super::{Node, Plan}; use crate::ir::distribution::{Distribution, Key, KeySet}; use crate::ir::expression::{ExpressionId, PlanExpr}; use crate::ir::helpers::RepeatableState; @@ -448,48 +448,6 @@ pub enum Relational { #[allow(dead_code)] impl Relational { - /// Gets a <column name - position> map from the output tuple. - /// - /// We expect that the top level of the node's expression tree - /// is a row of aliases with unique names. - /// - /// # Errors - /// Returns `SbroadError` when the output tuple is invalid. - pub fn output_alias_position_map<'nodes>( - &self, - nodes: &'nodes Nodes, - ) -> Result<HashMap<&'nodes str, usize, RandomState>, SbroadError> { - if let Some(Node::Expression(Expression::Row { list, .. })) = nodes.arena.get(self.output()) - { - let state = RandomState::new(); - let mut map: HashMap<&str, usize, RandomState> = - HashMap::with_capacity_and_hasher(list.len(), state); - let valid = list.iter().enumerate().all(|(pos, item)| { - // Checks that expressions in the row list are all aliases - if let Some(Node::Expression(Expression::Alias { ref name, .. })) = - nodes.arena.get(*item) - { - // Populates the map and checks for duplicates - if map.insert(name, pos).is_none() { - return true; - } - } - false - }); - if valid { - return Ok(map); - } - return Err(SbroadError::Invalid( - Entity::Expression, - Some("invalid output tuple".to_string()), - )); - } - Err(SbroadError::NotFound( - Entity::Node, - "(an output tuple) from the arena".to_string(), - )) - } - /// Gets an immutable id of the output tuple node of the plan's arena. #[must_use] pub fn output(&self) -> usize { @@ -1903,21 +1861,14 @@ impl Plan { rel_id: usize, ) -> Result<HashMap<ColumnPosition, ColumnPosition>, SbroadError> { let table = self.get_relation_or_error(table_name)?; - let alias_to_pos = self - .get_relation_node(rel_id)? - .output_alias_position_map(&self.nodes)?; + let alias_to_pos = ColumnPositionMap::new(self, rel_id)?; let mut map: HashMap<ColumnPosition, ColumnPosition> = HashMap::with_capacity(table.columns.len()); for (table_pos, col) in table.columns.iter().enumerate() { if let ColumnRole::Sharding = col.role { continue; } - let output_pos = *alias_to_pos.get(col.name.as_str()).ok_or_else(|| { - SbroadError::UnexpectedNumberOfValues(format!( - "no column with name {} in relational's ({}) output", - &col.name, rel_id - )) - })?; + let output_pos = alias_to_pos.get(col.name.as_str())?; map.insert(table_pos, output_pos); } Ok(map) @@ -1971,19 +1922,11 @@ impl Plan { rel_id: usize, children: Vec<usize>, ) -> Result<(), SbroadError> { - if let Node::Relational(ref mut rel) = - self.nodes.arena.get_mut(rel_id).ok_or_else(|| { - SbroadError::NotFound( - Entity::Node, - format!("(mutable) from arena with index {rel_id}"), - ) - })? - { + if let Node::Relational(ref mut rel) = self.nodes.get_mut(rel_id)? { rel.set_children(children)?; - Ok(()) - } else { - Err(SbroadError::Invalid(Entity::Relational, None)) + return Ok(()); } + Err(SbroadError::Invalid(Entity::Relational, None)) } /// Checks if the node is an additional child of some relational node. diff --git a/sbroad-core/src/ir/operator/tests.rs b/sbroad-core/src/ir/operator/tests.rs index 460ecf826f..72026e8d5b 100644 --- a/sbroad-core/src/ir/operator/tests.rs +++ b/sbroad-core/src/ir/operator/tests.rs @@ -111,7 +111,7 @@ fn projection() { // Invalid alias names in the output assert_eq!( - SbroadError::NotFound(Entity::Column, r#"with name a, e"#.into()), + SbroadError::NotFound(Entity::Column, r#"with name e"#.into()), plan.add_proj(scan_id, &["a", "e"], false).unwrap_err() ); @@ -479,7 +479,7 @@ fn selection_with_sub_query() { children.push(sq_id); let b_id = plan - .add_row_from_sub_query(&children[..], children.len() - 1, &["b"]) + .add_row_from_subquery(&children[..], children.len() - 1, None) .unwrap(); let a_id = plan.add_row_from_child(scan_t1_id, &["a"]).unwrap(); let eq_id = plan.add_cond(a_id, Bool::Eq, b_id).unwrap(); @@ -606,9 +606,8 @@ fn join_duplicate_columns() { .add_row_from_right_branch(scan_t1, scan_t2, &["d"]) .unwrap(); let condition = plan.nodes.add_bool(a_row, Bool::Eq, d_row).unwrap(); - assert_eq!( - SbroadError::DuplicatedValue("row can't be added because `a` already has an alias".into()), - plan.add_join(scan_t1, scan_t2, condition, JoinKind::Inner) - .unwrap_err() - ); + let join = plan + .add_join(scan_t1, scan_t2, condition, JoinKind::Inner) + .unwrap(); + plan.top = Some(join); } diff --git a/sbroad-core/src/ir/transformation/redistribution.rs b/sbroad-core/src/ir/transformation/redistribution.rs index 0b2fa50b69..79baa78f66 100644 --- a/sbroad-core/src/ir/transformation/redistribution.rs +++ b/sbroad-core/src/ir/transformation/redistribution.rs @@ -8,6 +8,7 @@ use std::collections::{hash_map::Entry, HashMap, HashSet}; use crate::errors::{Action, Entity, SbroadError}; use crate::frontend::sql::ir::SubtreeCloner; use crate::ir::distribution::{Distribution, Key, KeySet}; +use crate::ir::expression::ColumnPositionMap; use crate::ir::expression::Expression; use crate::ir::operator::{Bool, JoinKind, Relational, Unary, UpdateStrategy}; @@ -1621,9 +1622,7 @@ impl Plan { // remove bucket_id position, and shard_key.positions // can't be used directly. let expected_positions = { - let child_alias_map = self - .get_relation_node(pr_child)? - .output_alias_position_map(&self.nodes)?; + let child_alias_map = ColumnPositionMap::new(self, pr_child)?; let mut expected_positions = Vec::with_capacity(table.get_sk()?.len()); for pos in table.get_sk()? { let col_name = &table @@ -1636,16 +1635,7 @@ impl Plan { ) })? .name; - let col_pos = - *child_alias_map.get(col_name.as_str()).ok_or_else(|| { - SbroadError::Invalid( - Entity::Update, - Some(format!( - "no shard column {} in projection child's output", - &col_name - )), - ) - })?; + let col_pos = child_alias_map.get(col_name.as_str())?; expected_positions.push(col_pos); } expected_positions diff --git a/sbroad-core/src/ir/transformation/redistribution/groupby.rs b/sbroad-core/src/ir/transformation/redistribution/groupby.rs index db2a762ac1..b0c31b4971 100644 --- a/sbroad-core/src/ir/transformation/redistribution/groupby.rs +++ b/sbroad-core/src/ir/transformation/redistribution/groupby.rs @@ -2,7 +2,9 @@ use crate::errors::{Entity, SbroadError}; use crate::ir::aggregates::{generate_local_alias_for_aggr, AggregateKind, SimpleAggregate}; use crate::ir::distribution::Distribution; use crate::ir::expression::Expression::StableFunction; -use crate::ir::expression::{Comparator, Expression, ReferencePolicy, EXPR_HASH_DEPTH}; +use crate::ir::expression::{ + ColumnPositionMap, Comparator, Expression, ReferencePolicy, EXPR_HASH_DEPTH, +}; use crate::ir::operator::Relational; use crate::ir::relation::Type; use crate::ir::transformation::redistribution::{ @@ -14,7 +16,6 @@ use std::collections::{HashMap, HashSet}; use crate::ir::function::{Behavior, Function}; use crate::ir::helpers::RepeatableState; -use itertools::Itertools; use std::hash::{Hash, Hasher}; use std::rc::Rc; @@ -1263,23 +1264,14 @@ impl Plan { return Ok(child_id); } let mut gr_cols: Vec<usize> = Vec::with_capacity(grouping_exprs.len()); - let child_map = self - .get_relation_node(child_id)? - .output_alias_position_map(&self.nodes)? - .into_iter() - .map(|(k, v)| (k.to_string(), v)) - .collect::<HashMap<String, usize>>(); + let child_map = ColumnPositionMap::new(self, child_id)?; + let mut nodes = Vec::with_capacity(grouping_exprs.len()); for expr_id in grouping_exprs { let Some(local_alias) = local_aliases_map.get(expr_id) else { return Err(SbroadError::Invalid(Entity::Plan, Some(format!("could not find local alias for GroupBy expr ({expr_id})")))) }; - let Some(position) = child_map.get(&*(*local_alias)) else { - return Err(SbroadError::Invalid( - Entity::Node, - Some(format!("did not find alias: {local_alias} in child ({child_id}) output!"))) - ) - }; + let position = child_map.get(local_alias)?; let col_type = self.get_expression_node(*expr_id)?.calculate_type(self)?; if !col_type.is_scalar() { return Err(SbroadError::Invalid( @@ -1290,12 +1282,15 @@ impl Plan { )); } let new_col = Expression::Reference { - position: *position, + position, parent: None, targets: Some(vec![0]), col_type, }; - let new_col_id = self.nodes.push(Node::Expression(new_col)); + nodes.push(Node::Expression(new_col)); + } + for node in nodes { + let new_col_id = self.nodes.push(node); gr_cols.push(new_col_id); } let output = self.add_row_for_output(child_id, &[], true)?; @@ -1363,22 +1358,15 @@ impl Plan { "expected relation node ({rel_id}) to have children!" )) })?; - let alias_to_pos_map = self - .get_relation_node(child_id)? - .output_alias_position_map(&self.nodes)? - .into_iter() - .map(|(k, v)| (k.to_string(), v)) - .collect::<HashMap<String, usize>>(); + let alias_to_pos_map = ColumnPositionMap::new(self, child_id)?; + let mut nodes = Vec::with_capacity(group.len()); for (gr_expr_id, expr_id, parent) in group { let Some(local_alias) = local_aliases_map.get(&gr_expr_id) else { return Err(SbroadError::Invalid( Entity::Plan, Some(format!("failed to find local alias for groupby expression {gr_expr_id}")))) }; - let Some(pos) = alias_to_pos_map.get(&*(*local_alias)).copied() else { - return Err(SbroadError::Invalid(Entity::Plan, - Some(format!("failed to find alias '{local_alias}' in ({child_id}). Aliases: {}", alias_to_pos_map.keys().join(" "))))) - }; + let position = alias_to_pos_map.get(local_alias)?; let col_type = self.get_expression_node(expr_id)?.calculate_type(self)?; if !col_type.is_scalar() { return Err(SbroadError::Invalid( @@ -1391,10 +1379,13 @@ impl Plan { let new_ref = Expression::Reference { parent: Some(rel_id), targets: Some(vec![0]), - position: pos, + position, col_type, }; - let ref_id = self.nodes.push(Node::Expression(new_ref)); + nodes.push((parent, expr_id, gr_expr_id, Node::Expression(new_ref))); + } + for (parent, expr_id, gr_expr_id, node) in nodes { + let ref_id = self.nodes.push(node); if let Some(parent_expr_id) = parent { self.replace_expression(parent_expr_id, expr_id, ref_id)?; } else { @@ -1402,7 +1393,12 @@ impl Plan { Relational::Projection { .. } => { return Err(SbroadError::Invalid( Entity::Plan, - Some(format!("invalid mapping between groupby expression {gr_expr_id} and projection one: expression {expr_id} has no parent")) + Some(format!( + "{} {gr_expr_id} {} {expr_id} {}", + "invalid mapping between group by expression", + "and projection one: expression", + "has no parent", + )), )) } Relational::Having { filter, .. } => { @@ -1411,7 +1407,7 @@ impl Plan { _ => { return Err(SbroadError::Invalid( Entity::Plan, - Some(format!("unexpected node in Reduce stage: {rel_id}")) + Some(format!("unexpected node in Reduce stage: {rel_id}")), )) } } @@ -1516,17 +1512,19 @@ impl Plan { )), )); }; - let alias_to_pos_map = self - .get_relation_node(child_id)? - .output_alias_position_map(&self.nodes)? - .into_iter() - .map(|(k, v)| (k.to_string(), v)) - .collect::<HashMap<String, usize>>(); - for info in infos { + let alias_to_pos_map = ColumnPositionMap::new(self, child_id)?; + let mut position_kinds = Vec::with_capacity(infos.len()); + for info in &infos { + position_kinds.push( + info.aggr + .get_position_kinds(&alias_to_pos_map, info.is_distinct)?, + ); + } + for (info, pos_kinds) in infos.into_iter().zip(position_kinds) { let final_expr = info.aggr.create_final_aggregate_expr( parent, self, - &alias_to_pos_map, + pos_kinds, info.is_distinct, )?; if let Some(parent_expr) = info.parent_expr { diff --git a/sbroad-core/src/ir/transformation/redistribution/tests.rs b/sbroad-core/src/ir/transformation/redistribution/tests.rs index 75902704c3..8bab974812 100644 --- a/sbroad-core/src/ir/transformation/redistribution/tests.rs +++ b/sbroad-core/src/ir/transformation/redistribution/tests.rs @@ -47,7 +47,7 @@ fn full_motion_less_for_sub_query() { children.push(sq_id); let b_id = plan - .add_row_from_sub_query(&children[..], children.len() - 1, &["b"]) + .add_row_from_subquery(&children[..], children.len() - 1, None) .unwrap(); let a_id = plan.add_row_from_child(scan_t1_id, &["a"]).unwrap(); let less_id = plan.add_cond(a_id, Bool::Lt, b_id).unwrap(); @@ -110,7 +110,7 @@ fn full_motion_non_segment_outer_for_sub_query() { children.push(sq_id); let a_id = plan - .add_row_from_sub_query(&children[..], children.len() - 1, &["a"]) + .add_row_from_subquery(&children[..], children.len() - 1, None) .unwrap(); let b_id = plan.add_row_from_child(scan_t1_id, &["b"]).unwrap(); let eq_id = plan.add_cond(b_id, Bool::Eq, a_id).unwrap(); @@ -170,7 +170,7 @@ fn local_sub_query() { children.push(sq_id); let inner_a_id = plan - .add_row_from_sub_query(&children[..], children.len() - 1, &["a"]) + .add_row_from_subquery(&children[..], children.len() - 1, None) .unwrap(); let outer_a_id = plan.add_row_from_child(scan_t1_id, &["a"]).unwrap(); let eq_id = plan.add_cond(outer_a_id, Bool::Eq, inner_a_id).unwrap(); @@ -239,7 +239,7 @@ fn multiple_sub_queries() { let sq2_pos = children.len() - 1; let sq1_inner_a_id = plan - .add_row_from_sub_query(&children[..], sq1_pos, &["a"]) + .add_row_from_subquery(&children[..], sq1_pos, None) .unwrap(); let sq1_outer_a_id = plan.add_row_from_child(scan_t1_id, &["a"]).unwrap(); let less_id = plan @@ -247,7 +247,7 @@ fn multiple_sub_queries() { .unwrap(); let sq2_inner_a_id = plan - .add_row_from_sub_query(&children[..], sq2_pos, &["b"]) + .add_row_from_subquery(&children[..], sq2_pos, None) .unwrap(); let sq2_outer_a_id = plan.add_row_from_child(scan_t1_id, &["a"]).unwrap(); let eq_id = plan diff --git a/sbroad-core/src/ir/transformation/redistribution/tests/segment.rs b/sbroad-core/src/ir/transformation/redistribution/tests/segment.rs index 894265a7ff..70deabb0f0 100644 --- a/sbroad-core/src/ir/transformation/redistribution/tests/segment.rs +++ b/sbroad-core/src/ir/transformation/redistribution/tests/segment.rs @@ -53,7 +53,7 @@ fn sub_query1() { children.push(sq_id); let b_id = plan - .add_row_from_sub_query(&children[..], children.len() - 1, &["b"]) + .add_row_from_subquery(&children[..], children.len() - 1, None) .unwrap(); let a_id = plan.add_row_from_child(scan_t1_id, &["a"]).unwrap(); let eq_id = plan.add_cond(a_id, Bool::Eq, b_id).unwrap(); diff --git a/sbroad-core/tests/artifactory/backend/sql/tree/arbitrary_projection_plan.yaml b/sbroad-core/tests/artifactory/backend/sql/tree/arbitrary_projection_plan.yaml index c3fd63a7d7..2cd3f545e1 100644 --- a/sbroad-core/tests/artifactory/backend/sql/tree/arbitrary_projection_plan.yaml +++ b/sbroad-core/tests/artifactory/backend/sql/tree/arbitrary_projection_plan.yaml @@ -78,7 +78,7 @@ nodes: # 12 - Expression: Reference: - parent: 26 + parent: 27 targets: [0] position: 0 col_type: Integer @@ -91,7 +91,7 @@ nodes: # 14 - Expression: Reference: - parent: 26 + parent: 27 targets: [0] position: 1 col_type: Integer @@ -104,7 +104,7 @@ nodes: # 16 - Expression: Reference: - parent: 26 + parent: 27 targets: [0] position: 2 col_type: Integer @@ -117,7 +117,7 @@ nodes: # 18 - Expression: Reference: - parent: 26 + parent: 27 targets: [0] position: 3 col_type: Integer @@ -157,17 +157,22 @@ nodes: op: and right: 23 # 25 + - Expression: + Alias: + name: COL_1 + child: 24 + # 26 - Expression: Row: list: - - 24 + - 25 distribution: ~ - # 26 + # 27 - Relational: Projection: children: - 11 - output: 25 + output: 26 is_distinct: false relations: tables: @@ -205,7 +210,7 @@ relations: engine: Memtx slices: slices: [] -top: 26 +top: 27 is_explain: false undo: log: {} diff --git a/sbroad-core/tests/artifactory/backend/sql/tree/arithmetic_projection_plan.yaml b/sbroad-core/tests/artifactory/backend/sql/tree/arithmetic_projection_plan.yaml index 8c92fd55c8..83eb9eee43 100644 --- a/sbroad-core/tests/artifactory/backend/sql/tree/arithmetic_projection_plan.yaml +++ b/sbroad-core/tests/artifactory/backend/sql/tree/arithmetic_projection_plan.yaml @@ -104,7 +104,7 @@ nodes: # 16 - Expression: Reference: - parent: 35 + parent: 36 targets: [0] position: 0 col_type: Integer @@ -117,7 +117,7 @@ nodes: # 18 - Expression: Reference: - parent: 35 + parent: 36 targets: [0] position: 1 col_type: Integer @@ -130,7 +130,7 @@ nodes: # 20 - Expression: Reference: - parent: 35 + parent: 36 targets: [0] position: 2 col_type: Integer @@ -143,7 +143,7 @@ nodes: # 22 - Expression: Reference: - parent: 35 + parent: 36 targets: [0] position: 3 col_type: Integer @@ -156,7 +156,7 @@ nodes: # 24 - Expression: Reference: - parent: 35 + parent: 36 targets: [0] position: 4 col_type: Integer @@ -169,7 +169,7 @@ nodes: # 26 - Expression: Reference: - parent: 35 + parent: 36 targets: [0] position: 5 col_type: Integer @@ -222,17 +222,22 @@ nodes: right: 19 with_parentheses: false # 34 + - Expression: + Alias: + name: COL_1 + child: 33 + # 35 - Expression: Row: list: - - 33 + - 34 distribution: ~ - # 35 + # 36 - Relational: Projection: children: - 15 - output: 34 + output: 35 is_distinct: false relations: tables: @@ -278,7 +283,7 @@ relations: engine: Memtx slices: slices: [] -top: 35 +top: 36 is_explain: false undo: log: {} -- GitLab