From 43f8a3d186399d54d61321ec0db41c0e42dcfdd3 Mon Sep 17 00:00:00 2001 From: EmirVildanov <reddog201030@gmail.com> Date: Wed, 21 Aug 2024 13:27:06 +0300 Subject: [PATCH] feat: fix documentation, delete unused expression functions, replace errors on panics --- sbroad-core/src/executor/ir.rs | 14 +- sbroad-core/src/frontend/sql.rs | 23 ++- sbroad-core/src/frontend/sql/ir/tests.rs | 4 +- sbroad-core/src/ir.rs | 13 +- sbroad-core/src/ir/api/parameter.rs | 4 +- sbroad-core/src/ir/distribution.rs | 168 +++++++----------- sbroad-core/src/ir/explain.rs | 27 ++- sbroad-core/src/ir/expression.rs | 31 +--- sbroad-core/src/ir/operator.rs | 118 +++++------- sbroad-core/src/ir/operator/tests.rs | 2 +- sbroad-core/src/ir/transformation/bool_in.rs | 2 +- .../src/ir/transformation/redistribution.rs | 64 +++---- .../ir/transformation/redistribution/dml.rs | 30 +--- .../ir/transformation/redistribution/tests.rs | 2 + sbroad-core/src/ir/tree/tests.rs | 3 +- 15 files changed, 192 insertions(+), 313 deletions(-) diff --git a/sbroad-core/src/executor/ir.rs b/sbroad-core/src/executor/ir.rs index c5ca37882..79280ba12 100644 --- a/sbroad-core/src/executor/ir.rs +++ b/sbroad-core/src/executor/ir.rs @@ -131,9 +131,6 @@ impl ExecutionPlan { } /// Get motion virtual table - /// - /// # Errors - /// - Failed to find a virtual table for the motion node. pub fn get_motion_vtable(&self, motion_id: NodeId) -> Result<Rc<VirtualTable>, SbroadError> { if let Some(vtable) = self.get_vtables() { if let Some(result) = vtable.get(&motion_id) { @@ -141,14 +138,7 @@ impl ExecutionPlan { } } let motion_node = self.get_ir_plan().get_relation_node(motion_id)?; - - Err(SbroadError::NotFound( - Entity::VirtualTable, - format_smolstr!( - "for Motion node ({motion_id:?}): {motion_node:?}. Plan: {:?}", - self - ), - )) + panic!("Virtual table for motion {motion_node:?} with id {motion_id} not found.") } /// Add materialize motion result to map of virtual tables. @@ -561,7 +551,7 @@ impl ExecutionPlan { } } // We should not remove the child of a local motion node. - // The subtree is needed to complie the SQL on the storage. + // The subtree is needed to compile the SQL on the storage. if !policy.is_local() { *children = Vec::new(); } diff --git a/sbroad-core/src/frontend/sql.rs b/sbroad-core/src/frontend/sql.rs index 2831d62b7..93fee709f 100644 --- a/sbroad-core/src/frontend/sql.rs +++ b/sbroad-core/src/frontend/sql.rs @@ -1527,6 +1527,9 @@ where M: Metadata, { subquery_ids_queue: VecDeque<NodeId>, + /// Vec of BETWEEN expressions met during parsing. + /// Used later to fix them as soon as we need to resolve double-linking problem + /// of left expression. betweens: Vec<Between>, metadata: &'worker M, /// Map of { reference plan_id -> (it's column name, whether it's covered with row)} @@ -1682,6 +1685,10 @@ pub enum SelectOp { Except, } +/// Helper struct denoting any combination of +/// * SELECT +/// * UNION (ALL) +/// * EXCEPT #[derive(Clone)] pub enum SelectExpr { PlanId { @@ -1976,7 +1983,7 @@ impl ParseExpression { let uncovered_plan_child_id = if let Some((_, is_row)) = reference { if *is_row { let plan_inner_expr = plan.get_expression_node(plan_child_id)?; - *plan_inner_expr.get_row_list()?.first().ok_or_else(|| { + *plan_inner_expr.get_row_list().first().ok_or_else(|| { SbroadError::UnexpectedNumberOfValues( "There must be a Reference under Row.".into(), ) @@ -2213,10 +2220,9 @@ where let plan_left_id = referred_relation_ids .first() - .ok_or(SbroadError::Invalid( - Entity::Query, - Some("Reference must point to some relational node".into()) - ))?; + .unwrap_or_else(|| + panic!("Reference must point to some relational node") + ); worker.build_columns_map(plan, *plan_left_id)?; @@ -2266,7 +2272,9 @@ where Err(e) => return Err(e) }; let child = plan.get_relation_node(*plan_left_id)?; - let child_alias_ids = plan.get_row_list(child.output())?; + let child_alias_ids = plan.get_row_list( + child.output() + )?; let child_alias_id = child_alias_ids .get(col_position) .expect("column position is invalid"); @@ -2972,6 +2980,7 @@ impl AbstractSyntaxTree { // * `Row`s are added to support parsing Row expressions under `Values` nodes. // * `Literal`s are added to support procedure calls and // ALTER SYSTEM which should not contain all possible `Expr`s. + // * `SelectWithOptionalContinuation` is also parsed using Pratt parser pairs_map.insert(arena_node_id, stack_node.pair.clone()); } Rule::Projection | Rule::OrderBy => { @@ -3318,7 +3327,7 @@ impl AbstractSyntaxTree { plan.add_select(&[plan_rel_child_id], expr_plan_node_id)? } Rule::Having => plan.add_having(&[plan_rel_child_id], expr_plan_node_id)?, - _ => return Err(SbroadError::Invalid(Entity::AST, None)), // never happens + _ => panic!("Expected to see Selection or Having."), }; map.add(id, plan_node_id); } diff --git a/sbroad-core/src/frontend/sql/ir/tests.rs b/sbroad-core/src/frontend/sql/ir/tests.rs index 412118e26..58bd07d3a 100644 --- a/sbroad-core/src/frontend/sql/ir/tests.rs +++ b/sbroad-core/src/frontend/sql/ir/tests.rs @@ -2837,8 +2837,8 @@ vtable_max_rows = 5000 #[test] fn front_sql_having_with_sq_segment_local_motion() { - // check subquery has no Motion, as it has the same distribution - // as columns used in GroupBy + // Check subquery has no Motion, as it has the same distribution + // as columns used in GroupBy. let input = r#" SELECT "sysFrom", "sys_op", sum(distinct "id") as "sum", count(distinct "id") as "count" from "test_space" group by "sysFrom", "sys_op" diff --git a/sbroad-core/src/ir.rs b/sbroad-core/src/ir.rs index 52473088f..c317c6885 100644 --- a/sbroad-core/src/ir.rs +++ b/sbroad-core/src/ir.rs @@ -1484,15 +1484,8 @@ impl Plan { /// /// # Errors /// - supplied id does not correspond to `Row` node - pub fn get_row_list(&self, row_id: NodeId) -> Result<&Vec<NodeId>, SbroadError> { - if let Expression::Row(Row { list, .. }) = self.get_expression_node(row_id)? { - return Ok(list); - } - - Err(SbroadError::Invalid( - Entity::Expression, - Some("node is not Row".into()), - )) + pub fn get_row_list(&self, row_id: usize) -> Result<&[usize], SbroadError> { + Ok(self.get_expression_node(row_id)?.get_row_list()) } /// Helper function to get id of node under alias node, @@ -1767,7 +1760,7 @@ impl Plan { let column_expr_node = self.get_expression_node(column_rel_node.output())?; let col_alias_id = column_expr_node - .get_row_list()? + .get_row_list() .get(*position) .unwrap_or_else(|| panic!("Column not found at position {position} in row list")); diff --git a/sbroad-core/src/ir/api/parameter.rs b/sbroad-core/src/ir/api/parameter.rs index 91b9902fd..9825c831b 100644 --- a/sbroad-core/src/ir/api/parameter.rs +++ b/sbroad-core/src/ir/api/parameter.rs @@ -635,9 +635,9 @@ impl Plan { panic!("Expected a values row: {values_row:?}") }; let data = self.get_expression_node(data_id)?; - let data_list = data.clone_row_list()?; + let data_list = data.clone_row_list(); let output = self.get_expression_node(output_id)?; - let output_list = output.clone_row_list()?; + let output_list = output.clone_row_list(); for (pos, alias_id) in output_list.iter().enumerate() { let new_child_id = *data_list .get(pos) diff --git a/sbroad-core/src/ir/distribution.rs b/sbroad-core/src/ir/distribution.rs index 1e8451f59..8ea9503a1 100644 --- a/sbroad-core/src/ir/distribution.rs +++ b/sbroad-core/src/ir/distribution.rs @@ -20,9 +20,14 @@ use super::{Node, Plan}; /// Tuple columns that determinate its segment distribution. /// -/// If table T1 is segmented by columns (a, b) and produces -/// tuples with columns (a, b, c), it means that for any T1 tuple -/// on a segment S1: f(a, b) = S1 and (a, b) is a segmentation key. +/// Given: +/// * f -- distribution function. +/// * Table T1 contains columns (a, b, c) and distributed by columns (a, b). +/// +/// Let's look at tuple (column row) with index i: (`a_i`, `b_i`, `c_i`). +/// Calling function f on (`a_i`, `b_i`) gives us segment `S_i`. Its a segment on which +/// this tuple will be located. +/// (a, b) is called a "segmentation key". #[derive(Serialize, Deserialize, PartialEq, Eq, Hash, Debug, Clone)] pub struct Key { /// A list of column positions in the tuple that form a @@ -76,6 +81,14 @@ impl Key { } } +/// Set of `Key`s each of which represents the same segmentation. +/// After a join of several tables on the given key we may get several columns' sets that represent +/// the same distribution. +/// E.g. given 2 tables: +/// * t(a, b) distributed by a +/// * q(p, r) distributed by p +/// After their join (`t join q on a = p`) we'll get table tq(a, b, p, r) where +/// both Key((a)) and Key((p)) will represent the same segmentation. #[derive(Clone, Debug, Deserialize, PartialEq, Eq, Serialize)] pub struct KeySet(HashSet<Key, RepeatableState>); @@ -137,7 +150,7 @@ pub enum Distribution { /// /// Example: tuples from the segmented table. Segment { - /// A set of distribution keys (we can have multiple keys after join) + /// A set of distribution keys (we can have multiple keys after join). keys: KeySet, }, /// A subtree with relational operator that has this distribution is guaranteed @@ -151,8 +164,9 @@ pub enum Distribution { } impl Distribution { - fn union(left: &Distribution, right: &Distribution) -> Result<Distribution, SbroadError> { - let dist = match (left, right) { + /// Calculate a new distribution for the `Union` or `UnionAll` output tuple. + fn union(left: &Distribution, right: &Distribution) -> Distribution { + match (left, right) { ( Distribution::Global | Distribution::Any, Distribution::Any | Distribution::Segment { .. }, @@ -189,10 +203,9 @@ impl Distribution { Ok(dist) } - /// Calculate a new distribution for the `Except` and `UnionAll` output tuple. - /// Single - fn except(left: &Distribution, right: &Distribution) -> Result<Distribution, SbroadError> { - let dist = match (left, right) { + /// Calculate a new distribution for the `Except` output tuple. + fn except(left: &Distribution, right: &Distribution) -> Distribution { + match (left, right) { (Distribution::Global, _) => right.clone(), (_, Distribution::Global) => left.clone(), (Distribution::Single, _) | (_, Distribution::Single) => { @@ -235,7 +248,7 @@ impl Distribution { Some(format_smolstr!("join child has unexpected distribution Single. Left: {left:?}, right: {right:?}")))); } (Distribution::Global, Distribution::Global) => { - // this case is handled by `dist_from_subqueries`` + // This case is handled by `dist_from_subqueries`. Distribution::Global } (Distribution::Global, _) | (Distribution::Any, Distribution::Segment { .. }) => { @@ -278,6 +291,7 @@ impl Distribution { } } +/// Nodes referred by relational operator output (ids of its children). enum ReferredNodes { None, Single(NodeId), @@ -318,6 +332,19 @@ impl ReferredNodes { } } +/// Helper structure to get the column position +/// in the child node. +#[derive(Debug, Eq, Hash, PartialEq)] +struct ChildColumnReference { + /// Child node id. + node_id: NodeId, + /// Column position in the child node. + column_position: usize, +} + +type ParentColumnPosition = usize; + +/// Set of the relational nodes referred by references under the row. struct ReferenceInfo { referred_children: ReferredNodes, child_column_to_parent_col: AHashMap<ChildColumnReference, ParentColumnPosition>, @@ -392,16 +419,6 @@ impl Iterator for ReferredNodes { } } -/// Helper structure to get the column position -/// in the child node. -#[derive(Debug, Eq, Hash, PartialEq)] -struct ChildColumnReference { - /// Child node id. - node_id: NodeId, - /// Column position in the child node. - column_position: usize, -} - impl From<(NodeId, usize)> for ChildColumnReference { fn from((node_id, column_position): (NodeId, usize)) -> Self { ChildColumnReference { @@ -411,10 +428,9 @@ impl From<(NodeId, usize)> for ChildColumnReference { } } -type ParentColumnPosition = usize; - impl Plan { /// Sets distribution for output tuple of projection. + /// Applied in case two stage aggregation is not present. /// /// # Errors /// - node is not projection @@ -469,6 +485,8 @@ impl Plan { } /// Calculate and set tuple distribution. + /// In comparison with `set_dist` it automatically + /// derives distribution from children nodes. /// /// # Errors /// Returns `SbroadError` when current expression is not a `Row` or contains broken references. @@ -497,10 +515,9 @@ impl Plan { }; let parent = self.get_relation_node(parent_id)?; - // References in the branch node. let children = parent.children(); if children.is_empty() { - // References in the leaf (relation scan) node. + // Working with a leaf node (ScanRelation). let tbl_name = self.get_scan_relation(parent_id)?; let tbl = self.get_relation_or_error(tbl_name)?; if tbl.is_global() { @@ -524,8 +541,6 @@ impl Plan { table_map.insert(*position, pos); } } - // We don't handle a case with the ValueRow (distribution would be set to Replicated in Value node). - // So, we should care only about relation scan nodes. let sk = tbl.get_sk()?; let mut new_key: Key = Key::new(Vec::with_capacity(sk.len())); let all_found = sk.iter().all(|pos| { @@ -545,7 +560,7 @@ impl Plan { } } } else { - // Set of the referred relational nodes in the row. + // Working with all other nodes. let ref_info = ReferenceInfo::new(row_id, self, &children)?; match ref_info.referred_children { @@ -589,18 +604,14 @@ impl Plan { Ok(()) } - /// Join, Having or Selection - /// may have references that refer only to one - /// child (or two in case of Join), - /// but their final distribution also depends - /// on subqueries. Currently this dependency - /// arises only when non-sq children (required children) - /// have `Distribution::Global`. + /// Each relational node have non-sq (required) and sq (additional) children. + /// In case required children have `Distribution::Global` we can copy sq distribution + /// as far as required children data is stored on each replicaset. /// - /// This method checks whether node is Join, Selection or Having, - /// and if all required children have Global distribution, it computes - /// the correct distribution in case there are any subqueries. Otherwise, - /// it returns `None`. + /// In case all required children have Global distribution it improves + /// Global distribution based on subqueries in case there are any (note that `ValuesRow` has + /// not required children). + /// Otherwise, it returns `None`. /// /// # Errors /// - node is not relational @@ -612,15 +623,10 @@ impl Plan { ) -> Result<Option<Distribution>, SbroadError> { let node = self.get_relation_node(node_id)?; - if !matches!( - node, - Relational::Join(_) | Relational::Having(_) | Relational::Selection(_) - ) { - return Ok(None); - } - - let required_children_len = self.get_required_children_len(node_id)?; - // check all required children have Global distribution + let required_children_len = self + .get_required_children_len(node_id)? + .unwrap_or_else(|| panic!("Unexpected node to get required children number: {node:?}")); + // Check all required children have Global distribution. for child_idx in 0..required_children_len { let child_id = self.get_relational_child(node_id, child_idx)?; let child_dist = self.get_rel_distribution(child_id)?; @@ -638,17 +644,13 @@ impl Plan { suggested_dist = Some(Distribution::Any); } Distribution::Any { .. } => { - // Earier when resolving conflicts for subqueries we must have - // inserted Motion(Full) for subquery with Any - // distribution. - return Err(SbroadError::Invalid( - Entity::Distribution, - Some(format_smolstr!( - "expected Motion(Full) for subquery child ({sq_id:?})" - )), - )); + // Earlier when resolving conflicts for subqueries we must have + // inserted Motion(Full) for subquery with Any distribution. + panic!("Expected Motion(Full) for subquery child ({sq_id}).") + } + Distribution::Single | Distribution::Global => { + // TODO: In case we have a single sq can we improve Global to Single? } - Distribution::Single | Distribution::Global => {} } } @@ -723,8 +725,11 @@ impl Plan { /// Sets the `Distribution` of row to given one /// /// # Errors - /// - supplied node is `Row` - pub fn set_dist(&mut self, row_id: NodeId, dist: Distribution) -> Result<(), SbroadError> { + /// - Unable to get node. + /// + /// # Panics + /// - Supplied node is `Row`. + pub fn set_dist(&mut self, row_id: usize, dist: Distribution) -> Result<(), SbroadError> { if let MutExpression::Row(Row { ref mut distribution, .. @@ -733,10 +738,7 @@ impl Plan { *distribution = Some(dist); return Ok(()); } - Err(SbroadError::Invalid( - Entity::Expression, - Some("the node is not a row type".to_smolstr()), - )) + panic!("The node is not a Row."); } fn set_two_children_node_dist( @@ -803,41 +805,9 @@ impl Plan { /// /// # Errors /// - Node is not of a row type. - pub fn get_distribution(&self, row_id: NodeId) -> Result<&Distribution, SbroadError> { - match self.get_node(row_id)? { - Node::Expression(_) => self.distribution(row_id), - Node::Relational(_) => Err(SbroadError::Invalid( - Entity::Distribution, - Some( - "Failed to get distribution for a relational node (try its row output tuple)." - .to_smolstr(), - ), - )), - Node::Parameter(..) => Err(SbroadError::Invalid( - Entity::Distribution, - Some("Failed to get distribution for a parameter node.".to_smolstr()), - )), - Node::Ddl(_) => Err(SbroadError::Invalid( - Entity::Distribution, - Some("Failed to get distribution for a DDL node.".to_smolstr()), - )), - Node::Acl(_) => Err(SbroadError::Invalid( - Entity::Distribution, - Some("Failed to get distribution for a ACL node.".to_smolstr()), - )), - Node::Block(_) => Err(SbroadError::Invalid( - Entity::Distribution, - Some("Failed to get distribution for a code block node.".to_smolstr()), - )), - Node::Invalid(_) => Err(SbroadError::Invalid( - Entity::Distribution, - Some("Failed to get distribution for an invalid node.".to_smolstr()), - )), - Node::Plugin(_) => Err(SbroadError::Invalid( - Entity::Distribution, - Some("Failed to get distribution for a PLUGIN block node.".to_smolstr()), - )), - } + pub fn get_distribution(&self, row_id: usize) -> Result<&Distribution, SbroadError> { + let expr = self.get_expression_node(row_id)?; + Ok(expr.distribution()) } /// Gets distribution of the relational node. diff --git a/sbroad-core/src/ir/explain.rs b/sbroad-core/src/ir/explain.rs index 80413f2e1..257f8779b 100644 --- a/sbroad-core/src/ir/explain.rs +++ b/sbroad-core/src/ir/explain.rs @@ -386,7 +386,7 @@ impl Projection { let alias_list = plan.get_expression_node(output_id)?; - for col_node_id in alias_list.get_row_list()? { + for col_node_id in alias_list.get_row_list() { let col = ColExpr::new(plan, *col_node_id, sq_ref_map)?; result.cols.push(col); @@ -436,7 +436,7 @@ impl GroupBy { result.gr_cols.push(col); } let alias_list = plan.get_expression_node(output_id)?; - for col_node_id in alias_list.get_row_list()? { + for col_node_id in alias_list.get_row_list() { let col = ColExpr::new(plan, *col_node_id, sq_ref_map)?; result.output_cols.push(col); } @@ -736,20 +736,14 @@ impl Row { if let Relational::ScanSubQuery { .. } | Relational::Motion { .. } = rel_node { - let sq_offset = sq_ref_map.get(&rel_id).ok_or_else(|| { - SbroadError::NotFound( - Entity::SubQuery, - format_smolstr!("with index {rel_id} in the map"), - ) - })?; + let sq_offset = sq_ref_map.get(&rel_id).unwrap_or_else(|| { + panic!("Not found subquery with index {rel_id} in the map.") + }); row.add_col(RowVal::SqRef(Ref::new(*sq_offset))); } else { - return Err(SbroadError::Invalid( - Entity::Plan, - Some(format_smolstr!( - "additional child ({rel_id}) is not SQ or Motion: {rel_node:?}" - )), - )); + panic!( + "Additional child ({rel_id}) is not SQ or Motion: {rel_node:?}." + ); } } else { let col = ColExpr::new(plan, expr_id, sq_ref_map)?; @@ -1160,7 +1154,7 @@ impl FullExplain { let explain_node = match &node { Relational::Selection { .. } => ExplainNode::Selection(selection), Relational::Having { .. } => ExplainNode::Having(selection), - _ => return Err(SbroadError::DoSkip), + _ => panic!("Expected Selection or Having node."), }; Some(explain_node) } @@ -1207,8 +1201,7 @@ impl FullExplain { })?; let child_output_id = ir.get_relation_node(*child_id)?.output(); - let child_node = ir.get_expression_node(child_output_id)?; - let col_list = child_node.get_row_list()?; + let col_list = ir.get_expression_node(child_output_id)?.get_row_list(); let targets = (s.targets) .iter() diff --git a/sbroad-core/src/ir/expression.rs b/sbroad-core/src/ir/expression.rs index 6bb43457a..3c9e904f2 100644 --- a/sbroad-core/src/ir/expression.rs +++ b/sbroad-core/src/ir/expression.rs @@ -648,7 +648,7 @@ impl ColumnPositionMap { pub(crate) fn new(plan: &Plan, rel_id: NodeId) -> Result<Self, SbroadError> { let rel_node = plan.get_relation_node(rel_id)?; let output = plan.get_expression_node(rel_node.output())?; - let alias_ids = output.get_row_list()?; + let alias_ids = output.get_row_list(); let mut map = BTreeMap::new(); let mut max_name = None; @@ -1199,7 +1199,6 @@ impl Plan { } /// Project all the columns from the child's subquery node. - /// /// New columns don't have aliases. /// /// # Errors @@ -1483,34 +1482,6 @@ impl Plan { Ok(false) } - /// Extract `Const` value from `Row` by index - /// - /// # Errors - /// - node is not a row - /// - row doesn't have const - /// - const value is invalid - #[allow(dead_code)] - pub fn get_child_const_from_row( - &self, - row_id: NodeId, - child_num: usize, - ) -> Result<Value, SbroadError> { - let node = self.get_expression_node(row_id)?; - if let Expression::Row(Row { list, .. }) = node { - let const_node_id = list.get(child_num).ok_or_else(|| { - SbroadError::NotFound(Entity::Node, format_smolstr!("{child_num}")) - })?; - - let v = self.get_expression_node(*const_node_id)?.as_const_value()?; - - return Ok(v); - } - Err(SbroadError::Invalid( - Entity::Node, - Some("node is not Row type".into()), - )) - } - /// Replace parent for all references in the expression subtree of the current node. /// /// # Errors diff --git a/sbroad-core/src/ir/operator.rs b/sbroad-core/src/ir/operator.rs index e4477d3b4..547e6f3b0 100644 --- a/sbroad-core/src/ir/operator.rs +++ b/sbroad-core/src/ir/operator.rs @@ -357,10 +357,7 @@ impl Plan { pub fn add_except(&mut self, left: NodeId, right: NodeId) -> Result<NodeId, SbroadError> { let child_row_len = |child: NodeId, plan: &Plan| -> Result<usize, SbroadError> { let child_output = plan.get_relation_node(child)?.output(); - Ok(plan - .get_expression_node(child_output)? - .get_row_list()? - .len()) + Ok(plan.get_expression_node(child_output)?.get_row_list().len()) }; let left_row_len = child_row_len(left, self)?; @@ -906,8 +903,11 @@ impl Plan { /// Adds motion node. /// /// # Errors - /// - child node is not relational - /// - child output tuple is invalid + /// - Unable to get node. + /// + /// # Panics + /// - Child node is not relational. + /// - Child output tuple is invalid. pub fn add_motion( &mut self, child_id: NodeId, @@ -925,12 +925,9 @@ impl Plan { let output = self.add_row_for_output(child_id, &[], true, None)?; match policy { MotionPolicy::None => { - return Err(SbroadError::Invalid( - Entity::Motion, - Some(format_smolstr!( - "add_motion: got MotionPolicy::None for child_id: {child_id:?}" - )), - )) + panic!( + "None policy is not expected for `add_motion` method for child_id: {child_id}." + ); } MotionPolicy::Segment(key) | MotionPolicy::LocalSegment(key) => { if let Ok(keyset) = KeySet::try_from(key) { @@ -1200,7 +1197,7 @@ impl Plan { let child_columns = self .get_expression_node(child_output_id) .expect("output row") - .clone_row_list()?; + .clone_row_list(); if child_columns.len() != columns.len() { return Err(SbroadError::UnexpectedNumberOfValues(format_smolstr!( "expected {} columns in CTE, got {}", @@ -1246,10 +1243,7 @@ impl Plan { ) -> Result<NodeId, SbroadError> { let child_row_len = |child: NodeId, plan: &Plan| -> Result<usize, SbroadError> { let child_output = plan.get_relation_node(child)?.output(); - Ok(plan - .get_expression_node(child_output)? - .get_row_list()? - .len()) + Ok(plan.get_expression_node(child_output)?.get_row_list().len()) }; let left_row_len = child_row_len(left, self)?; @@ -1305,10 +1299,10 @@ impl Plan { /// - Row node is not of a row type pub fn add_values_row( &mut self, - row_id: NodeId, + expr_row_id: usize, col_idx: &mut usize, ) -> Result<NodeId, SbroadError> { - let row = self.get_expression_node(row_id)?; + let row = self.get_expression_node(expr_row_id)?; let columns = row.clone_row_list()?; let mut aliases: Vec<NodeId> = Vec::with_capacity(columns.len()); for col_id in columns { @@ -1323,11 +1317,11 @@ impl Plan { let values_row = ValuesRow { output, - data: row_id, + data: expr_row_id, children: vec![], }; let values_row_id = self.add_relational(values_row.into())?; - self.replace_parent_in_subtree(row_id, None, Some(values_row_id))?; + self.replace_parent_in_subtree(expr_row_id, None, Some(values_row_id))?; Ok(values_row_id) } @@ -1383,7 +1377,7 @@ impl Plan { // Generate a row of aliases referencing all the children. let mut aliases: Vec<NodeId> = Vec::with_capacity(names.len()); - let columns = last_output.clone_row_list()?; + let columns = last_output.clone_row_list(); for (pos, name) in names.iter().enumerate() { let col_id = *columns.get(pos).ok_or_else(|| { SbroadError::UnexpectedNumberOfValues(format_smolstr!( @@ -1417,12 +1411,9 @@ impl Plan { /// /// # Errors /// - node is not relational - pub fn get_relational_output(&self, rel_id: NodeId) -> Result<NodeId, SbroadError> { - if let Node::Relational(rel) = self.get_node(rel_id)? { - Ok(rel.output()) - } else { - Err(SbroadError::Invalid(Entity::Relational, None)) - } + pub fn get_relational_output(&self, rel_id: usize) -> Result<usize, SbroadError> { + let rel_node = self.get_relation_node(rel_id)?; + Ok(rel_node.output()) } /// Gets list of aliases in output tuple of `rel_id`. @@ -1452,44 +1443,33 @@ impl Plan { } /// Gets children from relational node. - /// - /// # Errors - /// - node is not relational pub fn get_relational_children(&self, rel_id: NodeId) -> Result<Children<'_>, SbroadError> { if let Node::Relational(_) = self.get_node(rel_id)? { Ok(self.children(rel_id)) } else { - Err(SbroadError::Invalid( - Entity::Node, - Some("invalid relational node".into()), - )) + panic!("Expected relational node for getting children.") } } /// Gets child with specified index /// /// # Errors - /// - node is not relational - /// - node does not have child with specified index + /// - Unable to get node. + /// + /// # Panics + /// - Node is not relational. + /// - Node does not have child with specified index. pub fn get_relational_child( &self, rel_id: NodeId, child_idx: usize, ) -> Result<NodeId, SbroadError> { if let Node::Relational(rel) = self.get_node(rel_id)? { - return Ok(*rel.children().get(child_idx).ok_or_else(|| { - SbroadError::Invalid( - Entity::Relational, - Some(format_smolstr!( - "rel node {rel:?} has no child with idx ({child_idx})" - )), - ) - })?); + return Ok(*rel.children().get(child_idx).unwrap_or_else(|| { + panic!("Rel node {rel:?} has no child with idx ({child_idx}).") + })); } - Err(SbroadError::NotFound( - Entity::Relational, - format_smolstr!("with id ({rel_id})"), - )) + panic!("Relational node with id {rel_id} is not found.") } /// Some nodes (like Having, Selection, Join), @@ -1666,20 +1646,12 @@ impl Plan { } /// Sets children for relational node - /// - /// # Errors - /// - node is not relational - /// - node is scan (always a leaf node) - pub fn set_relational_children( - &mut self, - rel_id: NodeId, - children: Vec<NodeId>, - ) -> Result<(), SbroadError> { - if let MutNode::Relational(ref mut rel) = self.get_mut_node(rel_id)? { + pub fn set_relational_children(&mut self, rel_id: NodeId, children: Vec<NodeId>) { + if let MutNode::Relational(ref mut rel) = self.get_mut_node(rel_id) { rel.set_children(children); - return Ok(()); + } else { + panic!("Expected relational node for {rel_id}."); } - Err(SbroadError::Invalid(Entity::Relational, None)) } /// Get relational Scan name that given `output_alias_position` (`Expression::Alias`) @@ -1765,21 +1737,17 @@ impl Plan { /// # Errors /// - Failed to get plan top /// - Node returned by the relational iterator is not relational (bug) - pub fn is_additional_child(&self, node_id: NodeId) -> Result<bool, SbroadError> { - for node in &self.nodes.arena64 { - match node { - Node64::Selection(Selection { children, .. }) - | Node64::Having(Having { children, .. }) => { - if children.iter().skip(1).any(|&c| c == node_id) { - return Ok(true); - } - } - Node64::Join(Join { children, .. }) => { - if children.iter().skip(2).any(|&c| c == node_id) { - return Ok(true); - } + pub fn is_additional_child(&self, node_id: usize) -> Result<bool, SbroadError> { + for (id, node) in self.nodes.iter().enumerate() { + if let Node::Relational(rel_node) = node { + let children = rel_node.children(); + let to_skip = self.get_required_children_len(id)?; + let Some(to_skip) = to_skip else { + continue; + }; + if children.iter().skip(to_skip).any(|&c| c == node_id) { + return Ok(true); } - _ => {} } } Ok(false) diff --git a/sbroad-core/src/ir/operator/tests.rs b/sbroad-core/src/ir/operator/tests.rs index 48bade8fa..e1ef35589 100644 --- a/sbroad-core/src/ir/operator/tests.rs +++ b/sbroad-core/src/ir/operator/tests.rs @@ -48,7 +48,7 @@ fn scan_rel() { if let Node::Expression(expr) = row { let keys: HashSet<_, RepeatableState> = collection! { Key::new(vec![1, 0]) }; assert_eq!( - expr.distribution().unwrap(), + expr.distribution(), &Distribution::Segment { keys: keys.into() } ); } else { diff --git a/sbroad-core/src/ir/transformation/bool_in.rs b/sbroad-core/src/ir/transformation/bool_in.rs index 9fa275082..89548eee8 100644 --- a/sbroad-core/src/ir/transformation/bool_in.rs +++ b/sbroad-core/src/ir/transformation/bool_in.rs @@ -60,7 +60,7 @@ impl Plan { return Ok((top_id, top_id)); } - let right_columns = self.get_expression_node(right_id)?.clone_row_list()?; + let right_columns = self.get_expression_node(right_id)?.clone_row_list(); if let Some((first_id, other)) = right_columns.split_first() { let new_left_id = left_id; diff --git a/sbroad-core/src/ir/transformation/redistribution.rs b/sbroad-core/src/ir/transformation/redistribution.rs index 29c1c4953..ce1aa7224 100644 --- a/sbroad-core/src/ir/transformation/redistribution.rs +++ b/sbroad-core/src/ir/transformation/redistribution.rs @@ -579,6 +579,8 @@ impl Plan { } /// Create Motions from the strategy map. + /// Note: If we call this function, every child of strategy `parent_id` must be presented in the map. + /// For such a case we have `MotionPolicy::None` that won't create additional Motion nodes. fn create_motion_nodes(&mut self, mut strategy: Strategy) -> Result<(), SbroadError> { let parent_id = strategy.parent_id; let children = self.get_relational_children(parent_id)?; @@ -597,14 +599,12 @@ impl Plan { .iter() .all(|(node, _)| children_set.contains(node)) { - return Err(SbroadError::FailedTo( - Action::Add, - Some(Entity::Motion), - "trying to add motions for non-existing children in relational node".into(), - )); + panic!("Trying to add motions for non-existing children in relational node"); } // Add motions. + + // Children nodes covered with Motions (if needed from `strategy`). let mut children_with_motions: Vec<NodeId> = Vec::new(); let children_owned = children.to_vec(); for child in children_owned { @@ -619,7 +619,7 @@ impl Plan { children_with_motions.push(child); } } - self.set_relational_children(parent_id, children_with_motions)?; + self.set_relational_children(parent_id, children_with_motions); Ok(()) } @@ -752,10 +752,7 @@ impl Plan { if let Expression::Unary(UnaryExpr { child, .. }) = not_node { not_nodes_children.insert(*child); } else { - return Err(SbroadError::Invalid( - Entity::Expression, - Some(format_smolstr!("Expected Not operator, got {not_node:?}")), - )); + panic!("Expected Not operator, got {not_node:?}"); } } @@ -769,8 +766,8 @@ impl Plan { for LevelNode(_, bool_node) in &bool_nodes { let strategies = self.get_sq_node_strategies_for_bool_op(select_id, *bool_node)?; for (id, policy) in strategies { - // In case we faced with `not ... in ...`, we - // have to change motion policy to Full. + // In case NOT operator is covering expression with subquery like in + // `not (... in ...)` expression, we have to change motion policy to Full. if not_nodes_children.contains(bool_node) { strategy.add_child(id, MotionPolicy::Full, Program::default()); } else { @@ -865,7 +862,7 @@ impl Plan { /// /// # Errors /// - If the node is not a row node. - fn build_row_map(&self, row_id: NodeId) -> Result<HashMap<usize, NodeId>, SbroadError> { + fn build_row_map(&self, row_id: NodeId) -> Result<HashMap<usize, usize>, SbroadError> { let columns = self.get_row_list(row_id)?; let mut map: HashMap<usize, NodeId> = HashMap::new(); for (pos, col) in columns.iter().enumerate() { @@ -1313,7 +1310,7 @@ impl Plan { (outer_dist, inner_dist), (Distribution::Global, _) | (_, Distribution::Global) ) { - // if at least one child is global, the join can be done without any motions + // If at least one child is global, the join can be done without any motions. strategy.add_child(inner_child, MotionPolicy::None, Program::default()); strategy.add_child(outer_child, MotionPolicy::None, Program::default()); self.fix_sq_strategy_for_global_tbl(rel_id, cond_id, &mut strategy)?; @@ -1329,9 +1326,9 @@ impl Plan { /// then default motions assigned in `choose_strategy_for_bool_op_inner_sq` /// function are wrong. /// - /// By default if subquery has `Segment` distribution, we assign + /// By default, if subquery has `Segment` distribution, we assign /// `Motion(None)` to it in `choose_strategy_for_bool_op_inner_sq`. - /// But in the case described above it can lead to wrong results: + /// But in the case described below it can lead to wrong results: /// /// ```sql /// select a, b from global @@ -1772,7 +1769,7 @@ impl Plan { MotionOpcode::ReshardIfNeeded, ]; - // Delete node alway produce a local segment policy + // Delete node always produce a local segment policy // (i.e. materialization without bucket calculation). map.add_child(child_id, MotionPolicy::Local, Program(program)); @@ -1858,20 +1855,16 @@ impl Plan { Ok(map) } - // Helper function to check whether except is done between - // sharded tables that both contain the bucket_id column - // at the same position in their outputs. In such case - // except can be done locally. - // - // Example: - // select "bucket_id" as a from t1 - // except - // select "bucket_id" as b from t1 - fn is_except_on_bucket_id( - &self, - left_id: NodeId, - right_id: NodeId, - ) -> Result<bool, SbroadError> { + /// Helper function to check whether except is done between + /// sharded tables that both contain the `bucket_id` column + /// at the same position in their outputs. In such case + /// except can be done locally. + /// + /// Example: + /// select `bucket_id` as a from t1 + /// except + /// select `bucket_id` as b from t1 + fn is_except_on_bucket_id(&self, left_id: NodeId, right_id: NodeId) -> Result<bool, SbroadError> { let mut context = self.context_mut(); let Some(left_shard_positions) = context.get_shard_columns_positions(left_id, self)?.copied() @@ -2152,7 +2145,11 @@ impl Plan { PostOrder::with_capacity(|node| self.nodes.rel_iter(node), REL_CAPACITY); post_tree.populate_nodes(top); let nodes = post_tree.take_nodes(); + // Set of already visited nodes. Used for the case of BETWEEN where two expressions may + // refer to the same relational node. let mut visited = AHashSet::with_capacity(nodes.len()); + // Map of { old relational child -> new relational child } + // used to fix Union nodes. let mut old_new: AHashMap<NodeId, NodeId> = AHashMap::new(); for LevelNode(_, id) in nodes { @@ -2160,6 +2157,7 @@ impl Plan { continue; } + // We clone it because immutable reference won't allow us to change plan later. let node = self.get_relation_node(id)?.get_rel_owned(); // Some transformations (Union) need to add new nodes above @@ -2244,6 +2242,8 @@ impl Plan { self.get_relational_output(self.get_relational_child(id, 0)?)?, )?; if matches!(child_dist, Distribution::Single | Distribution::Global) { + // Note on why we skip `add_two_stage_aggregation` call below in case of + // Single or Global distribution: // If child has Single or Global distribution and this Projection // contains aggregates or there is GroupBy, // then we don't need two stage transformation, @@ -2324,7 +2324,7 @@ impl Plan { // Possible, current CTE subtree has already been resolved and we // can just copy the corresponding motion node. if let Some(motion_id) = cte_motions.get(&child) { - self.set_relational_children(id, vec![*motion_id])?; + self.set_relational_children(id, vec![*motion_id]); } else { let strategy = self.resolve_cte_conflicts(id)?; self.create_motion_nodes(strategy)?; diff --git a/sbroad-core/src/ir/transformation/redistribution/dml.rs b/sbroad-core/src/ir/transformation/redistribution/dml.rs index 96a901a73..0388af48a 100644 --- a/sbroad-core/src/ir/transformation/redistribution/dml.rs +++ b/sbroad-core/src/ir/transformation/redistribution/dml.rs @@ -6,17 +6,13 @@ use crate::ir::relation::{Column, Table}; use crate::ir::transformation::redistribution::MotionOpcode; use crate::ir::Plan; use ahash::AHashMap; -use smol_str::{format_smolstr, ToSmolStr}; +use smol_str::format_smolstr; use super::{MotionKey, Target}; impl Plan { /// Return first child of `Insert` node - /// - /// # Errors - /// - node is not `Insert` - /// - `Insert` has 0 or more than 1 child - pub fn dml_child_id(&self, dml_node_id: NodeId) -> Result<NodeId, SbroadError> { + pub fn dml_child_id(&self, dml_node_id: usize) -> Result<usize, SbroadError> { let dml_node = self.get_relation_node(dml_node_id)?; if let Relational::Insert(Insert { children, .. }) | Relational::Update(Update { children, .. }) @@ -25,15 +21,9 @@ impl Plan { if let (Some(child), None) = (children.first(), children.get(1)) { return Ok(*child); } - return Err(SbroadError::Unsupported( - Entity::Operator, - Some("Insert/Update must have exactly a single child node".to_smolstr()), - )); + panic!("DML node must have exactly a single child node."); } - Err(SbroadError::Invalid( - Entity::Node, - Some(format_smolstr!("dml node with id {dml_node_id:?}")), - )) + panic!("Expected DML node to get child from. Got {dml_node:?}."); } /// Return `ConflictStrategy` for given insert node @@ -73,7 +63,7 @@ impl Plan { // output columns. let child_id = self.dml_child_id(insert_id)?; let child_output_id = self.get_relation_node(child_id)?.output(); - let child_row = self.get_row_list(child_output_id)?; + let child_row = self.get_expression_node(child_output_id)?.get_row_list(); if columns.len() != child_row.len() { return Err(SbroadError::Invalid( Entity::Node, @@ -128,10 +118,7 @@ impl Plan { } /// Return the table for given `Insert` node - /// - /// # Errors - /// - Node is not an `Insert` - pub fn dml_node_table(&self, node_id: NodeId) -> Result<&Table, SbroadError> { + pub fn dml_node_table(&self, node_id: usize) -> Result<&Table, SbroadError> { let node = self.get_relation_node(node_id)?; if let Relational::Insert(Insert { relation, .. }) | Relational::Update(Update { relation, .. }) @@ -139,10 +126,7 @@ impl Plan { { return self.get_relation_or_error(relation); } - Err(SbroadError::Invalid( - Entity::Node, - Some(format_smolstr!("DML node with id {node_id:?}")), - )) + panic!("Expected DML node to get table from. Got {node:?}.") } /// Set the length of tuple to delete for diff --git a/sbroad-core/src/ir/transformation/redistribution/tests.rs b/sbroad-core/src/ir/transformation/redistribution/tests.rs index ce8c407b1..ddfec14b2 100644 --- a/sbroad-core/src/ir/transformation/redistribution/tests.rs +++ b/sbroad-core/src/ir/transformation/redistribution/tests.rs @@ -161,6 +161,8 @@ fn inner_join_local_policy_sq_with_union_all_in_filter() { #[test] fn join_inner_and_local_full_policies() { + // "hash_testing" sharding_key = ("identification_number", "product_code") + // "hash_testing_hist" sharding_key = the same let query = r#"SELECT * FROM "hash_testing" AS "t1" INNER JOIN (SELECT "identification_number" as "id", "product_code" as "pc" FROM "hash_testing_hist") AS "t2" diff --git a/sbroad-core/src/ir/tree/tests.rs b/sbroad-core/src/ir/tree/tests.rs index 2a36ff61a..cccc7ac78 100644 --- a/sbroad-core/src/ir/tree/tests.rs +++ b/sbroad-core/src/ir/tree/tests.rs @@ -239,8 +239,7 @@ fn subtree_dfs_post() { let row_children = plan .get_expression_node(proj_row_id) .unwrap() - .clone_row_list() - .unwrap(); + .clone_row_list(); let alias_id = row_children.first().unwrap(); let Expression::Alias(Alias { child: c_ref_id, .. -- GitLab