From f9d584e9322c18ebbaf2865ac62170afde96d905 Mon Sep 17 00:00:00 2001 From: Denis Smirnov <sd@picodata.io> Date: Wed, 13 Apr 2022 17:38:01 +0700 Subject: [PATCH] feat: add benchmarks and improve performance --- Cargo.toml | 9 +- benches/engine.rs | 336 ++++++++++++++++++ benches/parse.rs | 244 +++++++++++++ src/executor.rs | 11 +- src/executor/bucket.rs | 9 +- .../engine/cartridge/backend/sql/ir.rs | 1 + .../engine/cartridge/backend/sql/tree.rs | 14 +- src/executor/engine/mock.rs | 21 +- src/executor/ir.rs | 10 +- src/executor/result.rs | 1 + src/executor/vtable.rs | 4 + src/frontend/sql/grammar.pest | 9 +- src/frontend/sql/ir.rs | 16 +- src/ir.rs | 10 +- src/ir/distribution.rs | 13 +- src/ir/expression.rs | 90 +++-- src/ir/operator.rs | 32 +- src/ir/transformation.rs | 10 +- src/ir/transformation/dnf.rs | 27 +- src/ir/transformation/equality_propagation.rs | 8 +- src/ir/transformation/merge_tuples.rs | 28 +- src/ir/transformation/redistribution.rs | 24 +- src/lib.rs | 2 +- 23 files changed, 786 insertions(+), 143 deletions(-) create mode 100644 benches/engine.rs create mode 100644 benches/parse.rs diff --git a/Cargo.toml b/Cargo.toml index a2f5f82156..250d63d066 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -10,6 +10,7 @@ edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] +ahash = "0.7" decimal = "2.1.0" hash32 = "0.2" itertools = "0.10.3" @@ -24,6 +25,12 @@ rmp-serde = "0.14" [dev-dependencies] pretty_assertions = "1.0.0" +criterion = "0.3" [lib] -crate-type = ["cdylib"] +name = "sbroad" +crate-type = ["cdylib", "rlib"] + +[[bench]] +name = "parse" +harness = false diff --git a/benches/engine.rs b/benches/engine.rs new file mode 100644 index 0000000000..94621db025 --- /dev/null +++ b/benches/engine.rs @@ -0,0 +1,336 @@ +extern crate sbroad; + +use sbroad::errors::QueryPlannerError; +use sbroad::executor::bucket::Buckets; +use sbroad::executor::engine::cartridge::hash::str_to_bucket_id; +use sbroad::executor::engine::Engine; +use sbroad::executor::ir::ExecutionPlan; +use sbroad::executor::result::{BoxExecuteFormat, Value}; +use sbroad::executor::vtable::VirtualTable; +use sbroad::executor::Metadata; +use sbroad::ir::relation::{Column, Table, Type}; +use sbroad::ir::value::Value as IrValue; +use std::collections::HashMap; + +#[allow(clippy::module_name_repetitions)] +#[derive(Debug, Clone)] +pub struct MetadataMock { + schema: HashMap<String, Vec<String>>, + tables: HashMap<String, Table>, + bucket_count: usize, +} + +impl Metadata for MetadataMock { + fn get_table_segment(&self, table_name: &str) -> Result<Table, QueryPlannerError> { + let name = Self::to_name(table_name); + match self.tables.get(&name) { + Some(v) => Ok(v.clone()), + None => Err(QueryPlannerError::SpaceNotFound), + } + } + + fn get_exec_waiting_timeout(&self) -> u64 { + 0 + } + + fn get_sharding_key_by_space(&self, space: &str) -> Result<Vec<&str>, QueryPlannerError> { + Ok(self + .schema + .get(space) + .map(|v| v.iter().map(String::as_str).collect::<Vec<&str>>()) + .unwrap()) + } +} + +impl Default for MetadataMock { + fn default() -> Self { + Self::new() + } +} + +impl MetadataMock { + /// Mock engine constructor. + /// + /// # Panics + /// - If schema is invalid. + #[allow(clippy::too_many_lines)] + #[must_use] + pub fn new() -> Self { + let mut tables = HashMap::new(); + + let columns = vec![ + Column::new("\"vehicleguid\"", Type::Number), + Column::new("\"reestrid\"", Type::Number), + Column::new("\"reestrstatus\"", Type::Number), + Column::new("\"vehicleregno\"", Type::Number), + Column::new("\"vehiclevin\"", Type::Number), + Column::new("\"vehiclevin2\"", Type::Number), + Column::new("\"vehiclechassisnum\"", Type::Number), + Column::new("\"vehiclereleaseyear\"", Type::Number), + Column::new("\"operationregdoctypename\"", Type::Number), + Column::new("\"operationregdoc\"", Type::Number), + Column::new("\"operationregdocissuedate\"", Type::Number), + Column::new("\"operationregdoccomments\"", Type::Number), + Column::new("\"vehicleptstypename\"", Type::Number), + Column::new("\"vehicleptsnum\"", Type::Number), + Column::new("\"vehicleptsissuedate\"", Type::Number), + Column::new("\"vehicleptsissuer\"", Type::Number), + Column::new("\"vehicleptscomments\"", Type::Number), + Column::new("\"vehiclebodycolor\"", Type::Number), + Column::new("\"vehiclebrand\"", Type::Number), + Column::new("\"vehiclemodel\"", Type::Number), + Column::new("\"vehiclebrandmodel\"", Type::Number), + Column::new("\"vehiclebodynum\"", Type::Number), + Column::new("\"vehiclecost\"", Type::Number), + Column::new("\"vehiclegasequip\"", Type::Number), + Column::new("\"vehicleproducername\"", Type::Number), + Column::new("\"vehiclegrossmass\"", Type::Number), + Column::new("\"vehiclemass\"", Type::Number), + Column::new("\"vehiclesteeringwheeltypeid\"", Type::Number), + Column::new("\"vehiclekpptype\"", Type::Number), + Column::new("\"vehicletransmissiontype\"", Type::Number), + Column::new("\"vehicletypename\"", Type::Number), + Column::new("\"vehiclecategory\"", Type::Number), + Column::new("\"vehicletypeunit\"", Type::Number), + Column::new("\"vehicleecoclass\"", Type::Number), + Column::new("\"vehiclespecfuncname\"", Type::Number), + Column::new("\"vehicleenclosedvolume\"", Type::Number), + Column::new("\"vehicleenginemodel\"", Type::Number), + Column::new("\"vehicleenginenum\"", Type::Number), + Column::new("\"vehicleenginepower\"", Type::Number), + Column::new("\"vehicleenginepowerkw\"", Type::Number), + Column::new("\"vehicleenginetype\"", Type::Number), + Column::new("\"holdrestrictiondate\"", Type::Number), + Column::new("\"approvalnum\"", Type::Number), + Column::new("\"approvaldate\"", Type::Number), + Column::new("\"approvaltype\"", Type::Number), + Column::new("\"utilizationfeename\"", Type::Number), + Column::new("\"customsdoc\"", Type::Number), + Column::new("\"customsdocdate\"", Type::Number), + Column::new("\"customsdocissue\"", Type::Number), + Column::new("\"customsdocrestriction\"", Type::Number), + Column::new("\"customscountryremovalid\"", Type::Number), + Column::new("\"customscountryremovalname\"", Type::Number), + Column::new("\"ownerorgname\"", Type::Number), + Column::new("\"ownerinn\"", Type::Number), + Column::new("\"ownerogrn\"", Type::Number), + Column::new("\"ownerkpp\"", Type::Number), + Column::new("\"ownerpersonlastname\"", Type::Number), + Column::new("\"ownerpersonfirstname\"", Type::Number), + Column::new("\"ownerpersonmiddlename\"", Type::Number), + Column::new("\"ownerpersonbirthdate\"", Type::Number), + Column::new("\"ownerbirthplace\"", Type::Number), + Column::new("\"ownerpersonogrnip\"", Type::Number), + Column::new("\"owneraddressindex\"", Type::Number), + Column::new("\"owneraddressmundistrict\"", Type::Number), + Column::new("\"owneraddresssettlement\"", Type::Number), + Column::new("\"owneraddressstreet\"", Type::Number), + Column::new("\"ownerpersoninn\"", Type::Number), + Column::new("\"ownerpersondoccode\"", Type::Number), + Column::new("\"ownerpersondocnum\"", Type::Number), + Column::new("\"ownerpersondocdate\"", Type::Number), + Column::new("\"operationname\"", Type::Number), + Column::new("\"operationdate\"", Type::Number), + Column::new("\"operationdepartmentname\"", Type::Number), + Column::new("\"operationattorney\"", Type::Number), + Column::new("\"operationlising\"", Type::Number), + Column::new("\"holdertypeid\"", Type::Number), + Column::new("\"holderpersondoccode\"", Type::Number), + Column::new("\"holderpersondocnum\"", Type::Number), + Column::new("\"holderpersondocdate\"", Type::Number), + Column::new("\"holderpersondocissuer\"", Type::Number), + Column::new("\"holderpersonlastname\"", Type::Number), + Column::new("\"holderpersonfirstname\"", Type::Number), + Column::new("\"holderpersonmiddlename\"", Type::Number), + Column::new("\"holderpersonbirthdate\"", Type::Number), + Column::new("\"holderpersonbirthregionid\"", Type::Number), + Column::new("\"holderpersonsex\"", Type::Number), + Column::new("\"holderpersonbirthplace\"", Type::Number), + Column::new("\"holderpersoninn\"", Type::Number), + Column::new("\"holderpersonsnils\"", Type::Number), + Column::new("\"holderpersonogrnip\"", Type::Number), + Column::new("\"holderaddressguid\"", Type::Number), + Column::new("\"holderaddressregionid\"", Type::Number), + Column::new("\"holderaddressregionname\"", Type::Number), + Column::new("\"holderaddressdistrict\"", Type::Number), + Column::new("\"holderaddressmundistrict\"", Type::Number), + Column::new("\"holderaddresssettlement\"", Type::Number), + Column::new("\"holderaddressstreet\"", Type::Number), + Column::new("\"holderaddressbuilding\"", Type::Number), + Column::new("\"holderaddressstructureid\"", Type::Number), + Column::new("\"holderaddressstructurename\"", Type::Number), + Column::new("\"holderaddressstructure\"", Type::Number), + Column::new("\"sys_from\"", Type::Number), + Column::new("\"sys_to\"", Type::Number), + ]; + let sharding_key: &[&str] = &["\"reestrid\""]; + tables.insert( + "\"test__gibdd_db__vehicle_reg_and_res100_actual\"".to_string(), + Table::new_seg( + "\"test__gibdd_db__vehicle_reg_and_res100_actual\"", + columns.clone(), + sharding_key, + ) + .unwrap(), + ); + let sharding_key: &[&str] = &["\"reestrid\""]; + tables.insert( + "\"test__gibdd_db__vehicle_reg_and_res100_history\"".to_string(), + Table::new_seg( + "\"test__gibdd_db__vehicle_reg_and_res100_history\"", + columns, + sharding_key, + ) + .unwrap(), + ); + + MetadataMock { + schema: [ + ("EMPLOYEES".into(), vec!["ID".into()]), + ( + "hash_testing".into(), + vec!["identification_number".into(), "product_code".into()], + ), + ] + .into_iter() + .collect(), + tables, + bucket_count: 10000, + } + } +} + +#[allow(clippy::module_name_repetitions)] +#[derive(Debug, Clone)] +pub struct EngineMock { + metadata: MetadataMock, + virtual_tables: HashMap<usize, VirtualTable>, +} + +impl Engine for EngineMock { + type Metadata = MetadataMock; + + fn metadata(&self) -> &Self::Metadata + where + Self: Sized, + { + &self.metadata + } + + fn has_metadata(&self) -> bool { + self.metadata.tables.is_empty() + } + + fn clear_metadata(&mut self) { + self.metadata.tables.clear(); + } + + fn load_metadata(&mut self) -> Result<(), QueryPlannerError> { + self.metadata = MetadataMock::new(); + Ok(()) + } + + fn materialize_motion( + &self, + _plan: &mut ExecutionPlan, + motion_node_id: usize, + _buckets: &Buckets, + ) -> Result<VirtualTable, QueryPlannerError> { + if let Some(virtual_table) = self.virtual_tables.get(&motion_node_id) { + Ok(virtual_table.clone()) + } else { + Err(QueryPlannerError::CustomError( + "No virtual table found for motion node".to_string(), + )) + } + } + + fn exec( + &self, + plan: &mut ExecutionPlan, + top_id: usize, + buckets: &Buckets, + ) -> Result<BoxExecuteFormat, QueryPlannerError> { + let mut result = BoxExecuteFormat::new(); + let sql = plan.subtree_as_sql(top_id)?; + + match buckets { + Buckets::All => { + result.extend(cluster_exec_query(&sql))?; + } + Buckets::Filtered(list) => { + for bucket in list { + let temp_result = bucket_exec_query(*bucket, &sql); + result.extend(temp_result)?; + } + } + } + + // Sort results to make tests reproducible. + result.rows.sort_by_key(|k| k[0].to_string()); + Ok(result) + } + + fn extract_sharding_keys( + &self, + space: String, + args: HashMap<String, IrValue>, + ) -> Result<Vec<IrValue>, QueryPlannerError> { + Ok(self + .metadata() + .get_sharding_key_by_space(&space) + .unwrap() + .iter() + .fold(Vec::new(), |mut acc: Vec<IrValue>, &v| { + acc.push(args.get(v).unwrap().clone()); + acc + })) + } + + fn determine_bucket_id(&self, s: &str) -> u64 { + str_to_bucket_id(s, self.metadata.bucket_count) + } +} + +impl Default for EngineMock { + fn default() -> Self { + Self::new() + } +} + +impl EngineMock { + #[allow(dead_code)] + #[must_use] + pub fn new() -> Self { + EngineMock { + metadata: MetadataMock::new(), + virtual_tables: HashMap::new(), + } + } + + #[allow(dead_code)] + pub fn add_virtual_table(&mut self, id: usize, table: VirtualTable) { + self.virtual_tables.insert(id, table); + } +} + +fn bucket_exec_query(bucket: u64, query: &str) -> BoxExecuteFormat { + let mut result = BoxExecuteFormat::new(); + + result.rows.push(vec![ + Value::String(format!("Execute query on a bucket [{}]", bucket)), + Value::String(String::from(query)), + ]); + + result +} + +fn cluster_exec_query(query: &str) -> BoxExecuteFormat { + let mut result = BoxExecuteFormat::new(); + + result.rows.push(vec![ + Value::String(String::from("Execute query on all buckets")), + Value::String(String::from(query)), + ]); + result +} diff --git a/benches/parse.rs b/benches/parse.rs new file mode 100644 index 0000000000..8c390b1de0 --- /dev/null +++ b/benches/parse.rs @@ -0,0 +1,244 @@ +extern crate sbroad; + +use criterion::{criterion_group, criterion_main, Criterion}; +use engine::EngineMock; +use sbroad::executor::Query; + +fn query1() { + let sql = r#"SELECT + * + FROM + ( + SELECT + "vehicleguid", + "reestrid", + "reestrstatus", + "vehicleregno", + "vehiclevin", + "vehiclevin2", + "vehiclechassisnum", + "vehiclereleaseyear", + "operationregdoctypename", + "operationregdoc", + "operationregdocissuedate", + "operationregdoccomments", + "vehicleptstypename", + "vehicleptsnum", + "vehicleptsissuedate", + "vehicleptsissuer", + "vehicleptscomments", + "vehiclebodycolor", + "vehiclebrand", + "vehiclemodel", + "vehiclebrandmodel", + "vehiclebodynum", + "vehiclecost", + "vehiclegasequip", + "vehicleproducername", + "vehiclegrossmass", + "vehiclemass", + "vehiclesteeringwheeltypeid", + "vehiclekpptype", + "vehicletransmissiontype", + "vehicletypename", + "vehiclecategory", + "vehicletypeunit", + "vehicleecoclass", + "vehiclespecfuncname", + "vehicleenclosedvolume", + "vehicleenginemodel", + "vehicleenginenum", + "vehicleenginepower", + "vehicleenginepowerkw", + "vehicleenginetype", + "holdrestrictiondate", + "approvalnum", + "approvaldate", + "approvaltype", + "utilizationfeename", + "customsdoc", + "customsdocdate", + "customsdocissue", + "customsdocrestriction", + "customscountryremovalid", + "customscountryremovalname", + "ownerorgname", + "ownerinn", + "ownerogrn", + "ownerkpp", + "ownerpersonlastname", + "ownerpersonfirstname", + "ownerpersonmiddlename", + "ownerpersonbirthdate", + "ownerbirthplace", + "ownerpersonogrnip", + "owneraddressindex", + "owneraddressmundistrict", + "owneraddresssettlement", + "owneraddressstreet", + "ownerpersoninn", + "ownerpersondoccode", + "ownerpersondocnum", + "ownerpersondocdate", + "operationname", + "operationdate", + "operationdepartmentname", + "operationattorney", + "operationlising", + "holdertypeid", + "holderpersondoccode", + "holderpersondocnum", + "holderpersondocdate", + "holderpersondocissuer", + "holderpersonlastname", + "holderpersonfirstname", + "holderpersonmiddlename", + "holderpersonbirthdate", + "holderpersonbirthregionid", + "holderpersonsex", + "holderpersonbirthplace", + "holderpersoninn", + "holderpersonsnils", + "holderpersonogrnip", + "holderaddressguid", + "holderaddressregionid", + "holderaddressregionname", + "holderaddressdistrict", + "holderaddressmundistrict", + "holderaddresssettlement", + "holderaddressstreet", + "holderaddressbuilding", + "holderaddressstructureid", + "holderaddressstructurename", + "holderaddressstructure" + FROM + "test__gibdd_db__vehicle_reg_and_res100_history" + WHERE + "sys_from" <= 332 + AND "sys_to" >= 332 + UNION + ALL + SELECT + "vehicleguid", + "reestrid", + "reestrstatus", + "vehicleregno", + "vehiclevin", + "vehiclevin2", + "vehiclechassisnum", + "vehiclereleaseyear", + "operationregdoctypename", + "operationregdoc", + "operationregdocissuedate", + "operationregdoccomments", + "vehicleptstypename", + "vehicleptsnum", + "vehicleptsissuedate", + "vehicleptsissuer", + "vehicleptscomments", + "vehiclebodycolor", + "vehiclebrand", + "vehiclemodel", + "vehiclebrandmodel", + "vehiclebodynum", + "vehiclecost", + "vehiclegasequip", + "vehicleproducername", + "vehiclegrossmass", + "vehiclemass", + "vehiclesteeringwheeltypeid", + "vehiclekpptype", + "vehicletransmissiontype", + "vehicletypename", + "vehiclecategory", + "vehicletypeunit", + "vehicleecoclass", + "vehiclespecfuncname", + "vehicleenclosedvolume", + "vehicleenginemodel", + "vehicleenginenum", + "vehicleenginepower", + "vehicleenginepowerkw", + "vehicleenginetype", + "holdrestrictiondate", + "approvalnum", + "approvaldate", + "approvaltype", + "utilizationfeename", + "customsdoc", + "customsdocdate", + "customsdocissue", + "customsdocrestriction", + "customscountryremovalid", + "customscountryremovalname", + "ownerorgname", + "ownerinn", + "ownerogrn", + "ownerkpp", + "ownerpersonlastname", + "ownerpersonfirstname", + "ownerpersonmiddlename", + "ownerpersonbirthdate", + "ownerbirthplace", + "ownerpersonogrnip", + "owneraddressindex", + "owneraddressmundistrict", + "owneraddresssettlement", + "owneraddressstreet", + "ownerpersoninn", + "ownerpersondoccode", + "ownerpersondocnum", + "ownerpersondocdate", + "operationname", + "operationdate", + "operationdepartmentname", + "operationattorney", + "operationlising", + "holdertypeid", + "holderpersondoccode", + "holderpersondocnum", + "holderpersondocdate", + "holderpersondocissuer", + "holderpersonlastname", + "holderpersonfirstname", + "holderpersonmiddlename", + "holderpersonbirthdate", + "holderpersonbirthregionid", + "holderpersonsex", + "holderpersonbirthplace", + "holderpersoninn", + "holderpersonsnils", + "holderpersonogrnip", + "holderaddressguid", + "holderaddressregionid", + "holderaddressregionname", + "holderaddressdistrict", + "holderaddressmundistrict", + "holderaddresssettlement", + "holderaddressstreet", + "holderaddressbuilding", + "holderaddressstructureid", + "holderaddressstructurename", + "holderaddressstructure" + FROM + "test__gibdd_db__vehicle_reg_and_res100_actual" + WHERE + "sys_from" <= 332 + ) AS "t3" + WHERE + "reestrid" = 452842574"#; + let engine = EngineMock::new(); + let mut query = Query::new(engine, sql).unwrap(); + let top_id = query.get_exec_plan().get_ir_plan().get_top().unwrap(); + query.bucket_discovery(top_id).unwrap(); + query.get_exec_plan().subtree_as_sql(top_id).unwrap(); +} + +fn bench_query1(c: &mut Criterion) { + c.bench_function("query1", |b| b.iter(|| query1())); +} + +criterion_group!(benches, bench_query1); +criterion_main!(benches); + +mod engine; diff --git a/src/executor.rs b/src/executor.rs index cc6bcbf6df..f9aecc7f70 100644 --- a/src/executor.rs +++ b/src/executor.rs @@ -33,11 +33,11 @@ use crate::frontend::sql::ast::AbstractSyntaxTree; use crate::ir::Plan; use std::collections::HashMap; -mod bucket; +pub mod bucket; pub mod engine; -pub(crate) mod ir; +pub mod ir; pub mod result; -mod vtable; +pub mod vtable; impl Plan { /// Apply optimization rules to the plan. @@ -92,6 +92,11 @@ where Ok(query) } + /// Get the execution plan of the query. + pub fn get_exec_plan(&self) -> &ExecutionPlan { + &self.exec_plan + } + /// Execute distributed query. /// /// # Errors diff --git a/src/executor/bucket.rs b/src/executor/bucket.rs index 39492ff518..30d5001940 100644 --- a/src/executor/bucket.rs +++ b/src/executor/bucket.rs @@ -21,16 +21,19 @@ pub enum Buckets { impl Buckets { /// Get all buckets in the cluster. + #[must_use] pub fn new_all() -> Self { Buckets::All } /// Get a filtered set of buckets. + #[must_use] pub fn new_filtered(buckets: HashSet<u64>) -> Self { Buckets::Filtered(buckets) } /// Disjunction of two sets of buckets. + #[must_use] pub fn disjunct(&self, buckets: &Buckets) -> Buckets { match (self, buckets) { (Buckets::All, Buckets::All) => Buckets::All, @@ -44,6 +47,7 @@ impl Buckets { } /// Conjunction of two sets of buckets. + #[must_use] pub fn conjunct(&self, buckets: &Buckets) -> Buckets { match (self, buckets) { (Buckets::All, _) | (_, Buckets::All) => Buckets::All, @@ -182,12 +186,9 @@ where /// - Relational nodes contain invalid children. #[allow(clippy::too_many_lines)] pub fn bucket_discovery(&mut self, top_id: usize) -> Result<Buckets, QueryPlannerError> { - let mut nodes: Vec<usize> = Vec::new(); let ir_plan = self.exec_plan.get_ir_plan(); let rel_tree = DftPost::new(&top_id, |node| ir_plan.nodes.rel_iter(node)); - for (_, node_id) in rel_tree { - nodes.push(*node_id); - } + let nodes: Vec<usize> = rel_tree.map(|(_, id)| *id).collect(); for node_id in nodes { if self.bucket_map.get(&node_id).is_some() { diff --git a/src/executor/engine/cartridge/backend/sql/ir.rs b/src/executor/engine/cartridge/backend/sql/ir.rs index d83bb383dd..9b31fdfdbe 100644 --- a/src/executor/engine/cartridge/backend/sql/ir.rs +++ b/src/executor/engine/cartridge/backend/sql/ir.rs @@ -54,6 +54,7 @@ impl ExecutionPlan { #[allow(clippy::too_many_lines)] pub fn subtree_as_sql(&self, node_id: usize) -> Result<String, QueryPlannerError> { let mut sql = String::new(); + let nodes = self.get_sql_order(node_id)?; let delim = " "; diff --git a/src/executor/engine/cartridge/backend/sql/tree.rs b/src/executor/engine/cartridge/backend/sql/tree.rs index 13aad6aca4..020060091f 100644 --- a/src/executor/engine/cartridge/backend/sql/tree.rs +++ b/src/executor/engine/cartridge/backend/sql/tree.rs @@ -222,11 +222,11 @@ impl SyntaxNodes { self.arena.len() } - /// Constructor - pub fn new() -> Self { + /// Constructor with pre-allocated memory + pub fn with_capacity(capacity: usize) -> Self { SyntaxNodes { - arena: Vec::new(), - map: HashMap::new(), + arena: Vec::with_capacity(capacity), + map: HashMap::with_capacity(capacity), } } } @@ -492,7 +492,7 @@ impl<'p> SyntaxPlan<'p> { Ok(self.nodes.push_syntax_node(sn)) } Relational::Motion { .. } => { - let vtable = self.plan.get_motion_vtable(id)?; + let vtable = self.plan.get_motion_vtable(id)?.clone(); let mut children = Vec::from([ self.nodes.push_syntax_node(SyntaxNode::new_open()), self.nodes @@ -524,7 +524,7 @@ impl<'p> SyntaxPlan<'p> { Expression::Row { list, .. } => { if let Some(motion_id) = ir_plan.get_motion_from_row(id)? { // Replace motion node to virtual table node - let vtable = self.plan.get_motion_vtable(motion_id)?; + let vtable = self.plan.get_motion_vtable(motion_id)?.clone(); if vtable.get_alias().is_none() { let sn = SyntaxNode::new_pointer( id, @@ -694,7 +694,7 @@ impl<'p> SyntaxPlan<'p> { fn empty(plan: &'p ExecutionPlan) -> Self { SyntaxPlan { - nodes: SyntaxNodes::new(), + nodes: SyntaxNodes::with_capacity(plan.get_ir_plan().next_id()), top: None, plan, } diff --git a/src/executor/engine/mock.rs b/src/executor/engine/mock.rs index 8092b16f34..322fc695f8 100644 --- a/src/executor/engine/mock.rs +++ b/src/executor/engine/mock.rs @@ -36,12 +36,24 @@ impl Metadata for MetadataMock { Ok(self .schema .get(space) - .map(|v| v.iter().map(|s| s.as_str()).collect::<Vec<&str>>()) + .map(|v| v.iter().map(String::as_str).collect::<Vec<&str>>()) .unwrap()) } } +impl Default for MetadataMock { + fn default() -> Self { + Self::new() + } +} + impl MetadataMock { + /// Mock engine constructor. + /// + /// # Panics + /// - If schema is invalid. + #[allow(clippy::too_many_lines)] + #[must_use] pub fn new() -> Self { let mut tables = HashMap::new(); @@ -220,8 +232,15 @@ impl Engine for EngineMock { } } +impl Default for EngineMock { + fn default() -> Self { + Self::new() + } +} + impl EngineMock { #[allow(dead_code)] + #[must_use] pub fn new() -> Self { EngineMock { metadata: MetadataMock::new(), diff --git a/src/executor/ir.rs b/src/executor/ir.rs index 2a5c6535c2..d333564d6e 100644 --- a/src/executor/ir.rs +++ b/src/executor/ir.rs @@ -23,6 +23,7 @@ impl From<Plan> for ExecutionPlan { } impl ExecutionPlan { + #[must_use] pub fn get_ir_plan(&self) -> &Plan { &self.plan } @@ -59,15 +60,18 @@ impl ExecutionPlan { } /// Get motion virtual table - pub fn get_motion_vtable(&self, motion_id: usize) -> Result<VirtualTable, QueryPlannerError> { + /// + /// # Errors + /// - Failed to find a virtual table for the motion node. + pub fn get_motion_vtable(&self, motion_id: usize) -> Result<&VirtualTable, QueryPlannerError> { if let Some(vtable) = &self.vtables { if let Some(result) = vtable.get(&motion_id) { - return Ok(result.clone()); + return Ok(result); } } Err(QueryPlannerError::CustomError(format!( - "Motion node ({}) not found in the virtual table", + "Motion node ({}) doesn't have a corresponding virtual table", motion_id ))) } diff --git a/src/executor/result.rs b/src/executor/result.rs index efedaa6926..b496cb7bd2 100644 --- a/src/executor/result.rs +++ b/src/executor/result.rs @@ -179,6 +179,7 @@ impl Default for BoxExecuteFormat { impl BoxExecuteFormat { /// Create empty query result set + #[allow(dead_code)] #[must_use] pub fn new() -> Self { BoxExecuteFormat { diff --git a/src/executor/vtable.rs b/src/executor/vtable.rs index 490e3a794f..6d911d4061 100644 --- a/src/executor/vtable.rs +++ b/src/executor/vtable.rs @@ -109,6 +109,9 @@ impl VirtualTable { } /// Set vtable alias name + /// + /// # Errors + /// - Try to set an empty alias name to the virtual table. pub fn set_alias(&mut self, name: &str) -> Result<(), QueryPlannerError> { if name.is_empty() { return Err(QueryPlannerError::CustomError( @@ -121,6 +124,7 @@ impl VirtualTable { } /// Get vtable alias name + #[must_use] pub fn get_alias(&self) -> Option<String> { self.name.clone() } diff --git a/src/frontend/sql/grammar.pest b/src/frontend/sql/grammar.pest index c5fb172e2c..3e80cfa99e 100644 --- a/src/frontend/sql/grammar.pest +++ b/src/frontend/sql/grammar.pest @@ -20,7 +20,7 @@ Query = _{ UnionAll | Select | Values | Insert } Table = @{ Name } InnerJoin = { Scan } Condition = { Expr+ } - UnionAll = { (SubQuery | Select) ~ ^"union" ~ ^"all" ~ (UnionAll | SubQuery | Select) } + UnionAll = { (SubQuery | Select) ~ ^"union" ~ ^"all" ~ (SubQuery | Select) } SubQuery = { "(" ~ (UnionAll | Select | Values) ~ ")" } Insert = { ^"insert" ~ ^"into" ~ Table ~ Values } Values = { ^"values" ~ Row ~ ("," ~ Row)*?} @@ -58,12 +58,7 @@ Expr = _{ Or | And | Cmp | Primary | Parentheses } OrRight = _{ Or | OrLeft } String = @{ !(WHITESPACE* ~ Keyword ~ WHITESPACE) ~ ('A' .. 'Z' | 'a'..'z' | "_" | ASCII_DIGIT)+ } - Keyword = { - ^"all" | ^"and" | ^"as" | ^"false" | ^"from" - | ^"in" | ^"inner" | ^"insert" | ^"into" | ^"join" | ^"null" - | ^"on" | ^"or" | ^"row" | ^"select" | ^"true" | ^"union" - | ^"where" | ^"values" - } + Keyword = { ^"union" | ^"where" } Number = @{ Int ~ ("." ~ ASCII_DIGIT*)? ~ (^"e" ~ Int)? } Int = @{ ("+" | "-")? ~ ASCII_DIGIT+ } diff --git a/src/frontend/sql/ir.rs b/src/frontend/sql/ir.rs index d66bb39845..545216c0ea 100644 --- a/src/frontend/sql/ir.rs +++ b/src/frontend/sql/ir.rs @@ -61,9 +61,9 @@ struct Translation { } impl Translation { - fn new() -> Self { + fn with_capacity(capacity: usize) -> Self { Translation { - map: HashMap::new(), + map: HashMap::with_capacity(capacity), } } @@ -105,8 +105,8 @@ impl AbstractSyntaxTree { None => return Err(QueryPlannerError::InvalidAst), }; let dft_post = DftPost::new(&top, |node| self.nodes.ast_iter(node)); - let mut map = Translation::new(); - let mut rows: HashSet<usize> = HashSet::new(); + let mut map = Translation::with_capacity(self.nodes.next_id()); + let mut rows: HashSet<usize> = HashSet::with_capacity(self.nodes.next_id()); for (_, id) in dft_post { let node = self.nodes.get_node(*id)?.clone(); @@ -230,7 +230,7 @@ impl AbstractSyntaxTree { let left_col_map = plan .get_relation_node(*plan_left_id)? .output_alias_position_map(&plan.nodes)?; - if left_col_map.get(&col_name).is_some() { + if left_col_map.get(&col_name.as_str()).is_some() { let ref_id = plan.add_row_from_left_branch( *plan_left_id, *plan_right_id, @@ -248,7 +248,7 @@ impl AbstractSyntaxTree { let right_col_map = plan .get_relation_node(*plan_right_id)? .output_alias_position_map(&plan.nodes)?; - if right_col_map.get(&col_name).is_some() { + if right_col_map.get(&col_name.as_str()).is_some() { let ref_id = plan.add_row_from_right_branch( *plan_left_id, *plan_right_id, @@ -281,7 +281,7 @@ impl AbstractSyntaxTree { let left_col_map = plan .get_relation_node(*plan_left_id)? .output_alias_position_map(&plan.nodes)?; - if left_col_map.get(&col_name).is_some() { + if left_col_map.get(&col_name.as_str()).is_some() { let ref_id = plan.add_row_from_left_branch( *plan_left_id, *plan_right_id, @@ -293,7 +293,7 @@ impl AbstractSyntaxTree { let right_col_map = plan .get_relation_node(*plan_right_id)? .output_alias_position_map(&plan.nodes)?; - if right_col_map.get(&col_name).is_some() { + if right_col_map.get(&col_name.as_str()).is_some() { let ref_id = plan.add_row_from_right_branch( *plan_left_id, *plan_right_id, diff --git a/src/ir.rs b/src/ir.rs index f501aa2ec4..3598da767d 100644 --- a/src/ir.rs +++ b/src/ir.rs @@ -322,7 +322,7 @@ impl Plan { pub fn get_alias_from_reference_node( &self, node: &Expression, - ) -> Result<String, QueryPlannerError> { + ) -> Result<&str, QueryPlannerError> { if let Expression::Reference { targets, position, @@ -364,9 +364,11 @@ impl Plan { QueryPlannerError::CustomError("Invalid position in row list".into()) })?; - let name = self.get_expression_node(col_alias_idx)?.get_alias_name()?; - - return Ok(name); + let col_alias_node = self.get_expression_node(col_alias_idx)?; + match col_alias_node { + Expression::Alias { name, .. } => return Ok(name), + _ => return Err(QueryPlannerError::CustomError("Expected alias node".into())), + } } } diff --git a/src/ir/distribution.rs b/src/ir/distribution.rs index 5193e1fa3c..5ed8f6535e 100644 --- a/src/ir/distribution.rs +++ b/src/ir/distribution.rs @@ -128,8 +128,17 @@ impl Plan { /// # Errors /// Returns `QueryPlannerError` when current expression is not a `Row` or contains broken references. pub fn set_distribution(&mut self, row_id: usize) -> Result<(), QueryPlannerError> { - let mut child_set: HashSet<usize> = HashSet::new(); - let mut child_pos_map: HashMap<(usize, usize), usize> = HashMap::new(); + let row_expr = self.get_expression_node(row_id)?; + let capacity = match row_expr { + Expression::Row { list, .. } => list.len(), + _ => { + return Err(QueryPlannerError::CustomError( + "Expected Row expression".to_string(), + )) + } + }; + let mut child_set: HashSet<usize> = HashSet::with_capacity(capacity); + let mut child_pos_map: HashMap<(usize, usize), usize> = HashMap::with_capacity(capacity); let mut table_set: HashSet<String> = HashSet::new(); let mut table_pos_map: HashMap<usize, usize> = HashMap::default(); let mut parent_node: Option<usize> = None; diff --git a/src/ir/expression.rs b/src/ir/expression.rs index bf21651ee2..f3fa56b01e 100644 --- a/src/ir/expression.rs +++ b/src/ir/expression.rs @@ -6,6 +6,7 @@ //! - the order of the columns (and we can get their types as well) //! - distribution of the data in the tuple +use ahash::RandomState; use std::collections::{HashMap, HashSet}; use serde::{Deserialize, Serialize}; @@ -252,7 +253,7 @@ impl Nodes { list: Vec<usize>, distribution: Option<Distribution>, ) -> Result<usize, QueryPlannerError> { - let mut names: HashSet<String> = HashSet::new(); + let mut names: HashSet<String> = HashSet::with_capacity(list.len()); for alias_node in &list { if let Node::Expression(Expression::Alias { name, .. }) = self @@ -318,7 +319,7 @@ impl Plan { } } - let mut result: Vec<usize> = Vec::new(); + let mut result: Vec<usize> = Vec::with_capacity(col_names.len()); if col_names.is_empty() { let required_targets = if is_join { targets } else { &targets[0..1] }; @@ -399,40 +400,64 @@ impl Plan { )); }; - let map = if let Node::Relational(relational_op) = self.get_node(child_node)? { - relational_op.output_alias_position_map(&self.nodes)? - } else { - return Err(QueryPlannerError::InvalidNode); - }; - - let mut result: Vec<usize> = Vec::new(); - let all_found = col_names.iter().all(|col| { - map.get(*col).map_or(false, |pos| { - let new_targets: Vec<usize> = targets.to_vec(); - // Adds new references and aliases to arena (if we need them). - let r_id = self.nodes.add_ref(None, Some(new_targets), *pos); - if need_aliases { - if let Ok(a_id) = self.nodes.add_alias(col, r_id) { - result.push(a_id); - true - } else { - false + let map: HashMap<&str, usize, RandomState> = + if let Node::Relational(relational_op) = self.get_node(child_node)? { + let output_id = relational_op.output(); + let output = self.get_expression_node(output_id)?; + if let Expression::Row { list, .. } = output { + let state = RandomState::new(); + let mut map: HashMap<&str, usize, RandomState> = + HashMap::with_capacity_and_hasher(list.len(), state); + for (pos, col_id) in list.iter().enumerate() { + let alias = self.get_expression_node(*col_id)?; + if let Expression::Alias { ref name, .. } = alias { + if map.insert(name, pos).is_some() { + return Err(QueryPlannerError::CustomError(format!( + "Duplicate column name {} at position {}", + name, pos + ))); + } + } else { + return Err(QueryPlannerError::CustomError( + "Child node is not an alias".into(), + )); + } } + map } else { - result.push(r_id); - true + return Err(QueryPlannerError::CustomError( + "Relational output tuple is not a row".into(), + )); } + } else { + return Err(QueryPlannerError::InvalidNode); + }; + + let mut refs: Vec<(&str, Vec<usize>, usize)> = Vec::with_capacity(col_names.len()); + let all_found = col_names.iter().all(|col| { + map.get(col).map_or(false, |pos| { + refs.push((col, targets.to_vec(), *pos)); + true }) }); + if !all_found { + return Err(QueryPlannerError::CustomError(format!( + "Some of the columns {:?} were not found in the table", + col_names, + ))); + } - if all_found { - return Ok(result); + for (col, new_targets, pos) in refs { + let r_id = self.nodes.add_ref(None, Some(new_targets), pos); + if need_aliases { + let a_id = self.nodes.add_alias(col, r_id)?; + result.push(a_id); + } else { + result.push(r_id); + } } - Err(QueryPlannerError::CustomError(format!( - "Some of the columns {:?} were not found in the table", - col_names, - ))) + Ok(result) } /// New output for a single child node (with aliases). @@ -739,11 +764,8 @@ impl Plan { /// - Internal errors during the expression tree copy. pub fn expr_clone(&mut self, expr_id: usize) -> Result<usize, QueryPlannerError> { let subtree = DftPost::new(&expr_id, |node| self.nodes.expr_iter(node, false)); - let mut nodes: Vec<usize> = Vec::new(); - for (_, id) in subtree { - nodes.push(*id); - } - let mut map: HashMap<usize, usize> = HashMap::new(); + let nodes: Vec<usize> = subtree.map(|(_, id)| *id).collect(); + let mut map: HashMap<usize, usize> = HashMap::with_capacity(nodes.len()); for id in nodes { let expr = self.get_expression_node(id)?.clone(); let new_id = match expr { @@ -766,7 +788,7 @@ impl Plan { self.nodes.add_bool(new_left_id, op.clone(), new_right_id)? } Expression::Row { list, distribution } => { - let mut new_list: Vec<usize> = Vec::new(); + let mut new_list: Vec<usize> = Vec::with_capacity(list.len()); for column_id in list { let new_column_id = *map.get(&column_id).ok_or_else(|| { QueryPlannerError::CustomError(format!( diff --git a/src/ir/operator.rs b/src/ir/operator.rs index 495aac8046..5dc94985b9 100644 --- a/src/ir/operator.rs +++ b/src/ir/operator.rs @@ -2,6 +2,7 @@ //! //! Contains operator nodes that transform the tuples in IR tree. +use ahash::RandomState; use std::collections::HashMap; use std::fmt::{Display, Formatter}; @@ -157,21 +158,22 @@ impl Relational { /// /// # Errors /// Returns `QueryPlannerError` when the output tuple is invalid. - pub fn output_alias_position_map( - &self, - nodes: &Nodes, - ) -> Result<HashMap<String, usize>, QueryPlannerError> { - let mut map: HashMap<String, usize> = HashMap::new(); - + pub fn output_alias_position_map<'rel_op, 'nodes>( + &'rel_op self, + nodes: &'nodes Nodes, + ) -> Result<HashMap<&'nodes str, usize, RandomState>, QueryPlannerError> { if let Some(Node::Expression(Expression::Row { list, .. })) = nodes.arena.get(self.output()) { + let state = RandomState::new(); + let mut map: HashMap<&str, usize, RandomState> = + HashMap::with_capacity_and_hasher(list.len(), state); let valid = list.iter().enumerate().all(|(pos, item)| { // Checks that expressions in the row list are all aliases if let Some(Node::Expression(Expression::Alias { ref name, .. })) = nodes.arena.get(*item) { // Populates the map and checks for duplicates - if map.insert(String::from(name), pos).is_none() { + if map.insert(name, pos).is_none() { return true; } } @@ -180,9 +182,13 @@ impl Relational { if valid { return Ok(map); } - return Err(QueryPlannerError::InvalidPlan); + return Err(QueryPlannerError::CustomError( + "Invalid output tuple".to_string(), + )); } - Err(QueryPlannerError::ValueOutOfRange) + Err(QueryPlannerError::CustomError( + "Failed to find an output tuple node in the arena".to_string(), + )) } /// Gets output tuple node index in plan node arena. @@ -201,14 +207,14 @@ impl Relational { // Gets a copy of the children nodes. #[must_use] - pub fn children(&self) -> Option<Vec<usize>> { + pub fn children(&self) -> Option<&[usize]> { match self { Relational::InnerJoin { children, .. } | Relational::Motion { children, .. } | Relational::Projection { children, .. } | Relational::ScanSubQuery { children, .. } | Relational::Selection { children, .. } - | Relational::UnionAll { children, .. } => Some(children.clone()), + | Relational::UnionAll { children, .. } => Some(children), Relational::ScanRelation { .. } => None, } } @@ -342,7 +348,7 @@ impl Plan { if let Some(relations) = &self.relations { if let Some(rel) = relations.get(table) { - let mut refs: Vec<usize> = Vec::new(); + let mut refs: Vec<usize> = Vec::with_capacity(rel.columns.len()); for (pos, col) in rel.columns.iter().enumerate() { let r_id = nodes.add_ref(None, None, pos); let col_alias_id = nodes.add_alias(&col.name, r_id)?; @@ -593,7 +599,7 @@ impl Plan { pub fn get_relational_children( &self, rel_id: usize, - ) -> Result<Option<Vec<usize>>, QueryPlannerError> { + ) -> Result<Option<&[usize]>, QueryPlannerError> { if let Node::Relational(rel) = self.get_node(rel_id)? { Ok(rel.children()) } else { diff --git a/src/ir/transformation.rs b/src/ir/transformation.rs index 50fa482fe3..5a290db6f7 100644 --- a/src/ir/transformation.rs +++ b/src/ir/transformation.rs @@ -77,12 +77,9 @@ impl Plan { &mut self, f: &dyn Fn(&mut Plan, usize) -> Result<usize, QueryPlannerError>, ) -> Result<(), QueryPlannerError> { - let mut nodes: Vec<usize> = Vec::new(); let top_id = self.get_top()?; let ir_tree = DftPost::new(&top_id, |node| self.nodes.rel_iter(node)); - for (_, id) in ir_tree { - nodes.push(*id); - } + let nodes: Vec<usize> = ir_tree.map(|(_, id)| *id).collect(); for id in &nodes { let rel = self.get_relation_node(*id)?; let new_tree_id = match rel { @@ -131,11 +128,8 @@ impl Plan { ops: &[Bool], ) -> Result<usize, QueryPlannerError> { let mut map: HashMap<usize, usize> = HashMap::new(); - let mut nodes: Vec<usize> = Vec::new(); let subtree = DftPost::new(&top_id, |node| self.nodes.expr_iter(node, false)); - for (_, id) in subtree { - nodes.push(*id); - } + let nodes: Vec<usize> = subtree.map(|(_, id)| *id).collect(); for id in &nodes { let expr = self.get_expression_node(*id)?; if let Expression::Bool { op, .. } = expr { diff --git a/src/ir/transformation/dnf.rs b/src/ir/transformation/dnf.rs index 95afd3f2a7..21c44e45ac 100644 --- a/src/ir/transformation/dnf.rs +++ b/src/ir/transformation/dnf.rs @@ -11,7 +11,7 @@ //! chain in every "OR" node, so "OR" is treated as a "fork" node. //! //! For example: -//! ``` +//! ```text //! ((a = 1) and (b = 2) or (a = 3)) and (c = 4) //! ``` //! can be converted to a tree: @@ -37,13 +37,13 @@ //! ``` //! //! Here is the list of all the chains ("paths") from the top to the bottom: -//! ``` +//! ```text //! 1. (c = 4) and (a = 1) and (b = 2) //! 2. (c = 4) and (a = 3) //! ``` //! //! To build the DNF we unite the chains with the "OR" node: -//! ``` +//! ```text //! ((c = 4) and (a = 1) and (b = 2)) or ((c = 4) and (a = 3)) //! ``` //! @@ -73,7 +73,7 @@ use crate::errors::QueryPlannerError; use crate::ir::expression::Expression; use crate::ir::operator::Bool; -use crate::ir::Plan; +use crate::ir::{Node, Plan}; use std::collections::VecDeque; /// A chain of the trivalents (boolean or NULL expressions) concatenated by AND. @@ -83,9 +83,9 @@ pub struct Chain { } impl Chain { - fn new() -> Self { + fn with_capacity(capacity: usize) -> Self { Chain { - nodes: VecDeque::new(), + nodes: VecDeque::with_capacity(capacity), } } @@ -163,10 +163,19 @@ impl Plan { /// - If the expression tree is not a trivalent expression. /// - Failed to append node to the AND chain. pub fn get_dnf_chains(&self, top_id: usize) -> Result<VecDeque<Chain>, QueryPlannerError> { - let mut result: VecDeque<Chain> = VecDeque::new(); - let mut stack: Vec<Chain> = Vec::new(); + let capacity: usize = self.nodes.arena.iter().fold(0_usize, |acc, node| { + acc + match node { + Node::Expression(Expression::Bool { + op: Bool::And | Bool::Or, + .. + }) => 1, + _ => 0, + } + }); + let mut result: VecDeque<Chain> = VecDeque::with_capacity(capacity); + let mut stack: Vec<Chain> = Vec::with_capacity(capacity); - let mut top_chain = Chain::new(); + let mut top_chain = Chain::with_capacity(capacity); top_chain.push(top_id, self)?; stack.push(top_chain); diff --git a/src/ir/transformation/equality_propagation.rs b/src/ir/transformation/equality_propagation.rs index ed216f600d..66e192f6fa 100644 --- a/src/ir/transformation/equality_propagation.rs +++ b/src/ir/transformation/equality_propagation.rs @@ -1,7 +1,7 @@ //! Equality propagation deduces new equality expressions in join conditions //! and selection filters. //! For example: -//! ``` +//! ```text //! (a = 1) and (b = 1) => (a = b) //! ``` //! @@ -14,7 +14,7 @@ //! containing references (at the moment only rows with a single column of //! the reference type are supported). Constants are replicated and do not //! produce distribution conflicts. -//! ``` +//! ```text //! select * from (select t1.a, t2.b from t1 join t2 on row(t1.c) = row(t2.d)) //! where row(a) = 1 and row(b) = 1 //! => @@ -35,7 +35,7 @@ //! //! # Implementation //! Let's look on an example: -//! ``` +//! ```sql //! select * from t where //! (a) = 1 and (c) = (e) and (b) = 1 and (d) = 1 and (e) = 4 and (f) = 1 and (a) = (f) //! or (e) = 3 @@ -53,7 +53,7 @@ //! //! 2. Each chain may produce multiple equality classes (where all the //! element are equal to each other). For example, the first chain -//! ``` +//! ```text //! (a) = 1 and (c) = (e) and (b) = 1 and (d) = 1 and (e) = 4 and (f) = 1 //! and (a) = (f) //! ``` diff --git a/src/ir/transformation/merge_tuples.rs b/src/ir/transformation/merge_tuples.rs index 7bb12dec0f..5249b9f210 100644 --- a/src/ir/transformation/merge_tuples.rs +++ b/src/ir/transformation/merge_tuples.rs @@ -43,18 +43,12 @@ pub struct Chain { other: Vec<usize>, } -impl Default for Chain { - fn default() -> Self { - Self::new() - } -} - impl Chain { /// Create a new chain. #[must_use] - pub fn new() -> Self { + pub fn with_capacity(capacity: usize) -> Self { Self { - grouped: HashMap::new(), + grouped: HashMap::with_capacity(capacity), other: Vec::new(), } } @@ -220,9 +214,9 @@ impl Plan { &mut self, nodes: &[usize], ) -> Result<HashMap<usize, Chain, RepeatableState>, QueryPlannerError> { - let mut visited: HashSet<usize> = HashSet::new(); + let mut visited: HashSet<usize> = HashSet::with_capacity(self.nodes.next_id()); let mut chains: HashMap<usize, Chain, RepeatableState> = - HashMap::with_hasher(RepeatableState); + HashMap::with_capacity_and_hasher(nodes.len(), RepeatableState); for id in nodes { if visited.contains(id) { @@ -231,12 +225,8 @@ impl Plan { visited.insert(*id); let tree_and = Bft::new(id, |node| self.nodes.and_iter(node)); - let mut nodes_and: Vec<usize> = Vec::new(); - for (_, and_id) in tree_and { - nodes_and.push(*and_id); - } - let mut chain = Chain::new(); - let mut nodes_for_chain: Vec<usize> = Vec::new(); + let nodes_and: Vec<usize> = tree_and.map(|(_, id)| *id).collect(); + let mut nodes_for_chain: Vec<usize> = Vec::with_capacity(nodes_and.len()); for and_id in nodes_and { let expr = self.get_expression_node(and_id)?; if let Expression::Bool { @@ -261,6 +251,7 @@ impl Plan { } } } + let mut chain = Chain::with_capacity(nodes_for_chain.len()); for node_id in nodes_for_chain { chain.insert(self, node_id)?; } @@ -291,11 +282,8 @@ impl Plan { >, f_to_plan: &dyn Fn(&Chain, &mut Plan) -> Result<usize, QueryPlannerError>, ) -> Result<usize, QueryPlannerError> { - let mut nodes: Vec<usize> = Vec::new(); let tree = Bft::new(&expr_id, |node| self.nodes.expr_iter(node, false)); - for (_, id) in tree { - nodes.push(*id); - } + let nodes: Vec<usize> = tree.map(|(_, id)| *id).collect(); let chains = f_build_chains(self, &nodes)?; // Replace nodes' children with the merged tuples. diff --git a/src/ir/transformation/redistribution.rs b/src/ir/transformation/redistribution.rs index dd0c518803..9b438db280 100644 --- a/src/ir/transformation/redistribution.rs +++ b/src/ir/transformation/redistribution.rs @@ -101,12 +101,8 @@ impl Plan { /// - plan doesn't contain the top node fn get_relational_nodes_dfs_post(&self) -> Result<Vec<usize>, QueryPlannerError> { let top = self.get_top()?; - let mut nodes: Vec<usize> = Vec::new(); - let post_tree = DftPost::new(&top, |node| self.nodes.rel_iter(node)); - for (_, node) in post_tree { - nodes.push(*node); - } + let nodes: Vec<usize> = post_tree.map(|(_, id)| *id).collect(); Ok(nodes) } @@ -251,8 +247,8 @@ impl Plan { rel_id: usize, strategy: &HashMap<usize, MotionPolicy>, ) -> Result<(), QueryPlannerError> { - let children = if let Some(children) = self.get_relational_children(rel_id)? { - children + let children: Vec<usize> = if let Some(children) = self.get_relational_children(rel_id)? { + children.to_vec() } else { return Err(QueryPlannerError::CustomError(String::from( "Trying to add motions under the leaf relational node.", @@ -272,15 +268,15 @@ impl Plan { // Add motions. let mut children_with_motions: Vec<usize> = Vec::new(); - for child in &children { - if let Some(policy) = strategy.get(child) { + for child in children { + if let Some(policy) = strategy.get(&child) { if let MotionPolicy::Local = policy { - children_with_motions.push(*child); + children_with_motions.push(child); } else { - children_with_motions.push(self.add_motion(*child, policy)?); + children_with_motions.push(self.add_motion(child, policy)?); } } else { - children_with_motions.push(*child); + children_with_motions.push(child); } } self.set_relational_children(rel_id, children_with_motions)?; @@ -377,7 +373,7 @@ impl Plan { /// # Errors /// - If the node is not a join node. /// - Join node has no children. - fn get_join_children(&self, join_id: usize) -> Result<Vec<usize>, QueryPlannerError> { + fn get_join_children(&self, join_id: usize) -> Result<&[usize], QueryPlannerError> { let join = self.get_relation_node(join_id)?; if let Relational::InnerJoin { .. } = join { } else { @@ -474,7 +470,7 @@ impl Plan { })?; for key in keys { - let child = self.get_join_child_by_key(key, row_map, &children)?; + let child = self.get_join_child_by_key(key, row_map, children)?; if child == outer_child { outer_keys.push(key.clone()); } else if child == inner_child { diff --git a/src/lib.rs b/src/lib.rs index 80006c78a8..bfa889f69c 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -2,7 +2,7 @@ #[macro_use] extern crate pest_derive; -mod errors; +pub mod errors; pub mod executor; pub mod frontend; pub mod ir; -- GitLab