diff --git a/Cargo.toml b/Cargo.toml index 250d63d066b2509ee7224b55f5665545435e36f7..54682ad0b0d6a3a2873ef9b6956afff2d129ef86 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -11,6 +11,7 @@ edition = "2021" [dependencies] ahash = "0.7" +base64ct = { version = "1.5", features = ["alloc"] } decimal = "2.1.0" hash32 = "0.2" itertools = "0.10.3" @@ -18,6 +19,7 @@ pest = "2.0" pest_derive = "2.0" serde = { version = "1.0", features = ["derive"] } serde_yaml = "0.8" +sha2 = "0.10" tarantool = { git = "https://sbroad-cargo-token:t-nZyqJVVuhGQv17BX6v@gitlab.com/picodata/picodata/tarantool-module.git", rev="d8921ec6"} traversal = "0.1.2" yaml-rust = "0.4.1" diff --git a/benches/engine.rs b/benches/engine.rs index 2aea2e0518692f254be98ef3f44c407f76966686..65b13421635dbdb604f302e1a7d883c74b4bf79d 100644 --- a/benches/engine.rs +++ b/benches/engine.rs @@ -2,14 +2,16 @@ extern crate sbroad; use sbroad::errors::QueryPlannerError; use sbroad::executor::bucket::Buckets; +use sbroad::executor::engine::cartridge::cache::lru::{LRUCache, DEFAULT_CAPACITY}; use sbroad::executor::engine::cartridge::hash::str_to_bucket_id; -use sbroad::executor::engine::{Engine, LocalMetadata}; +use sbroad::executor::engine::{Engine, LocalMetadata, Metadata, QueryCache}; use sbroad::executor::ir::ExecutionPlan; use sbroad::executor::result::{BoxExecuteFormat, Value}; use sbroad::executor::vtable::VirtualTable; -use sbroad::executor::Metadata; +use sbroad::frontend::sql::ast::AbstractSyntaxTree; use sbroad::ir::relation::{Column, Table, Type}; use sbroad::ir::value::Value as IrValue; +use std::cell::RefCell; use std::collections::HashMap; #[allow(clippy::module_name_repetitions)] @@ -205,10 +207,22 @@ impl MetadataMock { pub struct EngineMock { metadata: MetadataMock, virtual_tables: HashMap<usize, VirtualTable>, + query_cache: RefCell<LRUCache<String, AbstractSyntaxTree>>, } impl Engine for EngineMock { type Metadata = MetadataMock; + type Ast = AbstractSyntaxTree; + type QueryCache = LRUCache<String, AbstractSyntaxTree>; + + fn clear_query_cache(&self, capacity: usize) -> Result<(), QueryPlannerError> { + *self.query_cache.borrow_mut() = Self::QueryCache::new(capacity)?; + Ok(()) + } + + fn query_cache_rc(&self) -> &RefCell<Self::QueryCache> { + &self.query_cache + } fn metadata(&self) -> &Self::Metadata where @@ -310,9 +324,11 @@ impl EngineMock { #[allow(dead_code)] #[must_use] pub fn new() -> Self { + let cache: LRUCache<String, AbstractSyntaxTree> = LRUCache::new(DEFAULT_CAPACITY).unwrap(); EngineMock { metadata: MetadataMock::new(), virtual_tables: HashMap::new(), + query_cache: RefCell::new(cache), } } diff --git a/benches/parse.rs b/benches/parse.rs index 557914c503954b706892c12b8b17d916c10c1d55..847298cb315d5fbbcfb1386874f0a5420e96dffe 100644 --- a/benches/parse.rs +++ b/benches/parse.rs @@ -4,238 +4,244 @@ use criterion::{criterion_group, criterion_main, Criterion}; use engine::EngineMock; use sbroad::executor::Query; -fn query1() { +fn query1_sql() -> String { let sql = r#"SELECT - * - FROM - ( - SELECT - "vehicleguid", - "reestrid", - "reestrstatus", - "vehicleregno", - "vehiclevin", - "vehiclevin2", - "vehiclechassisnum", - "vehiclereleaseyear", - "operationregdoctypename", - "operationregdoc", - "operationregdocissuedate", - "operationregdoccomments", - "vehicleptstypename", - "vehicleptsnum", - "vehicleptsissuedate", - "vehicleptsissuer", - "vehicleptscomments", - "vehiclebodycolor", - "vehiclebrand", - "vehiclemodel", - "vehiclebrandmodel", - "vehiclebodynum", - "vehiclecost", - "vehiclegasequip", - "vehicleproducername", - "vehiclegrossmass", - "vehiclemass", - "vehiclesteeringwheeltypeid", - "vehiclekpptype", - "vehicletransmissiontype", - "vehicletypename", - "vehiclecategory", - "vehicletypeunit", - "vehicleecoclass", - "vehiclespecfuncname", - "vehicleenclosedvolume", - "vehicleenginemodel", - "vehicleenginenum", - "vehicleenginepower", - "vehicleenginepowerkw", - "vehicleenginetype", - "holdrestrictiondate", - "approvalnum", - "approvaldate", - "approvaltype", - "utilizationfeename", - "customsdoc", - "customsdocdate", - "customsdocissue", - "customsdocrestriction", - "customscountryremovalid", - "customscountryremovalname", - "ownerorgname", - "ownerinn", - "ownerogrn", - "ownerkpp", - "ownerpersonlastname", - "ownerpersonfirstname", - "ownerpersonmiddlename", - "ownerpersonbirthdate", - "ownerbirthplace", - "ownerpersonogrnip", - "owneraddressindex", - "owneraddressmundistrict", - "owneraddresssettlement", - "owneraddressstreet", - "ownerpersoninn", - "ownerpersondoccode", - "ownerpersondocnum", - "ownerpersondocdate", - "operationname", - "operationdate", - "operationdepartmentname", - "operationattorney", - "operationlising", - "holdertypeid", - "holderpersondoccode", - "holderpersondocnum", - "holderpersondocdate", - "holderpersondocissuer", - "holderpersonlastname", - "holderpersonfirstname", - "holderpersonmiddlename", - "holderpersonbirthdate", - "holderpersonbirthregionid", - "holderpersonsex", - "holderpersonbirthplace", - "holderpersoninn", - "holderpersonsnils", - "holderpersonogrnip", - "holderaddressguid", - "holderaddressregionid", - "holderaddressregionname", - "holderaddressdistrict", - "holderaddressmundistrict", - "holderaddresssettlement", - "holderaddressstreet", - "holderaddressbuilding", - "holderaddressstructureid", - "holderaddressstructurename", - "holderaddressstructure" - FROM - "test__gibdd_db__vehicle_reg_and_res100_history" - WHERE - "sys_from" <= 332 - AND "sys_to" >= 332 - UNION - ALL - SELECT - "vehicleguid", - "reestrid", - "reestrstatus", - "vehicleregno", - "vehiclevin", - "vehiclevin2", - "vehiclechassisnum", - "vehiclereleaseyear", - "operationregdoctypename", - "operationregdoc", - "operationregdocissuedate", - "operationregdoccomments", - "vehicleptstypename", - "vehicleptsnum", - "vehicleptsissuedate", - "vehicleptsissuer", - "vehicleptscomments", - "vehiclebodycolor", - "vehiclebrand", - "vehiclemodel", - "vehiclebrandmodel", - "vehiclebodynum", - "vehiclecost", - "vehiclegasequip", - "vehicleproducername", - "vehiclegrossmass", - "vehiclemass", - "vehiclesteeringwheeltypeid", - "vehiclekpptype", - "vehicletransmissiontype", - "vehicletypename", - "vehiclecategory", - "vehicletypeunit", - "vehicleecoclass", - "vehiclespecfuncname", - "vehicleenclosedvolume", - "vehicleenginemodel", - "vehicleenginenum", - "vehicleenginepower", - "vehicleenginepowerkw", - "vehicleenginetype", - "holdrestrictiondate", - "approvalnum", - "approvaldate", - "approvaltype", - "utilizationfeename", - "customsdoc", - "customsdocdate", - "customsdocissue", - "customsdocrestriction", - "customscountryremovalid", - "customscountryremovalname", - "ownerorgname", - "ownerinn", - "ownerogrn", - "ownerkpp", - "ownerpersonlastname", - "ownerpersonfirstname", - "ownerpersonmiddlename", - "ownerpersonbirthdate", - "ownerbirthplace", - "ownerpersonogrnip", - "owneraddressindex", - "owneraddressmundistrict", - "owneraddresssettlement", - "owneraddressstreet", - "ownerpersoninn", - "ownerpersondoccode", - "ownerpersondocnum", - "ownerpersondocdate", - "operationname", - "operationdate", - "operationdepartmentname", - "operationattorney", - "operationlising", - "holdertypeid", - "holderpersondoccode", - "holderpersondocnum", - "holderpersondocdate", - "holderpersondocissuer", - "holderpersonlastname", - "holderpersonfirstname", - "holderpersonmiddlename", - "holderpersonbirthdate", - "holderpersonbirthregionid", - "holderpersonsex", - "holderpersonbirthplace", - "holderpersoninn", - "holderpersonsnils", - "holderpersonogrnip", - "holderaddressguid", - "holderaddressregionid", - "holderaddressregionname", - "holderaddressdistrict", - "holderaddressmundistrict", - "holderaddresssettlement", - "holderaddressstreet", - "holderaddressbuilding", - "holderaddressstructureid", - "holderaddressstructurename", - "holderaddressstructure" - FROM - "test__gibdd_db__vehicle_reg_and_res100_actual" - WHERE - "sys_from" <= 332 - ) AS "t3" - WHERE - "reestrid" = 452842574"#; - let engine = EngineMock::new(); - let mut query = Query::new(&engine, sql).unwrap(); + * + FROM + ( + SELECT + "vehicleguid", + "reestrid", + "reestrstatus", + "vehicleregno", + "vehiclevin", + "vehiclevin2", + "vehiclechassisnum", + "vehiclereleaseyear", + "operationregdoctypename", + "operationregdoc", + "operationregdocissuedate", + "operationregdoccomments", + "vehicleptstypename", + "vehicleptsnum", + "vehicleptsissuedate", + "vehicleptsissuer", + "vehicleptscomments", + "vehiclebodycolor", + "vehiclebrand", + "vehiclemodel", + "vehiclebrandmodel", + "vehiclebodynum", + "vehiclecost", + "vehiclegasequip", + "vehicleproducername", + "vehiclegrossmass", + "vehiclemass", + "vehiclesteeringwheeltypeid", + "vehiclekpptype", + "vehicletransmissiontype", + "vehicletypename", + "vehiclecategory", + "vehicletypeunit", + "vehicleecoclass", + "vehiclespecfuncname", + "vehicleenclosedvolume", + "vehicleenginemodel", + "vehicleenginenum", + "vehicleenginepower", + "vehicleenginepowerkw", + "vehicleenginetype", + "holdrestrictiondate", + "approvalnum", + "approvaldate", + "approvaltype", + "utilizationfeename", + "customsdoc", + "customsdocdate", + "customsdocissue", + "customsdocrestriction", + "customscountryremovalid", + "customscountryremovalname", + "ownerorgname", + "ownerinn", + "ownerogrn", + "ownerkpp", + "ownerpersonlastname", + "ownerpersonfirstname", + "ownerpersonmiddlename", + "ownerpersonbirthdate", + "ownerbirthplace", + "ownerpersonogrnip", + "owneraddressindex", + "owneraddressmundistrict", + "owneraddresssettlement", + "owneraddressstreet", + "ownerpersoninn", + "ownerpersondoccode", + "ownerpersondocnum", + "ownerpersondocdate", + "operationname", + "operationdate", + "operationdepartmentname", + "operationattorney", + "operationlising", + "holdertypeid", + "holderpersondoccode", + "holderpersondocnum", + "holderpersondocdate", + "holderpersondocissuer", + "holderpersonlastname", + "holderpersonfirstname", + "holderpersonmiddlename", + "holderpersonbirthdate", + "holderpersonbirthregionid", + "holderpersonsex", + "holderpersonbirthplace", + "holderpersoninn", + "holderpersonsnils", + "holderpersonogrnip", + "holderaddressguid", + "holderaddressregionid", + "holderaddressregionname", + "holderaddressdistrict", + "holderaddressmundistrict", + "holderaddresssettlement", + "holderaddressstreet", + "holderaddressbuilding", + "holderaddressstructureid", + "holderaddressstructurename", + "holderaddressstructure" + FROM + "test__gibdd_db__vehicle_reg_and_res100_history" + WHERE + "sys_from" <= 332 + AND "sys_to" >= 332 + UNION + ALL + SELECT + "vehicleguid", + "reestrid", + "reestrstatus", + "vehicleregno", + "vehiclevin", + "vehiclevin2", + "vehiclechassisnum", + "vehiclereleaseyear", + "operationregdoctypename", + "operationregdoc", + "operationregdocissuedate", + "operationregdoccomments", + "vehicleptstypename", + "vehicleptsnum", + "vehicleptsissuedate", + "vehicleptsissuer", + "vehicleptscomments", + "vehiclebodycolor", + "vehiclebrand", + "vehiclemodel", + "vehiclebrandmodel", + "vehiclebodynum", + "vehiclecost", + "vehiclegasequip", + "vehicleproducername", + "vehiclegrossmass", + "vehiclemass", + "vehiclesteeringwheeltypeid", + "vehiclekpptype", + "vehicletransmissiontype", + "vehicletypename", + "vehiclecategory", + "vehicletypeunit", + "vehicleecoclass", + "vehiclespecfuncname", + "vehicleenclosedvolume", + "vehicleenginemodel", + "vehicleenginenum", + "vehicleenginepower", + "vehicleenginepowerkw", + "vehicleenginetype", + "holdrestrictiondate", + "approvalnum", + "approvaldate", + "approvaltype", + "utilizationfeename", + "customsdoc", + "customsdocdate", + "customsdocissue", + "customsdocrestriction", + "customscountryremovalid", + "customscountryremovalname", + "ownerorgname", + "ownerinn", + "ownerogrn", + "ownerkpp", + "ownerpersonlastname", + "ownerpersonfirstname", + "ownerpersonmiddlename", + "ownerpersonbirthdate", + "ownerbirthplace", + "ownerpersonogrnip", + "owneraddressindex", + "owneraddressmundistrict", + "owneraddresssettlement", + "owneraddressstreet", + "ownerpersoninn", + "ownerpersondoccode", + "ownerpersondocnum", + "ownerpersondocdate", + "operationname", + "operationdate", + "operationdepartmentname", + "operationattorney", + "operationlising", + "holdertypeid", + "holderpersondoccode", + "holderpersondocnum", + "holderpersondocdate", + "holderpersondocissuer", + "holderpersonlastname", + "holderpersonfirstname", + "holderpersonmiddlename", + "holderpersonbirthdate", + "holderpersonbirthregionid", + "holderpersonsex", + "holderpersonbirthplace", + "holderpersoninn", + "holderpersonsnils", + "holderpersonogrnip", + "holderaddressguid", + "holderaddressregionid", + "holderaddressregionname", + "holderaddressdistrict", + "holderaddressmundistrict", + "holderaddresssettlement", + "holderaddressstreet", + "holderaddressbuilding", + "holderaddressstructureid", + "holderaddressstructurename", + "holderaddressstructure" + FROM + "test__gibdd_db__vehicle_reg_and_res100_actual" + WHERE + "sys_from" <= 332 + ) AS "t3" + WHERE + "reestrid" = 452842574"#; + + sql.into() +} + +fn query1(sql: &str, engine: &mut EngineMock) { + let mut query = Query::new(engine, sql).unwrap(); let top_id = query.get_exec_plan().get_ir_plan().get_top().unwrap(); query.bucket_discovery(top_id).unwrap(); query.get_exec_plan().subtree_as_sql(top_id).unwrap(); } fn bench_query1(c: &mut Criterion) { - c.bench_function("query1", |b| b.iter(|| query1())); + let mut engine = EngineMock::new(); + let sql = query1_sql(); + c.bench_function("query1", |b| b.iter(|| query1(&sql, &mut engine))); } criterion_group!(benches, bench_query1); diff --git a/src/executor.rs b/src/executor.rs index 0b9bb35b5d1a520419d8b87e26f211de40245f3e..d30c197327e86b68507fe3ba5f646a598e06331a 100644 --- a/src/executor.rs +++ b/src/executor.rs @@ -28,11 +28,13 @@ use std::collections::HashMap; use crate::errors::QueryPlannerError; use crate::executor::bucket::Buckets; use crate::executor::engine::Engine; -pub use crate::executor::engine::Metadata; +use crate::executor::engine::{Metadata, QueryCache}; use crate::executor::ir::ExecutionPlan; use crate::executor::result::BoxExecuteFormat; -use crate::frontend::sql::ast::AbstractSyntaxTree; +use crate::frontend::Ast; use crate::ir::Plan; +use base64ct::{Base64, Encoding}; +use sha2::{Digest, Sha256}; pub mod bucket; pub mod engine; @@ -55,21 +57,21 @@ impl Plan { } /// Query to execute. -pub struct Query<'a, T> +pub struct Query<'a, E> where - T: Engine, + E: Engine, { /// Execution plan exec_plan: ExecutionPlan, /// Execution engine - engine: &'a T, + engine: &'a E, /// Bucket map bucket_map: HashMap<usize, Buckets>, } -impl<'a, T> Query<'a, T> +impl<'a, E> Query<'a, E> where - T: Engine, + E: Engine, { /// Create a new query. /// @@ -78,11 +80,26 @@ where /// - Failed to build AST. /// - Failed to build IR plan. /// - Failed to apply optimizing transformations to IR plan. - pub fn new(engine: &'a T, sql: &str) -> Result<Self, QueryPlannerError> + pub fn new(engine: &'a E, sql: &str) -> Result<Self, QueryPlannerError> where - T::Metadata: Metadata, + E::Metadata: Metadata, + E::QueryCache: QueryCache<String, E::Ast>, + E::Ast: Ast, { - let ast = AbstractSyntaxTree::new(sql)?; + let hash = Sha256::digest(sql.as_bytes()); + let key = Base64::encode_string(&hash); + + let query_cache = engine.query_cache_rc(); + let mut ast = Ast::empty(); + if let Some(cached_ast) = query_cache.borrow_mut().get(&key)? { + ast = cached_ast; + } + if ast.is_empty() { + query_cache.borrow_mut().put(key.clone(), Ast::new(sql)?)?; + ast = query_cache.borrow_mut().get(&key)?.ok_or_else(|| { + QueryPlannerError::CustomError("Failed to get AST from the query cache".to_string()) + })?; + } let mut plan = ast.to_ir(engine.metadata())?; plan.optimize()?; let query = Query { diff --git a/src/executor/engine.rs b/src/executor/engine.rs index c415a095daf278b5ea5df3f9eb083eb0b42ce4e6..97d9e0e9e2302874fcdea809ca473beb01d6ccdb 100644 --- a/src/executor/engine.rs +++ b/src/executor/engine.rs @@ -2,6 +2,7 @@ //! //! Traits that define an execution engine interface. +use std::cell::RefCell; use std::collections::HashMap; use crate::errors::QueryPlannerError; @@ -13,6 +14,28 @@ use crate::ir::value::Value as IrValue; pub mod cartridge; +pub trait QueryCache<Key, Value> { + /// Builds a new cache with the given capacity. + /// + /// # Errors + /// - Capacity is not valid (zero). + fn new(capacity: usize) -> Result<Self, QueryPlannerError> + where + Self: Sized; + + /// Returns a value from the cache. + /// + /// # Errors + /// - Internal error (should never happen). + fn get(&mut self, key: &Key) -> Result<Option<Value>, QueryPlannerError>; + + /// Inserts a key-value pair into the cache. + /// + /// # Errors + /// - Internal error (should never happen). + fn put(&mut self, key: Key, value: Value) -> Result<(), QueryPlannerError>; +} + /// A metadata storage trait of the cluster. pub trait Metadata { /// Get a table by name. @@ -56,6 +79,8 @@ pub struct LocalMetadata { /// An execution engine trait. pub trait Engine { type Metadata; + type QueryCache; + type Ast; /// Return object of metadata storage fn metadata(&self) -> &Self::Metadata @@ -80,6 +105,16 @@ pub trait Engine { /// - Failed to update metadata information (invalid metadata). fn update_metadata(&mut self, metadata: LocalMetadata) -> Result<(), QueryPlannerError>; + /// Flush the query cache. + /// + /// # Errors + /// - Invalid capacity (zero). + fn clear_query_cache(&self, capacity: usize) -> Result<(), QueryPlannerError>; + + fn query_cache_rc(&self) -> &RefCell<Self::QueryCache> + where + Self: Sized; + /// Materialize result motion node to virtual table /// /// # Errors diff --git a/src/executor/engine/cartridge.rs b/src/executor/engine/cartridge.rs index 598f52422a873318a2e403c2f6b33a0ca61cf32b..735367aba19e9bb0db31465600620e8dabf4b1f5 100644 --- a/src/executor/engine/cartridge.rs +++ b/src/executor/engine/cartridge.rs @@ -1,5 +1,6 @@ //! Tarantool cartridge engine module. +use std::cell::RefCell; use std::collections::HashMap; use std::convert::TryInto; @@ -8,13 +9,15 @@ use tarantool::tlua::LuaFunction; use crate::errors::QueryPlannerError; use crate::executor::bucket::Buckets; +use crate::executor::engine::cartridge::cache::lru::{LRUCache, DEFAULT_CAPACITY}; use crate::executor::engine::cartridge::cache::ClusterAppConfig; use crate::executor::engine::cartridge::hash::str_to_bucket_id; use crate::executor::engine::{Engine, LocalMetadata}; use crate::executor::ir::ExecutionPlan; use crate::executor::result::BoxExecuteFormat; use crate::executor::vtable::VirtualTable; -use crate::executor::Metadata; +use crate::executor::{Metadata, QueryCache}; +use crate::frontend::sql::ast::AbstractSyntaxTree; use crate::ir::value::Value as IrValue; mod backend; @@ -24,20 +27,33 @@ pub mod hash; /// Tarantool cartridge metadata and topology. #[derive(Debug, Clone)] pub struct Runtime { + // query_cache: metadata: ClusterAppConfig, bucket_count: usize, + query_cache: RefCell<LRUCache<String, AbstractSyntaxTree>>, } /// Implements `Engine` interface for tarantool cartridge application impl Engine for Runtime { type Metadata = ClusterAppConfig; + type Ast = AbstractSyntaxTree; + type QueryCache = LRUCache<String, AbstractSyntaxTree>; + + fn clear_query_cache(&self, capacity: usize) -> Result<(), QueryPlannerError> { + *self.query_cache.borrow_mut() = Self::QueryCache::new(capacity)?; + Ok(()) + } + + fn query_cache_rc(&self) -> &RefCell<Self::QueryCache> { + &self.query_cache + } fn metadata(&self) -> &Self::Metadata { &self.metadata } fn clear_metadata(&mut self) { - self.metadata = ClusterAppConfig::new(); + self.metadata = Self::Metadata::new(); } fn is_metadata_empty(&self) -> bool { @@ -161,9 +177,11 @@ impl Runtime { /// # Errors /// - Failed to detect the correct amount of buckets. pub fn new() -> Result<Self, QueryPlannerError> { + let cache: LRUCache<String, AbstractSyntaxTree> = LRUCache::new(DEFAULT_CAPACITY)?; let mut result = Runtime { metadata: ClusterAppConfig::new(), bucket_count: 0, + query_cache: RefCell::new(cache), }; result.set_bucket_count()?; diff --git a/src/executor/engine/cartridge/backend/sql/ir/tests.rs b/src/executor/engine/cartridge/backend/sql/ir/tests.rs index 0623ede977a9add98fbca01d2fcbb4864d4a6699..31a849b78e8e2e37f9687eb1ed99ad4d751b2699 100644 --- a/src/executor/engine/cartridge/backend/sql/ir/tests.rs +++ b/src/executor/engine/cartridge/backend/sql/ir/tests.rs @@ -3,6 +3,7 @@ use pretty_assertions::assert_eq; use crate::executor::engine::mock::MetadataMock; use crate::executor::ir::ExecutionPlan; use crate::frontend::sql::ast::AbstractSyntaxTree; +use crate::frontend::Ast; #[test] fn one_table_projection() { diff --git a/src/executor/engine/cartridge/cache.rs b/src/executor/engine/cartridge/cache.rs index 02547089e86e9b7363399d9300bd70f3c1027cef..e37619aeab7cefbdb2b7a46cb1c746a32a32ecfb 100644 --- a/src/executor/engine/cartridge/cache.rs +++ b/src/executor/engine/cartridge/cache.rs @@ -172,5 +172,6 @@ impl Metadata for ClusterAppConfig { } } +pub mod lru; #[cfg(test)] mod tests; diff --git a/src/executor/engine/cartridge/cache/lru.rs b/src/executor/engine/cartridge/cache/lru.rs new file mode 100644 index 0000000000000000000000000000000000000000..e8271f8d5d0072a3638543b8fd02da25c0814968 --- /dev/null +++ b/src/executor/engine/cartridge/cache/lru.rs @@ -0,0 +1,203 @@ +use crate::errors::QueryPlannerError; +use crate::executor::{Ast, QueryCache}; +use std::collections::{hash_map::Entry, HashMap}; + +pub const DEFAULT_CAPACITY: usize = 50; + +#[derive(Clone, Debug)] +struct LRUNode<Key, Value> +where + Value: Ast, +{ + /// The value of the node. + value: Value, + /// Next node key in a hash map. + next: Option<Key>, + /// Previous node key in a hash map. + prev: Option<Key>, +} + +impl<Key, Value> LRUNode<Key, Value> +where + Value: Ast, +{ + fn new(value: Value) -> Self { + LRUNode { + value, + next: None, + prev: None, + } + } + + fn sentinel() -> Self { + LRUNode::new(Value::empty()) + } + + fn replace_next(&mut self, next: Option<Key>) { + self.next = next; + } + + fn replace_prev(&mut self, prev: Option<Key>) { + self.prev = prev; + } +} + +#[derive(Clone, Debug)] +pub struct LRUCache<Key, Value> +where + Value: Ast, +{ + /// The capacity of the cache. + capacity: usize, + /// Actual amount of nodes in the cache. + size: usize, + /// `None` key is reserved for the LRU sentinel head. + map: HashMap<Option<Key>, LRUNode<Key, Value>>, +} + +impl<Key, Value> LRUCache<Key, Value> +where + Value: Ast, + Key: Clone + Eq + std::hash::Hash + std::fmt::Debug, +{ + fn get_node_or_none(&self, key: &Option<Key>) -> Option<&LRUNode<Key, Value>> { + self.map.get(key) + } + + fn get_node(&self, key: &Option<Key>) -> Result<&LRUNode<Key, Value>, QueryPlannerError> { + self.map.get(key).ok_or_else(|| { + QueryPlannerError::CustomError(format!("LRU node with key {:?} not found", key)) + }) + } + + fn get_node_mut( + &mut self, + key: &Option<Key>, + ) -> Result<&mut LRUNode<Key, Value>, QueryPlannerError> { + self.map.get_mut(key).ok_or_else(|| { + QueryPlannerError::CustomError(format!("Mutable LRU node with key {:?} not found", key)) + }) + } + + fn add_first(&mut self, key: Key, value: Value) -> Result<(), QueryPlannerError> { + let new_node = LRUNode::new(value); + self.map.insert(Some(key.clone()), new_node); + self.size += 1; + let head_node = self.get_node(&None)?; + let head_next_id = head_node.next.clone(); + self.link_node(key, &None, &head_next_id)?; + Ok(()) + } + + fn make_first(&mut self, key: &Key) -> Result<(), QueryPlannerError> { + self.unlink_node(&Some(key.clone()))?; + let head_node = self.get_node(&None)?; + let head_next_id = head_node.next.clone(); + self.link_node(key.clone(), &None, &head_next_id)?; + Ok(()) + } + + fn is_first(&self, key: &Key) -> Result<bool, QueryPlannerError> { + let head_node = self.get_node(&None)?; + Ok(head_node.next == Some(key.clone())) + } + + fn link_node( + &mut self, + key: Key, + prev: &Option<Key>, + next: &Option<Key>, + ) -> Result<(), QueryPlannerError> { + let node = self.get_node_mut(&Some(key.clone()))?; + node.replace_prev(prev.clone()); + node.replace_next(next.clone()); + let prev_node = self.get_node_mut(prev)?; + prev_node.replace_next(Some(key.clone())); + let next_node = self.get_node_mut(next)?; + next_node.replace_prev(Some(key)); + Ok(()) + } + + fn unlink_node(&mut self, key: &Option<Key>) -> Result<(), QueryPlannerError> { + // We don't want to remove sentinel. + if key.is_none() { + return Ok(()); + } + + let node = self.get_node_mut(key)?; + let prev_id = node.prev.take(); + let next_id = node.next.take(); + let prev_node = self.get_node_mut(&prev_id)?; + prev_node.replace_next(next_id.clone()); + let next_node = self.get_node_mut(&next_id)?; + next_node.replace_prev(prev_id); + Ok(()) + } + + fn remove_last(&mut self) -> Result<(), QueryPlannerError> { + let head_node = self.get_node(&None)?; + let head_prev_id = head_node.prev.clone(); + if head_prev_id.is_none() { + return Ok(()); + } + self.unlink_node(&head_prev_id.clone())?; + if self.map.remove(&head_prev_id).is_some() { + self.size -= 1; + } + Ok(()) + } +} + +impl<Key, Value> QueryCache<Key, Value> for LRUCache<Key, Value> +where + Value: Ast + Clone, + Key: Clone + Eq + std::hash::Hash + std::fmt::Debug, +{ + fn new(capacity: usize) -> Result<Self, QueryPlannerError> { + if capacity == 0 { + return Err(QueryPlannerError::CustomError( + "LRU cache capacity must be greater than zero".to_string(), + )); + } + let head = LRUNode::sentinel(); + let mut map: HashMap<Option<Key>, LRUNode<Key, Value>> = + HashMap::with_capacity((capacity + 2) as usize); + map.insert(None, head); + + Ok(LRUCache { + capacity: capacity as usize, + size: 0, + map, + }) + } + + fn get(&mut self, key: &Key) -> Result<Option<Value>, QueryPlannerError> { + let value = if let Some(node) = self.get_node_or_none(&Some(key.clone())) { + node.value.clone() + } else { + return Ok(None); + }; + + if self.is_first(key)? { + return Ok(Some(value)); + } + + self.make_first(key)?; + Ok(Some(value)) + } + + fn put(&mut self, key: Key, value: Value) -> Result<(), QueryPlannerError> { + if let Entry::Occupied(mut entry) = self.map.entry(Some(key.clone())) { + let node = entry.get_mut(); + node.value = value; + self.make_first(&key)?; + return Ok(()); + } + + self.add_first(key, value)?; + if self.size > self.capacity { + self.remove_last()?; + } + Ok(()) + } +} diff --git a/src/executor/engine/mock.rs b/src/executor/engine/mock.rs index b1d8e24adf459b1ce0a1da5d96731dd6f536807d..be24921e8dadc9c5c9d28957560845a41df48ed9 100644 --- a/src/executor/engine/mock.rs +++ b/src/executor/engine/mock.rs @@ -3,12 +3,14 @@ use std::collections::HashMap; use crate::errors::QueryPlannerError; use crate::executor::bucket::Buckets; +use crate::executor::engine::cartridge::cache::lru::{LRUCache, DEFAULT_CAPACITY}; use crate::executor::engine::cartridge::hash::str_to_bucket_id; use crate::executor::engine::{Engine, LocalMetadata}; use crate::executor::ir::ExecutionPlan; use crate::executor::result::{BoxExecuteFormat, Value}; use crate::executor::vtable::VirtualTable; -use crate::executor::Metadata; +use crate::executor::{Metadata, QueryCache}; +use crate::frontend::sql::ast::AbstractSyntaxTree; use crate::ir::relation::{Column, Table, Type}; use crate::ir::value::Value as IrValue; @@ -146,10 +148,22 @@ impl MetadataMock { pub struct EngineMock { metadata: MetadataMock, virtual_tables: RefCell<HashMap<usize, VirtualTable>>, + query_cache: RefCell<LRUCache<String, AbstractSyntaxTree>>, } impl Engine for EngineMock { type Metadata = MetadataMock; + type Ast = AbstractSyntaxTree; + type QueryCache = LRUCache<String, AbstractSyntaxTree>; + + fn clear_query_cache(&self, capacity: usize) -> Result<(), QueryPlannerError> { + *self.query_cache.borrow_mut() = Self::QueryCache::new(capacity)?; + Ok(()) + } + + fn query_cache_rc(&self) -> &RefCell<Self::QueryCache> { + &self.query_cache + } fn metadata(&self) -> &Self::Metadata where @@ -251,9 +265,11 @@ impl EngineMock { #[allow(dead_code)] #[must_use] pub fn new() -> Self { + let cache: LRUCache<String, AbstractSyntaxTree> = LRUCache::new(DEFAULT_CAPACITY).unwrap(); EngineMock { metadata: MetadataMock::new(), virtual_tables: RefCell::new(HashMap::new()), + query_cache: RefCell::new(cache), } } diff --git a/src/frontend.rs b/src/frontend.rs index a0339ff799574fdde03975b226349eb7eb1ebc64..74b45ef1d5d4c6c37bc90ad8b8d54d8d5a81516c 100644 --- a/src/frontend.rs +++ b/src/frontend.rs @@ -3,4 +3,34 @@ //! A list of different frontend implementations //! to build the intermediate representation (IR). +use crate::errors::QueryPlannerError; +use crate::executor::engine::Metadata; +use crate::ir::Plan; + +pub trait Ast { + fn empty() -> Self + where + Self: Sized; + + /// Builds abstract syntax tree (AST) from SQL query. + /// + /// # Errors + /// - SQL query is not valid or not supported. + fn new(query: &str) -> Result<Self, QueryPlannerError> + where + Self: Sized; + + /// AST is empty. + fn is_empty(&self) -> bool; + + /// Builds the intermediate representation (IR) from the AST. + /// + /// # Errors + /// - The AST doesn't represent a valid SQL query. + /// - AST contains objects not present in the metadata. + fn to_ir<M>(&self, metadata: &M) -> Result<Plan, QueryPlannerError> + where + M: Metadata; +} + pub mod sql; diff --git a/src/frontend/sql.rs b/src/frontend/sql.rs index b9810f1601497847d90eca810a47b9407026b2bd..7619c8fb75944b3dd33a8d1797947bc9085094e0 100644 --- a/src/frontend/sql.rs +++ b/src/frontend/sql.rs @@ -3,6 +3,614 @@ //! Parses an SQL statement to the abstract syntax tree (AST) //! and builds the intermediate representation (IR). +use pest::Parser; +use std::collections::{HashMap, HashSet}; +use traversal::DftPost; + +use crate::errors::QueryPlannerError; +use crate::executor::engine::Metadata; +use crate::frontend::sql::ast::{ + AbstractSyntaxTree, ParseNode, ParseNodes, ParseTree, Rule, StackParseNode, Type, +}; +use crate::frontend::sql::ir::{to_name, Translation}; +use crate::frontend::Ast; +use crate::ir::expression::Expression; +use crate::ir::operator::Bool; +use crate::ir::value::Value; +use crate::ir::{Node, Plan}; + +impl Ast for AbstractSyntaxTree { + /// Build an empty AST. + fn empty() -> Self { + AbstractSyntaxTree { + nodes: ParseNodes::new(), + top: None, + map: HashMap::new(), + } + } + + /// Constructor. + /// Builds a tree (nodes are in postorder reverse). + /// + /// # Errors + /// - Failed to parse an SQL query. + fn new(query: &str) -> Result<Self, QueryPlannerError> { + let mut ast = AbstractSyntaxTree::empty(); + + let mut command_pair = match ParseTree::parse(Rule::Command, query) { + Ok(p) => p, + Err(e) => { + return Err(QueryPlannerError::CustomError(format!( + "Parsing error: {:?}", + e + ))) + } + }; + let top_pair = command_pair.next().ok_or_else(|| { + QueryPlannerError::CustomError("No query found in the parse tree.".to_string()) + })?; + let top = StackParseNode::new(top_pair, None); + + let mut stack: Vec<StackParseNode> = vec![top]; + + while !stack.is_empty() { + let stack_node: StackParseNode = match stack.pop() { + Some(n) => n, + None => break, + }; + + // Save node to AST + let node = ast.nodes.push_node(ParseNode::new( + stack_node.pair.as_rule(), + Some(String::from(stack_node.pair.as_str())), + )?); + + // Update parent's node children list + ast.nodes.add_child(stack_node.parent, node)?; + // Clean parent values (only leafs should contain data) + if let Some(parent) = stack_node.parent { + ast.nodes.update_value(parent, None)?; + } + + for parse_child in stack_node.pair.into_inner() { + stack.push(StackParseNode::new(parse_child, Some(node))); + } + } + + ast.set_top(0)?; + + ast.transform_select()?; + ast.add_aliases_to_projection()?; + ast.build_ref_to_relation_map()?; + + Ok(ast) + } + + fn is_empty(&self) -> bool { + self.nodes.arena.is_empty() + } + + /// Transform AST to IR plan tree. + /// + /// # Errors + /// - IR plan can't be built. + #[allow(dead_code)] + #[allow(clippy::too_many_lines)] + fn to_ir<T>(&self, metadata: &T) -> Result<Plan, QueryPlannerError> + where + T: Metadata, + { + let mut plan = Plan::new(); + + let top = match self.top { + Some(t) => t, + None => return Err(QueryPlannerError::InvalidAst), + }; + let dft_post = DftPost::new(&top, |node| self.nodes.ast_iter(node)); + let mut map = Translation::with_capacity(self.nodes.next_id()); + let mut rows: HashSet<usize> = HashSet::with_capacity(self.nodes.next_id()); + + for (_, id) in dft_post { + let node = self.nodes.get_node(*id)?.clone(); + match &node.rule { + Type::Scan => { + let ast_child_id = node.children.get(0).ok_or_else(|| { + QueryPlannerError::CustomError( + "Could not find child id in scan node".to_string(), + ) + })?; + let plan_child_id = map.get(*ast_child_id)?; + map.add(*id, plan_child_id); + if let Some(ast_scan_name_id) = node.children.get(1) { + let ast_scan_name = self.nodes.get_node(*ast_scan_name_id)?; + if let Type::ScanName = ast_scan_name.rule { + // Update scan name in the plan. + let scan = plan.get_mut_relation_node(plan_child_id)?; + scan.set_scan_name(ast_scan_name.value.as_ref().map(|s| to_name(s)))?; + } else { + return Err(QueryPlannerError::CustomError( + "Expected scan name AST node.".into(), + )); + } + } + } + Type::Table => { + if let Some(node_val) = &node.value { + let table = node_val.as_str(); + let t = metadata.get_table_segment(table)?; + plan.add_rel(t); + let scan_id = plan.add_scan(table, None)?; + map.add(*id, scan_id); + } else { + return Err(QueryPlannerError::CustomError( + "Table name is not found.".into(), + )); + } + } + Type::SubQuery => { + let ast_child_id = node.children.get(0).ok_or_else(|| { + QueryPlannerError::CustomError( + "Child node id is not found among sub-query children.".into(), + ) + })?; + let plan_child_id = map.get(*ast_child_id)?; + let alias_name: Option<String> = if let Some(ast_name_id) = node.children.get(1) + { + let ast_alias = self.nodes.get_node(*ast_name_id)?; + if let Type::SubQueryName = ast_alias.rule { + } else { + return Err(QueryPlannerError::CustomError(format!( + "Expected a sub-query name, got {:?}.", + ast_alias.rule + ))); + } + ast_alias.value.as_deref().map(to_name) + } else { + None + }; + let plan_sq_id = plan.add_sub_query(plan_child_id, alias_name.as_deref())?; + map.add(*id, plan_sq_id); + } + Type::Reference => { + let ast_rel_list = self.get_referred_relational_nodes(*id)?; + let mut plan_rel_list = Vec::new(); + for ast_id in ast_rel_list { + let plan_id = map.get(ast_id)?; + plan_rel_list.push(plan_id); + } + + let get_column_name = |ast_id: usize| -> Result<String, QueryPlannerError> { + let ast_col_name = self.nodes.get_node(ast_id)?; + if let Type::ColumnName = ast_col_name.rule { + let name: Option<String> = ast_col_name.value.as_deref().map(to_name); + Ok(name.ok_or_else(|| { + QueryPlannerError::CustomError("Empty AST column name".into()) + })?) + } else { + Err(QueryPlannerError::CustomError( + "Expected column name AST node.".into(), + )) + } + }; + + let get_scan_name = + |col_name: &str, + plan_id: usize| + -> Result<Option<String>, QueryPlannerError> { + let child = plan.get_relation_node(plan_id)?; + let col_position = child + .output_alias_position_map(&plan.nodes)? + .get(col_name) + .copied(); + match col_position { + Some(pos) => Ok(plan + .get_relation_node(plan_id)? + .scan_name(&plan, pos)? + .map(String::from)), + None => Ok(None), + } + }; + + // Reference to the join node. + if let (Some(plan_left_id), Some(plan_right_id)) = + (plan_rel_list.get(0), plan_rel_list.get(1)) + { + if let (Some(ast_scan_name_id), Some(ast_col_name_id)) = + (node.children.get(0), node.children.get(1)) + { + let ast_scan_name = self.nodes.get_node(*ast_scan_name_id)?; + if let Type::ScanName = ast_scan_name.rule { + // Get the column name and its positions in the output tuples. + let col_name = get_column_name(*ast_col_name_id)?; + let left_name = get_scan_name(&col_name, *plan_left_id)?; + let right_name = get_scan_name(&col_name, *plan_right_id)?; + // Check that the AST scan name matches to the children scan names in the plan join node. + let scan_name: Option<String> = + ast_scan_name.value.as_deref().map(to_name); + // Determine the referred side of the join (left or right). + if left_name == scan_name { + let left_col_map = plan + .get_relation_node(*plan_left_id)? + .output_alias_position_map(&plan.nodes)?; + if left_col_map.get(&col_name.as_str()).is_some() { + let ref_id = plan.add_row_from_left_branch( + *plan_left_id, + *plan_right_id, + &[&col_name], + )?; + rows.insert(ref_id); + map.add(*id, ref_id); + } else { + return Err(QueryPlannerError::CustomError(format!( + "Column '{}' not found in for the join left child '{:?}'.", + col_name, left_name + ))); + } + } else if right_name == scan_name { + let right_col_map = plan + .get_relation_node(*plan_right_id)? + .output_alias_position_map(&plan.nodes)?; + if right_col_map.get(&col_name.as_str()).is_some() { + let ref_id = plan.add_row_from_right_branch( + *plan_left_id, + *plan_right_id, + &[&col_name], + )?; + rows.insert(ref_id); + map.add(*id, ref_id); + } else { + return Err(QueryPlannerError::CustomError(format!( + "Column '{}' not found in for the join right child '{:?}'.", + col_name, right_name + ))); + } + } else { + return Err(QueryPlannerError::CustomError( + "Left and right plan nodes do not match the AST scan name." + .into(), + )); + } + } else { + return Err(QueryPlannerError::CustomError( + "Expected AST node to be a scan name.".into(), + )); + } + } else if let (Some(ast_col_name_id), None) = + (node.children.get(0), node.children.get(1)) + { + // Determine the referred side of the join (left or right). + let col_name = get_column_name(*ast_col_name_id)?; + let left_col_map = plan + .get_relation_node(*plan_left_id)? + .output_alias_position_map(&plan.nodes)?; + if left_col_map.get(&col_name.as_str()).is_some() { + let ref_id = plan.add_row_from_left_branch( + *plan_left_id, + *plan_right_id, + &[&col_name], + )?; + rows.insert(ref_id); + map.add(*id, ref_id); + } + let right_col_map = plan + .get_relation_node(*plan_right_id)? + .output_alias_position_map(&plan.nodes)?; + if right_col_map.get(&col_name.as_str()).is_some() { + let ref_id = plan.add_row_from_right_branch( + *plan_left_id, + *plan_right_id, + &[&col_name], + )?; + rows.insert(ref_id); + map.add(*id, ref_id); + } + return Err(QueryPlannerError::CustomError(format!( + "Column '{}' not found in for the join left or right children.", + col_name + ))); + } else { + return Err(QueryPlannerError::CustomError( + "Expected children nodes contain a column name.".into(), + )); + }; + + // Reference to a single child node. + } else if let (Some(plan_rel_id), None) = + (plan_rel_list.get(0), plan_rel_list.get(1)) + { + let col_name: String = if let ( + Some(ast_scan_name_id), + Some(ast_col_name_id), + ) = (node.children.get(0), node.children.get(1)) + { + // Get column name. + let col_name = get_column_name(*ast_col_name_id)?; + // Check that scan name in the reference matches to the one in scan node. + let ast_scan_name = self.nodes.get_node(*ast_scan_name_id)?; + if let Type::ScanName = ast_scan_name.rule { + let plan_scan_name = get_scan_name(&col_name, *plan_rel_id)?; + if plan_scan_name != ast_scan_name.value { + return Err(QueryPlannerError::CustomError( + format!("Scan name for the column {:?} doesn't match: expected {:?}, found {:?}", + get_column_name(*ast_col_name_id), plan_scan_name, ast_scan_name.value + ))); + } + } else { + return Err(QueryPlannerError::CustomError( + "Expected AST node to be a scan name.".into(), + )); + }; + col_name + } else if let (Some(ast_col_name_id), None) = + (node.children.get(0), node.children.get(1)) + { + // Get the column name. + get_column_name(*ast_col_name_id)? + } else { + return Err(QueryPlannerError::CustomError( + "No child node found in the AST reference.".into(), + )); + }; + + let ref_list = + plan.new_columns(&[*plan_rel_id], false, &[0], &[&col_name], false)?; + let ref_id = *ref_list.get(0).ok_or_else(|| { + QueryPlannerError::CustomError("Referred column is not found.".into()) + })?; + map.add(*id, ref_id); + } else { + return Err(QueryPlannerError::CustomError( + "Expected one or two referred relational nodes, got less or more." + .into(), + )); + } + } + Type::Number | Type::String | Type::Null | Type::True | Type::False => { + let val = Value::from_node(&node)?; + map.add(*id, plan.add_const(val)); + } + Type::Asterisk => { + // We can get an asterisk only in projection. + let ast_rel_list = self.get_referred_relational_nodes(*id)?; + let mut plan_rel_list = Vec::new(); + for ast_id in ast_rel_list { + let plan_id = map.get(ast_id)?; + plan_rel_list.push(plan_id); + } + if plan_rel_list.len() > 1 { + return Err(QueryPlannerError::CustomError( + "Joins are not implemented yet.".into(), + )); + } + let plan_rel_id = *plan_rel_list.get(0).ok_or_else(|| { + QueryPlannerError::CustomError( + "Referred relational node is not found.".into(), + ) + })?; + let plan_asterisk_id = plan.add_row_for_output(plan_rel_id, &[])?; + map.add(*id, plan_asterisk_id); + } + Type::Alias => { + let ast_ref_id = node.children.get(0).ok_or_else(|| { + QueryPlannerError::CustomError( + "Reference node id is not found among alias children.".into(), + ) + })?; + let plan_ref_id = map.get(*ast_ref_id)?; + let ast_name_id = node.children.get(1).ok_or_else(|| { + QueryPlannerError::CustomError( + "Alias name node id is not found among alias children.".into(), + ) + })?; + let name = self + .nodes + .get_node(*ast_name_id)? + .value + .as_ref() + .ok_or_else(|| { + QueryPlannerError::CustomError("Alias name is not found.".into()) + })?; + let plan_alias_id = plan.nodes.add_alias(&to_name(name), plan_ref_id)?; + map.add(*id, plan_alias_id); + } + Type::Column => { + let ast_child_id = node.children.get(0).ok_or_else(|| { + QueryPlannerError::CustomError("Column has no children.".into()) + })?; + let plan_child_id = map.get(*ast_child_id)?; + map.add(*id, plan_child_id); + } + Type::Row => { + let mut plan_col_list = Vec::new(); + for ast_child_id in node.children { + let plan_child_id = map.get(ast_child_id)?; + // If the child is a row that was generated by our + // reference-to-row logic in AST code, we should unwrap it back. + let plan_id = if rows.get(&plan_child_id).is_some() { + let plan_inner_expr = plan.get_expression_node(plan_child_id)?; + *plan_inner_expr.extract_row_list()?.get(0).ok_or_else(|| { + QueryPlannerError::CustomError("Row is empty.".into()) + })? + } else { + plan_child_id + }; + plan_col_list.push(plan_id); + } + let plan_row_id = plan.nodes.add_row(plan_col_list, None); + map.add(*id, plan_row_id); + } + Type::And + | Type::Or + | Type::Eq + | Type::In + | Type::Gt + | Type::GtEq + | Type::Lt + | Type::LtEq + | Type::NotEq => { + let mut to_row = |plan_id| -> Result<usize, QueryPlannerError> { + if let Node::Expression( + Expression::Reference { .. } | Expression::Constant { .. }, + ) = plan.get_node(plan_id)? + { + let row_id = plan.nodes.add_row(vec![plan_id], None); + rows.insert(row_id); + Ok(row_id) + } else { + Ok(plan_id) + } + }; + let ast_left_id = node.children.get(0).ok_or_else(|| { + QueryPlannerError::CustomError( + "Left node id is not found among comparison children.".into(), + ) + })?; + let plan_left_id = to_row(map.get(*ast_left_id)?)?; + let ast_right_id = node.children.get(1).ok_or_else(|| { + QueryPlannerError::CustomError( + "Right node id is not found among comparison children.".into(), + ) + })?; + let plan_right_id = to_row(map.get(*ast_right_id)?)?; + let op = Bool::from_node_type(&node.rule)?; + let cond_id = plan.add_cond(plan_left_id, op, plan_right_id)?; + map.add(*id, cond_id); + } + Type::Condition => { + let ast_child_id = node.children.get(0).ok_or_else(|| { + QueryPlannerError::CustomError("Condition has no children.".into()) + })?; + let plan_child_id = map.get(*ast_child_id)?; + map.add(*id, plan_child_id); + } + Type::InnerJoin => { + let ast_left_id = node.children.get(0).ok_or_else(|| { + QueryPlannerError::CustomError( + "Left node id is not found among join children.".into(), + ) + })?; + let plan_left_id = map.get(*ast_left_id)?; + let ast_right_id = node.children.get(1).ok_or_else(|| { + QueryPlannerError::CustomError( + "Right node id is not found among join children.".into(), + ) + })?; + let plan_right_id = map.get(*ast_right_id)?; + let ast_cond_id = node.children.get(2).ok_or_else(|| { + QueryPlannerError::CustomError( + "Condition node id is not found among join children.".into(), + ) + })?; + let plan_cond_id = map.get(*ast_cond_id)?; + let plan_join_id = plan.add_join(plan_left_id, plan_right_id, plan_cond_id)?; + map.add(*id, plan_join_id); + } + Type::Selection => { + let ast_child_id = node.children.get(0).ok_or_else(|| { + QueryPlannerError::CustomError( + "Child node id is not found among selection children.".into(), + ) + })?; + let plan_child_id = map.get(*ast_child_id)?; + let ast_filter_id = node.children.get(1).ok_or_else(|| { + QueryPlannerError::CustomError( + "Filter node id is not found among selection children.".into(), + ) + })?; + let plan_filter_id = map.get(*ast_filter_id)?; + let plan_selection_id = plan.add_select(&[plan_child_id], plan_filter_id)?; + map.add(*id, plan_selection_id); + } + Type::Projection => { + let ast_child_id = node.children.get(0).ok_or_else(|| { + QueryPlannerError::CustomError( + "Child node id is not found among projection children.".into(), + ) + })?; + let plan_child_id = map.get(*ast_child_id)?; + let mut columns: Vec<usize> = Vec::new(); + for ast_column_id in node.children.iter().skip(1) { + let ast_column = self.nodes.get_node(*ast_column_id)?; + match ast_column.rule { + Type::Column => { + let ast_alias_id = + *ast_column.children.get(0).ok_or_else(|| { + QueryPlannerError::CustomError( + "Alias node id is not found among column children." + .into(), + ) + })?; + let plan_alias_id = map.get(ast_alias_id)?; + columns.push(plan_alias_id); + } + Type::Asterisk => { + let plan_asterisk_id = map.get(*ast_column_id)?; + if let Node::Expression(Expression::Row { list, .. }) = + plan.get_node(plan_asterisk_id)? + { + for row_id in list { + columns.push(*row_id); + } + } else { + return Err(QueryPlannerError::CustomError( + "A plan node corresponding to asterisk is not a row." + .into(), + )); + } + } + _ => { + return Err(QueryPlannerError::CustomError(format!( + "Expected a column in projection, got {:?}.", + ast_column.rule + ))); + } + } + } + let projection_id = plan.add_proj_internal(plan_child_id, &columns)?; + map.add(*id, projection_id); + } + Type::UnionAll => { + let ast_left_id = node.children.get(0).ok_or_else(|| { + QueryPlannerError::CustomError( + "Left node id is not found among union all children.".into(), + ) + })?; + let plan_left_id = map.get(*ast_left_id)?; + let ast_right_id = node.children.get(1).ok_or_else(|| { + QueryPlannerError::CustomError( + "Right node id is not found among union all children.".into(), + ) + })?; + let plan_right_id = map.get(*ast_right_id)?; + let plan_union_all_id = plan.add_union_all(plan_left_id, plan_right_id)?; + map.add(*id, plan_union_all_id); + } + Type::AliasName + | Type::ColumnName + | Type::ScanName + | Type::Select + | Type::SubQueryName => {} + rule => { + return Err(QueryPlannerError::CustomError(format!( + "Not implements type: {:?}", + rule + ))); + } + } + } + + // get root node id + let plan_top_id = map.get( + self.top + .ok_or_else(|| QueryPlannerError::CustomError("No top in AST.".into()))?, + )?; + plan.set_top(plan_top_id)?; + plan.replace_sq_with_references()?; + + Ok(plan) + } +} + pub mod ast; mod ir; pub mod tree; diff --git a/src/frontend/sql/ast.rs b/src/frontend/sql/ast.rs index eabf4a9fbfd1e868698346a34d3ddb87dee9c08c..5cf6f9d3430e7d36ade075a05cbb87427e92ba42 100644 --- a/src/frontend/sql/ast.rs +++ b/src/frontend/sql/ast.rs @@ -9,7 +9,6 @@ use std::collections::{hash_map::Entry, HashMap, HashSet}; use std::mem::swap; use pest::iterators::Pair; -use pest::Parser; use serde::{Deserialize, Serialize}; use traversal::DftPost; @@ -17,8 +16,8 @@ use crate::errors::QueryPlannerError; /// Parse tree #[derive(Parser)] -#[grammar = "frontend/sql/grammar.pest"] -struct ParseTree; +#[grammar = "frontend/sql/query.pest"] +pub(super) struct ParseTree; /// A list of current rules from the actual grammar. /// When new tokens are added to the grammar they @@ -46,6 +45,7 @@ pub enum Type { Null, Number, Or, + Parameter, Parentheses, Primary, Projection, @@ -90,6 +90,7 @@ impl Type { Rule::Null => Ok(Type::Null), Rule::Number => Ok(Type::Number), Rule::Or => Ok(Type::Or), + Rule::Parameter => Ok(Type::Parameter), Rule::Parentheses => Ok(Type::Parentheses), Rule::Primary => Ok(Type::Primary), Rule::Projection => Ok(Type::Projection), @@ -121,7 +122,7 @@ pub struct ParseNode { #[allow(dead_code)] impl ParseNode { - fn new(rule: Rule, value: Option<String>) -> Result<Self, QueryPlannerError> { + pub(super) fn new(rule: Rule, value: Option<String>) -> Result<Self, QueryPlannerError> { Ok(ParseNode { children: vec![], rule: Type::from_rule(rule)?, @@ -223,24 +224,24 @@ impl ParseNodes { } /// A wrapper over the pair to keep its parent as well. -struct StackParseNode<'n> { - parent: Option<usize>, - pair: Pair<'n, Rule>, +pub(super) struct StackParseNode<'n> { + pub(super) parent: Option<usize>, + pub(super) pair: Pair<'n, Rule>, } impl<'n> StackParseNode<'n> { /// Constructor - fn new(pair: Pair<'n, Rule>, parent: Option<usize>) -> Self { + pub(super) fn new(pair: Pair<'n, Rule>, parent: Option<usize>) -> Self { StackParseNode { parent, pair } } } /// AST is a tree build on the top of the parse nodes arena. -#[derive(Serialize, Deserialize, PartialEq, Debug)] +#[derive(Serialize, Deserialize, PartialEq, Clone, Debug)] pub struct AbstractSyntaxTree { pub(in crate::frontend::sql) nodes: ParseNodes, pub(in crate::frontend::sql) top: Option<usize>, - map: HashMap<usize, Vec<usize>>, + pub(super) map: HashMap<usize, Vec<usize>>, } #[allow(dead_code)] @@ -276,70 +277,9 @@ impl AbstractSyntaxTree { Ok(ast) } - /// Constructor. - /// Builds a tree (nodes are in postorder reverse). - /// - /// # Errors - /// - Failed to parse an SQL query. - pub fn new(query: &str) -> Result<Self, QueryPlannerError> { - let mut ast = AbstractSyntaxTree { - nodes: ParseNodes::new(), - top: None, - map: HashMap::new(), - }; - - let mut command_pair = match ParseTree::parse(Rule::Command, query) { - Ok(p) => p, - Err(e) => { - return Err(QueryPlannerError::CustomError(format!( - "Parsing error: {:?}", - e - ))) - } - }; - let top_pair = command_pair.next().ok_or_else(|| { - QueryPlannerError::CustomError("No query found in the parse tree.".to_string()) - })?; - let top = StackParseNode::new(top_pair, None); - - let mut stack: Vec<StackParseNode> = vec![top]; - - while !stack.is_empty() { - let stack_node: StackParseNode = match stack.pop() { - Some(n) => n, - None => break, - }; - - // Save node to AST - let node = ast.nodes.push_node(ParseNode::new( - stack_node.pair.as_rule(), - Some(String::from(stack_node.pair.as_str())), - )?); - - // Update parent's node children list - ast.nodes.add_child(stack_node.parent, node)?; - // Clean parent values (only leafs should contain data) - if let Some(parent) = stack_node.parent { - ast.nodes.update_value(parent, None)?; - } - - for parse_child in stack_node.pair.into_inner() { - stack.push(StackParseNode::new(parse_child, Some(node))); - } - } - - ast.set_top(0)?; - - ast.transform_select()?; - ast.add_aliases_to_projection()?; - ast.build_ref_to_relation_map()?; - - Ok(ast) - } - /// `Select` node is not IR-friendly as it can have up to five children. /// Transform this node in IR-way (to a binary sub-tree). - fn transform_select(&mut self) -> Result<(), QueryPlannerError> { + pub(super) fn transform_select(&mut self) -> Result<(), QueryPlannerError> { let mut selects: HashSet<usize> = HashSet::new(); for (id, node) in self.nodes.arena.iter().enumerate() { if node.rule == Type::Select { @@ -645,7 +585,7 @@ impl AbstractSyntaxTree { /// /// # Errors /// - columns are invalid - fn add_aliases_to_projection(&mut self) -> Result<(), QueryPlannerError> { + pub(super) fn add_aliases_to_projection(&mut self) -> Result<(), QueryPlannerError> { let mut columns: Vec<(usize, Option<String>)> = Vec::new(); // Collect projection columns and their names. for (_, node) in self.nodes.arena.iter().enumerate() { @@ -731,7 +671,7 @@ impl AbstractSyntaxTree { /// /// # Errors /// - Projection, selection and inner join nodes don't have valid children. - fn build_ref_to_relation_map(&mut self) -> Result<(), QueryPlannerError> { + pub(super) fn build_ref_to_relation_map(&mut self) -> Result<(), QueryPlannerError> { let mut map: HashMap<usize, Vec<usize>> = HashMap::new(); // Traverse relational nodes in Post Order and then enter their subtrees // and map expressions to relational nodes. diff --git a/src/frontend/sql/ast/tests.rs b/src/frontend/sql/ast/tests.rs index 2968c6d355195c2eafa2a2133cf6a400f852c40a..4a5c574f4292631133e98904b945c5b3fe4008bc 100644 --- a/src/frontend/sql/ast/tests.rs +++ b/src/frontend/sql/ast/tests.rs @@ -1,4 +1,5 @@ use super::*; +use crate::frontend::Ast; use pretty_assertions::assert_eq; use std::fs; use std::path::Path; @@ -148,7 +149,7 @@ fn invalid_query() { format!( "{} {} {} {}", r#"Parsing error: Error { variant: ParsingError { positives:"#, - r#"[Alias, Asterisk, Number, True, False, Null, Row], negatives: [] },"#, + r#"[Alias, Asterisk, Number, True, False, Null, Row, Parameter], negatives: [] },"#, r#"location: Pos(7), line_col: Pos((1, 8)), path: None, line: "select a frAm t","#, r#"continued_line: None }"#, ), diff --git a/src/frontend/sql/ir.rs b/src/frontend/sql/ir.rs index 545216c0ea635dafdd74de598fe5286785c4cef6..2159894205e1800063c58dbcf95973a30e5c99c5 100644 --- a/src/frontend/sql/ir.rs +++ b/src/frontend/sql/ir.rs @@ -3,8 +3,7 @@ use std::collections::{HashMap, HashSet}; use traversal::DftPost; use crate::errors::QueryPlannerError; -use crate::executor::engine::Metadata; -use crate::frontend::sql::ast::{AbstractSyntaxTree, ParseNode, Type}; +use crate::frontend::sql::ast::{ParseNode, Type}; use crate::ir::expression::Expression; use crate::ir::operator::{Bool, Relational}; use crate::ir::value::Value; @@ -16,7 +15,7 @@ impl Bool { /// # Errors /// Returns `QueryPlannerError` when the operator is invalid. #[allow(dead_code)] - fn from_node_type(s: &Type) -> Result<Self, QueryPlannerError> { + pub(super) fn from_node_type(s: &Type) -> Result<Self, QueryPlannerError> { match s { Type::And => Ok(Bool::And), Type::Or => Ok(Bool::Or), @@ -38,7 +37,7 @@ impl Value { /// # Errors /// Returns `QueryPlannerError` when the operator is invalid. #[allow(dead_code)] - fn from_node(s: &ParseNode) -> Result<Self, QueryPlannerError> { + pub(super) fn from_node(s: &ParseNode) -> Result<Self, QueryPlannerError> { let val = match s.clone().value { Some(v) => v, None => "".into(), @@ -56,22 +55,22 @@ impl Value { } #[derive(Debug)] -struct Translation { +pub(super) struct Translation { map: HashMap<usize, usize>, } impl Translation { - fn with_capacity(capacity: usize) -> Self { + pub(super) fn with_capacity(capacity: usize) -> Self { Translation { map: HashMap::with_capacity(capacity), } } - fn add(&mut self, parse_id: usize, plan_id: usize) { + pub(super) fn add(&mut self, parse_id: usize, plan_id: usize) { self.map.insert(parse_id, plan_id); } - fn get(&self, old: usize) -> Result<usize, QueryPlannerError> { + pub(super) fn get(&self, old: usize) -> Result<usize, QueryPlannerError> { self.map.get(&old).copied().ok_or_else(|| { QueryPlannerError::CustomError( "Could not find parse node in translation map".to_string(), @@ -80,535 +79,13 @@ impl Translation { } } -fn to_name(s: &str) -> String { +pub(super) fn to_name(s: &str) -> String { if let (Some('"'), Some('"')) = (s.chars().next(), s.chars().last()) { return s.to_string(); } s.to_lowercase() } -impl AbstractSyntaxTree { - /// Transform AST to IR plan tree. - /// - /// # Errors - /// - IR plan can't be built. - #[allow(dead_code)] - #[allow(clippy::too_many_lines)] - pub fn to_ir<T>(&self, metadata: &T) -> Result<Plan, QueryPlannerError> - where - T: Metadata, - { - let mut plan = Plan::new(); - - let top = match self.top { - Some(t) => t, - None => return Err(QueryPlannerError::InvalidAst), - }; - let dft_post = DftPost::new(&top, |node| self.nodes.ast_iter(node)); - let mut map = Translation::with_capacity(self.nodes.next_id()); - let mut rows: HashSet<usize> = HashSet::with_capacity(self.nodes.next_id()); - - for (_, id) in dft_post { - let node = self.nodes.get_node(*id)?.clone(); - match &node.rule { - Type::Scan => { - let ast_child_id = node.children.get(0).ok_or_else(|| { - QueryPlannerError::CustomError( - "Could not find child id in scan node".to_string(), - ) - })?; - let plan_child_id = map.get(*ast_child_id)?; - map.add(*id, plan_child_id); - if let Some(ast_scan_name_id) = node.children.get(1) { - let ast_scan_name = self.nodes.get_node(*ast_scan_name_id)?; - if let Type::ScanName = ast_scan_name.rule { - // Update scan name in the plan. - let scan = plan.get_mut_relation_node(plan_child_id)?; - scan.set_scan_name(ast_scan_name.value.as_ref().map(|s| to_name(s)))?; - } else { - return Err(QueryPlannerError::CustomError( - "Expected scan name AST node.".into(), - )); - } - } - } - Type::Table => { - if let Some(node_val) = &node.value { - let table = node_val.as_str(); - let t = metadata.get_table_segment(table)?; - plan.add_rel(t); - let scan_id = plan.add_scan(table, None)?; - map.add(*id, scan_id); - } else { - return Err(QueryPlannerError::CustomError( - "Table name is not found.".into(), - )); - } - } - Type::SubQuery => { - let ast_child_id = node.children.get(0).ok_or_else(|| { - QueryPlannerError::CustomError( - "Child node id is not found among sub-query children.".into(), - ) - })?; - let plan_child_id = map.get(*ast_child_id)?; - let alias_name: Option<String> = if let Some(ast_name_id) = node.children.get(1) - { - let ast_alias = self.nodes.get_node(*ast_name_id)?; - if let Type::SubQueryName = ast_alias.rule { - } else { - return Err(QueryPlannerError::CustomError(format!( - "Expected a sub-query name, got {:?}.", - ast_alias.rule - ))); - } - ast_alias.value.as_deref().map(to_name) - } else { - None - }; - let plan_sq_id = plan.add_sub_query(plan_child_id, alias_name.as_deref())?; - map.add(*id, plan_sq_id); - } - Type::Reference => { - let ast_rel_list = self.get_referred_relational_nodes(*id)?; - let mut plan_rel_list = Vec::new(); - for ast_id in ast_rel_list { - let plan_id = map.get(ast_id)?; - plan_rel_list.push(plan_id); - } - - let get_column_name = |ast_id: usize| -> Result<String, QueryPlannerError> { - let ast_col_name = self.nodes.get_node(ast_id)?; - if let Type::ColumnName = ast_col_name.rule { - let name: Option<String> = ast_col_name.value.as_deref().map(to_name); - Ok(name.ok_or_else(|| { - QueryPlannerError::CustomError("Empty AST column name".into()) - })?) - } else { - Err(QueryPlannerError::CustomError( - "Expected column name AST node.".into(), - )) - } - }; - - let get_scan_name = - |col_name: &str, - plan_id: usize| - -> Result<Option<String>, QueryPlannerError> { - let child = plan.get_relation_node(plan_id)?; - let col_position = child - .output_alias_position_map(&plan.nodes)? - .get(col_name) - .copied(); - match col_position { - Some(pos) => Ok(plan - .get_relation_node(plan_id)? - .scan_name(&plan, pos)? - .map(String::from)), - None => Ok(None), - } - }; - - // Reference to the join node. - if let (Some(plan_left_id), Some(plan_right_id)) = - (plan_rel_list.get(0), plan_rel_list.get(1)) - { - if let (Some(ast_scan_name_id), Some(ast_col_name_id)) = - (node.children.get(0), node.children.get(1)) - { - let ast_scan_name = self.nodes.get_node(*ast_scan_name_id)?; - if let Type::ScanName = ast_scan_name.rule { - // Get the column name and its positions in the output tuples. - let col_name = get_column_name(*ast_col_name_id)?; - let left_name = get_scan_name(&col_name, *plan_left_id)?; - let right_name = get_scan_name(&col_name, *plan_right_id)?; - // Check that the AST scan name matches to the children scan names in the plan join node. - let scan_name: Option<String> = - ast_scan_name.value.as_deref().map(to_name); - // Determine the referred side of the join (left or right). - if left_name == scan_name { - let left_col_map = plan - .get_relation_node(*plan_left_id)? - .output_alias_position_map(&plan.nodes)?; - if left_col_map.get(&col_name.as_str()).is_some() { - let ref_id = plan.add_row_from_left_branch( - *plan_left_id, - *plan_right_id, - &[&col_name], - )?; - rows.insert(ref_id); - map.add(*id, ref_id); - } else { - return Err(QueryPlannerError::CustomError(format!( - "Column '{}' not found in for the join left child '{:?}'.", - col_name, left_name - ))); - } - } else if right_name == scan_name { - let right_col_map = plan - .get_relation_node(*plan_right_id)? - .output_alias_position_map(&plan.nodes)?; - if right_col_map.get(&col_name.as_str()).is_some() { - let ref_id = plan.add_row_from_right_branch( - *plan_left_id, - *plan_right_id, - &[&col_name], - )?; - rows.insert(ref_id); - map.add(*id, ref_id); - } else { - return Err(QueryPlannerError::CustomError(format!( - "Column '{}' not found in for the join right child '{:?}'.", - col_name, right_name - ))); - } - } else { - return Err(QueryPlannerError::CustomError( - "Left and right plan nodes do not match the AST scan name." - .into(), - )); - } - } else { - return Err(QueryPlannerError::CustomError( - "Expected AST node to be a scan name.".into(), - )); - } - } else if let (Some(ast_col_name_id), None) = - (node.children.get(0), node.children.get(1)) - { - // Determine the referred side of the join (left or right). - let col_name = get_column_name(*ast_col_name_id)?; - let left_col_map = plan - .get_relation_node(*plan_left_id)? - .output_alias_position_map(&plan.nodes)?; - if left_col_map.get(&col_name.as_str()).is_some() { - let ref_id = plan.add_row_from_left_branch( - *plan_left_id, - *plan_right_id, - &[&col_name], - )?; - rows.insert(ref_id); - map.add(*id, ref_id); - } - let right_col_map = plan - .get_relation_node(*plan_right_id)? - .output_alias_position_map(&plan.nodes)?; - if right_col_map.get(&col_name.as_str()).is_some() { - let ref_id = plan.add_row_from_right_branch( - *plan_left_id, - *plan_right_id, - &[&col_name], - )?; - rows.insert(ref_id); - map.add(*id, ref_id); - } - return Err(QueryPlannerError::CustomError(format!( - "Column '{}' not found in for the join left or right children.", - col_name - ))); - } else { - return Err(QueryPlannerError::CustomError( - "Expected children nodes contain a column name.".into(), - )); - }; - - // Reference to a single child node. - } else if let (Some(plan_rel_id), None) = - (plan_rel_list.get(0), plan_rel_list.get(1)) - { - let col_name: String = if let ( - Some(ast_scan_name_id), - Some(ast_col_name_id), - ) = (node.children.get(0), node.children.get(1)) - { - // Get column name. - let col_name = get_column_name(*ast_col_name_id)?; - // Check that scan name in the reference matches to the one in scan node. - let ast_scan_name = self.nodes.get_node(*ast_scan_name_id)?; - if let Type::ScanName = ast_scan_name.rule { - let plan_scan_name = get_scan_name(&col_name, *plan_rel_id)?; - if plan_scan_name != ast_scan_name.value { - return Err(QueryPlannerError::CustomError( - format!("Scan name for the column {:?} doesn't match: expected {:?}, found {:?}", - get_column_name(*ast_col_name_id), plan_scan_name, ast_scan_name.value - ))); - } - } else { - return Err(QueryPlannerError::CustomError( - "Expected AST node to be a scan name.".into(), - )); - }; - col_name - } else if let (Some(ast_col_name_id), None) = - (node.children.get(0), node.children.get(1)) - { - // Get the column name. - get_column_name(*ast_col_name_id)? - } else { - return Err(QueryPlannerError::CustomError( - "No child node found in the AST reference.".into(), - )); - }; - - let ref_list = - plan.new_columns(&[*plan_rel_id], false, &[0], &[&col_name], false)?; - let ref_id = *ref_list.get(0).ok_or_else(|| { - QueryPlannerError::CustomError("Referred column is not found.".into()) - })?; - map.add(*id, ref_id); - } else { - return Err(QueryPlannerError::CustomError( - "Expected one or two referred relational nodes, got less or more." - .into(), - )); - } - } - Type::Number | Type::String | Type::Null | Type::True | Type::False => { - let val = Value::from_node(&node)?; - map.add(*id, plan.add_const(val)); - } - Type::Asterisk => { - // We can get an asterisk only in projection. - let ast_rel_list = self.get_referred_relational_nodes(*id)?; - let mut plan_rel_list = Vec::new(); - for ast_id in ast_rel_list { - let plan_id = map.get(ast_id)?; - plan_rel_list.push(plan_id); - } - if plan_rel_list.len() > 1 { - return Err(QueryPlannerError::CustomError( - "Joins are not implemented yet.".into(), - )); - } - let plan_rel_id = *plan_rel_list.get(0).ok_or_else(|| { - QueryPlannerError::CustomError( - "Referred relational node is not found.".into(), - ) - })?; - let plan_asterisk_id = plan.add_row_for_output(plan_rel_id, &[])?; - map.add(*id, plan_asterisk_id); - } - Type::Alias => { - let ast_ref_id = node.children.get(0).ok_or_else(|| { - QueryPlannerError::CustomError( - "Reference node id is not found among alias children.".into(), - ) - })?; - let plan_ref_id = map.get(*ast_ref_id)?; - let ast_name_id = node.children.get(1).ok_or_else(|| { - QueryPlannerError::CustomError( - "Alias name node id is not found among alias children.".into(), - ) - })?; - let name = self - .nodes - .get_node(*ast_name_id)? - .value - .as_ref() - .ok_or_else(|| { - QueryPlannerError::CustomError("Alias name is not found.".into()) - })?; - let plan_alias_id = plan.nodes.add_alias(&to_name(name), plan_ref_id)?; - map.add(*id, plan_alias_id); - } - Type::Column => { - let ast_child_id = node.children.get(0).ok_or_else(|| { - QueryPlannerError::CustomError("Column has no children.".into()) - })?; - let plan_child_id = map.get(*ast_child_id)?; - map.add(*id, plan_child_id); - } - Type::Row => { - let mut plan_col_list = Vec::new(); - for ast_child_id in node.children { - let plan_child_id = map.get(ast_child_id)?; - // If the child is a row that was generated by our - // reference-to-row logic in AST code, we should unwrap it back. - let plan_id = if rows.get(&plan_child_id).is_some() { - let plan_inner_expr = plan.get_expression_node(plan_child_id)?; - *plan_inner_expr.extract_row_list()?.get(0).ok_or_else(|| { - QueryPlannerError::CustomError("Row is empty.".into()) - })? - } else { - plan_child_id - }; - plan_col_list.push(plan_id); - } - let plan_row_id = plan.nodes.add_row(plan_col_list, None); - map.add(*id, plan_row_id); - } - Type::And - | Type::Or - | Type::Eq - | Type::In - | Type::Gt - | Type::GtEq - | Type::Lt - | Type::LtEq - | Type::NotEq => { - let mut to_row = |plan_id| -> Result<usize, QueryPlannerError> { - if let Node::Expression( - Expression::Reference { .. } | Expression::Constant { .. }, - ) = plan.get_node(plan_id)? - { - let row_id = plan.nodes.add_row(vec![plan_id], None); - rows.insert(row_id); - Ok(row_id) - } else { - Ok(plan_id) - } - }; - let ast_left_id = node.children.get(0).ok_or_else(|| { - QueryPlannerError::CustomError( - "Left node id is not found among comparison children.".into(), - ) - })?; - let plan_left_id = to_row(map.get(*ast_left_id)?)?; - let ast_right_id = node.children.get(1).ok_or_else(|| { - QueryPlannerError::CustomError( - "Right node id is not found among comparison children.".into(), - ) - })?; - let plan_right_id = to_row(map.get(*ast_right_id)?)?; - let op = Bool::from_node_type(&node.rule)?; - let cond_id = plan.add_cond(plan_left_id, op, plan_right_id)?; - map.add(*id, cond_id); - } - Type::Condition => { - let ast_child_id = node.children.get(0).ok_or_else(|| { - QueryPlannerError::CustomError("Condition has no children.".into()) - })?; - let plan_child_id = map.get(*ast_child_id)?; - map.add(*id, plan_child_id); - } - Type::InnerJoin => { - let ast_left_id = node.children.get(0).ok_or_else(|| { - QueryPlannerError::CustomError( - "Left node id is not found among join children.".into(), - ) - })?; - let plan_left_id = map.get(*ast_left_id)?; - let ast_right_id = node.children.get(1).ok_or_else(|| { - QueryPlannerError::CustomError( - "Right node id is not found among join children.".into(), - ) - })?; - let plan_right_id = map.get(*ast_right_id)?; - let ast_cond_id = node.children.get(2).ok_or_else(|| { - QueryPlannerError::CustomError( - "Condition node id is not found among join children.".into(), - ) - })?; - let plan_cond_id = map.get(*ast_cond_id)?; - let plan_join_id = plan.add_join(plan_left_id, plan_right_id, plan_cond_id)?; - map.add(*id, plan_join_id); - } - Type::Selection => { - let ast_child_id = node.children.get(0).ok_or_else(|| { - QueryPlannerError::CustomError( - "Child node id is not found among selection children.".into(), - ) - })?; - let plan_child_id = map.get(*ast_child_id)?; - let ast_filter_id = node.children.get(1).ok_or_else(|| { - QueryPlannerError::CustomError( - "Filter node id is not found among selection children.".into(), - ) - })?; - let plan_filter_id = map.get(*ast_filter_id)?; - let plan_selection_id = plan.add_select(&[plan_child_id], plan_filter_id)?; - map.add(*id, plan_selection_id); - } - Type::Projection => { - let ast_child_id = node.children.get(0).ok_or_else(|| { - QueryPlannerError::CustomError( - "Child node id is not found among projection children.".into(), - ) - })?; - let plan_child_id = map.get(*ast_child_id)?; - let mut columns: Vec<usize> = Vec::new(); - for ast_column_id in node.children.iter().skip(1) { - let ast_column = self.nodes.get_node(*ast_column_id)?; - match ast_column.rule { - Type::Column => { - let ast_alias_id = - *ast_column.children.get(0).ok_or_else(|| { - QueryPlannerError::CustomError( - "Alias node id is not found among column children." - .into(), - ) - })?; - let plan_alias_id = map.get(ast_alias_id)?; - columns.push(plan_alias_id); - } - Type::Asterisk => { - let plan_asterisk_id = map.get(*ast_column_id)?; - if let Node::Expression(Expression::Row { list, .. }) = - plan.get_node(plan_asterisk_id)? - { - for row_id in list { - columns.push(*row_id); - } - } else { - return Err(QueryPlannerError::CustomError( - "A plan node corresponding to asterisk is not a row." - .into(), - )); - } - } - _ => { - return Err(QueryPlannerError::CustomError(format!( - "Expected a column in projection, got {:?}.", - ast_column.rule - ))); - } - } - } - let projection_id = plan.add_proj_internal(plan_child_id, &columns)?; - map.add(*id, projection_id); - } - Type::UnionAll => { - let ast_left_id = node.children.get(0).ok_or_else(|| { - QueryPlannerError::CustomError( - "Left node id is not found among union all children.".into(), - ) - })?; - let plan_left_id = map.get(*ast_left_id)?; - let ast_right_id = node.children.get(1).ok_or_else(|| { - QueryPlannerError::CustomError( - "Right node id is not found among union all children.".into(), - ) - })?; - let plan_right_id = map.get(*ast_right_id)?; - let plan_union_all_id = plan.add_union_all(plan_left_id, plan_right_id)?; - map.add(*id, plan_union_all_id); - } - Type::AliasName - | Type::ColumnName - | Type::ScanName - | Type::Select - | Type::SubQueryName => {} - rule => { - return Err(QueryPlannerError::CustomError(format!( - "Not implements type: {:?}", - rule - ))); - } - } - } - - // get root node id - let plan_top_id = map.get( - self.top - .ok_or_else(|| QueryPlannerError::CustomError("No top in AST.".into()))?, - )?; - plan.set_top(plan_top_id)?; - plan.replace_sq_with_references()?; - - Ok(plan) - } -} - #[derive(Hash, PartialEq, Debug)] struct SubQuery { relational: usize, @@ -665,7 +142,7 @@ impl Plan { } /// Replace sub-queries with references to the sub-query. - fn replace_sq_with_references(&mut self) -> Result<(), QueryPlannerError> { + pub(super) fn replace_sq_with_references(&mut self) -> Result<(), QueryPlannerError> { let set = self.gather_sq_for_replacement()?; for sq in set { // Append sub-query to relational node. diff --git a/src/frontend/sql/ir/tests.rs b/src/frontend/sql/ir/tests.rs index 17adf6188cff9ccd558e281e8043463968fb4d75..a0dec67cd6a6dca71f30e78276e7e930124eed2b 100644 --- a/src/frontend/sql/ir/tests.rs +++ b/src/frontend/sql/ir/tests.rs @@ -1,6 +1,7 @@ use crate::errors::QueryPlannerError; use crate::executor::engine::mock::MetadataMock; use crate::frontend::sql::ast::AbstractSyntaxTree; +use crate::frontend::Ast; use crate::ir::transformation::helpers::sql_to_sql; use crate::ir::Plan; use pretty_assertions::assert_eq; diff --git a/src/frontend/sql/grammar.pest b/src/frontend/sql/query.pest similarity index 96% rename from src/frontend/sql/grammar.pest rename to src/frontend/sql/query.pest index 3e80cfa99e31de4ac45544b24262527bb1903bb2..d935afe4efa1e1221c76ebce17917d2e49fdbc28 100644 --- a/src/frontend/sql/grammar.pest +++ b/src/frontend/sql/query.pest @@ -62,7 +62,7 @@ String = @{ !(WHITESPACE* ~ Keyword ~ WHITESPACE) ~ ('A' .. 'Z' | 'a'..'z' | "_" Number = @{ Int ~ ("." ~ ASCII_DIGIT*)? ~ (^"e" ~ Int)? } Int = @{ ("+" | "-")? ~ ASCII_DIGIT+ } -Value = _{ Row | True | False | Null | Number | SingleQuotedString } +Value = _{ Parameter | Row | True | False | Null | Number | SingleQuotedString } True = @{ ^"true" } False = @{ ^"false" } Null = @{ ^"null" } @@ -71,6 +71,7 @@ Value = _{ Row | True | False | Null | Number | SingleQuotedString } ("(" ~ (Value | Reference) ~ ("," ~ (Value | Reference))* ~ ")") | (^"row" ~ "(" ~ (Value | Reference) ~ ("," ~ (Value | Reference))* ~ ")") } + Parameter = @{ "?" } EOF = { EOI | ";" } WHITESPACE = _{ " " | "\t" | "\n" | "\n\r" } diff --git a/src/ir/transformation/helpers.rs b/src/ir/transformation/helpers.rs index dd6e28ff477938bf1b77fcfd25bf1af4b6b3b547..a053197cef9a3236a3932518d10173d0109d2266 100644 --- a/src/ir/transformation/helpers.rs +++ b/src/ir/transformation/helpers.rs @@ -3,6 +3,7 @@ use crate::executor::engine::mock::MetadataMock; use crate::executor::ir::ExecutionPlan; use crate::frontend::sql::ast::AbstractSyntaxTree; +use crate::frontend::Ast; use crate::ir::Plan; /// Compiles an SQL query to IR plan. diff --git a/src/ir/transformation/split_columns/tests.rs b/src/ir/transformation/split_columns/tests.rs index aca3593d4e46cf10d46c3bed7dc3c449b1c59fad..17b8aab1e3a6084092e55b1d788a888ec23d46e2 100644 --- a/src/ir/transformation/split_columns/tests.rs +++ b/src/ir/transformation/split_columns/tests.rs @@ -1,5 +1,6 @@ use crate::executor::engine::mock::MetadataMock; use crate::frontend::sql::ast::AbstractSyntaxTree; +use crate::frontend::Ast; use crate::ir::transformation::helpers::sql_to_sql; use crate::ir::Plan; use pretty_assertions::assert_eq;