From 3bf75270d60489988946fbaf56e389b8924612c9 Mon Sep 17 00:00:00 2001 From: Denis Smirnov <sd@picodata.io> Date: Fri, 8 Apr 2022 12:08:10 +0700 Subject: [PATCH] doc: improve module documentation --- src/executor.rs | 27 ++++++++++++++++++- src/executor/engine.rs | 8 ++++-- src/executor/engine/cartridge.rs | 3 +++ src/executor/engine/cartridge/cache.rs | 2 ++ src/executor/engine/cartridge/hash.rs | 2 ++ src/frontend.rs | 5 ++++ src/frontend/sql.rs | 7 ++++- src/frontend/sql/ast.rs | 13 ++++++--- src/frontend/sql/tree.rs | 4 +++ src/ir.rs | 2 +- src/ir/distribution.rs | 3 +++ src/ir/expression.rs | 5 ++-- src/ir/helpers.rs | 4 +++ src/ir/operator.rs | 4 ++- src/ir/transformation/bool_in.rs | 2 +- src/ir/transformation/equality_propagation.rs | 4 +-- src/ir/transformation/merge_tuples.rs | 12 +++++++++ src/ir/transformation/redistribution.rs | 5 ++-- src/ir/tree.rs | 3 +++ src/lib.rs | 6 ++--- 20 files changed, 102 insertions(+), 19 deletions(-) diff --git a/src/executor.rs b/src/executor.rs index 45e82d5cd6..078d8b4d73 100644 --- a/src/executor.rs +++ b/src/executor.rs @@ -1,3 +1,28 @@ +//! Executor module. +//! +//! The executor is located on the coordinator node in the cluster. +//! It collects all the intermediate results of the plan execution +//! in memory and executes the IR plan tree in the bottom-up manner. +//! It goes like this: +//! +//! 1. The executor collects all the motion nodes from the bottom layer. +//! In theory all the motions in the same layer can be executed in parallel +//! (this feature is yet to come). +//! 2. For every motion the executor: +//! - inspects the IR sub-tree and detects the buckets to execute the query for. +//! - builds a valid SQL query from the IR sub-tree. +//! - performs map-reduce for that SQL query (we send it to the shards deduced from the buckets). +//! - builds a virtual table with query results that correspond to the original motion. +//! 3. Moves to the next motion layer in the IR tree. +//! 4. For every motion the executor then: +//! - links the virtual table results of the motion from the previous layer we depend on. +//! - inspects the IR sub-tree and detects the buckets to execute the query. +//! - builds a valid SQL query from the IR sub-tree. +//! - performs map-reduce for that SQL query. +//! - builds a virtual table with query results that correspond to the original motion. +//! 5. Repeats step 3 till we are done with motion layers. +//! 6. Executes the final IR top subtree and returns the final result to the user. + use crate::errors::QueryPlannerError; use crate::executor::bucket::Buckets; use crate::executor::engine::Engine; @@ -28,7 +53,7 @@ impl Plan { } } -/// Query object for executing +/// Query to execute. pub struct Query<T> where T: Engine, diff --git a/src/executor/engine.rs b/src/executor/engine.rs index f99a8cbde6..be9e163536 100644 --- a/src/executor/engine.rs +++ b/src/executor/engine.rs @@ -1,3 +1,7 @@ +//! Engine module. +//! +//! Traits that define an execution engine interface. + use crate::errors::QueryPlannerError; use crate::executor::bucket::Buckets; use crate::executor::ir::ExecutionPlan; @@ -6,7 +10,7 @@ use crate::executor::vtable::VirtualTable; pub mod cartridge; -/// `Metadata` trait is interface for working with metadata storage, that was needed the query execute. +/// A metadata storage trait of the cluster. pub trait Metadata { fn get_table_segment( &self, @@ -26,7 +30,7 @@ pub trait Metadata { } } -/// `Engine` trait is interface for working with execution engine. +/// An execution engine trait. pub trait Engine { type Metadata; diff --git a/src/executor/engine/cartridge.rs b/src/executor/engine/cartridge.rs index 2fc66bac8e..c4d666212a 100644 --- a/src/executor/engine/cartridge.rs +++ b/src/executor/engine/cartridge.rs @@ -1,3 +1,5 @@ +//! Tarantool cartridge engine module. + use std::convert::TryInto; use tarantool::log::{say, SayLevel}; @@ -17,6 +19,7 @@ mod backend; pub mod cache; pub mod hash; +/// Tarantool cartridge metadata and topology. #[derive(Debug, Clone)] pub struct Runtime { metadata: ClusterAppConfig, diff --git a/src/executor/engine/cartridge/cache.rs b/src/executor/engine/cartridge/cache.rs index 656e557e49..816e8a4cd3 100644 --- a/src/executor/engine/cartridge/cache.rs +++ b/src/executor/engine/cartridge/cache.rs @@ -1,3 +1,5 @@ +//! Metadata cache module. + extern crate yaml_rust; use std::collections::HashMap; diff --git a/src/executor/engine/cartridge/hash.rs b/src/executor/engine/cartridge/hash.rs index d747f1d523..5cf31af77a 100644 --- a/src/executor/engine/cartridge/hash.rs +++ b/src/executor/engine/cartridge/hash.rs @@ -1,3 +1,5 @@ +//! Bucket hash module. + use fasthash::{murmur3::Hasher32, FastHasher}; use std::hash::Hasher; diff --git a/src/frontend.rs b/src/frontend.rs index 2752f636bb..a0339ff799 100644 --- a/src/frontend.rs +++ b/src/frontend.rs @@ -1 +1,6 @@ +//! Frontend module. +//! +//! A list of different frontend implementations +//! to build the intermediate representation (IR). + pub mod sql; diff --git a/src/frontend/sql.rs b/src/frontend/sql.rs index c9be2bbaab..b9810f1601 100644 --- a/src/frontend/sql.rs +++ b/src/frontend/sql.rs @@ -1,3 +1,8 @@ +//! SQL frontend module. +//! +//! Parses an SQL statement to the abstract syntax tree (AST) +//! and builds the intermediate representation (IR). + pub mod ast; -pub mod ir; +mod ir; pub mod tree; diff --git a/src/frontend/sql/ast.rs b/src/frontend/sql/ast.rs index d96a97ca5a..e9517ab147 100644 --- a/src/frontend/sql/ast.rs +++ b/src/frontend/sql/ast.rs @@ -1,3 +1,8 @@ +//! Abstract syntax tree (AST) module. +//! +//! This module contains a definition of the abstract syntax tree +//! constructed from the nodes of the `pest` tree iterator nodes. + extern crate pest; use std::collections::{hash_map::Entry, HashMap, HashSet}; @@ -13,7 +18,7 @@ use crate::errors::QueryPlannerError; /// Parse tree #[derive(Parser)] #[grammar = "frontend/sql/grammar.pest"] -pub struct ParseTree; +struct ParseTree; /// A list of current rules from the actual grammar. /// When new tokens are added to the grammar they @@ -106,6 +111,7 @@ impl Type { } } +/// Parse node is a wrapper over the pest pair. #[derive(Serialize, Deserialize, PartialEq, Debug, Clone)] pub struct ParseNode { pub(in crate::frontend::sql) children: Vec<usize>, @@ -124,6 +130,8 @@ impl ParseNode { } } +/// A storage arena of the parse nodes +/// (a node position in the arena vector acts like a reference). #[derive(Serialize, Deserialize, PartialEq, Debug, Clone)] pub struct ParseNodes { pub(crate) arena: Vec<ParseNode>, @@ -207,8 +215,7 @@ impl<'n> StackParseNode<'n> { } } -/// AST where all the nodes are kept in a list. -/// Positions in a list act like references. +/// AST is a tree build on the top of the parse nodes arena. #[derive(Serialize, Deserialize, PartialEq, Debug)] pub struct AbstractSyntaxTree { pub(in crate::frontend::sql) nodes: ParseNodes, diff --git a/src/frontend/sql/tree.rs b/src/frontend/sql/tree.rs index 5600b06f5e..559e9056d8 100644 --- a/src/frontend/sql/tree.rs +++ b/src/frontend/sql/tree.rs @@ -1,6 +1,9 @@ +//! AST traversal iterator module. + use crate::frontend::sql::ast::ParseNodes; use std::cell::RefCell; +/// AST traversal iterator. #[derive(Debug)] pub struct AstIterator<'n> { current: &'n usize, @@ -26,6 +29,7 @@ impl<'n> Iterator for AstIterator<'n> { } impl<'n> ParseNodes { + /// Returns an iterator over the children of the node. #[allow(dead_code)] pub fn ast_iter(&'n self, current: &'n usize) -> AstIterator<'n> { AstIterator { diff --git a/src/ir.rs b/src/ir.rs index ca245d2b98..f501aa2ec4 100644 --- a/src/ir.rs +++ b/src/ir.rs @@ -1,4 +1,4 @@ -//! Intermediate representation. +//! Intermediate representation (IR) module. //! //! Contains the logical plan tree and helpers. diff --git a/src/ir/distribution.rs b/src/ir/distribution.rs index 374a72dee3..9865b1afcf 100644 --- a/src/ir/distribution.rs +++ b/src/ir/distribution.rs @@ -1,3 +1,5 @@ +//! Tuple distribution module. + use std::collections::{HashMap, HashSet}; use serde::{Deserialize, Serialize}; @@ -33,6 +35,7 @@ impl Key { #[derive(Serialize, Deserialize, PartialEq, Debug, Clone)] pub enum Distribution { /// A tuple can be located on any data node. + /// Example: projection removes the segment key columns. Any, /// A tuple is located on all data nodes (constants). Replicated, diff --git a/src/ir/expression.rs b/src/ir/expression.rs index 578f669215..e5b6e2fa69 100644 --- a/src/ir/expression.rs +++ b/src/ir/expression.rs @@ -1,6 +1,7 @@ -//! Expressions are the building blocks of the tuple. +//! Expression module. //! -//! They provide us information about: +//! Expressions are the building blocks of the tuple. +//! They provide information about: //! - what input tuple's columns where used to build our tuple //! - the order of the columns (and we can get their types as well) //! - distribution of the data in the tuple diff --git a/src/ir/helpers.rs b/src/ir/helpers.rs index 08efcc423e..5f9ae993ed 100644 --- a/src/ir/helpers.rs +++ b/src/ir/helpers.rs @@ -1,6 +1,10 @@ +//! Helper module with functions and structures for the IR. + use std::collections::hash_map::DefaultHasher; use std::hash::BuildHasher; +/// A helper macros to build a hash map or set +/// from the list of arguments. #[macro_export] macro_rules! collection { // map-like diff --git a/src/ir/operator.rs b/src/ir/operator.rs index 22060d829a..ee52c613a4 100644 --- a/src/ir/operator.rs +++ b/src/ir/operator.rs @@ -1,4 +1,6 @@ -//! Operators for expression transformations. +//! Tuple operators module. +//! +//! Contains operator nodes that transform the tuples in IR tree. use std::collections::HashMap; use std::fmt::{Display, Formatter}; diff --git a/src/ir/transformation/bool_in.rs b/src/ir/transformation/bool_in.rs index 411fe7386b..67896d59cc 100644 --- a/src/ir/transformation/bool_in.rs +++ b/src/ir/transformation/bool_in.rs @@ -1,4 +1,4 @@ -//! Replace all boolean "IN: operators with a chian of equalities, +//! Replace all boolean "IN": operators with a chian of equalities, //! combined by "OR" operator. //! //! For example, the following query: diff --git a/src/ir/transformation/equality_propagation.rs b/src/ir/transformation/equality_propagation.rs index e10b3796cd..ed216f600d 100644 --- a/src/ir/transformation/equality_propagation.rs +++ b/src/ir/transformation/equality_propagation.rs @@ -27,8 +27,8 @@ //! and row(t1.a) = row(t2.b)) //! where row(a) = 1 and row(b) = 1 //! ``` -//! We don't produce (1) = (1) equality as it don't give any new information, -//! but row(a) = row(b) can be pushed down to the join condition. +//! We don't produce `(1) = (1)` equality as it don't give any new information, +//! but `row(a) = row(b)` can be pushed down to the join condition. //! //! Currently implementation produces new equalities only for constants, references //! and rows that contain a single reference column. diff --git a/src/ir/transformation/merge_tuples.rs b/src/ir/transformation/merge_tuples.rs index f95ea0c23e..7bb12dec0f 100644 --- a/src/ir/transformation/merge_tuples.rs +++ b/src/ir/transformation/merge_tuples.rs @@ -1,3 +1,15 @@ +//! Merge tuples in a disjunction of boolean expressions +//! into a single tuple. +//! +//! Example: +//! ```sql +//! select * from t where (a = 1) and (b = 2) and (c = 3) +//! ``` +//! is converted to: +//! ```sql +//! select * from t where (a, b, c) = (1, 2, 3) +//! ``` + use crate::errors::QueryPlannerError; use crate::ir::expression::Expression; use crate::ir::helpers::RepeatableState; diff --git a/src/ir/transformation/redistribution.rs b/src/ir/transformation/redistribution.rs index 754753bfea..dd0c518803 100644 --- a/src/ir/transformation/redistribution.rs +++ b/src/ir/transformation/redistribution.rs @@ -1,3 +1,5 @@ +//! Resolve distribution conflicts and insert motion nodes to IR. + use std::cmp::Ordering; use std::collections::{hash_map::Entry, HashMap, HashSet}; @@ -10,8 +12,7 @@ use crate::ir::expression::Expression; use crate::ir::operator::{Bool, Relational}; use crate::ir::{Node, Plan}; -/// A motion policy determinate what portion of data to move -/// between data nodes. +/// Determinate what portion of data to move between data nodes in cluster. #[derive(Serialize, Deserialize, PartialEq, Debug, Clone)] pub enum MotionPolicy { /// Move all data. diff --git a/src/ir/tree.rs b/src/ir/tree.rs index 4330e44a50..e3cd9c1f8b 100644 --- a/src/ir/tree.rs +++ b/src/ir/tree.rs @@ -1,3 +1,5 @@ +//! IR tree traversal module. + use std::cell::RefCell; use std::cmp::Ordering; @@ -27,6 +29,7 @@ pub struct ExpressionIterator<'n> { make_row_leaf: bool, } +/// Expression and relational nodes iterator. #[derive(Debug)] pub struct SubtreeIterator<'n> { current: &'n usize, diff --git a/src/lib.rs b/src/lib.rs index 6a0144798a..80006c78a8 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,9 +1,9 @@ -//! Tarantool planner for distributed SQL. +//! Tarantool planner and executor for a distributed SQL. #[macro_use] extern crate pest_derive; mod errors; -mod executor; -mod frontend; +pub mod executor; +pub mod frontend; pub mod ir; mod parser; -- GitLab