From 3bf75270d60489988946fbaf56e389b8924612c9 Mon Sep 17 00:00:00 2001
From: Denis Smirnov <sd@picodata.io>
Date: Fri, 8 Apr 2022 12:08:10 +0700
Subject: [PATCH] doc: improve module documentation

---
 src/executor.rs                               | 27 ++++++++++++++++++-
 src/executor/engine.rs                        |  8 ++++--
 src/executor/engine/cartridge.rs              |  3 +++
 src/executor/engine/cartridge/cache.rs        |  2 ++
 src/executor/engine/cartridge/hash.rs         |  2 ++
 src/frontend.rs                               |  5 ++++
 src/frontend/sql.rs                           |  7 ++++-
 src/frontend/sql/ast.rs                       | 13 ++++++---
 src/frontend/sql/tree.rs                      |  4 +++
 src/ir.rs                                     |  2 +-
 src/ir/distribution.rs                        |  3 +++
 src/ir/expression.rs                          |  5 ++--
 src/ir/helpers.rs                             |  4 +++
 src/ir/operator.rs                            |  4 ++-
 src/ir/transformation/bool_in.rs              |  2 +-
 src/ir/transformation/equality_propagation.rs |  4 +--
 src/ir/transformation/merge_tuples.rs         | 12 +++++++++
 src/ir/transformation/redistribution.rs       |  5 ++--
 src/ir/tree.rs                                |  3 +++
 src/lib.rs                                    |  6 ++---
 20 files changed, 102 insertions(+), 19 deletions(-)

diff --git a/src/executor.rs b/src/executor.rs
index 45e82d5cd6..078d8b4d73 100644
--- a/src/executor.rs
+++ b/src/executor.rs
@@ -1,3 +1,28 @@
+//! Executor module.
+//!
+//! The executor is located on the coordinator node in the cluster.
+//! It collects all the intermediate results of the plan execution
+//! in memory and executes the IR plan tree in the bottom-up manner.
+//! It goes like this:
+//!
+//! 1. The executor collects all the motion nodes from the bottom layer.
+//!    In theory all the motions in the same layer can be executed in parallel
+//!    (this feature is yet to come).
+//! 2. For every motion the executor:
+//!    - inspects the IR sub-tree and detects the buckets to execute the query for.
+//!    - builds a valid SQL query from the IR sub-tree.
+//!    - performs map-reduce for that SQL query (we send it to the shards deduced from the buckets).
+//!    - builds a virtual table with query results that correspond to the original motion.
+//! 3. Moves to the next motion layer in the IR tree.
+//! 4. For every motion the executor then:
+//!    - links the virtual table results of the motion from the previous layer we depend on.
+//!    - inspects the IR sub-tree and detects the buckets to execute the query.
+//!    - builds a valid SQL query from the IR sub-tree.
+//!    - performs map-reduce for that SQL query.
+//!    - builds a virtual table with query results that correspond to the original motion.
+//! 5. Repeats step 3 till we are done with motion layers.
+//! 6. Executes the final IR top subtree and returns the final result to the user.
+
 use crate::errors::QueryPlannerError;
 use crate::executor::bucket::Buckets;
 use crate::executor::engine::Engine;
@@ -28,7 +53,7 @@ impl Plan {
     }
 }
 
-/// Query object for executing
+/// Query to execute.
 pub struct Query<T>
 where
     T: Engine,
diff --git a/src/executor/engine.rs b/src/executor/engine.rs
index f99a8cbde6..be9e163536 100644
--- a/src/executor/engine.rs
+++ b/src/executor/engine.rs
@@ -1,3 +1,7 @@
+//! Engine module.
+//!
+//! Traits that define an execution engine interface.
+
 use crate::errors::QueryPlannerError;
 use crate::executor::bucket::Buckets;
 use crate::executor::ir::ExecutionPlan;
@@ -6,7 +10,7 @@ use crate::executor::vtable::VirtualTable;
 
 pub mod cartridge;
 
-/// `Metadata` trait is interface for working with metadata storage, that was needed the query execute.
+/// A metadata storage trait of the cluster.
 pub trait Metadata {
     fn get_table_segment(
         &self,
@@ -26,7 +30,7 @@ pub trait Metadata {
     }
 }
 
-/// `Engine` trait is interface for working with execution engine.
+/// An execution engine trait.
 pub trait Engine {
     type Metadata;
 
diff --git a/src/executor/engine/cartridge.rs b/src/executor/engine/cartridge.rs
index 2fc66bac8e..c4d666212a 100644
--- a/src/executor/engine/cartridge.rs
+++ b/src/executor/engine/cartridge.rs
@@ -1,3 +1,5 @@
+//! Tarantool cartridge engine module.
+
 use std::convert::TryInto;
 
 use tarantool::log::{say, SayLevel};
@@ -17,6 +19,7 @@ mod backend;
 pub mod cache;
 pub mod hash;
 
+/// Tarantool cartridge metadata and topology.
 #[derive(Debug, Clone)]
 pub struct Runtime {
     metadata: ClusterAppConfig,
diff --git a/src/executor/engine/cartridge/cache.rs b/src/executor/engine/cartridge/cache.rs
index 656e557e49..816e8a4cd3 100644
--- a/src/executor/engine/cartridge/cache.rs
+++ b/src/executor/engine/cartridge/cache.rs
@@ -1,3 +1,5 @@
+//! Metadata cache module.
+
 extern crate yaml_rust;
 
 use std::collections::HashMap;
diff --git a/src/executor/engine/cartridge/hash.rs b/src/executor/engine/cartridge/hash.rs
index d747f1d523..5cf31af77a 100644
--- a/src/executor/engine/cartridge/hash.rs
+++ b/src/executor/engine/cartridge/hash.rs
@@ -1,3 +1,5 @@
+//! Bucket hash module.
+
 use fasthash::{murmur3::Hasher32, FastHasher};
 use std::hash::Hasher;
 
diff --git a/src/frontend.rs b/src/frontend.rs
index 2752f636bb..a0339ff799 100644
--- a/src/frontend.rs
+++ b/src/frontend.rs
@@ -1 +1,6 @@
+//! Frontend module.
+//!
+//! A list of different frontend implementations
+//! to build the intermediate representation (IR).
+
 pub mod sql;
diff --git a/src/frontend/sql.rs b/src/frontend/sql.rs
index c9be2bbaab..b9810f1601 100644
--- a/src/frontend/sql.rs
+++ b/src/frontend/sql.rs
@@ -1,3 +1,8 @@
+//! SQL frontend module.
+//!
+//! Parses an SQL statement to the abstract syntax tree (AST)
+//! and builds the intermediate representation (IR).
+
 pub mod ast;
-pub mod ir;
+mod ir;
 pub mod tree;
diff --git a/src/frontend/sql/ast.rs b/src/frontend/sql/ast.rs
index d96a97ca5a..e9517ab147 100644
--- a/src/frontend/sql/ast.rs
+++ b/src/frontend/sql/ast.rs
@@ -1,3 +1,8 @@
+//! Abstract syntax tree (AST) module.
+//!
+//! This module contains a definition of the abstract syntax tree
+//! constructed from the nodes of the `pest` tree iterator nodes.
+
 extern crate pest;
 
 use std::collections::{hash_map::Entry, HashMap, HashSet};
@@ -13,7 +18,7 @@ use crate::errors::QueryPlannerError;
 /// Parse tree
 #[derive(Parser)]
 #[grammar = "frontend/sql/grammar.pest"]
-pub struct ParseTree;
+struct ParseTree;
 
 /// A list of current rules from the actual grammar.
 /// When new tokens are added to the grammar they
@@ -106,6 +111,7 @@ impl Type {
     }
 }
 
+/// Parse node is a wrapper over the pest pair.
 #[derive(Serialize, Deserialize, PartialEq, Debug, Clone)]
 pub struct ParseNode {
     pub(in crate::frontend::sql) children: Vec<usize>,
@@ -124,6 +130,8 @@ impl ParseNode {
     }
 }
 
+/// A storage arena of the parse nodes
+/// (a node position in the arena vector acts like a reference).
 #[derive(Serialize, Deserialize, PartialEq, Debug, Clone)]
 pub struct ParseNodes {
     pub(crate) arena: Vec<ParseNode>,
@@ -207,8 +215,7 @@ impl<'n> StackParseNode<'n> {
     }
 }
 
-/// AST where all the nodes are kept in a list.
-/// Positions in a list act like references.
+/// AST is a tree build on the top of the parse nodes arena.
 #[derive(Serialize, Deserialize, PartialEq, Debug)]
 pub struct AbstractSyntaxTree {
     pub(in crate::frontend::sql) nodes: ParseNodes,
diff --git a/src/frontend/sql/tree.rs b/src/frontend/sql/tree.rs
index 5600b06f5e..559e9056d8 100644
--- a/src/frontend/sql/tree.rs
+++ b/src/frontend/sql/tree.rs
@@ -1,6 +1,9 @@
+//! AST traversal iterator module.
+
 use crate::frontend::sql::ast::ParseNodes;
 use std::cell::RefCell;
 
+/// AST traversal iterator.
 #[derive(Debug)]
 pub struct AstIterator<'n> {
     current: &'n usize,
@@ -26,6 +29,7 @@ impl<'n> Iterator for AstIterator<'n> {
 }
 
 impl<'n> ParseNodes {
+    /// Returns an iterator over the children of the node.
     #[allow(dead_code)]
     pub fn ast_iter(&'n self, current: &'n usize) -> AstIterator<'n> {
         AstIterator {
diff --git a/src/ir.rs b/src/ir.rs
index ca245d2b98..f501aa2ec4 100644
--- a/src/ir.rs
+++ b/src/ir.rs
@@ -1,4 +1,4 @@
-//! Intermediate representation.
+//! Intermediate representation (IR) module.
 //!
 //! Contains the logical plan tree and helpers.
 
diff --git a/src/ir/distribution.rs b/src/ir/distribution.rs
index 374a72dee3..9865b1afcf 100644
--- a/src/ir/distribution.rs
+++ b/src/ir/distribution.rs
@@ -1,3 +1,5 @@
+//! Tuple distribution module.
+
 use std::collections::{HashMap, HashSet};
 
 use serde::{Deserialize, Serialize};
@@ -33,6 +35,7 @@ impl Key {
 #[derive(Serialize, Deserialize, PartialEq, Debug, Clone)]
 pub enum Distribution {
     /// A tuple can be located on any data node.
+    /// Example: projection removes the segment key columns.
     Any,
     /// A tuple is located on all data nodes (constants).
     Replicated,
diff --git a/src/ir/expression.rs b/src/ir/expression.rs
index 578f669215..e5b6e2fa69 100644
--- a/src/ir/expression.rs
+++ b/src/ir/expression.rs
@@ -1,6 +1,7 @@
-//! Expressions are the building blocks of the tuple.
+//! Expression module.
 //!
-//! They provide us information about:
+//! Expressions are the building blocks of the tuple.
+//! They provide information about:
 //! - what input tuple's columns where used to build our tuple
 //! - the order of the columns (and we can get their types as well)
 //! - distribution of the data in the tuple
diff --git a/src/ir/helpers.rs b/src/ir/helpers.rs
index 08efcc423e..5f9ae993ed 100644
--- a/src/ir/helpers.rs
+++ b/src/ir/helpers.rs
@@ -1,6 +1,10 @@
+//! Helper module with functions and structures for the IR.
+
 use std::collections::hash_map::DefaultHasher;
 use std::hash::BuildHasher;
 
+/// A helper macros to build a hash map or set
+/// from the list of arguments.
 #[macro_export]
 macro_rules! collection {
     // map-like
diff --git a/src/ir/operator.rs b/src/ir/operator.rs
index 22060d829a..ee52c613a4 100644
--- a/src/ir/operator.rs
+++ b/src/ir/operator.rs
@@ -1,4 +1,6 @@
-//! Operators for expression transformations.
+//! Tuple operators module.
+//!
+//! Contains operator nodes that transform the tuples in IR tree.
 
 use std::collections::HashMap;
 use std::fmt::{Display, Formatter};
diff --git a/src/ir/transformation/bool_in.rs b/src/ir/transformation/bool_in.rs
index 411fe7386b..67896d59cc 100644
--- a/src/ir/transformation/bool_in.rs
+++ b/src/ir/transformation/bool_in.rs
@@ -1,4 +1,4 @@
-//! Replace all boolean "IN: operators with a chian of equalities,
+//! Replace all boolean "IN": operators with a chian of equalities,
 //! combined by "OR" operator.
 //!
 //! For example, the following query:
diff --git a/src/ir/transformation/equality_propagation.rs b/src/ir/transformation/equality_propagation.rs
index e10b3796cd..ed216f600d 100644
--- a/src/ir/transformation/equality_propagation.rs
+++ b/src/ir/transformation/equality_propagation.rs
@@ -27,8 +27,8 @@
 //! and row(t1.a) = row(t2.b))
 //! where row(a) = 1 and row(b) = 1
 //! ```
-//! We don't produce (1) = (1) equality as it don't give any new information,
-//! but row(a) = row(b) can be pushed down to the join condition.
+//! We don't produce `(1) = (1)` equality as it don't give any new information,
+//! but `row(a) = row(b)` can be pushed down to the join condition.
 //!
 //! Currently implementation produces new equalities only for constants, references
 //! and rows that contain a single reference column.
diff --git a/src/ir/transformation/merge_tuples.rs b/src/ir/transformation/merge_tuples.rs
index f95ea0c23e..7bb12dec0f 100644
--- a/src/ir/transformation/merge_tuples.rs
+++ b/src/ir/transformation/merge_tuples.rs
@@ -1,3 +1,15 @@
+//! Merge tuples in a disjunction of boolean expressions
+//! into a single tuple.
+//!
+//! Example:
+//! ```sql
+//! select * from t where (a = 1) and (b = 2) and (c = 3)
+//! ```
+//! is converted to:
+//! ```sql
+//! select * from t where (a, b, c) = (1, 2, 3)
+//! ```
+
 use crate::errors::QueryPlannerError;
 use crate::ir::expression::Expression;
 use crate::ir::helpers::RepeatableState;
diff --git a/src/ir/transformation/redistribution.rs b/src/ir/transformation/redistribution.rs
index 754753bfea..dd0c518803 100644
--- a/src/ir/transformation/redistribution.rs
+++ b/src/ir/transformation/redistribution.rs
@@ -1,3 +1,5 @@
+//! Resolve distribution conflicts and insert motion nodes to IR.
+
 use std::cmp::Ordering;
 use std::collections::{hash_map::Entry, HashMap, HashSet};
 
@@ -10,8 +12,7 @@ use crate::ir::expression::Expression;
 use crate::ir::operator::{Bool, Relational};
 use crate::ir::{Node, Plan};
 
-/// A motion policy determinate what portion of data to move
-/// between data nodes.
+/// Determinate what portion of data to move between data nodes in cluster.
 #[derive(Serialize, Deserialize, PartialEq, Debug, Clone)]
 pub enum MotionPolicy {
     /// Move all data.
diff --git a/src/ir/tree.rs b/src/ir/tree.rs
index 4330e44a50..e3cd9c1f8b 100644
--- a/src/ir/tree.rs
+++ b/src/ir/tree.rs
@@ -1,3 +1,5 @@
+//! IR tree traversal module.
+
 use std::cell::RefCell;
 use std::cmp::Ordering;
 
@@ -27,6 +29,7 @@ pub struct ExpressionIterator<'n> {
     make_row_leaf: bool,
 }
 
+/// Expression and relational nodes iterator.
 #[derive(Debug)]
 pub struct SubtreeIterator<'n> {
     current: &'n usize,
diff --git a/src/lib.rs b/src/lib.rs
index 6a0144798a..80006c78a8 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -1,9 +1,9 @@
-//! Tarantool planner for distributed SQL.
+//! Tarantool planner and executor for a distributed SQL.
 #[macro_use]
 extern crate pest_derive;
 
 mod errors;
-mod executor;
-mod frontend;
+pub mod executor;
+pub mod frontend;
 pub mod ir;
 mod parser;
-- 
GitLab