diff --git a/src/executor.rs b/src/executor.rs index 91348ac49f2e4757c018f01a49cc0facd472db02..e673374aeb121a6aaefbb502cb3b9195fac39404 100644 --- a/src/executor.rs +++ b/src/executor.rs @@ -53,7 +53,7 @@ pub mod vtable; impl Plan { /// Apply optimization rules to the plan. - fn optimize(&mut self) -> Result<(), QueryPlannerError> { + pub(crate) fn optimize(&mut self) -> Result<(), QueryPlannerError> { self.replace_in_operator()?; self.split_columns()?; self.set_dnf()?; diff --git a/src/ir/explain.rs b/src/ir/explain.rs index f0a2f26e52362ccb0147776db60c5dd23e85413f..13e9ee5fdc6e311a53ea8ff847bb1d1701f86aa6 100644 --- a/src/ir/explain.rs +++ b/src/ir/explain.rs @@ -7,6 +7,9 @@ use traversal::DftPost; use crate::errors::QueryPlannerError; use crate::ir::expression::Expression; use crate::ir::operator::Relational; +use crate::ir::transformation::redistribution::{ + DataGeneration, MotionPolicy as IrMotionPolicy, Target as IrTarget, +}; use crate::ir::Plan; use super::operator::{Bool, Unary}; @@ -359,6 +362,78 @@ impl Display for SubQuery { } } +#[derive(Debug, Serialize)] +struct Motion { + policy: MotionPolicy, + generation: DataGeneration, +} + +impl Motion { + fn new(policy: MotionPolicy, generation: DataGeneration) -> Self { + Motion { policy, generation } + } +} + +impl Display for Motion { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + write!( + f, + "motion [policy: {}, generation: {}]", + &self.policy, &self.generation + ) + } +} + +#[derive(Debug, Serialize)] +enum MotionPolicy { + Full, + Segment(MotionKey), + Local, +} + +impl Display for MotionPolicy { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + match &self { + MotionPolicy::Full => write!(f, "full"), + MotionPolicy::Segment(mk) => write!(f, "segment({})", mk), + MotionPolicy::Local => write!(f, "local"), + } + } +} + +#[derive(Debug, Serialize)] +struct MotionKey { + pub targets: Vec<Target>, +} + +impl Display for MotionKey { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + let targets = &self + .targets + .iter() + .map(ToString::to_string) + .collect::<Vec<String>>() + .join(", "); + + write!(f, "[{}]", targets) + } +} + +#[derive(Debug, Serialize)] +pub enum Target { + Reference(String), + Value(Value), +} + +impl Display for Target { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + match &self { + Target::Reference(s) => write!(f, "ref({})", s), + Target::Value(v) => write!(f, "value({})", v), + } + } +} + #[derive(Debug, Serialize)] #[allow(dead_code)] enum ExplainNode { @@ -368,6 +443,7 @@ enum ExplainNode { Selection(Selection), UnionAll, SubQuery(SubQuery), + Motion(Motion), } impl Display for ExplainNode { @@ -379,6 +455,7 @@ impl Display for ExplainNode { ExplainNode::Selection(s) => format!("selection {}", s), ExplainNode::UnionAll => "union all".to_string(), ExplainNode::SubQuery(s) => s.to_string(), + ExplainNode::Motion(m) => m.to_string(), }; write!(f, "{}", s) @@ -551,8 +628,61 @@ impl FullExplain { let s = SubQuery::new(alias.as_ref().map(ToString::to_string)); Some(ExplainNode::SubQuery(s)) } + Relational::Motion { + children, + policy, + generation, + .. + } => { + let child = stack.pop().ok_or_else(|| { + QueryPlannerError::CustomError( + "Motion node must have exactly one child".into(), + ) + })?; + current_node.children.push(child); + + let p = match policy { + IrMotionPolicy::Segment(s) => { + let child_id = children.first().ok_or_else(|| { + QueryPlannerError::CustomError( + "Current node should have exactly one child".to_string(), + ) + })?; + + let child_output_id = ir.get_relation_node(*child_id)?.output(); + let col_list = + ir.get_expression_node(child_output_id)?.get_row_list()?; + + let targets = (&s.targets) + .iter() + .map(|r| match r { + IrTarget::Reference(pos) => { + let col_id = *col_list.get(*pos).ok_or_else(|| { + QueryPlannerError::CustomError(String::from( + "Invalid position in list", + )) + })?; + let col_name = ir + .get_expression_node(col_id)? + .get_alias_name()? + .to_string(); + + Ok(Target::Reference(col_name)) + } + IrTarget::Value(v) => Ok(Target::Value(v.clone())), + }) + .collect::<Result<Vec<Target>, _>>()?; + + MotionPolicy::Segment(MotionKey { targets }) + } + IrMotionPolicy::Full => MotionPolicy::Full, + IrMotionPolicy::Local => MotionPolicy::Local, + }; + let m = Motion::new(p, generation.clone()); + + Some(ExplainNode::Motion(m)) + } Relational::InnerJoin { .. } - | Relational::Motion { .. } | Relational::Insert { .. } | Relational::Values { .. } | Relational::ValuesRow { .. } => { diff --git a/src/ir/explain/tests.rs b/src/ir/explain/tests.rs index 9164969e505cd52b322a668b184c1d189fd96cc5..685a975c7ee04a150837aad6199785f76bfec6cf 100644 --- a/src/ir/explain/tests.rs +++ b/src/ir/explain/tests.rs @@ -1,14 +1,14 @@ use pretty_assertions::assert_eq; use super::*; -use crate::ir::transformation::helpers::sql_to_ir; +use crate::ir::transformation::helpers::sql_to_optimized_ir; #[test] fn simple_query_without_cond_plan() { let query = r#"SELECT "t"."identification_number" as "c1", "product_code" FROM "hash_testing" as "t""#; - let plan = sql_to_ir(query, vec![]); + let plan = sql_to_optimized_ir(query, vec![]); let top = &plan.get_top().unwrap(); let explain_tree = FullExplain::new(&plan, *top).unwrap(); @@ -27,7 +27,7 @@ fn simple_query_without_cond_plan() { fn simple_query_with_cond_plan() { let query = r#"SELECT "t"."identification_number" as "c1", "product_code" FROM "hash_testing" as "t" WHERE "t"."identification_number" = 1 AND "t"."product_code" = '222'"#; - let plan = sql_to_ir(query, vec![]); + let plan = sql_to_optimized_ir(query, vec![]); let top = &plan.get_top().unwrap(); let explain_tree = FullExplain::new(&plan, *top).unwrap(); @@ -35,7 +35,7 @@ fn simple_query_with_cond_plan() { let mut actual_explain = String::new(); actual_explain.push_str( r#"projection ("t"."identification_number" -> "c1", "t"."product_code" -> "product_code") - selection ROW("t"."identification_number") = ROW(1) and ROW("t"."product_code") = ROW('222') + selection ROW("t"."identification_number", "t"."product_code") = ROW(1, '222') scan "hash_testing" -> "t" "#, ); @@ -49,7 +49,7 @@ fn union_query_plan() { UNION ALL SELECT "t2"."identification_number", "product_code" FROM "hash_testing_hist" as "t2""#; - let plan = sql_to_ir(query, vec![]); + let plan = sql_to_optimized_ir(query, vec![]); let top = &plan.get_top().unwrap(); let explain_tree = FullExplain::new(&plan, *top).unwrap(); @@ -74,7 +74,7 @@ SELECT "id", "FIRST_NAME" FROM "test_space_hist" WHERE "sys_op" < 0 ) as "t" WHERE "id" = 1"#; - let plan = sql_to_ir(query, vec![]); + let plan = sql_to_optimized_ir(query, vec![]); let top = &plan.get_top().unwrap(); let explain_tree = FullExplain::new(&plan, *top).unwrap(); @@ -85,7 +85,7 @@ WHERE "id" = 1"#; scan "t" union all projection ("test_space"."id" -> "id", "test_space"."FIRST_NAME" -> "FIRST_NAME") - selection ROW("test_space"."sys_op") > ROW(0) and ROW("test_space"."sysFrom") < ROW(0) + selection ROW("test_space"."sysFrom") < ROW(0) and ROW("test_space"."sys_op") > ROW(0) scan "test_space" projection ("test_space_hist"."id" -> "id", "test_space_hist"."FIRST_NAME" -> "FIRST_NAME") selection ROW("test_space_hist"."sys_op") < ROW(0) @@ -109,21 +109,20 @@ WHERE "id" IN (SELECT "id" SELECT "id", "FIRST_NAME" FROM "test_space_hist" WHERE "sys_op" < 0 ) as "t2" WHERE "t2"."id" = 4) - AND "FIRST_NAME" IN (SELECT "FIRST_NAME" FROM "test_space" WHERE "id" = 5) "#; - let plan = sql_to_ir(query, vec![]); + let plan = sql_to_optimized_ir(query, vec![]); let top = &plan.get_top().unwrap(); let explain_tree = FullExplain::new(&plan, *top).unwrap(); let mut actual_explain = String::new(); actual_explain.push_str(r#"projection ("t"."id" -> "id", "t"."FIRST_NAME" -> "FIRST_NAME") - selection ROW("t"."id") in ROW($0) and ROW("t"."FIRST_NAME") in ROW($1) + selection ROW("t"."id") in ROW($0) scan "t" union all projection ("test_space"."id" -> "id", "test_space"."FIRST_NAME" -> "FIRST_NAME") - selection ROW("test_space"."sys_op") > ROW(0) and ROW("test_space"."sysFrom") < ROW(0) + selection ROW("test_space"."sysFrom") < ROW(0) and ROW("test_space"."sys_op") > ROW(0) scan "test_space" projection ("test_space_hist"."id" -> "id", "test_space_hist"."FIRST_NAME" -> "FIRST_NAME") selection ROW("test_space_hist"."sys_op") < ROW(0) @@ -140,11 +139,6 @@ scan projection ("test_space_hist"."id" -> "id", "test_space_hist"."FIRST_NAME" -> "FIRST_NAME") selection ROW("test_space_hist"."sys_op") < ROW(0) scan "test_space_hist" -subquery $1: -scan - projection ("test_space"."FIRST_NAME" -> "FIRST_NAME") - selection ROW("test_space"."id") = ROW(5) - scan "test_space" "#); assert_eq!(actual_explain, explain_tree.to_string()) @@ -156,17 +150,74 @@ fn explain_except1() { EXCEPT DISTINCT SELECT "identification_number" FROM "hash_testing_hist""#; - let plan = sql_to_ir(query, vec![]); + let plan = sql_to_optimized_ir(query, vec![]); let top = &plan.get_top().unwrap(); let explain_tree = FullExplain::new(&plan, *top).unwrap(); let expected = format!( - "{}\n{}\n{}\n{}\n{}\n", + "{}\n{}\n{}\n{}\n{}\n{}\n", r#"except"#, r#" projection ("t"."product_code" -> "pc")"#, r#" scan "hash_testing" -> "t""#, - r#" projection ("hash_testing_hist"."identification_number" -> "identification_number")"#, - r#" scan "hash_testing_hist""#, + r#" motion [policy: full, generation: none]"#, + r#" projection ("hash_testing_hist"."identification_number" -> "identification_number")"#, + r#" scan "hash_testing_hist""#, ); assert_eq!(expected, explain_tree.to_string()) } + +#[test] +fn motion_subquery_plan() { + let query = r#"SELECT * FROM ( +SELECT "id", "FIRST_NAME" FROM "test_space" WHERE "sys_op" > 0 and "sysFrom" < 0 +UNION ALL +SELECT "id", "FIRST_NAME" FROM "test_space_hist" WHERE "sys_op" < 0 +) as "t" +WHERE "id" IN (SELECT "id" + FROM ( + SELECT "id", "FIRST_NAME" FROM "test_space" WHERE "sys_op" > 0 + UNION ALL + SELECT "id", "FIRST_NAME" FROM "test_space_hist" WHERE "sys_op" < 0 + ) as "t2" + WHERE "t2"."id" = 4) + OR "id" IN (SELECT "identification_number" FROM "hash_testing" WHERE "identification_number" = 5 AND "product_code" = '123') +"#; + + let plan = sql_to_optimized_ir(query, vec![]); + + let top = &plan.get_top().unwrap(); + let explain_tree = FullExplain::new(&plan, *top).unwrap(); + + let mut actual_explain = String::new(); + actual_explain.push_str(r#"projection ("t"."id" -> "id", "t"."FIRST_NAME" -> "FIRST_NAME") + selection ROW("t"."id") in ROW($0) or ROW("t"."id") in ROW($1) + scan "t" + union all + projection ("test_space"."id" -> "id", "test_space"."FIRST_NAME" -> "FIRST_NAME") + selection ROW("test_space"."sysFrom") < ROW(0) and ROW("test_space"."sys_op") > ROW(0) + scan "test_space" + projection ("test_space_hist"."id" -> "id", "test_space_hist"."FIRST_NAME" -> "FIRST_NAME") + selection ROW("test_space_hist"."sys_op") < ROW(0) + scan "test_space_hist" +subquery $0: +scan + projection ("t2"."id" -> "id") + selection ROW("t2"."id") = ROW(4) + scan "t2" + union all + projection ("test_space"."id" -> "id", "test_space"."FIRST_NAME" -> "FIRST_NAME") + selection ROW("test_space"."sys_op") > ROW(0) + scan "test_space" + projection ("test_space_hist"."id" -> "id", "test_space_hist"."FIRST_NAME" -> "FIRST_NAME") + selection ROW("test_space_hist"."sys_op") < ROW(0) + scan "test_space_hist" +subquery $1: +motion [policy: segment([ref("identification_number")]), generation: none] + scan + projection ("hash_testing"."identification_number" -> "identification_number") + selection ROW("hash_testing"."identification_number", "hash_testing"."product_code") = ROW(5, '123') + scan "hash_testing" +"#); + + assert_eq!(actual_explain, explain_tree.to_string()) +} diff --git a/src/ir/transformation/helpers.rs b/src/ir/transformation/helpers.rs index 89f101e9cdbb93079939f127e03b2455a90c15d2..b4d009282fb66ce0534c43473efbe1e3e0d05a3b 100644 --- a/src/ir/transformation/helpers.rs +++ b/src/ir/transformation/helpers.rs @@ -10,6 +10,14 @@ use crate::frontend::Ast; use crate::ir::value::Value; use crate::ir::Plan; +/// Compiles an SQL query to optimized IR plan. +#[allow(dead_code)] +pub fn sql_to_optimized_ir(query: &str, params: Vec<Value>) -> Plan { + let mut plan = sql_to_ir(query, params); + plan.optimize().unwrap(); + plan +} + /// Compiles an SQL query to IR plan. #[allow(dead_code)] pub fn sql_to_ir(query: &str, params: Vec<Value>) -> Plan { diff --git a/src/ir/transformation/redistribution.rs b/src/ir/transformation/redistribution.rs index 58d53f0efd8e0ba778e779b206c3bcae6a062fee..f1b3010fd2915519dafe15e42ca6934c199ac0b2 100644 --- a/src/ir/transformation/redistribution.rs +++ b/src/ir/transformation/redistribution.rs @@ -3,6 +3,7 @@ use ahash::RandomState; use std::cmp::Ordering; use std::collections::{hash_map::Entry, HashMap, HashSet}; +use std::fmt::{Display, Formatter}; use serde::{Deserialize, Serialize}; use traversal::{Bft, DftPost}; @@ -79,6 +80,17 @@ pub enum DataGeneration { ShardingColumn, } +impl Display for DataGeneration { + fn fmt(&self, f: &mut Formatter) -> std::fmt::Result { + let op = match &self { + DataGeneration::None => "none", + DataGeneration::ShardingColumn => "sharding_column", + }; + + write!(f, "{}", op) + } +} + struct BoolOp { left: usize, op: Bool, diff --git a/test_app/test/integration/api_test.lua b/test_app/test/integration/api_test.lua index a93b6d33a485bf042d2b202a8ed423407d6a7fb5..080fc950911f640efc2812816ff0a6f9d95587d7 100644 --- a/test_app/test/integration/api_test.lua +++ b/test_app/test/integration/api_test.lua @@ -777,13 +777,26 @@ g.test_bucket_id_in_join = function() }) end -g.test_invalid_explain = function() +g.test_motion_explain = function() local api = cluster:server("api-1").net_box - local _, err = api:call("sbroad.explain", { [[SELECT "id", "name" FROM "testing_space" + local r, err = api:call("sbroad.explain", { [[SELECT "id", "name" FROM "testing_space" WHERE "id" in (SELECT "id" FROM "space_simple_shard_key_hist" WHERE "sysOp" < 0)]] }) - - t.assert_str_contains(tostring(err), "Explain hasn't supported node Motion") + t.assert_equals(err, nil) + t.assert_equals( + r, + { + "projection (\"testing_space\".\"id\" -> \"id\", \"testing_space\".\"name\" -> \"name\")", + " selection ROW(\"testing_space\".\"id\") in ROW($0)", + " scan \"testing_space\"", + "subquery $0:", + "motion [policy: full, generation: none]", + " scan", + " projection (\"space_simple_shard_key_hist\".\"id\" -> \"id\")", + " selection ROW(\"space_simple_shard_key_hist\".\"sysOp\") < ROW(0)", + " scan \"space_simple_shard_key_hist\"", + } + ) end g.test_valid_explain = function()